Skip to content

Commit cf7de96

Browse files
committedAug 13, 2024·
fix: Revert "feat(execute): allocate memory for string content. (#5482)"
This reverts commit bea9586.
1 parent b624a57 commit cf7de96

File tree

3 files changed

+49
-136
lines changed

3 files changed

+49
-136
lines changed
 

‎execute/allocator.go

+10-19
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package execute
22

33
import (
4-
arrowmem "github.com/apache/arrow/go/v7/arrow/memory"
5-
64
"github.com/influxdata/flux/memory"
75
)
86

@@ -158,14 +156,17 @@ func (a *Allocator) GrowFloats(slice []float64, n int) []float64 {
158156
return s
159157
}
160158

161-
// Strings makes a slice of String values.
162-
func (a *Allocator) Strings(l, c int) []String {
159+
// Strings makes a slice of string values.
160+
// Only the string headers are accounted for.
161+
func (a *Allocator) Strings(l, c int) []string {
163162
a.account(c, stringSize)
164-
return make([]String, l, c)
163+
return make([]string, l, c)
165164
}
166165

167-
// AppendStrings appends Strings to a slice.
168-
func (a *Allocator) AppendStrings(slice []String, vs ...String) []String {
166+
// AppendStrings appends strings to a slice.
167+
// Only the string headers are accounted for.
168+
func (a *Allocator) AppendStrings(slice []string, vs ...string) []string {
169+
// TODO(nathanielc): Account for actual size of strings
169170
if cap(slice)-len(slice) >= len(vs) {
170171
return append(slice, vs...)
171172
}
@@ -175,14 +176,14 @@ func (a *Allocator) AppendStrings(slice []String, vs ...String) []String {
175176
return s
176177
}
177178

178-
func (a *Allocator) GrowStrings(slice []String, n int) []String {
179+
func (a *Allocator) GrowStrings(slice []string, n int) []string {
179180
newCap := len(slice) + n
180181
if newCap < cap(slice) {
181182
return slice[:newCap]
182183
}
183184
// grow capacity same way as built-in append
184185
newCap = newCap*3/2 + 1
185-
s := make([]String, len(slice)+n, newCap)
186+
s := make([]string, len(slice)+n, newCap)
186187
copy(s, slice)
187188
diff := cap(s) - cap(slice)
188189
a.account(diff, stringSize)
@@ -219,13 +220,3 @@ func (a *Allocator) GrowTimes(slice []Time, n int) []Time {
219220
a.account(diff, timeSize)
220221
return s
221222
}
222-
223-
// String represents a string stored in some backing byte slice.
224-
type String struct {
225-
offset int
226-
len int
227-
}
228-
229-
func (s String) Bytes(buf *arrowmem.Buffer) []byte {
230-
return buf.Bytes()[s.offset : s.offset+s.len]
231-
}

‎execute/table.go

+39-65
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
package execute
22

33
import (
4-
"bytes"
54
"fmt"
65
"sort"
76
"sync/atomic"
87

9-
arrowmem "github.com/apache/arrow/go/v7/arrow/memory"
108
"github.com/google/go-cmp/cmp"
11-
129
"github.com/influxdata/flux"
1310
"github.com/influxdata/flux/array"
1411
"github.com/influxdata/flux/arrow"
@@ -298,9 +295,8 @@ func TablesEqual(left, right flux.Table, alloc memory.Allocator) (bool, error) {
298295
eq = cmp.Equal(leftBuffer.cols[j].(*floatColumnBuilder).data,
299296
rightBuffer.cols[j].(*floatColumnBuilder).data)
300297
case flux.TString:
301-
eq = cmp.Equal(leftBuffer.cols[j].(*stringColumnBuilder),
302-
rightBuffer.cols[j].(*stringColumnBuilder),
303-
cmp.Comparer(stringColumnBuilderEqual))
298+
eq = cmp.Equal(leftBuffer.cols[j].(*stringColumnBuilder).data,
299+
rightBuffer.cols[j].(*stringColumnBuilder).data)
304300
case flux.TTime:
305301
eq = cmp.Equal(leftBuffer.cols[j].(*timeColumnBuilder).data,
306302
rightBuffer.cols[j].(*timeColumnBuilder).data)
@@ -328,27 +324,6 @@ func colsMatch(left, right []flux.ColMeta) bool {
328324
return true
329325
}
330326

331-
func stringColumnBuilderEqual(x, y *stringColumnBuilder) bool {
332-
if x.Len() != y.Len() {
333-
return false
334-
}
335-
for i := 0; i < x.Len(); i++ {
336-
if x.IsNil(i) {
337-
if !y.IsNil(i) {
338-
return false
339-
}
340-
continue
341-
}
342-
if y.IsNil(i) {
343-
return false
344-
}
345-
if !bytes.Equal(x.data[i].Bytes(x.buf), y.data[i].Bytes(y.buf)) {
346-
return false
347-
}
348-
}
349-
return true
350-
}
351-
352327
// ColMap writes a mapping of builder index to cols index into colMap.
353328
// When colMap does not have enough capacity a new colMap is allocated.
354329
// The colMap is always returned
@@ -623,7 +598,6 @@ func (b *ColListTableBuilder) AddCol(c flux.ColMeta) (int, error) {
623598
case flux.TString:
624599
b.cols = append(b.cols, &stringColumnBuilder{
625600
columnBuilderBase: colBase,
626-
buf: arrowmem.NewResizableBuffer(b.alloc.Allocator),
627601
})
628602
if b.NRows() > 0 {
629603
if err := b.GrowStrings(newIdx, b.NRows()); err != nil {
@@ -945,9 +919,8 @@ func (b *ColListTableBuilder) SetString(i int, j int, value string) error {
945919
if err := b.checkCol(j, flux.TString); err != nil {
946920
return err
947921
}
948-
col := b.cols[j].(*stringColumnBuilder)
949-
col.data[i] = col.makeString(value)
950-
col.SetNil(i, false)
922+
b.cols[j].(*stringColumnBuilder).data[i] = value
923+
b.cols[j].SetNil(i, false)
951924
return nil
952925
}
953926

@@ -956,7 +929,7 @@ func (b *ColListTableBuilder) AppendString(j int, value string) error {
956929
return err
957930
}
958931
col := b.cols[j].(*stringColumnBuilder)
959-
col.data = b.alloc.AppendStrings(col.data, col.makeString(value))
932+
col.data = b.alloc.AppendStrings(col.data, value)
960933
b.nrows = len(col.data)
961934
return nil
962935
}
@@ -1179,6 +1152,11 @@ func (b *ColListTableBuilder) Floats(j int) []float64 {
11791152
CheckColType(b.colMeta[j], flux.TFloat)
11801153
return b.cols[j].(*floatColumnBuilder).data
11811154
}
1155+
func (b *ColListTableBuilder) Strings(j int) []string {
1156+
meta := b.colMeta[j]
1157+
CheckColType(meta, flux.TString)
1158+
return b.cols[j].(*stringColumnBuilder).data
1159+
}
11821160
func (b *ColListTableBuilder) Times(j int) []values.Time {
11831161
CheckColType(b.colMeta[j], flux.TTime)
11841162
return b.cols[j].(*timeColumnBuilder).data
@@ -1202,9 +1180,7 @@ func (b *ColListTableBuilder) GetRow(row int) values.Object {
12021180
case flux.TFloat:
12031181
val = values.NewFloat(b.cols[j].(*floatColumnBuilder).data[row])
12041182
case flux.TString:
1205-
// TODO(mhilton): avoid a copy
1206-
col := b.cols[j].(*stringColumnBuilder)
1207-
val = values.NewString(string(col.data[row].Bytes(col.buf)))
1183+
val = values.NewString(b.cols[j].(*stringColumnBuilder).data[row])
12081184
case flux.TTime:
12091185
val = values.NewTime(b.cols[j].(*timeColumnBuilder).data[row])
12101186
}
@@ -1890,38 +1866,46 @@ func (c *stringColumn) Copy() column {
18901866

18911867
type stringColumnBuilder struct {
18921868
columnBuilderBase
1893-
data []String
1894-
1895-
// buf contains a backing buffer containing the bytes of the
1896-
// strings.
1897-
buf *arrowmem.Buffer
1869+
data []string
18981870
}
18991871

19001872
func (c *stringColumnBuilder) Clear() {
1901-
c.buf.Release()
1902-
c.buf = arrowmem.NewResizableBuffer(c.alloc.Allocator)
1903-
c.data = c.data[:0]
1873+
c.data = c.data[0:0]
19041874
}
19051875

19061876
func (c *stringColumnBuilder) Release() {
1907-
c.buf.Release()
19081877
c.alloc.Free(cap(c.data), stringSize)
1878+
c.data = nil
19091879
}
19101880

19111881
func (c *stringColumnBuilder) Copy() column {
1912-
builder := arrow.NewStringBuilder(c.alloc.Allocator)
1913-
builder.Reserve(len(c.data))
1914-
builder.ReserveData(c.buf.Len())
1915-
for i, v := range c.data {
1916-
if c.nils[i] {
1917-
builder.AppendNull()
1918-
continue
1882+
var data *array.String
1883+
if len(c.nils) > 0 {
1884+
b := arrow.NewStringBuilder(c.alloc.Allocator)
1885+
b.Reserve(len(c.data))
1886+
sz := 0
1887+
for i, v := range c.data {
1888+
if c.nils[i] {
1889+
continue
1890+
}
1891+
sz += len(v)
19191892
}
1920-
builder.AppendBytes(v.Bytes(c.buf))
1893+
b.ReserveData(sz)
1894+
for i, v := range c.data {
1895+
if c.nils[i] {
1896+
b.AppendNull()
1897+
continue
1898+
}
1899+
b.Append(v)
1900+
}
1901+
data = b.NewStringArray()
1902+
b.Release()
1903+
} else {
1904+
data = arrow.NewString(c.data, c.alloc.Allocator)
19211905
}
19221906
col := &stringColumn{
19231907
ColMeta: c.ColMeta,
1924-
data: builder.NewStringArray(),
1908+
data: data,
19251909
}
19261910
return col
19271911
}
@@ -1932,13 +1916,13 @@ func (c *stringColumnBuilder) Len() int {
19321916

19331917
func (c *stringColumnBuilder) Equal(i, j int) bool {
19341918
return c.EqualFunc(i, j, func(i, j int) bool {
1935-
return bytes.Equal(c.data[i].Bytes(c.buf), c.data[j].Bytes(c.buf))
1919+
return c.data[i] == c.data[j]
19361920
})
19371921
}
19381922

19391923
func (c *stringColumnBuilder) Less(i, j int) bool {
19401924
return c.LessFunc(i, j, func(i, j int) bool {
1941-
return bytes.Compare(c.data[i].Bytes(c.buf), c.data[j].Bytes(c.buf)) < 0
1925+
return c.data[i] < c.data[j]
19421926
})
19431927
}
19441928

@@ -1947,16 +1931,6 @@ func (c *stringColumnBuilder) Swap(i, j int) {
19471931
c.data[i], c.data[j] = c.data[j], c.data[i]
19481932
}
19491933

1950-
func (c *stringColumnBuilder) makeString(s string) String {
1951-
offset := c.buf.Len()
1952-
c.buf.Resize(offset + len(s))
1953-
copy(c.buf.Bytes()[offset:], s)
1954-
return String{
1955-
offset: offset,
1956-
len: len(s),
1957-
}
1958-
}
1959-
19601934
type timeColumn struct {
19611935
flux.ColMeta
19621936
data *array.Int

‎execute/table_test.go

-52
Original file line numberDiff line numberDiff line change
@@ -148,58 +148,6 @@ func TestTablesEqual(t *testing.T) {
148148
},
149149
want: false,
150150
},
151-
{
152-
name: "string values",
153-
data0: &executetest.Table{
154-
ColMeta: []flux.ColMeta{
155-
{Label: "_time", Type: flux.TTime},
156-
{Label: "_value", Type: flux.TString},
157-
},
158-
Data: [][]interface{}{
159-
{execute.Time(1), "1"},
160-
{execute.Time(2), "2"},
161-
{execute.Time(3), "3"},
162-
},
163-
},
164-
data1: &executetest.Table{
165-
ColMeta: []flux.ColMeta{
166-
{Label: "_time", Type: flux.TTime},
167-
{Label: "_value", Type: flux.TString},
168-
},
169-
Data: [][]interface{}{
170-
{execute.Time(1), "1"},
171-
{execute.Time(2), "2"},
172-
{execute.Time(3), "3"},
173-
},
174-
},
175-
want: true,
176-
},
177-
{
178-
name: "string mismatch",
179-
data0: &executetest.Table{
180-
ColMeta: []flux.ColMeta{
181-
{Label: "_time", Type: flux.TTime},
182-
{Label: "_value", Type: flux.TString},
183-
},
184-
Data: [][]interface{}{
185-
{execute.Time(1), "1"},
186-
{execute.Time(2), "2"},
187-
{execute.Time(3), "3"},
188-
},
189-
},
190-
data1: &executetest.Table{
191-
ColMeta: []flux.ColMeta{
192-
{Label: "_time", Type: flux.TTime},
193-
{Label: "_value", Type: flux.TString},
194-
},
195-
Data: [][]interface{}{
196-
{execute.Time(1), "1"},
197-
{execute.Time(2), "2"},
198-
{execute.Time(3), "4"},
199-
},
200-
},
201-
want: false,
202-
},
203151
}
204152
for _, tc := range testCases {
205153
tc := tc

0 commit comments

Comments
 (0)