Skip to content

Commit 32f7947

Browse files
authoredJun 5, 2024··
feat: return to internal string references (#5486)
* feat: stop making string copies Undo the changes introduced in #5377. This is a trade-off where it is considerd better to have as much memory as possible allocated by the allocator, with the risk that it will be freed when still in use. Than to have significant amounts of memory allocated outside of the allocator. It should be noted that because of Go's garbage collector it is perfectly safe for memory to be "freed" when still in use. It just mean that any accounting done by the allocator will be inaccurate. * feat: remove unnecessary StringRef type Remove the recently added StringRef type as it is now unnecessary.
1 parent 68c831c commit 32f7947

File tree

26 files changed

+36
-123
lines changed

26 files changed

+36
-123
lines changed
 

‎array/array.go

-55
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55

66
"github.com/apache/arrow/go/v7/arrow"
77
"github.com/apache/arrow/go/v7/arrow/array"
8-
arrowmem "github.com/apache/arrow/go/v7/arrow/memory"
98

109
"github.com/influxdata/flux/codes"
1110
"github.com/influxdata/flux/internal/errors"
@@ -158,65 +157,11 @@ func (a *String) Value(i int) string {
158157
return a.ValueString(i)
159158
}
160159

161-
// ValueRef returns a reference to the memory buffer and location that
162-
// stores the value at i. The reference is only valid for as long as the
163-
// array is, the buffer needs to be retained if further access is
164-
// required.
165-
func (a *String) ValueRef(i int) StringRef {
166-
if vr, ok := a.binaryArray.(interface{ ValueRef(int) StringRef }); ok {
167-
return vr.ValueRef(i)
168-
}
169-
return StringRef{
170-
buf: a.Data().Buffers()[2],
171-
off: a.ValueOffset(i),
172-
len: a.ValueLen(i),
173-
}
174-
}
175-
176-
// ValueCopy returns the value at the requested position copied into a
177-
// new memory location. This value will remain valid after the array is
178-
// released, but is not tracked by the memory allocator.
179-
//
180-
// This function is intended to be temporary while changes are being
181-
// made to reduce the amount of unaccounted data memory.
182-
func (a *String) ValueCopy(i int) string {
183-
return string(a.ValueRef(i).Bytes())
184-
}
185-
186160
func (a *String) IsConstant() bool {
187161
ic, ok := a.binaryArray.(interface{ IsConstant() bool })
188162
return ok && ic.IsConstant()
189163
}
190164

191-
// StringRef contains a referenct to the storage for a value.
192-
type StringRef struct {
193-
buf *arrowmem.Buffer
194-
off int
195-
len int
196-
}
197-
198-
// Buffer returns the memory buffer that contains the value.
199-
func (r StringRef) Buffer() *arrowmem.Buffer {
200-
return r.buf
201-
}
202-
203-
// Offset returns the offset into the memory buffer at which the value
204-
// starts.
205-
func (r StringRef) Offset() int {
206-
return r.off
207-
}
208-
209-
// Len returns the length of the value.
210-
func (r StringRef) Len() int {
211-
return r.len
212-
}
213-
214-
// Bytes returns the bytes from the memory buffer that contain the
215-
// value.
216-
func (r StringRef) Bytes() []byte {
217-
return r.buf.Bytes()[r.off : r.off+r.len]
218-
}
219-
220165
type sliceable interface {
221166
Slice(i, j int) Array
222167
}

‎array/repeat.go

-8
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,3 @@ func (b *repeatedBinary) Slice(i, j int) binaryArray {
8383
buf: b.buf,
8484
}
8585
}
86-
87-
func (b *repeatedBinary) ValueRef(int) StringRef {
88-
return StringRef{
89-
buf: b.buf,
90-
off: 0,
91-
len: b.buf.Len(),
92-
}
93-
}

‎arrow/arrow_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ func TestSlice_String(t *testing.T) {
472472

473473
vs := make([]string, l)
474474
for i := 0; i < l; i++ {
475-
vs[i] = arr.ValueCopy(i)
475+
vs[i] = arr.Value(i)
476476
}
477477

478478
if !cmp.Equal(values, vs) {
@@ -540,7 +540,7 @@ func TestSlice_String(t *testing.T) {
540540

541541
vs = vs[:0]
542542
for i := 0; i < l; i++ {
543-
vs = append(vs, arr.ValueCopy(i))
543+
vs = append(vs, arr.Value(i))
544544
}
545545

546546
if !cmp.Equal(tc.want, vs) {

‎csv/result.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1257,7 +1257,7 @@ func encodeValueFrom(i, j int, c colMeta, cr flux.ColReader) (string, error) {
12571257
}
12581258
case flux.TString:
12591259
if cr.Strings(j).IsValid(i) {
1260-
v = cr.Strings(j).ValueCopy(i)
1260+
v = cr.Strings(j).Value(i)
12611261
}
12621262
case flux.TTime:
12631263
if cr.Times(j).IsValid(i) {

‎execute/executetest/allocator.go

+1-25
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,5 @@ import (
77
)
88

99
var UnlimitedAllocator = &memory.ResourceAllocator{
10-
Allocator: Allocator{},
11-
}
12-
13-
// Allocator is an allocator for use in test. When a buffer is freed the
14-
// contents are overwritten with a predictable pattern to help detect
15-
// use-after-free scenarios.
16-
type Allocator struct{}
17-
18-
func (Allocator) Allocate(size int) []byte {
19-
return arrowmem.DefaultAllocator.Allocate(size)
20-
}
21-
22-
func (a Allocator) Reallocate(size int, b []byte) []byte {
23-
b1 := a.Allocate(size)
24-
copy(b1, b)
25-
a.Free(b)
26-
return b1
27-
}
28-
29-
func (a Allocator) Free(b []byte) {
30-
var pattern = [...]byte{0x00, 0x33, 0xcc, 0xff}
31-
for i := range b {
32-
b[i] = pattern[i%len(pattern)]
33-
}
34-
arrowmem.DefaultAllocator.Free(b)
10+
Allocator: arrowmem.DefaultAllocator,
3511
}

‎execute/executetest/table.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ func ConvertTable(tbl flux.Table) (*Table, error) {
544544
}
545545
case flux.TString:
546546
if col := cr.Strings(j); col.IsValid(i) {
547-
row[j] = col.ValueCopy(i)
547+
row[j] = col.Value(i)
548548
}
549549
case flux.TTime:
550550
if col := cr.Times(j); col.IsValid(i) {

‎execute/table.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ func ValueForRow(cr flux.ColReader, i, j int) values.Value {
437437
if cr.Strings(j).IsNull(i) {
438438
return values.NewNull(semantic.BasicString)
439439
}
440-
return values.NewString(cr.Strings(j).ValueCopy(i))
440+
return values.NewString(cr.Strings(j).Value(i))
441441
case flux.TInt:
442442
if cr.Ints(j).IsNull(i) {
443443
return values.NewNull(semantic.BasicInt)

‎execute/table/stringify.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ func valueForRow(cr flux.ColReader, i, j int) values.Value {
124124
if cr.Strings(j).IsNull(i) {
125125
return values.NewNull(semantic.BasicString)
126126
}
127-
return values.NewString(cr.Strings(j).ValueCopy(i))
127+
return values.NewString(cr.Strings(j).Value(i))
128128
case flux.TInt:
129129
if cr.Ints(j).IsNull(i) {
130130
return values.NewNull(semantic.BasicInt)

‎internal/arrowutil/array_values.gen.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ func (v StringArrayValue) Get(i int) values.Value {
498498
if v.arr.IsNull(i) {
499499
return values.Null
500500
}
501-
return values.New(v.arr.ValueCopy(i))
501+
return values.New(v.arr.Value(i))
502502
}
503503

504504
func (v StringArrayValue) Set(i int, value values.Value) {

‎internal/arrowutil/compare.gen.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ func StringCompare(x, y *array.String, i, j int) int {
237237
return 1
238238
}
239239

240-
if l, r := x.ValueCopy(i), y.ValueCopy(j); l < r {
240+
if l, r := x.Value(i), y.Value(j); l < r {
241241
return -1
242242
} else if l == r {
243243
return 0
@@ -256,7 +256,7 @@ func StringCompareDesc(x, y *array.String, i, j int) int {
256256
return 1
257257
}
258258

259-
if l, r := x.ValueCopy(i), y.ValueCopy(j); l > r {
259+
if l, r := x.Value(i), y.Value(j); l > r {
260260
return -1
261261
} else if l == r {
262262
return 0

‎internal/arrowutil/copy.gen.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ func CopyStringsTo(b *array.StringBuilder, arr *array.String) {
284284
b.AppendNull()
285285
continue
286286
}
287-
b.Append(arr.ValueCopy(i))
287+
b.Append(arr.Value(i))
288288
}
289289
}
290290

@@ -315,7 +315,7 @@ func CopyStringsByIndexTo(b *array.StringBuilder, arr *array.String, indices *ar
315315
b.AppendNull()
316316
continue
317317
}
318-
b.Append(arr.ValueCopy(offset))
318+
b.Append(arr.Value(offset))
319319
}
320320
}
321321

@@ -324,5 +324,5 @@ func CopyStringValue(b *array.StringBuilder, arr *array.String, i int) {
324324
b.AppendNull()
325325
return
326326
}
327-
b.Append(arr.ValueCopy(i))
327+
b.Append(arr.Value(i))
328328
}

‎internal/arrowutil/filter.gen.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ func FilterStrings(arr *array.String, bitset []byte, mem memory.Allocator) *arra
108108
for i := 0; i < len(bitset); i++ {
109109
if bitutil.BitIsSet(bitset, i) {
110110
if arr.IsValid(i) {
111-
b.Append(arr.ValueCopy(i))
111+
b.Append(arr.Value(i))
112112
} else {
113113
b.AppendNull()
114114
}

‎internal/arrowutil/iterator.gen.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -289,10 +289,10 @@ func IterateStrings(arrs []array.Array) StringIterator {
289289
return StringIterator{Values: values}
290290
}
291291

292-
// ValueCopy returns the current value in the iterator.
292+
// Value returns the current value in the iterator.
293293
func (i *StringIterator) Value() string {
294294
vs := i.Values[0]
295-
return vs.ValueCopy(i.i)
295+
return vs.Value(i.i)
296296
}
297297

298298
// IsValid returns if the current value is valid.

‎internal/arrowutil/types.tmpldata

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"MonoType": "semantic.BasicString",
5151
"IsNumeric": false,
5252
"IsComparable": true,
53-
"Value": "ValueCopy",
53+
"Value": "Value",
5454
"Append": "Append",
5555
"NewArray": "NewStringArray"
5656
}

‎internal/moving_average/array_container.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func (a *ArrayContainer) Value(i int) values.Value {
3737
case *array.Float:
3838
return values.New(float64(a.array.(*array.Float).Value(i)))
3939
case *array.String:
40-
return values.New(string(a.array.(*array.String).ValueCopy(i)))
40+
return values.New(string(a.array.(*array.String).Value(i)))
4141
default:
4242
return nil
4343
}
@@ -54,7 +54,7 @@ func (a *ArrayContainer) OrigValue(i int) interface{} {
5454
case *array.Float:
5555
return a.array.(*array.Float).Value(i)
5656
case *array.String:
57-
return a.array.(*array.String).ValueCopy(i)
57+
return a.array.(*array.String).Value(i)
5858
default:
5959
return nil
6060
}

‎result_iterator_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ func TestQueryResultIterator_Results(t *testing.T) {
192192
for i := 0; i < cr.Len(); i++ {
193193
r := row{
194194
Value: cr.Ints(0).Value(i),
195-
Tag: cr.Strings(1).ValueCopy(i),
195+
Tag: cr.Strings(1).Value(i),
196196
}
197197
got = append(got, r)
198198
}

‎semantic/semantictest/cmp.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ func getValue(arr array.Array, i int) values.Value {
219219
case *array.Float:
220220
return values.New(arr.Value(i))
221221
case *array.String:
222-
return values.New(arr.ValueCopy(i))
222+
return values.New(arr.Value(i))
223223
case *array.Boolean:
224224
return values.New(arr.Value(i))
225225
default:

‎stdlib/experimental/mqtt/to.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,12 @@ func (t *ToMQTTTransformation) Process(id execute.DatasetID, tbl flux.Table) err
316316
if col.Type != flux.TString {
317317
return errors.Newf(codes.FailedPrecondition, "invalid type for measurement column: %s", col.Type)
318318
}
319-
m.name = er.Strings(j).ValueCopy(i)
319+
m.name = er.Strings(j).Value(i)
320320
case isTag[j]:
321321
if col.Type != flux.TString {
322322
return errors.Newf(codes.FailedPrecondition, "invalid type for tag column: %s", col.Type)
323323
}
324-
m.tags = append(m.tags, &protocol.Tag{Key: col.Label, Value: er.Strings(j).ValueCopy(i)})
324+
m.tags = append(m.tags, &protocol.Tag{Key: col.Label, Value: er.Strings(j).Value(i)})
325325

326326
case isValue[j]:
327327
switch col.Type {
@@ -332,7 +332,7 @@ func (t *ToMQTTTransformation) Process(id execute.DatasetID, tbl flux.Table) err
332332
case flux.TUInt:
333333
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: er.UInts(j).Value(i)})
334334
case flux.TString:
335-
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: er.Strings(j).ValueCopy(i)})
335+
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: er.Strings(j).Value(i)})
336336
case flux.TTime:
337337
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: values.Time(er.Times(j).Value(i))})
338338
case flux.TBool:

‎stdlib/influxdata/influxdb/to.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ outer:
217217
for j, col := range chunk.Cols() {
218218
switch {
219219
case col.Label == spec.MeasurementColumn:
220-
metric.NameStr = er.Strings(j).ValueCopy(i)
220+
metric.NameStr = er.Strings(j).Value(i)
221221
case col.Label == timeColLabel:
222222
valueTime := execute.ValueForRow(&er, i, j)
223223
if valueTime.IsNull() {
@@ -230,7 +230,7 @@ outer:
230230
return errors.New(codes.Invalid, "invalid type for tag column")
231231
}
232232

233-
value := er.Strings(j).ValueCopy(i)
233+
value := er.Strings(j).Value(i)
234234
if value == "" {
235235
// Skip tag value if it is empty.
236236
continue

‎stdlib/kafka/to.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -353,12 +353,12 @@ func (t *ToKafkaTransformation) Process(id execute.DatasetID, tbl flux.Table) (e
353353
if col.Type != flux.TString {
354354
return errors.New(codes.FailedPrecondition, "invalid type for measurement column")
355355
}
356-
m.name = er.Strings(j).ValueCopy(i)
356+
m.name = er.Strings(j).Value(i)
357357
case isTag[j]:
358358
if col.Type != flux.TString {
359359
return errors.New(codes.FailedPrecondition, "invalid type for measurement column")
360360
}
361-
m.tags = append(m.tags, &protocol.Tag{Key: col.Label, Value: er.Strings(j).ValueCopy(i)})
361+
m.tags = append(m.tags, &protocol.Tag{Key: col.Label, Value: er.Strings(j).Value(i)})
362362
case isValue[j]:
363363
switch col.Type {
364364
case flux.TFloat:
@@ -368,7 +368,7 @@ func (t *ToKafkaTransformation) Process(id execute.DatasetID, tbl flux.Table) (e
368368
case flux.TUInt:
369369
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: er.UInts(j).Value(i)})
370370
case flux.TString:
371-
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: er.Strings(j).ValueCopy(i)})
371+
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: er.Strings(j).Value(i)})
372372
case flux.TTime:
373373
m.fields = append(m.fields, &protocol.Field{Key: col.Label, Value: values.Time(er.Times(j).Value(i))})
374374
case flux.TBool:

‎stdlib/sql/to.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ func CreateInsertComponents(t *ToSQLTransformation, tbl flux.Table) (colNames []
471471
valueArgs = append(valueArgs, nil)
472472
break
473473
}
474-
valueArgs = append(valueArgs, er.Strings(j).ValueCopy(i))
474+
valueArgs = append(valueArgs, er.Strings(j).Value(i))
475475
case flux.TTime:
476476
if er.Times(j).IsNull(i) {
477477
valueArgs = append(valueArgs, nil)

‎stdlib/universe/distinct.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ func (t *distinctTransformation) Process(id execute.DatasetID, tbl flux.Table) e
309309
}
310310
nullDistinct = true
311311
} else {
312-
v := cr.Strings(j).ValueCopy(i)
312+
v := cr.Strings(j).Value(i)
313313
if stringDistinct[v] {
314314
continue
315315
}

‎stdlib/universe/key_values.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ func (t *keyValuesTransformation) Process(id execute.DatasetID, tbl flux.Table)
396396
}
397397
nullDistinct = true
398398
} else {
399-
v := vs.ValueCopy(i)
399+
v := vs.Value(i)
400400
if stringDistinct[[2]string{c.name, v}] {
401401
continue
402402
}

‎stdlib/universe/mode.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ func (t *modeTransformation) doString(cr flux.ColReader, tbl flux.Table, builder
220220
if cr.Strings(j).IsNull(i) {
221221
continue
222222
}
223-
v := cr.Strings(j).ValueCopy(i)
223+
v := cr.Strings(j).Value(i)
224224
stringMode[v]++
225225
}
226226

‎stdlib/universe/pivot.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ func valueToStr(cr flux.ColReader, c flux.ColMeta, row, col int) string {
384384
}
385385
case flux.TString:
386386
if v := cr.Strings(col); v.IsValid(row) {
387-
result = v.ValueCopy(row)
387+
result = v.Value(row)
388388
}
389389
case flux.TTime:
390390
if v := cr.Times(col); v.IsValid(row) {

‎stdlib/universe/table_fns.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ func arrayFromColumn(idx int, tbl flux.Table) (values.Array, error) {
189189
switch typ {
190190
case flux.TString:
191191
if vs := cr.Strings(idx); vs.IsValid(i) {
192-
vsSlice = append(vsSlice, values.New(vs.ValueCopy(i)))
192+
vsSlice = append(vsSlice, values.New(vs.Value(i)))
193193
} else {
194194
vsSlice = append(vsSlice, values.NewNull(semantic.BasicString))
195195
}
@@ -279,7 +279,7 @@ func objectFromRow(idx int, cr flux.ColReader) values.Object {
279279
switch c.Type {
280280
case flux.TString:
281281
if vs := cr.Strings(j); vs.IsValid(idx) {
282-
v = values.New(vs.ValueCopy(idx))
282+
v = values.New(vs.Value(idx))
283283
} else {
284284
v = values.NewNull(semantic.BasicString)
285285
}

0 commit comments

Comments
 (0)
Please sign in to comment.