1
1
package execute
2
2
3
3
import (
4
+ "bytes"
4
5
"fmt"
5
6
"sort"
6
7
"sync/atomic"
7
8
9
+ arrowmem "github.com/apache/arrow/go/v7/arrow/memory"
8
10
"github.com/google/go-cmp/cmp"
11
+
9
12
"github.com/influxdata/flux"
10
13
"github.com/influxdata/flux/array"
11
14
"github.com/influxdata/flux/arrow"
@@ -295,8 +298,9 @@ func TablesEqual(left, right flux.Table, alloc memory.Allocator) (bool, error) {
295
298
eq = cmp .Equal (leftBuffer .cols [j ].(* floatColumnBuilder ).data ,
296
299
rightBuffer .cols [j ].(* floatColumnBuilder ).data )
297
300
case flux .TString :
298
- eq = cmp .Equal (leftBuffer .cols [j ].(* stringColumnBuilder ).data ,
299
- rightBuffer .cols [j ].(* stringColumnBuilder ).data )
301
+ eq = cmp .Equal (leftBuffer .cols [j ].(* stringColumnBuilder ),
302
+ rightBuffer .cols [j ].(* stringColumnBuilder ),
303
+ cmp .Comparer (stringColumnBuilderEqual ))
300
304
case flux .TTime :
301
305
eq = cmp .Equal (leftBuffer .cols [j ].(* timeColumnBuilder ).data ,
302
306
rightBuffer .cols [j ].(* timeColumnBuilder ).data )
@@ -324,6 +328,27 @@ func colsMatch(left, right []flux.ColMeta) bool {
324
328
return true
325
329
}
326
330
331
+ func stringColumnBuilderEqual (x , y * stringColumnBuilder ) bool {
332
+ if x .Len () != y .Len () {
333
+ return false
334
+ }
335
+ for i := 0 ; i < x .Len (); i ++ {
336
+ if x .IsNil (i ) {
337
+ if ! y .IsNil (i ) {
338
+ return false
339
+ }
340
+ continue
341
+ }
342
+ if y .IsNil (i ) {
343
+ return false
344
+ }
345
+ if ! bytes .Equal (x .data [i ].Bytes (x .buf ), y .data [i ].Bytes (y .buf )) {
346
+ return false
347
+ }
348
+ }
349
+ return true
350
+ }
351
+
327
352
// ColMap writes a mapping of builder index to cols index into colMap.
328
353
// When colMap does not have enough capacity a new colMap is allocated.
329
354
// The colMap is always returned
@@ -598,6 +623,7 @@ func (b *ColListTableBuilder) AddCol(c flux.ColMeta) (int, error) {
598
623
case flux .TString :
599
624
b .cols = append (b .cols , & stringColumnBuilder {
600
625
columnBuilderBase : colBase ,
626
+ buf : arrowmem .NewResizableBuffer (b .alloc .Allocator ),
601
627
})
602
628
if b .NRows () > 0 {
603
629
if err := b .GrowStrings (newIdx , b .NRows ()); err != nil {
@@ -919,8 +945,9 @@ func (b *ColListTableBuilder) SetString(i int, j int, value string) error {
919
945
if err := b .checkCol (j , flux .TString ); err != nil {
920
946
return err
921
947
}
922
- b .cols [j ].(* stringColumnBuilder ).data [i ] = value
923
- b .cols [j ].SetNil (i , false )
948
+ col := b .cols [j ].(* stringColumnBuilder )
949
+ col .data [i ] = col .makeString (value )
950
+ col .SetNil (i , false )
924
951
return nil
925
952
}
926
953
@@ -929,7 +956,7 @@ func (b *ColListTableBuilder) AppendString(j int, value string) error {
929
956
return err
930
957
}
931
958
col := b .cols [j ].(* stringColumnBuilder )
932
- col .data = b .alloc .AppendStrings (col .data , value )
959
+ col .data = b .alloc .AppendStrings (col .data , col . makeString ( value ) )
933
960
b .nrows = len (col .data )
934
961
return nil
935
962
}
@@ -1152,11 +1179,6 @@ func (b *ColListTableBuilder) Floats(j int) []float64 {
1152
1179
CheckColType (b .colMeta [j ], flux .TFloat )
1153
1180
return b .cols [j ].(* floatColumnBuilder ).data
1154
1181
}
1155
- func (b * ColListTableBuilder ) Strings (j int ) []string {
1156
- meta := b .colMeta [j ]
1157
- CheckColType (meta , flux .TString )
1158
- return b .cols [j ].(* stringColumnBuilder ).data
1159
- }
1160
1182
func (b * ColListTableBuilder ) Times (j int ) []values.Time {
1161
1183
CheckColType (b .colMeta [j ], flux .TTime )
1162
1184
return b .cols [j ].(* timeColumnBuilder ).data
@@ -1180,7 +1202,9 @@ func (b *ColListTableBuilder) GetRow(row int) values.Object {
1180
1202
case flux .TFloat :
1181
1203
val = values .NewFloat (b .cols [j ].(* floatColumnBuilder ).data [row ])
1182
1204
case flux .TString :
1183
- val = values .NewString (b .cols [j ].(* stringColumnBuilder ).data [row ])
1205
+ // TODO(mhilton): avoid a copy
1206
+ col := b .cols [j ].(* stringColumnBuilder )
1207
+ val = values .NewString (string (col .data [row ].Bytes (col .buf )))
1184
1208
case flux .TTime :
1185
1209
val = values .NewTime (b .cols [j ].(* timeColumnBuilder ).data [row ])
1186
1210
}
@@ -1866,46 +1890,38 @@ func (c *stringColumn) Copy() column {
1866
1890
1867
1891
type stringColumnBuilder struct {
1868
1892
columnBuilderBase
1869
- data []string
1893
+ data []String
1894
+
1895
+ // buf contains a backing buffer containing the bytes of the
1896
+ // strings.
1897
+ buf * arrowmem.Buffer
1870
1898
}
1871
1899
1872
1900
func (c * stringColumnBuilder ) Clear () {
1873
- c .data = c .data [0 :0 ]
1901
+ c .buf .Release ()
1902
+ c .buf = arrowmem .NewResizableBuffer (c .alloc .Allocator )
1903
+ c .data = c .data [:0 ]
1874
1904
}
1875
1905
1876
1906
func (c * stringColumnBuilder ) Release () {
1907
+ c .buf .Release ()
1877
1908
c .alloc .Free (cap (c .data ), stringSize )
1878
- c .data = nil
1879
1909
}
1880
1910
1881
1911
func (c * stringColumnBuilder ) Copy () column {
1882
- var data * array.String
1883
- if len (c .nils ) > 0 {
1884
- b := arrow .NewStringBuilder (c .alloc .Allocator )
1885
- b .Reserve (len (c .data ))
1886
- sz := 0
1887
- for i , v := range c .data {
1888
- if c .nils [i ] {
1889
- continue
1890
- }
1891
- sz += len (v )
1892
- }
1893
- b .ReserveData (sz )
1894
- for i , v := range c .data {
1895
- if c .nils [i ] {
1896
- b .AppendNull ()
1897
- continue
1898
- }
1899
- b .Append (v )
1912
+ builder := arrow .NewStringBuilder (c .alloc .Allocator )
1913
+ builder .Reserve (len (c .data ))
1914
+ builder .ReserveData (c .buf .Len ())
1915
+ for i , v := range c .data {
1916
+ if c .nils [i ] {
1917
+ builder .AppendNull ()
1918
+ continue
1900
1919
}
1901
- data = b .NewStringArray ()
1902
- b .Release ()
1903
- } else {
1904
- data = arrow .NewString (c .data , c .alloc .Allocator )
1920
+ builder .AppendBytes (v .Bytes (c .buf ))
1905
1921
}
1906
1922
col := & stringColumn {
1907
1923
ColMeta : c .ColMeta ,
1908
- data : data ,
1924
+ data : builder . NewStringArray () ,
1909
1925
}
1910
1926
return col
1911
1927
}
@@ -1916,13 +1932,13 @@ func (c *stringColumnBuilder) Len() int {
1916
1932
1917
1933
func (c * stringColumnBuilder ) Equal (i , j int ) bool {
1918
1934
return c .EqualFunc (i , j , func (i , j int ) bool {
1919
- return c .data [i ] == c .data [j ]
1935
+ return bytes . Equal ( c .data [i ]. Bytes ( c . buf ), c .data [j ]. Bytes ( c . buf ))
1920
1936
})
1921
1937
}
1922
1938
1923
1939
func (c * stringColumnBuilder ) Less (i , j int ) bool {
1924
1940
return c .LessFunc (i , j , func (i , j int ) bool {
1925
- return c .data [i ] < c .data [j ]
1941
+ return bytes . Compare ( c .data [i ]. Bytes ( c . buf ), c .data [j ]. Bytes ( c . buf )) < 0
1926
1942
})
1927
1943
}
1928
1944
@@ -1931,6 +1947,16 @@ func (c *stringColumnBuilder) Swap(i, j int) {
1931
1947
c .data [i ], c .data [j ] = c .data [j ], c .data [i ]
1932
1948
}
1933
1949
1950
+ func (c * stringColumnBuilder ) makeString (s string ) String {
1951
+ offset := c .buf .Len ()
1952
+ c .buf .Resize (offset + len (s ))
1953
+ copy (c .buf .Bytes ()[offset :], s )
1954
+ return String {
1955
+ offset : offset ,
1956
+ len : len (s ),
1957
+ }
1958
+ }
1959
+
1934
1960
type timeColumn struct {
1935
1961
flux.ColMeta
1936
1962
data * array.Int
0 commit comments