@@ -2,53 +2,100 @@ package array
2
2
3
3
import (
4
4
"bytes"
5
+ "sync/atomic"
6
+ "unsafe"
5
7
8
+ "github.com/apache/arrow/go/v7/arrow"
6
9
"github.com/apache/arrow/go/v7/arrow/array"
7
10
"github.com/apache/arrow/go/v7/arrow/bitutil"
8
11
"github.com/apache/arrow/go/v7/arrow/memory"
9
12
)
10
13
11
14
type StringBuilder struct {
12
- mem memory.Allocator
13
- builder * array.BinaryBuilder
14
- constant bool
15
+ mem memory.Allocator
16
+ len int
17
+ cap int
18
+ reserveData int
19
+ buffer * memory.Buffer
20
+ builder * array.BinaryBuilder
21
+ refCount int64
15
22
}
16
23
17
24
func NewStringBuilder (mem memory.Allocator ) * StringBuilder {
18
25
return & StringBuilder {
19
- mem : mem ,
20
- builder : array .NewBinaryBuilder (mem , StringType ),
21
- constant : true ,
26
+ mem : mem ,
27
+ len : 0 ,
28
+ cap : 0 ,
29
+ reserveData : 0 ,
30
+ buffer : nil ,
31
+ builder : nil ,
32
+ refCount : 1 ,
22
33
}
23
34
}
24
35
25
36
func (b * StringBuilder ) Retain () {
26
- b . builder . Retain ( )
37
+ atomic . AddInt64 ( & b . refCount , 1 )
27
38
}
28
39
func (b * StringBuilder ) Release () {
29
- b .builder .Release ()
40
+ if atomic .AddInt64 (& b .refCount , - 1 ) == 0 {
41
+ if b .buffer != nil {
42
+ b .buffer .Release ()
43
+ }
44
+ if b .builder != nil {
45
+ b .builder .Release ()
46
+ }
47
+ }
30
48
}
31
49
func (b * StringBuilder ) Len () int {
32
- return b .builder .Len ()
50
+ if b .builder != nil {
51
+ return b .builder .Len ()
52
+ }
53
+ return b .len
33
54
}
34
55
func (b * StringBuilder ) Cap () int {
35
- return b .builder .Cap ()
56
+ if b .builder != nil {
57
+ return b .builder .Cap ()
58
+ }
59
+ if b .cap > b .len {
60
+ return b .cap
61
+ }
62
+ return b .len
36
63
}
37
64
func (b * StringBuilder ) NullN () int {
38
- return b .builder .NullN ()
65
+ if b .builder != nil {
66
+ return b .builder .NullN ()
67
+ }
68
+ return 0
39
69
}
40
70
41
71
func (b * StringBuilder ) AppendBytes (buf []byte ) {
42
- if b .builder .Len () > 0 {
43
- b .constant = b .constant && bytes .Equal (buf , b .builder .Value (0 ))
72
+ if b .builder != nil {
73
+ b .builder .Append (buf )
74
+ return
75
+ }
76
+ if b .len == 0 {
77
+ b .buffer = memory .NewResizableBuffer (b .mem )
78
+ b .buffer .Resize (len (buf ))
79
+ copy (b .buffer .Bytes (), buf )
80
+ b .len = 1
81
+ return
44
82
}
45
- b .builder .Append (buf )
83
+ if bytes .Equal (b .buffer .Bytes (), buf ) {
84
+ b .len ++
85
+ return
86
+ }
87
+ b .makeBuilder (buf )
88
+
46
89
}
47
90
48
- // Append appends a string to the array being built. The input string
49
- // will always be copied.
91
+ // Append appends a string to the array being built. A reference
92
+ // to the input string will not be retained by the builder. The
93
+ // string will be copied, if necessary.
50
94
func (b * StringBuilder ) Append (v string ) {
51
- b .AppendBytes ([]byte (v ))
95
+ // Avoid copying the input string as AppendBytes
96
+ // will never keep a reference or modify the input.
97
+ bytes := unsafe .Slice (unsafe .StringData (v ), len (v ))
98
+ b .AppendBytes (bytes )
52
99
}
53
100
54
101
func (b * StringBuilder ) AppendValues (v []string , valid []bool ) {
@@ -61,38 +108,66 @@ func (b *StringBuilder) AppendValues(v []string, valid []bool) {
61
108
}
62
109
}
63
110
func (b * StringBuilder ) AppendNull () {
64
- b .constant = false
111
+ if b .builder == nil {
112
+ b .makeBuilder (nil )
113
+ }
65
114
b .builder .AppendNull ()
66
115
}
67
116
68
- func (b * StringBuilder ) UnsafeAppendBoolToBitmap (isValid bool ) {
69
- b .builder .UnsafeAppendBoolToBitmap (isValid )
70
- }
71
-
72
117
func (b * StringBuilder ) Reserve (n int ) {
73
- b .builder .Reserve (n )
118
+ if b .builder != nil {
119
+ b .builder .Reserve (n )
120
+ return
121
+ }
122
+ if b .len + n > b .cap {
123
+ b .cap = b .len + n
124
+ }
74
125
}
75
126
76
127
func (b * StringBuilder ) ReserveData (n int ) {
77
- b .builder .ReserveData (n )
128
+ if b .builder != nil {
129
+ b .builder .ReserveData (n )
130
+ return
131
+ }
132
+ b .reserveData = n
78
133
}
79
134
80
135
func (b * StringBuilder ) Resize (n int ) {
81
- b .builder .Resize (n )
136
+ if b .builder != nil {
137
+ b .builder .Resize (n )
138
+ }
139
+ b .cap = n
140
+ if b .len > n {
141
+ b .len = n
142
+ }
82
143
}
83
144
84
145
func (b * StringBuilder ) NewArray () Array {
85
146
return b .NewStringArray ()
86
147
}
87
148
88
149
func (b * StringBuilder ) NewStringArray () * String {
89
- arr := b .builder .NewBinaryArray ()
90
- if ! b .constant || arr .Len () < 1 {
91
- b .constant = true
92
- return & String {arr }
150
+ if b .builder != nil {
151
+ arr := & String {b .builder .NewBinaryArray ()}
152
+ b .builder .Release ()
153
+ b .builder = nil
154
+ return arr
155
+ }
156
+ if b .buffer != nil {
157
+ arr := & String {& repeatedBinary {
158
+ len : b .len ,
159
+ buf : b .buffer ,
160
+ }}
161
+ b .buffer = nil
162
+ b .len = 0
163
+ b .cap = 0
164
+ return arr
93
165
}
94
- defer arr .Release ()
95
- return StringRepeat (arr .ValueString (0 ), arr .Len (), b .mem )
166
+ // getting this far means we have an empty array.
167
+ arr := StringRepeat ("" , b .len , b .mem )
168
+ b .len = 0
169
+ b .cap = 0
170
+ return arr
96
171
}
97
172
98
173
func (b * StringBuilder ) CopyValidValues (values * String , nullCheckArray Array ) {
@@ -110,6 +185,43 @@ func (b *StringBuilder) CopyValidValues(values *String, nullCheckArray Array) {
110
185
}
111
186
}
112
187
188
+ func (b * StringBuilder ) makeBuilder (value []byte ) {
189
+ bufferLen := 0
190
+ if b .buffer != nil {
191
+ bufferLen = b .buffer .Len ()
192
+ }
193
+ size := b .len
194
+ if b .cap > b .len {
195
+ size = b .cap
196
+ }
197
+ dataSize := b .len * bufferLen
198
+ if value != nil {
199
+ if b .cap <= b .len {
200
+ size ++
201
+ }
202
+ dataSize += len (value )
203
+ }
204
+ if b .reserveData > dataSize {
205
+ dataSize = b .reserveData
206
+ }
207
+ b .builder = array .NewBinaryBuilder (b .mem , arrow .BinaryTypes .String )
208
+ b .builder .Resize (size )
209
+ b .builder .ReserveData (dataSize )
210
+ for i := 0 ; i < b .len ; i ++ {
211
+ b .builder .Append (b .buffer .Bytes ())
212
+ }
213
+ if value != nil {
214
+ b .builder .Append (value )
215
+ }
216
+ if b .buffer != nil {
217
+ b .buffer .Release ()
218
+ b .buffer = nil
219
+ }
220
+ b .len = 0
221
+ b .cap = 0
222
+ b .reserveData = 0
223
+ }
224
+
113
225
// Copy of Array.IsValid from arrow, allowing the IsValid check to be done without going through an interface
114
226
func isValid (nullBitmapBytes []byte , offset int , i int ) bool {
115
227
return len (nullBitmapBytes ) == 0 || bitutil .BitIsSet (nullBitmapBytes , offset + i )
0 commit comments