1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use std::sync::Arc;
use crate::{
array::{Array, Offset, Utf8Array},
bitmap::MutableBitmap,
};
use super::{
utils::{build_extend_null_bits, extend_offset_values, extend_offsets, ExtendNullBits},
Growable,
};
pub struct GrowableUtf8<'a, O: Offset> {
arrays: Vec<&'a Utf8Array<O>>,
validity: MutableBitmap,
values: Vec<u8>,
offsets: Vec<O>,
length: O,
extend_null_bits: Vec<ExtendNullBits<'a>>,
}
impl<'a, O: Offset> GrowableUtf8<'a, O> {
pub fn new(arrays: Vec<&'a Utf8Array<O>>, mut use_validity: bool, capacity: usize) -> Self {
if arrays.iter().any(|array| array.null_count() > 0) {
use_validity = true;
};
let extend_null_bits = arrays
.iter()
.map(|array| build_extend_null_bits(*array, use_validity))
.collect();
let mut offsets = Vec::with_capacity(capacity + 1);
let length = O::default();
offsets.push(length);
Self {
arrays: arrays.to_vec(),
values: Vec::with_capacity(0),
offsets,
length,
validity: MutableBitmap::with_capacity(capacity),
extend_null_bits,
}
}
fn to(&mut self) -> Utf8Array<O> {
let validity = std::mem::take(&mut self.validity);
let offsets = std::mem::take(&mut self.offsets);
let values = std::mem::take(&mut self.values);
unsafe {
Utf8Array::<O>::from_data_unchecked(
self.arrays[0].data_type().clone(),
offsets.into(),
values.into(),
validity.into(),
)
}
}
}
impl<'a, O: Offset> Growable<'a> for GrowableUtf8<'a, O> {
fn extend(&mut self, index: usize, start: usize, len: usize) {
(self.extend_null_bits[index])(&mut self.validity, start, len);
let array = self.arrays[index];
let offsets = array.offsets();
let values = array.values();
extend_offsets::<O>(
&mut self.offsets,
&mut self.length,
&offsets[start..start + len + 1],
);
extend_offset_values::<O>(&mut self.values, offsets, values, start, len);
}
fn extend_validity(&mut self, additional: usize) {
self.offsets
.resize(self.offsets.len() + additional, self.length);
self.validity.extend_constant(additional, false);
}
fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}
fn as_box(&mut self) -> Box<dyn Array> {
Box::new(self.to())
}
}
impl<'a, O: Offset> From<GrowableUtf8<'a, O>> for Utf8Array<O> {
fn from(val: GrowableUtf8<'a, O>) -> Self {
unsafe {
Utf8Array::<O>::from_data_unchecked(
val.arrays[0].data_type().clone(),
val.offsets.into(),
val.values.into(),
val.validity.into(),
)
}
}
}