1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
use parquet_format_async_temp::RowGroup;
use super::{column_chunk_metadata::ColumnChunkMetaData, schema_descriptor::SchemaDescriptor};
use crate::{error::Result, write::ColumnOffsetsMetadata};
#[derive(Debug, Clone)]
pub struct RowGroupMetaData {
columns: Vec<ColumnChunkMetaData>,
num_rows: i64,
total_byte_size: i64,
}
impl RowGroupMetaData {
pub fn new(
columns: Vec<ColumnChunkMetaData>,
num_rows: i64,
total_byte_size: i64,
) -> RowGroupMetaData {
Self {
columns,
num_rows,
total_byte_size,
}
}
pub fn num_columns(&self) -> usize {
self.columns.len()
}
pub fn column(&self, i: usize) -> &ColumnChunkMetaData {
&self.columns[i]
}
pub fn columns(&self) -> &[ColumnChunkMetaData] {
&self.columns
}
pub fn num_rows(&self) -> i64 {
self.num_rows
}
pub fn total_byte_size(&self) -> i64 {
self.total_byte_size
}
pub fn compressed_size(&self) -> i64 {
self.columns.iter().map(|c| c.compressed_size()).sum()
}
pub fn try_from_thrift(
schema_descr: &SchemaDescriptor,
rg: RowGroup,
) -> Result<RowGroupMetaData> {
assert_eq!(schema_descr.num_columns(), rg.columns.len());
let total_byte_size = rg.total_byte_size;
let num_rows = rg.num_rows;
let mut columns = vec![];
for (cc, d) in rg.columns.into_iter().zip(schema_descr.columns()) {
let cc = ColumnChunkMetaData::try_from_thrift(d.clone(), cc)?;
columns.push(cc);
}
Ok(RowGroupMetaData {
columns,
num_rows,
total_byte_size,
})
}
pub fn into_thrift(self) -> RowGroup {
let file_offset = self
.columns
.iter()
.map(|c| {
ColumnOffsetsMetadata::from_column_chunk_metadata(c).calc_row_group_file_offset()
})
.next()
.unwrap_or(None);
RowGroup {
columns: self.columns.into_iter().map(|v| v.into_thrift()).collect(),
total_byte_size: self.total_byte_size,
num_rows: self.num_rows,
sorting_columns: None,
file_offset: file_offset,
total_compressed_size: None,
ordinal: None,
}
}
}