1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
use crate::prelude::*;
use polars_core::prelude::*;
use polars_io::RowCount;
#[derive(Clone)]
pub struct ScanArgsParquet {
pub n_rows: Option<usize>,
pub cache: bool,
pub parallel: bool,
pub rechunk: bool,
pub row_count: Option<RowCount>,
}
impl Default for ScanArgsParquet {
fn default() -> Self {
Self {
n_rows: None,
cache: true,
parallel: true,
rechunk: true,
row_count: None,
}
}
}
impl LazyFrame {
fn scan_parquet_impl(
path: String,
n_rows: Option<usize>,
cache: bool,
parallel: bool,
row_count: Option<RowCount>,
) -> Result<Self> {
let mut lf: LazyFrame =
LogicalPlanBuilder::scan_parquet(path, n_rows, cache, parallel, row_count)?
.build()
.into();
lf.opt_state.agg_scan_projection = true;
Ok(lf)
}
#[cfg_attr(docsrs, doc(cfg(feature = "parquet")))]
pub fn scan_parquet(path: String, args: ScanArgsParquet) -> Result<Self> {
if path.contains('*') {
let paths = glob::glob(&path)
.map_err(|_| PolarsError::ComputeError("invalid glob pattern given".into()))?;
let lfs = paths
.map(|r| {
let path = r.map_err(|e| PolarsError::ComputeError(format!("{}", e).into()))?;
let path_string = path.to_string_lossy().into_owned();
Self::scan_parquet_impl(path_string, args.n_rows, args.cache, false, None)
})
.collect::<Result<Vec<_>>>()?;
concat(&lfs, args.rechunk)
.map_err(|_| PolarsError::ComputeError("no matching files found".into()))
.map(|mut lf| {
if let Some(n_rows) = args.n_rows {
lf = lf.slice(0, n_rows as u32)
};
if let Some(rc) = args.row_count {
lf = lf.with_row_count(&rc.name, Some(rc.offset))
};
lf
})
} else {
Self::scan_parquet_impl(path, args.n_rows, args.cache, args.parallel, args.row_count)
}
}
}