1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
use polars_core::prelude::*;

pub trait PhysicalIoExpr: Send + Sync {
    /// Take a `DataFrame` and produces a boolean `Series` that serves
    /// as a predicate mask
    fn evaluate(&self, df: &DataFrame) -> Result<Series>;

    /// Can take &dyn Statistics and determine of a file should be
    /// read -> `true`
    /// or not -> `false`
    #[cfg(feature = "parquet")]
    fn as_stats_evaluator(&self) -> Option<&dyn StatsEvaluator> {
        None
    }
}

#[cfg(feature = "parquet")]
pub trait StatsEvaluator {
    fn should_read(&self, stats: &crate::parquet::predicates::BatchStats) -> Result<bool>;
}

#[cfg(feature = "parquet")]
pub(crate) fn arrow_schema_to_empty_df(schema: &ArrowSchema) -> DataFrame {
    let columns = schema
        .fields
        .iter()
        .map(|fld| Series::full_null(&fld.name, 0, &fld.data_type().into()))
        .collect();
    DataFrame::new_no_checks(columns)
}

#[cfg(any(feature = "ipc", feature = "parquet", feature = "json",))]
pub(crate) fn apply_predicate(
    df: &mut DataFrame,
    predicate: Option<&dyn PhysicalIoExpr>,
) -> Result<()> {
    if let (Some(predicate), false) = (&predicate, df.is_empty()) {
        let s = predicate.evaluate(df)?;
        let mask = s.bool().expect("filter predicates was not of type boolean");
        *df = df.filter(mask)?;
    }
    Ok(())
}