Enum polars::prelude::Expr

source · [−]

pub enum Expr {
Show 30 variants
    Alias(Box<Expr, Global>, Arc<str>),
    Column(Arc<str>),
    Columns(Vec<String, Global>),
    DtypeColumn(Vec<DataType, Global>),
    Literal(LiteralValue),
    BinaryExpr {
        left: Box<Expr, Global>,
        op: Operator,
        right: Box<Expr, Global>,
    },
    Not(Box<Expr, Global>),
    IsNotNull(Box<Expr, Global>),
    IsNull(Box<Expr, Global>),
    Cast {
        expr: Box<Expr, Global>,
        data_type: DataType,
        strict: bool,
    },
    Sort {
        expr: Box<Expr, Global>,
        options: SortOptions,
    },
    Take {
        expr: Box<Expr, Global>,
        idx: Box<Expr, Global>,
    },
    SortBy {
        expr: Box<Expr, Global>,
        by: Vec<Expr, Global>,
        reverse: Vec<bool, Global>,
    },
    Agg(AggExpr),
    Ternary {
        predicate: Box<Expr, Global>,
        truthy: Box<Expr, Global>,
        falsy: Box<Expr, Global>,
    },
    Function {
        input: Vec<Expr, Global>,
        function: NoEq<Arc<dyn SeriesUdf + 'static>>,
        output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>,
        options: FunctionOptions,
    },
    Shift {
        input: Box<Expr, Global>,
        periods: i64,
    },
    Reverse(Box<Expr, Global>),
    Duplicated(Box<Expr, Global>),
    IsUnique(Box<Expr, Global>),
    Explode(Box<Expr, Global>),
    Filter {
        input: Box<Expr, Global>,
        by: Box<Expr, Global>,
    },
    Window {
        function: Box<Expr, Global>,
        partition_by: Vec<Expr, Global>,
        order_by: Option<Box<Expr, Global>>,
        options: WindowOptions,
    },
    Wildcard,
    Slice {
        input: Box<Expr, Global>,
        offset: Box<Expr, Global>,
        length: Box<Expr, Global>,
    },
    Exclude(Box<Expr, Global>, Vec<Excluded, Global>),
    KeepName(Box<Expr, Global>),
    RenameAlias {
        function: NoEq<Arc<dyn RenameAliasFn + 'static>>,
        expr: Box<Expr, Global>,
    },
    Count,
    Nth(i64),
}

Expand description

Queries consists of multiple expressions.

Variants

`Alias(Box<Expr, Global>, Arc<str>)`

`Column(Arc<str>)`

`Columns(Vec<String, Global>)`

`DtypeColumn(Vec<DataType, Global>)`

`BinaryExpr`

Fields

left: Box<Expr, Global>

op: Operator

right: Box<Expr, Global>

`IsNotNull(Box<Expr, Global>)`

`IsNull(Box<Expr, Global>)`

`Cast`

Fields

expr: Box<Expr, Global>

data_type: DataType

strict: bool

`Sort`

Fields

expr: Box<Expr, Global>

options: SortOptions

`Take`

Fields

expr: Box<Expr, Global>

idx: Box<Expr, Global>

`SortBy`

Fields

expr: Box<Expr, Global>

by: Vec<Expr, Global>

reverse: Vec<bool, Global>

`Ternary`

Fields

predicate: Box<Expr, Global>

truthy: Box<Expr, Global>

falsy: Box<Expr, Global>

A ternary operation if true then “foo” else “bar”

`Function`

Fields

input: Vec<Expr, Global>

function arguments

function: NoEq<Arc<dyn SeriesUdf + 'static>>

function to apply

output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>

output dtype of the function

options: FunctionOptions

`Shift`

Fields

input: Box<Expr, Global>

periods: i64

`Reverse(Box<Expr, Global>)`

`Duplicated(Box<Expr, Global>)`

`IsUnique(Box<Expr, Global>)`

`Explode(Box<Expr, Global>)`

`Filter`

Fields

input: Box<Expr, Global>

by: Box<Expr, Global>

`Window`

Fields

function: Box<Expr, Global>

Also has the input. i.e. avg(“foo”)

partition_by: Vec<Expr, Global>

order_by: Option<Box<Expr, Global>>

options: WindowOptions

See postgres window functions

`Wildcard`

`Slice`

Fields

input: Box<Expr, Global>

offset: Box<Expr, Global>

length is not yet known so we accept negative offsets

length: Box<Expr, Global>

`Exclude(Box<Expr, Global>, Vec<Excluded, Global>)`

Can be used in a select statement to exclude a column from selection

`KeepName(Box<Expr, Global>)`

Set root name as Alias

`RenameAlias`

Fields

function: NoEq<Arc<dyn RenameAliasFn + 'static>>

expr: Box<Expr, Global>

`Count`

Special case that does not need columns

`Nth(i64)`

Take the nth column in the DataFrame

Implementations

source

impl Expr

source

pub fn eq(self, other: Expr) -> Expr

Compare Expr with other Expr on equality

source

pub fn neq(self, other: Expr) -> Expr

Compare Expr with other Expr on non-equality

source

pub fn lt(self, other: Expr) -> Expr

Check if Expr < Expr

source

pub fn gt(self, other: Expr) -> Expr

Check if Expr > Expr

source

pub fn gt_eq(self, other: Expr) -> Expr

Check if Expr >= Expr

source

pub fn lt_eq(self, other: Expr) -> Expr

Check if Expr <= Expr

source

pub fn not(self) -> Expr

Negate Expr

source

pub fn alias(self, name: &str) -> Expr

Rename Column.

source

pub fn is_null(self) -> Expr

Run is_null operation on Expr.

source

pub fn is_not_null(self) -> Expr

Run is_not_null operation on Expr.

source

pub fn drop_nulls(self) -> Expr

Drop null values

source

pub fn drop_nans(self) -> Expr

Drop NaN values

source

pub fn min(self) -> Expr

Reduce groups to minimal value.

source

pub fn max(self) -> Expr

Reduce groups to maximum value.

source

pub fn mean(self) -> Expr

Reduce groups to the mean value.

source

pub fn median(self) -> Expr

Reduce groups to the median value.

source

pub fn sum(self) -> Expr

Reduce groups to the sum of all the values.

source

pub fn n_unique(self) -> Expr

Get the number of unique values in the groups.

source

pub fn first(self) -> Expr

Get the first value in the group.

source

pub fn last(self) -> Expr

Get the last value in the group.

source

pub fn list(self) -> Expr

Aggregate the group to a Series

source

pub fn quantile(self, quantile: f64, interpol: QuantileInterpolOptions) -> Expr

Compute the quantile per group.

source

pub fn agg_groups(self) -> Expr

Get the group indexes of the group by operation.

source

pub fn flatten(self) -> Expr

Alias for explode

source

pub fn explode(self) -> Expr

Explode the utf8/ list column

source

pub fn slice(self, offset: Expr, length: Expr) -> Expr

Slice the Series. offset may be negative.

source

pub fn head(self, length: Option<usize>) -> Expr

Get the first n elements of the Expr result

source

pub fn tail(self, length: Option<usize>) -> Expr

Get the last n elements of the Expr result

source

pub fn unique(self) -> Expr

Get unique values of this expression.

source

pub fn unique_stable(self) -> Expr

Get unique values of this expression, while maintaining order. This requires more work than Expr::unique.

source

pub fn arg_unique(self) -> Expr

Get the first index of unique values of this expression.

source

pub fn arg_min(self) -> Expr

Get the index value that has the minumum value

source

pub fn arg_max(self) -> Expr

Get the index value that has the maximum value

source

pub fn arg_sort(self, reverse: bool) -> Expr

Get the index values that would sort this expression.

source

pub fn strict_cast(self, data_type: DataType) -> Expr

Cast expression to another data type. Throws an error if conversion had overflows

source

pub fn cast(self, data_type: DataType) -> Expr

Cast expression to another data type.

source

pub fn take(self, idx: Expr) -> Expr

Take the values by idx.

source

pub fn sort(self, reverse: bool) -> Expr

Sort in increasing order. See the eager implementation.

source

pub fn sort_with(self, options: SortOptions) -> Expr

Sort with given options.

source

pub fn reverse(self) -> Expr

Reverse column

source

pub fn map<F>(
 self,
 function: F,
 output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>
) -> Expr where
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure once the logical plan get executed.

This function is very similar to Expr::apply, but differs in how it handles aggregations.

map should be used for operations that are independent of groups, e.g. multiply * 2, or raise to the power
apply should be used for operations that work on a group of data. e.g. sum, count, etc.

It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.

source

pub fn map_many<F>(
 self,
 function: F,
 arguments: &[Expr ],
 output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>
) -> Expr where
 F: 'static + Fn(&mut [Series ]) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure once the logical plan get executed with many arguments

See the Expr::map function for the differences between map and apply.

source

pub fn map_list<F>(
 self,
 function: F,
 output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>
) -> Expr where
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure once the logical plan get executed.

This function is very similar to apply, but differs in how it handles aggregations.

map should be used for operations that are independent of groups, e.g. multiply * 2, or raise to the power
apply should be used for operations that work on a group of data. e.g. sum, count, etc.
map_list should be used when the function expects a list aggregated series.

source

pub fn function_with_options<F>(
 self,
 function: F,
 output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>,
 options: FunctionOptions
) -> Expr where
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

A function that cannot be expressed with map or apply and requires extra settings.

source

pub fn apply<F>(
 self,
 function: F,
 output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>
) -> Expr where
 F: 'static + Fn(Series) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure over the groups. This should only be used in a groupby aggregation.

It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.

This difference with map is that apply will create a separate Series per group.

map should be used for operations that are independent of groups, e.g. multiply * 2, or raise to the power
apply should be used for operations that work on a group of data. e.g. sum, count, etc.

source

pub fn apply_many<F>(
 self,
 function: F,
 arguments: &[Expr ],
 output_type: NoEq<Arc<dyn FunctionOutputField + 'static>>
) -> Expr where
 F: 'static + Fn(&mut [Series ]) -> Result<Series, PolarsError> + Send + Sync,

Apply a function/closure over the groups with many arguments. This should only be used in a groupby aggregation.

See the Expr::apply function for the differences between map and apply.

source

pub fn is_finite(self) -> Expr

Get mask of finite values if dtype is Float

source

pub fn is_infinite(self) -> Expr

Get mask of infinite values if dtype is Float

source

pub fn is_nan(self) -> Expr

Get mask of NaN values if dtype is Float

source

pub fn is_not_nan(self) -> Expr

Get inverse mask of NaN values if dtype is Float

source

pub fn shift(self, periods: i64) -> Expr

Shift the values in the array by some period. See the eager implementation.

source

pub fn shift_and_fill(self, periods: i64, fill_value: Expr) -> Expr

Shift the values in the array by some period and fill the resulting empty values.

source

pub fn cumsum(self, reverse: bool) -> Expr

This is supported on crate feature cum_agg only.

Get an array with the cumulative sum computed at every element

source

pub fn cumprod(self, reverse: bool) -> Expr

This is supported on crate feature cum_agg only.

Get an array with the cumulative product computed at every element

source

pub fn cummin(self, reverse: bool) -> Expr

This is supported on crate feature cum_agg only.

Get an array with the cumulative min computed at every element

source

pub fn cummax(self, reverse: bool) -> Expr

This is supported on crate feature cum_agg only.

Get an array with the cumulative max computed at every element

source

pub fn product(self) -> Expr

This is supported on crate feature product only.

Get the product aggreagtion of an expresion

source

pub fn backward_fill(self) -> Expr

Fill missing value with next non-null.

source

pub fn forward_fill(self) -> Expr

Fill missing value with previous non-null.

source

pub fn round(self, decimals: u32) -> Expr

This is supported on crate feature round_series only.

Round underlying floating point array to given decimal numbers.

source

pub fn floor(self) -> Expr

This is supported on crate feature round_series only.

Floor underlying floating point array to the lowest integers smaller or equal to the float value.

source

pub fn ceil(self) -> Expr

This is supported on crate feature round_series only.

Ceil underlying floating point array to the heighest integers smaller or equal to the float value.

source

pub fn clip(self, min: f64, max: f64) -> Expr

This is supported on crate feature round_series only.

Clip underlying values to a set boundary.

source

pub fn abs(self) -> Expr

This is supported on crate feature abs only.

Convert all values to their absolute/positive value.

source

pub fn over<E>(self, partition_by: E) -> Expr where
E: AsRef<[Expr ]>,

Apply window function over a subgroup. This is similar to a groupby + aggregation + self join. Or similar to window functions in Postgres.

pub fn fill_null(self, fill_value: Expr) -> Expr

Replace the null values by a value.

source

pub fn fill_nan(self, fill_value: Expr) -> Expr

Replace the floating point NaN values by a value.

source

pub fn count(self) -> Expr

Count the values of the Series or Get counts of the group by operation.

source

pub fn std(self) -> Expr

Standard deviation of the values of the Series

source

pub fn var(self) -> Expr

Variance of the values of the Series

source

pub fn is_duplicated(self) -> Expr

Get a mask of duplicated values

source

pub fn is_unique(self) -> Expr

Get a mask of unique values

source

pub fn and(self, expr: Expr) -> Expr

and operation

source

pub fn xor(self, expr: Expr) -> Expr

source

pub fn or(self, expr: Expr) -> Expr

or operation

source

pub fn pow(self, exponent: f64) -> Expr

Raise expression to the power exponent

source

pub fn filter(self, predicate: Expr) -> Expr

Filter a single column Should be used in aggregation context. If you want to filter on a DataFrame level, use LazyFrame::filter

source

pub fn is_in(self, other: Expr) -> Expr

This is supported on crate feature is_in only.

Check if the values of the left expression are in the lists of the right expr.

source

pub fn sort_by<E, R>(self, by: E, reverse: R) -> Expr where
E: AsRef<[Expr ]>,
R: AsRef<[bool ]>,

Sort this column by the ordering of another column. Can also be used in a groupby context to sort the groups.

source

pub fn repeat_by(self, by: Expr) -> Expr

This is supported on crate feature repeat_by only.

source

pub fn is_first(self) -> Expr

This is supported on crate feature is_first only.

Get a mask of the first unique value.

source

pub fn mode(self) -> Expr

This is supported on crate feature mode only.

Compute the mode(s) of this column. This is the most occurring value.

source

pub fn keep_name(self) -> Expr

Keep the original root name

use polars_core::prelude::*;
use polars_lazy::prelude::*;

fn example(df: LazyFrame) -> LazyFrame {
    df.select([
// even thought the alias yields a different column name,
// `keep_name` will make sure that the original column name is used
        col("*").alias("foo").keep_name()
])
}

source

pub fn map_alias<F>(self, function: F) -> Expr where
F: 'static + Fn(&str) -> String + Send + Sync,

Define an alias by mapping a function over the original root column name.

source

pub fn suffix(self, suffix: &str) -> Expr

Add a suffix to the root column name.

source

pub fn prefix(self, prefix: &str) -> Expr

Add a prefix to the root column name.

source

pub fn exclude(self, columns: impl IntoVec<String>) -> Expr

Exclude a column from a wildcard/regex selection.

You may also use regexes in the exclude as long as they start with ^ and end with $/

pub fn exclude_dtype<D>(self, dtypes: D) -> Expr where
D: AsRef<[DataType ]>,

source