Enum polars_lazy::dsl::Expr
source · [−]pub enum Expr {
Show 30 variants
Alias(Box<Expr>, Arc<str>),
Column(Arc<str>),
Columns(Vec<String>),
DtypeColumn(Vec<DataType>),
Literal(LiteralValue),
BinaryExpr {
left: Box<Expr>,
op: Operator,
right: Box<Expr>,
},
Not(Box<Expr>),
IsNotNull(Box<Expr>),
IsNull(Box<Expr>),
Cast {
expr: Box<Expr>,
data_type: DataType,
strict: bool,
},
Sort {
expr: Box<Expr>,
options: SortOptions,
},
Take {
expr: Box<Expr>,
idx: Box<Expr>,
},
SortBy {
expr: Box<Expr>,
by: Vec<Expr>,
reverse: Vec<bool>,
},
Agg(AggExpr),
Ternary {
predicate: Box<Expr>,
truthy: Box<Expr>,
falsy: Box<Expr>,
},
Function {
input: Vec<Expr>,
function: NoEq<Arc<dyn SeriesUdf>>,
output_type: GetOutput,
options: FunctionOptions,
},
Shift {
input: Box<Expr>,
periods: i64,
},
Reverse(Box<Expr>),
Duplicated(Box<Expr>),
IsUnique(Box<Expr>),
Explode(Box<Expr>),
Filter {
input: Box<Expr>,
by: Box<Expr>,
},
Window {
function: Box<Expr>,
partition_by: Vec<Expr>,
order_by: Option<Box<Expr>>,
options: WindowOptions,
},
Wildcard,
Slice {
input: Box<Expr>,
offset: Box<Expr>,
length: Box<Expr>,
},
Exclude(Box<Expr>, Vec<Excluded>),
KeepName(Box<Expr>),
RenameAlias {
function: NoEq<Arc<dyn RenameAliasFn>>,
expr: Box<Expr>,
},
Count,
Nth(i64),
}
Expand description
Queries consists of multiple expressions.
Variants
Alias(Box<Expr>, Arc<str>)
Column(Arc<str>)
Columns(Vec<String>)
DtypeColumn(Vec<DataType>)
Literal(LiteralValue)
BinaryExpr
Not(Box<Expr>)
IsNotNull(Box<Expr>)
IsNull(Box<Expr>)
Cast
Sort
Take
SortBy
Agg(AggExpr)
Ternary
A ternary operation if true then “foo” else “bar”
Function
Shift
Reverse(Box<Expr>)
Duplicated(Box<Expr>)
IsUnique(Box<Expr>)
Explode(Box<Expr>)
Filter
Window
Fields
options: WindowOptions
See postgres window functions
Wildcard
Slice
Fields
Exclude(Box<Expr>, Vec<Excluded>)
Can be used in a select statement to exclude a column from selection
KeepName(Box<Expr>)
Set root name as Alias
RenameAlias
Count
Special case that does not need columns
Nth(i64)
Take the nth column in the DataFrame
Implementations
sourceimpl Expr
impl Expr
sourcepub fn is_not_null(self) -> Self
pub fn is_not_null(self) -> Self
Run is_not_null operation on Expr
.
sourcepub fn drop_nulls(self) -> Self
pub fn drop_nulls(self) -> Self
Drop null values
sourcepub fn quantile(self, quantile: f64, interpol: QuantileInterpolOptions) -> Self
pub fn quantile(self, quantile: f64, interpol: QuantileInterpolOptions) -> Self
Compute the quantile per group.
sourcepub fn agg_groups(self) -> Self
pub fn agg_groups(self) -> Self
Get the group indexes of the group by operation.
sourcepub fn slice(self, offset: Expr, length: Expr) -> Self
pub fn slice(self, offset: Expr, length: Expr) -> Self
Slice the Series.
offset
may be negative.
sourcepub fn unique_stable(self) -> Self
pub fn unique_stable(self) -> Self
Get unique values of this expression, while maintaining order.
This requires more work than Expr::unique
.
sourcepub fn arg_unique(self) -> Self
pub fn arg_unique(self) -> Self
Get the first index of unique values of this expression.
sourcepub fn arg_sort(self, reverse: bool) -> Self
pub fn arg_sort(self, reverse: bool) -> Self
Get the index values that would sort this expression.
sourcepub fn strict_cast(self, data_type: DataType) -> Self
pub fn strict_cast(self, data_type: DataType) -> Self
Cast expression to another data type. Throws an error if conversion had overflows
sourcepub fn sort(self, reverse: bool) -> Self
pub fn sort(self, reverse: bool) -> Self
Sort in increasing order. See the eager implementation.
sourcepub fn sort_with(self, options: SortOptions) -> Self
pub fn sort_with(self, options: SortOptions) -> Self
Sort with given options.
sourcepub fn map<F>(self, function: F, output_type: GetOutput) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
pub fn map<F>(self, function: F, output_type: GetOutput) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
Apply a function/closure once the logical plan get executed.
This function is very similar to Expr::apply
, but differs in how it handles aggregations.
map
should be used for operations that are independent of groups, e.g.multiply * 2
, orraise to the power
apply
should be used for operations that work on a group of data. e.g.sum
,count
, etc.
It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.
sourcepub fn map_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: GetOutput
) -> Self where
F: Fn(&mut [Series]) -> Result<Series> + 'static + Send + Sync,
pub fn map_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: GetOutput
) -> Self where
F: Fn(&mut [Series]) -> Result<Series> + 'static + Send + Sync,
sourcepub fn map_list<F>(self, function: F, output_type: GetOutput) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
pub fn map_list<F>(self, function: F, output_type: GetOutput) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
Apply a function/closure once the logical plan get executed.
This function is very similar to apply, but differs in how it handles aggregations.
map
should be used for operations that are independent of groups, e.g.multiply * 2
, orraise to the power
apply
should be used for operations that work on a group of data. e.g.sum
,count
, etc.map_list
should be used when the function expects a list aggregated series.
sourcepub fn function_with_options<F>(
self,
function: F,
output_type: GetOutput,
options: FunctionOptions
) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
pub fn function_with_options<F>(
self,
function: F,
output_type: GetOutput,
options: FunctionOptions
) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
A function that cannot be expressed with map
or apply
and requires extra settings.
sourcepub fn apply<F>(self, function: F, output_type: GetOutput) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
pub fn apply<F>(self, function: F, output_type: GetOutput) -> Self where
F: Fn(Series) -> Result<Series> + 'static + Send + Sync,
Apply a function/closure over the groups. This should only be used in a groupby aggregation.
It is the responsibility of the caller that the schema is correct by giving the correct output_type. If None given the output type of the input expr is used.
This difference with map is that apply
will create a separate Series
per group.
map
should be used for operations that are independent of groups, e.g.multiply * 2
, orraise to the power
apply
should be used for operations that work on a group of data. e.g.sum
,count
, etc.
sourcepub fn apply_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: GetOutput
) -> Self where
F: Fn(&mut [Series]) -> Result<Series> + 'static + Send + Sync,
pub fn apply_many<F>(
self,
function: F,
arguments: &[Expr],
output_type: GetOutput
) -> Self where
F: Fn(&mut [Series]) -> Result<Series> + 'static + Send + Sync,
Apply a function/closure over the groups with many arguments. This should only be used in a groupby aggregation.
See the Expr::apply
function for the differences between map
and apply
.
sourcepub fn is_infinite(self) -> Self
pub fn is_infinite(self) -> Self
Get mask of infinite values if dtype is Float
sourcepub fn is_not_nan(self) -> Self
pub fn is_not_nan(self) -> Self
Get inverse mask of NaN values if dtype is Float
sourcepub fn shift(self, periods: i64) -> Self
pub fn shift(self, periods: i64) -> Self
Shift the values in the array by some period. See the eager implementation.
sourcepub fn shift_and_fill(self, periods: i64, fill_value: Expr) -> Self
pub fn shift_and_fill(self, periods: i64, fill_value: Expr) -> Self
Shift the values in the array by some period and fill the resulting empty values.
sourcepub fn cumsum(self, reverse: bool) -> Self
pub fn cumsum(self, reverse: bool) -> Self
Get an array with the cumulative sum computed at every element
sourcepub fn cumprod(self, reverse: bool) -> Self
pub fn cumprod(self, reverse: bool) -> Self
Get an array with the cumulative product computed at every element
sourcepub fn cummin(self, reverse: bool) -> Self
pub fn cummin(self, reverse: bool) -> Self
Get an array with the cumulative min computed at every element
sourcepub fn cummax(self, reverse: bool) -> Self
pub fn cummax(self, reverse: bool) -> Self
Get an array with the cumulative max computed at every element
sourcepub fn backward_fill(self) -> Self
pub fn backward_fill(self) -> Self
Fill missing value with next non-null.
sourcepub fn forward_fill(self) -> Self
pub fn forward_fill(self) -> Self
Fill missing value with previous non-null.
sourcepub fn round(self, decimals: u32) -> Self
pub fn round(self, decimals: u32) -> Self
Round underlying floating point array to given decimal numbers.
sourcepub fn floor(self) -> Self
pub fn floor(self) -> Self
Floor underlying floating point array to the lowest integers smaller or equal to the float value.
sourcepub fn ceil(self) -> Self
pub fn ceil(self) -> Self
Ceil underlying floating point array to the heighest integers smaller or equal to the float value.
sourcepub fn over<E: AsRef<[Expr]>>(self, partition_by: E) -> Self
pub fn over<E: AsRef<[Expr]>>(self, partition_by: E) -> Self
Apply window function over a subgroup. This is similar to a groupby + aggregation + self join. Or similar to window functions in Postgres.
Example
#[macro_use] extern crate polars_core;
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example() -> Result<()> {
let df = df! {
"groups" => &[1, 1, 2, 2, 1, 2, 3, 3, 1],
"values" => &[1, 2, 3, 4, 5, 6, 7, 8, 8]
}?;
let out = df
.lazy()
.select(&[
col("groups"),
sum("values").over([col("groups")]),
])
.collect()?;
dbg!(&out);
Ok(())
}
Outputs:
╭────────┬────────╮
│ groups ┆ values │
│ --- ┆ --- │
│ i32 ┆ i32 │
╞════════╪════════╡
│ 1 ┆ 16 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1 ┆ 16 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2 ┆ 13 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2 ┆ 13 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ ... ┆ ... │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1 ┆ 16 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 2 ┆ 13 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 3 ┆ 15 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 3 ┆ 15 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ 1 ┆ 16 │
╰────────┴────────╯
sourcepub fn fill_nan(self, fill_value: Expr) -> Self
pub fn fill_nan(self, fill_value: Expr) -> Self
Replace the floating point NaN
values by a value.
sourcepub fn count(self) -> Self
pub fn count(self) -> Self
Count the values of the Series or Get counts of the group by operation.
sourcepub fn is_duplicated(self) -> Self
pub fn is_duplicated(self) -> Self
Get a mask of duplicated values
pub fn xor(self, expr: Expr) -> Self
sourcepub fn filter(self, predicate: Expr) -> Self
pub fn filter(self, predicate: Expr) -> Self
Filter a single column Should be used in aggregation context. If you want to filter on a DataFrame level, use LazyFrame::filter
sourcepub fn is_in(self, other: Expr) -> Self
pub fn is_in(self, other: Expr) -> Self
Check if the values of the left expression are in the lists of the right expr.
sourcepub fn sort_by<E: AsRef<[Expr]>, R: AsRef<[bool]>>(
self,
by: E,
reverse: R
) -> Expr
pub fn sort_by<E: AsRef<[Expr]>, R: AsRef<[bool]>>(
self,
by: E,
reverse: R
) -> Expr
Sort this column by the ordering of another column. Can also be used in a groupby context to sort the groups.
pub fn repeat_by(self, by: Expr) -> Expr
sourcepub fn mode(self) -> Expr
pub fn mode(self) -> Expr
Compute the mode(s) of this column. This is the most occurring value.
sourcepub fn keep_name(self) -> Expr
pub fn keep_name(self) -> Expr
Keep the original root name
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: LazyFrame) -> LazyFrame {
df.select([
// even thought the alias yields a different column name,
// `keep_name` will make sure that the original column name is used
col("*").alias("foo").keep_name()
])
}
sourcepub fn map_alias<F>(self, function: F) -> Expr where
F: Fn(&str) -> String + 'static + Send + Sync,
pub fn map_alias<F>(self, function: F) -> Expr where
F: Fn(&str) -> String + 'static + Send + Sync,
Define an alias by mapping a function over the original root column name.
sourcepub fn exclude(self, columns: impl IntoVec<String>) -> Expr
pub fn exclude(self, columns: impl IntoVec<String>) -> Expr
Exclude a column from a wildcard/regex selection.
You may also use regexes in the exclude as long as they start with ^
and end with $
/
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
// Select all columns except foo.
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[
col("*").exclude(&["foo"])
])
}
pub fn exclude_dtype<D: AsRef<[DataType]>>(self, dtypes: D) -> Expr
pub fn interpolate(self) -> Expr
sourcepub fn rolling_min(self, options: RollingOptions) -> Expr
pub fn rolling_min(self, options: RollingOptions) -> Expr
Apply a rolling min See: ChunkedArray::rolling_min
sourcepub fn rolling_max(self, options: RollingOptions) -> Expr
pub fn rolling_max(self, options: RollingOptions) -> Expr
Apply a rolling max See: ChunkedArray::rolling_max
sourcepub fn rolling_mean(self, options: RollingOptions) -> Expr
pub fn rolling_mean(self, options: RollingOptions) -> Expr
Apply a rolling mean See: ChunkedArray::rolling_mean
sourcepub fn rolling_sum(self, options: RollingOptions) -> Expr
pub fn rolling_sum(self, options: RollingOptions) -> Expr
Apply a rolling sum See: ChunkedArray::rolling_sum
sourcepub fn rolling_median(self, options: RollingOptions) -> Expr
pub fn rolling_median(self, options: RollingOptions) -> Expr
Apply a rolling median See:
ChunkedArray::rolling_median
sourcepub fn rolling_quantile(
self,
quantile: f64,
interpolation: QuantileInterpolOptions,
options: RollingOptions
) -> Expr
pub fn rolling_quantile(
self,
quantile: f64,
interpolation: QuantileInterpolOptions,
options: RollingOptions
) -> Expr
Apply a rolling quantile See:
ChunkedArray::rolling_quantile
sourcepub fn rolling_var(self, options: RollingOptions) -> Expr
pub fn rolling_var(self, options: RollingOptions) -> Expr
Apply a rolling variance
sourcepub fn rolling_std(self, options: RollingOptions) -> Expr
pub fn rolling_std(self, options: RollingOptions) -> Expr
Apply a rolling std-dev
sourcepub fn rolling_apply(
self,
f: Arc<dyn Fn(&Series) -> Series + Send + Sync>,
output_type: GetOutput,
options: RollingOptions
) -> Expr
pub fn rolling_apply(
self,
f: Arc<dyn Fn(&Series) -> Series + Send + Sync>,
output_type: GetOutput,
options: RollingOptions
) -> Expr
Apply a custom function over a rolling/ moving window of the array. This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
sourcepub fn rolling_apply_float<F>(self, window_size: usize, f: F) -> Expr where
F: 'static + Fn(&Float64Chunked) -> Option<f64> + Send + Sync + Copy,
pub fn rolling_apply_float<F>(self, window_size: usize, f: F) -> Expr where
F: 'static + Fn(&Float64Chunked) -> Option<f64> + Send + Sync + Copy,
Apply a custom function over a rolling/ moving window of the array. Prefer this over rolling_apply in case of floating point numbers as this is faster. This has quite some dynamic dispatch, so prefer rolling_min, max, mean, sum over this.
pub fn rank(self, options: RankOptions) -> Expr
pub fn diff(self, n: usize, null_behavior: NullBehavior) -> Expr
sourcepub fn upper_bound(self) -> Expr
pub fn upper_bound(self) -> Expr
Get maximal value that could be hold by this dtype.
sourcepub fn lower_bound(self) -> Expr
pub fn lower_bound(self) -> Expr
Get minimal value that could be hold by this dtype.
pub fn reshape(self, dims: &[i64]) -> Self
pub fn shuffle(self, seed: u64) -> Self
pub fn sample_frac(self, frac: f64, with_replacement: bool, seed: u64) -> Self
sourcepub fn to_float(self) -> Self
pub fn to_float(self) -> Self
This is useful if an apply
function needs a floating point type.
Because this cast is done on a map
level, it will be faster.
pub fn str(self) -> StringNameSpace
pub fn dt(self) -> DateLikeNameSpace
pub fn arr(self) -> ListNameSpace
pub fn cat(self) -> CategoricalNameSpace
pub fn struct_(self) -> StructNameSpace
Trait Implementations
sourceimpl<'a> IntoIterator for &'a Expr
impl<'a> IntoIterator for &'a Expr
impl StructuralPartialEq for Expr
Auto Trait Implementations
impl !RefUnwindSafe for Expr
impl Send for Expr
impl Sync for Expr
impl Unpin for Expr
impl !UnwindSafe for Expr
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
sourceimpl<T> Pointable for T
impl<T> Pointable for T
sourceimpl<T> ToOwned for T where
T: Clone,
impl<T> ToOwned for T where
T: Clone,
type Owned = T
type Owned = T
The resulting type after obtaining ownership.
sourcefn clone_into(&self, target: &mut T)
fn clone_into(&self, target: &mut T)
toowned_clone_into
)Uses borrowed data to replace owned data, usually by cloning. Read more