pub struct GroupBy<'df> { /* private fields */ }Expand description
Returned by a groupby operation on a DataFrame. This struct supports several aggregations.
Until described otherwise, the examples in this struct are performed on the following DataFrame:
use polars_core::prelude::*;
let dates = &[
"2020-08-21",
"2020-08-21",
"2020-08-22",
"2020-08-23",
"2020-08-22",
];
// date format
let fmt = "%Y-%m-%d";
// create date series
let s0 = DateChunked::parse_from_str_slice("date", dates, fmt)
        .into_series();
// create temperature series
let s1 = Series::new("temp", [20, 10, 7, 9, 1]);
// create rain series
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01]);
// create a new DataFrame
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
println!("{:?}", df);Outputs:
+------------+------+------+
| date       | temp | rain |
| ---        | ---  | ---  |
| Date     | i32  | f64  |
+============+======+======+
| 2020-08-21 | 20   | 0.2  |
+------------+------+------+
| 2020-08-21 | 10   | 0.1  |
+------------+------+------+
| 2020-08-22 | 7    | 0.3  |
+------------+------+------+
| 2020-08-23 | 9    | 0.1  |
+------------+------+------+
| 2020-08-22 | 1    | 0.01 |
+------------+------+------+Implementations
sourceimpl<'df> GroupBy<'df>
 
impl<'df> GroupBy<'df>
sourcepub fn pivot(
    &mut self, 
    columns: impl IntoVec<String>, 
    values: impl IntoVec<String>
) -> Pivot<'_>
 This is supported on crate feature rows only.
pub fn pivot(
    &mut self, 
    columns: impl IntoVec<String>, 
    values: impl IntoVec<String>
) -> Pivot<'_>
rows only.Pivot a column of the current DataFrame and perform one of the following aggregations:
- first
 - last
 - sum
 - min
 - max
 - mean
 - median
 
The pivot operation consists of a group by one, or multiple columns (these will be the new y-axis), column that will be pivoted (this will be the new x-axis) and an aggregation.
Panics
If the values column is not a numerical type, the code will panic.
Example
use polars_core::prelude::*;
use polars_core::df;
fn example() -> Result<DataFrame> {
    let df = df!["foo" => ["A", "A", "B", "B", "C"],
        "N" => [1, 2, 2, 4, 2],
        "bar" => ["k", "l", "m", "n", "0"]
        ]?;
    df.groupby(["foo"])?
    .pivot(["bar"], ["N"])
    .first()
}Transforms:
+-----+-----+-----+
| foo | N   | bar |
| --- | --- | --- |
| str | i32 | str |
+=====+=====+=====+
| "A" | 1   | "k" |
+-----+-----+-----+
| "A" | 2   | "l" |
+-----+-----+-----+
| "B" | 2   | "m" |
+-----+-----+-----+
| "B" | 4   | "n" |
+-----+-----+-----+
| "C" | 2   | "o" |
+-----+-----+-----+Into:
+-----+------+------+------+------+------+
| foo | o    | n    | m    | l    | k    |
| --- | ---  | ---  | ---  | ---  | ---  |
| str | i32  | i32  | i32  | i32  | i32  |
+=====+======+======+======+======+======+
| "A" | null | null | null | 2    | 1    |
+-----+------+------+------+------+------+
| "B" | null | 4    | 2    | null | null |
+-----+------+------+------+------+------+
| "C" | 2    | null | null | null | null |
+-----+------+------+------+------+------+sourceimpl<'df> GroupBy<'df>
 
impl<'df> GroupBy<'df>
pub fn new(
    df: &'df DataFrame, 
    by: Vec<Series, Global>, 
    groups: GroupsProxy, 
    selected_agg: Option<Vec<String, Global>>
) -> GroupBy<'df>
sourcepub fn select<I, S>(self, selection: I) -> GroupBy<'df> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
 
pub fn select<I, S>(self, selection: I) -> GroupBy<'df> where
    I: IntoIterator<Item = S>,
    S: AsRef<str>, 
Select the column(s) that should be aggregated. You can select a single column or a slice of columns.
Note that making a selection with this method is not required. If you skip it all columns (except for the keys) will be selected for aggregation.
sourcepub fn get_groups(&self) -> &GroupsProxy
 
pub fn get_groups(&self) -> &GroupsProxy
Get the internal representation of the GroupBy operation.
The Vec returned contains:
(first_idx, Vec
sourcepub fn get_groups_mut(&mut self) -> &mut GroupsProxy
 
pub fn get_groups_mut(&mut self) -> &mut GroupsProxy
Get the internal representation of the GroupBy operation.
The Vec returned contains:
(first_idx, Vec
pub fn take_groups(self) -> GroupsProxy
pub fn keys(&self) -> Vec<Series, Global>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
    A: Allocator, 
A: Allocator,
sourcepub fn mean(&self) -> Result<DataFrame, PolarsError>
 
pub fn mean(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the mean per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(&["temp", "rain"]).mean()
}Returns:
+------------+-----------+-----------+
| date       | temp_mean | rain_mean |
| ---        | ---       | ---       |
| Date     | f64       | f64       |
+============+===========+===========+
| 2020-08-23 | 9         | 0.1       |
+------------+-----------+-----------+
| 2020-08-22 | 4         | 0.155     |
+------------+-----------+-----------+
| 2020-08-21 | 15        | 0.15      |
+------------+-----------+-----------+sourcepub fn sum(&self) -> Result<DataFrame, PolarsError>
 
pub fn sum(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the sum per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).sum()
}Returns:
+------------+----------+
| date       | temp_sum |
| ---        | ---      |
| Date     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 8        |
+------------+----------+
| 2020-08-21 | 30       |
+------------+----------+sourcepub fn min(&self) -> Result<DataFrame, PolarsError>
 
pub fn min(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the minimal value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).min()
}Returns:
+------------+----------+
| date       | temp_min |
| ---        | ---      |
| Date     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 1        |
+------------+----------+
| 2020-08-21 | 10       |
+------------+----------+sourcepub fn max(&self) -> Result<DataFrame, PolarsError>
 
pub fn max(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the maximum value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).max()
}Returns:
+------------+----------+
| date       | temp_max |
| ---        | ---      |
| Date     | i32      |
+============+==========+
| 2020-08-23 | 9        |
+------------+----------+
| 2020-08-22 | 7        |
+------------+----------+
| 2020-08-21 | 20       |
+------------+----------+sourcepub fn first(&self) -> Result<DataFrame, PolarsError>
 
pub fn first(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and find the first value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).first()
}Returns:
+------------+------------+
| date       | temp_first |
| ---        | ---        |
| Date     | i32        |
+============+============+
| 2020-08-23 | 9          |
+------------+------------+
| 2020-08-22 | 7          |
+------------+------------+
| 2020-08-21 | 20         |
+------------+------------+sourcepub fn last(&self) -> Result<DataFrame, PolarsError>
 
pub fn last(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and return the last value per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).last()
}Returns:
+------------+------------+
| date       | temp_last |
| ---        | ---        |
| Date     | i32        |
+============+============+
| 2020-08-23 | 9          |
+------------+------------+
| 2020-08-22 | 1          |
+------------+------------+
| 2020-08-21 | 10         |
+------------+------------+sourcepub fn n_unique(&self) -> Result<DataFrame, PolarsError>
 
pub fn n_unique(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series by counting the number of unique values.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).n_unique()
}Returns:
+------------+---------------+
| date       | temp_n_unique |
| ---        | ---           |
| Date     | u32           |
+============+===============+
| 2020-08-23 | 1             |
+------------+---------------+
| 2020-08-22 | 2             |
+------------+---------------+
| 2020-08-21 | 2             |
+------------+---------------+sourcepub fn quantile(
    &self, 
    quantile: f64, 
    interpol: QuantileInterpolOptions
) -> Result<DataFrame, PolarsError>
 
pub fn quantile(
    &self, 
    quantile: f64, 
    interpol: QuantileInterpolOptions
) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and determine the quantile per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).quantile(0.2, QuantileInterpolOptions::default())
}sourcepub fn median(&self) -> Result<DataFrame, PolarsError>
 
pub fn median(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and determine the median per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).median()
}sourcepub fn var(&self) -> Result<DataFrame, PolarsError>
 
pub fn var(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and determine the variance per group.
sourcepub fn std(&self) -> Result<DataFrame, PolarsError>
 
pub fn std(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped Series and determine the standard deviation per group.
sourcepub fn count(&self) -> Result<DataFrame, PolarsError>
 
pub fn count(&self) -> Result<DataFrame, PolarsError>
Aggregate grouped series and compute the number of values per group.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.select(["temp"]).count()
}Returns:
+------------+------------+
| date       | temp_count |
| ---        | ---        |
| Date     | u32        |
+============+============+
| 2020-08-23 | 1          |
+------------+------------+
| 2020-08-22 | 2          |
+------------+------------+
| 2020-08-21 | 2          |
+------------+------------+sourcepub fn groups(&self) -> Result<DataFrame, PolarsError>
 
pub fn groups(&self) -> Result<DataFrame, PolarsError>
Get the groupby group indexes.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.groups()
}Returns:
+--------------+------------+
| date         | groups     |
| ---          | ---        |
| Date(days) | list [u32] |
+==============+============+
| 2020-08-23   | "[3]"      |
+--------------+------------+
| 2020-08-22   | "[2, 4]"   |
+--------------+------------+
| 2020-08-21   | "[0, 1]"   |
+--------------+------------+sourcepub fn agg<Column, S, Slice>(
    &self, 
    column_to_agg: &[(Column, Slice)]
) -> Result<DataFrame, PolarsError> where
    S: AsRef<str>,
    Slice: AsRef<[S]>,
    Column: AsRef<str>, 
 
pub fn agg<Column, S, Slice>(
    &self, 
    column_to_agg: &[(Column, Slice)]
) -> Result<DataFrame, PolarsError> where
    S: AsRef<str>,
    Slice: AsRef<[S]>,
    Column: AsRef<str>, 
Combine different aggregations on columns
Operations
- count
 - first
 - last
 - sum
 - min
 - max
 - mean
 - median
 
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    df.groupby(["date"])?.agg(&[("temp", &["n_unique", "sum", "min"])])
}Returns:
+--------------+---------------+----------+----------+
| date         | temp_n_unique | temp_sum | temp_min |
| ---          | ---           | ---      | ---      |
| Date(days) | u32           | i32      | i32      |
+==============+===============+==========+==========+
| 2020-08-23   | 1             | 9        | 9        |
+--------------+---------------+----------+----------+
| 2020-08-22   | 2             | 8        | 1        |
+--------------+---------------+----------+----------+
| 2020-08-21   | 2             | 30       | 10       |
+--------------+---------------+----------+----------+sourcepub fn agg_list(&self) -> Result<DataFrame, PolarsError>
 
pub fn agg_list(&self) -> Result<DataFrame, PolarsError>
Aggregate the groups of the groupby operation into lists.
Example
fn example(df: DataFrame) -> Result<DataFrame> {
    // GroupBy and aggregate to Lists
    df.groupby(["date"])?.select(["temp"]).agg_list()
}Returns:
+------------+------------------------+
| date       | temp_agg_list          |
| ---        | ---                    |
| Date     | list [i32]             |
+============+========================+
| 2020-08-23 | "[Some(9)]"            |
+------------+------------------------+
| 2020-08-22 | "[Some(7), Some(1)]"   |
+------------+------------------------+
| 2020-08-21 | "[Some(20), Some(10)]" |
+------------+------------------------+Trait Implementations
Auto Trait Implementations
impl<'df> !RefUnwindSafe for GroupBy<'df>
impl<'df> Send for GroupBy<'df>
impl<'df> Sync for GroupBy<'df>
impl<'df> Unpin for GroupBy<'df>
impl<'df> !UnwindSafe for GroupBy<'df>
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
    T: ?Sized, 
 
impl<T> BorrowMut<T> for T where
    T: ?Sized, 
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
 
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
sourceimpl<T> Pointable for T
 
impl<T> Pointable for T
sourceimpl<T> ToOwned for T where
    T: Clone, 
 
impl<T> ToOwned for T where
    T: Clone, 
type Owned = T
type Owned = T
The resulting type after obtaining ownership.
sourcefn clone_into(&self, target: &mut T)
 
fn clone_into(&self, target: &mut T)
toowned_clone_into)Uses borrowed data to replace owned data, usually by cloning. Read more