pub struct CsvReader<'a, R> where
R: MmapBytesReader, { /* private fields */ }
Expand description
Create a new DataFrame by reading a csv file.
Example
use polars_core::prelude::*;
use polars_io::prelude::*;
use std::fs::File;
fn example() -> Result<DataFrame> {
CsvReader::from_path("iris_csv")?
.has_header(true)
.finish()
}
Implementations
sourceimpl<'a, R> CsvReader<'a, R> where
R: 'a + MmapBytesReader,
impl<'a, R> CsvReader<'a, R> where
R: 'a + MmapBytesReader,
sourcepub fn with_skip_rows_after_header(self, offset: usize) -> Self
pub fn with_skip_rows_after_header(self, offset: usize) -> Self
Skip these rows after the header
sourcepub fn with_row_count(self, rc: Option<RowCount>) -> Self
pub fn with_row_count(self, rc: Option<RowCount>) -> Self
Add a row_count
column.
sourcepub fn with_chunk_size(self, chunk_size: usize) -> Self
pub fn with_chunk_size(self, chunk_size: usize) -> Self
Sets the chunk size used by the parser. This influences performance
sourcepub fn with_encoding(self, enc: CsvEncoding) -> Self
pub fn with_encoding(self, enc: CsvEncoding) -> Self
Set CsvEncoding
sourcepub fn with_n_rows(self, num_rows: Option<usize>) -> Self
pub fn with_n_rows(self, num_rows: Option<usize>) -> Self
Try to stop parsing when n
rows are parsed. During multithreaded parsing the upper bound n
cannot
be guaranteed.
sourcepub fn with_ignore_parser_errors(self, ignore: bool) -> Self
pub fn with_ignore_parser_errors(self, ignore: bool) -> Self
Continue with next batch when a ParserError is encountered.
sourcepub fn with_schema(self, schema: &'a Schema) -> Self
pub fn with_schema(self, schema: &'a Schema) -> Self
Set the CSV file’s schema. This only accepts datatypes that are implemented in the csv parser and expects a complete Schema.
It is recommended to use with_dtypes instead.
sourcepub fn with_skip_rows(self, skip_rows: usize) -> Self
pub fn with_skip_rows(self, skip_rows: usize) -> Self
Skip the first n
rows during parsing. The header will be parsed an n
lines.
sourcepub fn with_rechunk(self, rechunk: bool) -> Self
pub fn with_rechunk(self, rechunk: bool) -> Self
Rechunk the DataFrame to contiguous memory after the CSV is parsed.
sourcepub fn has_header(self, has_header: bool) -> Self
pub fn has_header(self, has_header: bool) -> Self
Set whether the CSV file has headers
sourcepub fn with_delimiter(self, delimiter: u8) -> Self
pub fn with_delimiter(self, delimiter: u8) -> Self
Set the CSV file’s column delimiter as a byte character
sourcepub fn with_comment_char(self, comment_char: Option<u8>) -> Self
pub fn with_comment_char(self, comment_char: Option<u8>) -> Self
Set the comment character. Lines starting with this character will be ignored.
sourcepub fn with_null_values(self, null_values: Option<NullValues>) -> Self
pub fn with_null_values(self, null_values: Option<NullValues>) -> Self
Set values that will be interpreted as missing/ null. Note that any value you set as null value will not be escaped, so if quotation marks are part of the null value you should include them.
sourcepub fn with_dtypes(self, schema: Option<&'a Schema>) -> Self
pub fn with_dtypes(self, schema: Option<&'a Schema>) -> Self
Overwrite the schema with the dtypes in this given Schema. The given schema may be a subset of the total schema.
sourcepub fn with_dtypes_slice(self, dtypes: Option<&'a [DataType]>) -> Self
pub fn with_dtypes_slice(self, dtypes: Option<&'a [DataType]>) -> Self
Overwrite the dtypes in the schema in the order of the slice that’s given. This is useful if you don’t know the column names beforehand
sourcepub fn infer_schema(self, max_records: Option<usize>) -> Self
pub fn infer_schema(self, max_records: Option<usize>) -> Self
Set the CSV reader to infer the schema of the file
Arguments
max_records
- Maximum number of rows read for schema inference. Setting this toNone
will do a full table scan (slow).
sourcepub fn with_projection(self, projection: Option<Vec<usize>>) -> Self
pub fn with_projection(self, projection: Option<Vec<usize>>) -> Self
Set the reader’s column projection. This counts from 0, meaning that
vec![0, 4]
would select the 1st and 5th column.
sourcepub fn with_columns(self, columns: Option<Vec<String>>) -> Self
pub fn with_columns(self, columns: Option<Vec<String>>) -> Self
Columns to select/ project
sourcepub fn with_n_threads(self, n: Option<usize>) -> Self
pub fn with_n_threads(self, n: Option<usize>) -> Self
Set the number of threads used in CSV reading. The default uses the number of cores of your cpu.
Note that this only works if this is initialized with CsvReader::from_path
.
Note that the number of cores is the maximum allowed number of threads.
sourcepub fn with_path<P: Into<PathBuf>>(self, path: Option<P>) -> Self
pub fn with_path<P: Into<PathBuf>>(self, path: Option<P>) -> Self
The preferred way to initialize this builder. This allows the CSV file to be memory mapped and thereby greatly increases parsing performance.
sourcepub fn sample_size(self, size: usize) -> Self
pub fn sample_size(self, size: usize) -> Self
Sets the size of the sample taken from the CSV file. The sample is used to get statistic about the file. These statistics are used to try to optimally allocate up front. Increasing this may improve performance.
sourcepub fn low_memory(self, toggle: bool) -> Self
pub fn low_memory(self, toggle: bool) -> Self
Reduce memory consumption at the expense of performance
sourcepub fn with_quote_char(self, quote: Option<u8>) -> Self
pub fn with_quote_char(self, quote: Option<u8>) -> Self
Set the char
used as quote char. The default is b'"'
. If set to [None]
quoting is disabled.
sourcepub fn with_parse_dates(self, toggle: bool) -> Self
pub fn with_parse_dates(self, toggle: bool) -> Self
Automatically try to parse dates/ datetimes and time. If parsing fails, columns remain of dtype [DataType::Utf8]
.
pub fn with_predicate(self, predicate: Option<Arc<dyn PhysicalIoExpr>>) -> Self
pub fn with_aggregate(self, aggregate: Option<&'a [ScanAggregation]>) -> Self
Trait Implementations
sourceimpl<'a, R> SerReader<R> for CsvReader<'a, R> where
R: MmapBytesReader,
impl<'a, R> SerReader<R> for CsvReader<'a, R> where
R: MmapBytesReader,
Auto Trait Implementations
impl<'a, R> !RefUnwindSafe for CsvReader<'a, R>
impl<'a, R> Send for CsvReader<'a, R>
impl<'a, R> Sync for CsvReader<'a, R>
impl<'a, R> Unpin for CsvReader<'a, R> where
R: Unpin,
impl<'a, R> !UnwindSafe for CsvReader<'a, R>
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more