Interface DataFrame

A DataFrame is a two-dimensional data structure that represents data as a table with rows and columns.

Param: data

Object, Array, or Series Two-dimensional data in various forms. object must contain Arrays. Array may contain Series or other Arrays.

Param: columns

Array of str, default undefined Column labels to use for resulting DataFrame. If specified, overrides any labels already present in the data. Must match data dimensions.

Param: orient

'col' | 'row' default undefined Whether to interpret two-dimensional data as columns or as rows. If None, the orientation is inferred by matching the columns and data dimensions. If this does not yield conclusive results, column orientation is used.

Example

Constructing a DataFrame from an object :

> const data = {'a': [1n, 2n], 'b': [3, 4]};
> const df = pl.DataFrame(data);
> console.log(df.toString());
shape: (2, 2)
╭─────┬─────╮
ab
│ --- ┆ --- │
u64i64
╞═════╪═════╡
13
├╌╌╌╌╌┼╌╌╌╌╌┤
24
╰─────┴─────╯

Notice that the dtype is automatically inferred as a polars Int64:

> df.dtypes
['UInt64', `Int64']

In order to specify dtypes for your columns, initialize the DataFrame with a list of Series instead:

> const data = [pl.Series('col1', [1, 2], pl.Float32), pl.Series('col2', [3, 4], pl.Int64)];
> const df2 = pl.DataFrame(series);
> console.log(df2.toString());
shape: (2, 2)
╭──────┬──────╮
col1col2
│ --- ┆ --- │
f32i64
╞══════╪══════╡
13
├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
24
╰──────┴──────╯

Constructing a DataFrame from a list of lists, row orientation inferred:

> const data = [[1, 2, 3], [4, 5, 6]];
> const df4 = pl.DataFrame(data, ['a', 'b', 'c']);
> console.log(df4.toString());
shape: (2, 3)
╭─────┬─────┬─────╮
abc
│ --- ┆ --- ┆ --- │
i64i64i64
╞═════╪═════╪═════╡
123
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
456
╰─────┴─────┴─────╯
interface DataFrame {
    [inspect](): string;
    [iterator](): Generator<any, void, any>;
    add(other): pl.DataFrame;
    clone(): pl.DataFrame;
    columns: string[];
    describe(): pl.DataFrame;
    distinct(maintainOrder?, subset?, keep?): pl.DataFrame;
    div(other): pl.DataFrame;
    divideBy(other): pl.DataFrame;
    drop(name): pl.DataFrame;
    drop(names): pl.DataFrame;
    drop(name, ...names): pl.DataFrame;
    dropNulls(column): pl.DataFrame;
    dropNulls(columns): pl.DataFrame;
    dropNulls(...columns): pl.DataFrame;
    dtypes: DataType[];
    explode(column): pl.DataFrame;
    explode(columns): pl.DataFrame;
    explode(column, ...columns): pl.DataFrame;
    extend(other): pl.DataFrame;
    fillNull(strategy): pl.DataFrame;
    filter(predicate): pl.DataFrame;
    findIdxByName(name): number;
    fold(operation): pl.Series;
    frameEqual(other): boolean;
    frameEqual(other, nullEqual): boolean;
    getColumn(name): pl.Series;
    getColumns(): pl.Series[];
    groupBy(...by): GroupBy;
    groupByDynamic(options): RollingGroupBy;
    groupByRolling(opts): RollingGroupBy;
    hashRows(k0?, k1?, k2?, k3?): pl.Series;
    hashRows(options): pl.Series;
    head(length?): pl.DataFrame;
    height: number;
    hstack(columns): pl.DataFrame;
    hstack(columns, inPlace?): void;
    insertAtIdx(index, series): void;
    interpolate(): pl.DataFrame;
    isDuplicated(): pl.Series;
    isEmpty(): boolean;
    isUnique(): pl.Series;
    join(other, options): pl.DataFrame;
    join(other, options): pl.DataFrame;
    join(other, options): pl.DataFrame;
    joinAsof(other, options): pl.DataFrame;
    lazy(): LazyDataFrame;
    limit(length?): pl.DataFrame;
    map(func): any[];
    max(): pl.DataFrame;
    max(axis): pl.DataFrame;
    max(axis): pl.Series;
    mean(): pl.DataFrame;
    mean(axis): pl.DataFrame;
    mean(axis): pl.Series;
    mean(axis, nullStrategy?): pl.Series;
    median(): pl.DataFrame;
    melt(idVars, valueVars): pl.DataFrame;
    min(): pl.DataFrame;
    min(axis): pl.DataFrame;
    min(axis): pl.Series;
    minus(other): pl.DataFrame;
    modulo(other): pl.DataFrame;
    mul(other): pl.DataFrame;
    multiplyBy(other): pl.DataFrame;
    nChunks(): number;
    nullCount(): pl.DataFrame;
    partitionBy(cols, stable?, includeKey?): pl.DataFrame[];
    partitionBy<T>(cols, stable, includeKey, mapFn): T[];
    pivot(values, options): pl.DataFrame;
    pivot(options): pl.DataFrame;
    plus(other): pl.DataFrame;
    quantile(quantile): pl.DataFrame;
    rechunk(): pl.DataFrame;
    rem(other): pl.DataFrame;
    rename(mapping): pl.DataFrame;
    replaceAtIdx(index, newColumn): void;
    row(index): any[];
    rows(): any[][];
    sample(opts?): pl.DataFrame;
    sample(opts?): pl.DataFrame;
    sample(n?, frac?, withReplacement?, seed?): pl.DataFrame;
    get schema(): Record<string, DataType>;
    select(...columns): pl.DataFrame;
    serialize(format): Buffer;
    shape: {
        height: number;
        width: number;
    };
    shift(periods): pl.DataFrame;
    shift(__namedParameters): pl.DataFrame;
    shiftAndFill(n, fillValue): pl.DataFrame;
    shiftAndFill(__namedParameters): pl.DataFrame;
    shrinkToFit(): pl.DataFrame;
    shrinkToFit(inPlace): void;
    shrinkToFit(__namedParameters): void;
    slice(opts): pl.DataFrame;
    slice(offset, length): pl.DataFrame;
    sort(by, descending?, maintain_order?): pl.DataFrame;
    sort(__namedParameters): pl.DataFrame;
    std(): pl.DataFrame;
    sub(other): pl.DataFrame;
    sum(): pl.DataFrame;
    sum(axis): pl.DataFrame;
    sum(axis): pl.Series;
    sum(axis, nullStrategy?): pl.Series;
    tail(length?): pl.DataFrame;
    toCSV(destOrOptions?, options?): any;
    toDataResource(): TabularDataResource;
    toHTML(): string;
    toIPC(destination?, options?): any;
    toJSON(): string;
    toObject(): Record<string, any[]>;
    toParquet(destination?, options?): any;
    toRecords(): Record<string, any>[];
    toSeries(index?): pl.Series;
    toString(): string;
    toStruct(name): pl.Series;
    transpose(options?): pl.DataFrame;
    unique(maintainOrder?, subset?, keep?): pl.DataFrame;
    unique(opts): pl.DataFrame;
    unnest(names): pl.DataFrame;
    upsample(timeColumn, every, offset?, by?, maintainOrder?): pl.DataFrame;
    upsample(opts): pl.DataFrame;
    var(): pl.DataFrame;
    vstack(df): pl.DataFrame;
    where(predicate): pl.DataFrame;
    width: number;
    withColumn(column): pl.DataFrame;
    withColumn(column): pl.DataFrame;
    withColumnRenamed(existing, replacement): pl.DataFrame;
    withColumnRenamed(opts): pl.DataFrame;
    withColumns(...columns): pl.DataFrame;
    withRowCount(name?): pl.DataFrame;
    writeAvro(options?): Buffer;
    writeAvro(destination, options?): void;
    writeCSV(): Buffer;
    writeCSV(options): Buffer;
    writeCSV(dest, options?): void;
    writeIPC(options?): Buffer;
    writeIPC(destination, options?): void;
    writeJSON(options?): Buffer;
    writeJSON(destination, options?): void;
    writeParquet(options?): Buffer;
    writeParquet(destination, options?): void;
}

Hierarchy

Properties

dtypes: DataType[]
height: number
shape: {
    height: number;
    width: number;
}

Type declaration

  • height: number
  • width: number
width: number

Accessors

Methods - Arithmetic

Methods - Deprecated

  • Parameters

    • Optional destOrOptions: any
    • Optional options: any

    Returns any

    Deprecated

    since 0.4.0 use writeCSV

Methods - IO

  • compat with JSON.stringify

    Returns string

  • Converts dataframe object into column oriented javascript objects

    Returns Record<string, any[]>

    Example

    > df.toObject()
    {
    "foo": [1,2,3],
    "bar": ["a", "b", "c"]
    }
  • Converts dataframe object into row oriented javascript objects

    Returns Record<string, any>[]

    Example

    > df.toRecords()
    [
    {"foo":1.0,"bar":"a"},
    {"foo":2.0,"bar":"b"},
    {"foo":3.0,"bar":"c"}
    ]
  • Write the DataFrame disk in avro format.

    Parameters

    Returns Buffer

  • Parameters

    Returns void

  • Write DataFrame to comma-separated values file (csv).

    If no options are specified, it will return a new string containing the contents


    Returns Buffer

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.writeCSV();
    foo,bar,ham
    1,6,a
    2,7,b
    3,8,c

    // using a file path
    > df.head(1).writeCSV("./foo.csv")
    // foo.csv
    foo,bar,ham
    1,6,a

    // using a write stream
    > const writeStream = new Stream.Writable({
    ... write(chunk, encoding, callback) {
    ... console.log("writeStream: %O', chunk.toString());
    ... callback(null);
    ... }
    ... });
    > df.head(1).writeCSV(writeStream, {includeHeader: false});
    writeStream: '1,6,a'
  • Parameters

    Returns Buffer

  • Parameters

    Returns void

  • Write to Arrow IPC binary stream, or a feather file.

    Parameters

    Returns Buffer

  • Parameters

    Returns void

  • Write Dataframe to JSON string, file, or write stream

    Parameters

    • Optional options: {
          format: "json" | "lines";
      }
      • format: "json" | "lines"

        json | lines

    Returns Buffer

    Example

    > const df = pl.DataFrame({
    ... foo: [1,2,3],
    ... bar: ['a','b','c']
    ... })

    > df.writeJSON({format:"json"})
    `[ {"foo":1.0,"bar":"a"}, {"foo":2.0,"bar":"b"}, {"foo":3.0,"bar":"c"}]`

    > df.writeJSON({format:"lines"})
    `{"foo":1.0,"bar":"a"}
    {"foo":2.0,"bar":"b"}
    {"foo":3.0,"bar":"c"}`

    // writing to a file
    > df.writeJSON("/path/to/file.json", {format:'lines'})
  • Parameters

    • destination: string | Writable
    • Optional options: {
          format: "json" | "lines";
      }
      • format: "json" | "lines"

    Returns void

  • Write the DataFrame disk in parquet format.

    Parameters

    Returns Buffer

  • Parameters

    Returns void

Methods - IO Deprecated

  • Parameters

    • Optional destination: any
    • Optional options: any

    Returns any

    Deprecated

    since 0.4.0 use writeIPC

  • Parameters

    • Optional destination: any
    • Optional options: any

    Returns any

    Deprecated

    since 0.4.0 use writeParquet

Methods - Math

  • Sample from this DataFrame by setting either n or frac.

    Parameters

    • Optional opts: {
          n: number;
          seed?: number | bigint;
          withReplacement?: boolean;
      }
      • n: number
      • Optional seed?: number | bigint
      • Optional withReplacement?: boolean

    Returns pl.DataFrame

    Example

    > df = pl.DataFrame({
    > "foo": [1, 2, 3],
    > "bar": [6, 7, 8],
    > "ham": ['a', 'b', 'c']
    > })
    > df.sample({n: 2})
    shape: (2, 3)
    ╭─────┬─────┬─────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ╰─────┴─────┴─────╯
  • Parameters

    • Optional opts: {
          frac: number;
          seed?: number | bigint;
          withReplacement?: boolean;
      }
      • frac: number
      • Optional seed?: number | bigint
      • Optional withReplacement?: boolean

    Returns pl.DataFrame

  • Parameters

    • Optional n: number
    • Optional frac: number
    • Optional withReplacement: boolean
    • Optional seed: number | bigint

    Returns pl.DataFrame

Methods - Other

  • Returns Generator<any, void, any>

  • Summary statistics for a DataFrame.

    Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.


    Example

    >  const df = pl.DataFrame({
    ... 'a': [1.0, 2.8, 3.0],
    ... 'b': [4, 5, 6],
    ... "c": [True, False, True]
    ... });
    ... df.describe()
    shape: (5, 4)
    ╭──────────┬───────┬─────┬──────╮
    describeabc
    │ --- ┆ --- ┆ --- ┆ --- │
    strf64f64f64
    ╞══════════╪═══════╪═════╪══════╡
    "mean"2.2675null
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "std"1.1021null
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "min"140.0
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "max"361
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "median"2.85null
    ╰──────────┴───────┴─────┴──────╯

    Returns pl.DataFrame

  • Remove column from DataFrame and return as new.


    Parameters

    • name: string

    Returns pl.DataFrame

    Example

    >  const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c'],
    ... "apple": ['a', 'b', 'c']
    ... });
    > console.log(df.drop(['ham', 'apple']).toString());
    shape: (3, 2)
    ╭─────┬─────╮
    foobar
    │ --- ┆ --- │
    i64f64
    ╞═════╪═════╡
    16
    ├╌╌╌╌╌┼╌╌╌╌╌┤
    27
    ├╌╌╌╌╌┼╌╌╌╌╌┤
    38
    ╰─────┴─────╯
  • Parameters

    • names: string[]

    Returns pl.DataFrame

  • Parameters

    • name: string
    • Rest ...names: string[]

    Returns pl.DataFrame

  • Return a new DataFrame where the null values are dropped.

    This method only drops nulls row-wise if any single value of the row is null.


    Parameters

    • column: string

    Returns pl.DataFrame

    Example

    >  const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, null, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > console.log(df.dropNulls().toString());
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    └─────┴─────┴─────┘
  • Parameters

    • columns: string[]

    Returns pl.DataFrame

  • Parameters

    • Rest ...columns: string[]

    Returns pl.DataFrame

  • Explode DataFrame to long format by exploding a column with Lists.


    Parameters

    • column: ExprOrString

    Returns pl.DataFrame

    Example

    >  const df = pl.DataFrame({
    ... "letters": ["c", "c", "a", "c", "a", "b"],
    ... "nrs": [[1, 2], [1, 3], [4, 3], [5, 5, 5], [6], [2, 1, 2]]
    ... });
    > console.log(df.toString());
    shape: (6, 2)
    ╭─────────┬────────────╮
    lettersnrs
    │ --- ┆ --- │
    strlist [i64] │
    ╞═════════╪════════════╡
    "c" ┆ [1, 2] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "c" ┆ [1, 3] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "a" ┆ [4, 3] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "c" ┆ [5, 5, 5] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "a" ┆ [6] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "b" ┆ [2, 1, 2] │
    ╰─────────┴────────────╯
    > df.explode("nrs")
    shape: (13, 2)
    ╭─────────┬─────╮
    lettersnrs
    │ --- ┆ --- │
    stri64
    ╞═════════╪═════╡
    "c"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"3
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ ... ┆ ... │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"5
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"6
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"2
    ╰─────────┴─────╯
  • Parameters

    • columns: ExprOrString[]

    Returns pl.DataFrame

  • Parameters

    • column: ExprOrString
    • Rest ...columns: ExprOrString[]

    Returns pl.DataFrame

  • Extend the memory backed by this DataFrame with the values from other.


    Different from vstack which adds the chunks from other to the chunks of this DataFrame extent appends the data from other to the underlying memory locations and thus may cause a reallocation.

    If this does not cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries.

    Prefer extend over vstack when you want to do a query after a single append. For instance during online operations where you add n rows and rerun a query.

    Prefer vstack over extend when you want to append many times before doing a query. For instance when you read in multiple files and when to store them in a single DataFrame. In the latter case, finish the sequence of vstack operations with a rechunk.

    Parameters

    Returns pl.DataFrame

  • Fill null/missing values by a filling strategy

    Parameters

    • strategy: FillNullStrategy

      One of:

      • "backward"
      • "forward"
      • "mean"
      • "min'
      • "max"
      • "zero"
      • "one"

    Returns pl.DataFrame

    DataFrame with None replaced with the filling strategy.

  • Filter the rows in the DataFrame based on a predicate expression.


    Parameters

    • predicate: any

      Expression that evaluates to a boolean Series.

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    // Filter on one condition
    > df.filter(pl.col("foo").lt(3))
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16a
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27b
    └─────┴─────┴─────┘
    // Filter on multiple conditions
    > df.filter(
    ... pl.col("foo").lt(3)
    ... .and(pl.col("ham").eq(pl.lit("a")))
    ... )
    shape: (1, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16a
    └─────┴─────┴─────┘
  • Find the index of a column by name.


    Parameters

    • name: string

      Name of the column to find.

    Returns number

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.findIdxByName("ham"))
    2
  • Apply a horizontal reduction on a DataFrame.

    This can be used to effectively determine aggregations on a row level, and can be applied to any DataType that can be supercasted (casted to a similar parent type).

    An example of the supercast rules when applying an arithmetic operation on two DataTypes are for instance:

    • Int8 + Utf8 = Utf8
    • Float32 + Int64 = Float32
    • Float32 + Float64 = Float64

    Parameters

    Returns pl.Series

    Series

    Example

    > // A horizontal sum operation
    > let df = pl.DataFrame({
    ... "a": [2, 1, 3],
    ... "b": [1, 2, 3],
    ... "c": [1.0, 2.0, 3.0]
    ... });
    > df.fold((s1, s2) => s1.plus(s2))
    Series: 'a' [f64]
    [
    4
    5
    9
    ]
    > // A horizontal minimum operation
    > df = pl.DataFrame({
    ... "a": [2, 1, 3],
    ... "b": [1, 2, 3],
    ... "c": [1.0, 2.0, 3.0]
    ... });
    > df.fold((s1, s2) => s1.zipWith(s1.lt(s2), s2))
    Series: 'a' [f64]
    [
    1
    1
    3
    ]
    > // A horizontal string concatenation
    > df = pl.DataFrame({
    ... "a": ["foo", "bar", 2],
    ... "b": [1, 2, 3],
    ... "c": [1.0, 2.0, 3.0]
    ... })
    > df.fold((s1, s2) => s.plus(s2))
    Series: '' [f64]
    [
    "foo11"
    "bar22
    "233"
    ]
  • Check if DataFrame is equal to other.


    Parameters

    Returns boolean

    Example

    > const df1 = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c']
    ... })
    > const df2 = pl.DataFrame({
    ... "foo": [3, 2, 1],
    ... "bar": [8.0, 7.0, 6.0],
    ... "ham": ['c', 'b', 'a']
    ... })
    > df1.frameEqual(df1)
    true
    > df1.frameEqual(df2)
    false
  • Parameters

    Returns boolean

  • Start a groupby operation.


    Parameters

    • Rest ...by: ColumnSelection[]

      Column(s) to group by.

    Returns GroupBy

  • Groups based on a time value (or index value of type Int32, Int64). Time windows are calculated and rows are assigned to windows. Different from a normal groupby is that a row can be member of multiple groups. The time/index window could be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.

    A window is defined by:

    • every: interval of the window
    • period: length of the window
    • offset: offset of the window

    The every, period and offset arguments are created with the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    In case of a groupbyDynamic on an integer column, the windows are defined by:

    • "1i" # length 1
    • "10i" # length 10

    Parameters

    Parameters

    • options: {
          by?: ColumnsOrExpr;
          check_sorted?: boolean;
          closed?: "none" | "left" | "right" | "both";
          every: string;
          includeBoundaries?: boolean;
          indexColumn: string;
          offset?: string;
          period?: string;
          start_by: StartBy;
      }
      • Optional by?: ColumnsOrExpr
      • Optional check_sorted?: boolean
      • Optional closed?: "none" | "left" | "right" | "both"
      • every: string
      • Optional includeBoundaries?: boolean
      • indexColumn: string
      • Optional offset?: string
      • Optional period?: string
      • start_by: StartBy

    Returns RollingGroupBy

  • Create rolling groups based on a time column (or index value of type Int32, Int64).

    Different from a rolling groupby the windows are now determined by the individual values and are not of constant intervals. For constant intervals use groupByDynamic

    The period and offset arguments are created with the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    In case of a groupby_rolling on an integer column, the windows are defined by:

    • "1i" # length 1
    • "10i" # length 10

    Parameters

    • opts: {
          by?: ColumnsOrExpr;
          check_sorted?: boolean;
          closed?: "none" | "left" | "right" | "both";
          indexColumn: ColumnsOrExpr;
          offset?: string;
          period: string;
      }
      • Optional by?: ColumnsOrExpr
      • Optional check_sorted?: boolean
      • Optional closed?: "none" | "left" | "right" | "both"
      • indexColumn: ColumnsOrExpr
      • Optional offset?: string
      • period: string

    Returns RollingGroupBy

    Example


    >dates = [
    ... "2020-01-01 13:45:48",
    ... "2020-01-01 16:42:13",
    ... "2020-01-01 16:45:09",
    ... "2020-01-02 18:12:48",
    ... "2020-01-03 19:45:32",
    ... "2020-01-08 23:16:43",
    ... ]
    >df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).withColumn(
    ... pl.col("dt").str.strptime(pl.Datetime)
    ... )
    >out = df.groupbyRolling({indexColumn:"dt", period:"2d"}).agg(
    ... [
    ... pl.sum("a").alias("sum_a"),
    ... pl.min("a").alias("min_a"),
    ... pl.max("a").alias("max_a"),
    ... ]
    ... )
    >assert(out["sum_a"].toArray() === [3, 10, 15, 24, 11, 1])
    >assert(out["max_a"].toArray() === [3, 7, 7, 9, 9, 1])
    >assert(out["min_a"].toArray() === [3, 3, 3, 3, 2, 1])
    >out
    shape: (6, 4)
    ┌─────────────────────┬───────┬───────┬───────┐
    dta_suma_maxa_min
    │ --- ┆ --- ┆ --- ┆ --- │
    datetime[ms] ┆ i64i64i64
    ╞═════════════════════╪═══════╪═══════╪═══════╡
    2020-01-01 13:45:48333
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-01 16:42:131073
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-01 16:45:091573
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-02 18:12:482493
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-03 19:45:321192
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-08 23:16:43111
    └─────────────────────┴───────┴───────┴───────┘
  • Hash and combine the rows in this DataFrame. (Hash value is UInt64)

    Parameters

    • Optional k0: number

      seed parameter

    • Optional k1: number

      seed parameter

    • Optional k2: number

      seed parameter

    • Optional k3: number

      seed parameter

    Returns pl.Series

  • Parameters

    • options: {
          k0?: number;
          k1?: number;
          k2?: number;
          k3?: number;
      }
      • Optional k0?: number
      • Optional k1?: number
      • Optional k2?: number
      • Optional k3?: number

    Returns pl.Series

  • Get first N rows as DataFrame.


    Parameters

    • Optional length: number

      Length of the head.

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3, 4, 5],
    ... "bar": [6, 7, 8, 9, 10],
    ... "ham": ['a', 'b', 'c', 'd','e']
    ... });
    > df.head(3)
    shape: (3, 3)
    ╭─────┬─────┬─────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ╰─────┴─────┴─────╯
  • Return a new DataFrame grown horizontally by stacking multiple Series to it.

    Parameters

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > const x = pl.Series("apple", [10, 20, 30])
    > df.hStack([x])
    shape: (3, 4)
    ╭─────┬─────┬─────┬───────╮
    foobarhamapple
    │ --- ┆ --- ┆ --- ┆ --- │
    i64i64stri64
    ╞═════╪═════╪═════╪═══════╡
    16"a"10
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    27"b"20
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    38"c"30
    ╰─────┴─────┴─────┴───────╯
  • Parameters

    Returns void

  • Insert a Series at a certain column index. This operation is in place.

    Parameters

    • index: number

      Column position to insert the new Series column.

    • series: pl.Series

      Series to insert

    Returns void

  • Check if the dataframe is empty

    Returns boolean

  • SQL like joins.

    Parameters

    Returns pl.DataFrame

    See

    JoinOptions

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c']
    ... });
    > const otherDF = pl.DataFrame({
    ... "apple": ['x', 'y', 'z'],
    ... "ham": ['a', 'b', 'd']
    ... });
    > df.join(otherDF, {on: 'ham'})
    shape: (2, 4)
    ╭─────┬─────┬─────┬───────╮
    foobarhamapple
    │ --- ┆ --- ┆ --- ┆ --- │
    i64f64strstr
    ╞═════╪═════╪═════╪═══════╡
    16"a""x"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    27"b""y"
    ╰─────┴─────┴─────┴───────╯
  • Parameters

    • other: pl.DataFrame
    • options: {
          leftOn: ValueOrArray<string>;
          rightOn: ValueOrArray<string>;
      } & Omit<JoinOptions, "on">

    Returns pl.DataFrame

  • Parameters

    • other: pl.DataFrame
    • options: {
          how: "cross";
          suffix?: string;
      }
      • how: "cross"
      • Optional suffix?: string

    Returns pl.DataFrame

  • Perform an asof join. This is similar to a left-join except that we match on nearest key rather than equal keys.

    Both DataFrames must be sorted by the asofJoin key.

    For each row in the left DataFrame:

    • A "backward" search selects the last row in the right DataFrame whose 'on' key is less than or equal to the left's key.

    • A "forward" search selects the first row in the right DataFrame whose 'on' key is greater than or equal to the left's key.

    The default is "backward".

    Parameters

    • other: pl.DataFrame

      DataFrame to join with.

    • options: {
          allowParallel?: boolean;
          by?: string | string[];
          byLeft?: string | string[];
          byRight?: string | string[];
          forceParallel?: boolean;
          leftOn?: string;
          on?: string;
          rightOn?: string;
          strategy?: "backward" | "forward";
          suffix?: string;
          tolerance?: string | number;
      }
      • Optional allowParallel?: boolean

        Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.

      • Optional by?: string | string[]
      • Optional byLeft?: string | string[]

        join on these columns before doing asof join

      • Optional byRight?: string | string[]

        join on these columns before doing asof join

      • Optional forceParallel?: boolean

        Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.

      • Optional leftOn?: string

        Join column of the left DataFrame.

      • Optional on?: string

        Join column of both DataFrames. If set, leftOn and rightOn should be undefined.

      • Optional rightOn?: string

        Join column of the right DataFrame.

      • Optional strategy?: "backward" | "forward"

        One of 'forward', 'backward'

      • Optional suffix?: string

        Suffix to append to columns with a duplicate name.

      • Optional tolerance?: string | number

        Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. If an asof join is done on columns of dtype "Date", "Datetime" you use the following string language:

        • 1ns (1 nanosecond)
        • 1us (1 microsecond)
        • 1ms (1 millisecond)
        • 1s (1 second)
        • 1m (1 minute)
        • 1h (1 hour)
        • 1d (1 day)
        • 1w (1 week)
        • 1mo (1 calendar month)
        • 1y (1 calendar year)
        • 1i (1 index count)

        Or combine them:

        • "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    Returns pl.DataFrame

    Example

    > const gdp = pl.DataFrame({
    ... date: [
    ... new Date('2016-01-01'),
    ... new Date('2017-01-01'),
    ... new Date('2018-01-01'),
    ... new Date('2019-01-01'),
    ... ], // note record date: Jan 1st (sorted!)
    ... gdp: [4164, 4411, 4566, 4696],
    ... })
    > const population = pl.DataFrame({
    ... date: [
    ... new Date('2016-05-12'),
    ... new Date('2017-05-12'),
    ... new Date('2018-05-12'),
    ... new Date('2019-05-12'),
    ... ], // note record date: May 12th (sorted!)
    ... "population": [82.19, 82.66, 83.12, 83.52],
    ... })
    > population.joinAsof(
    ... gdp,
    ... {leftOn:"date", rightOn:"date", strategy:"backward"}
    ... )
    shape: (4, 3)
    ┌─────────────────────┬────────────┬──────┐
    datepopulationgdp
    │ --- ┆ --- ┆ --- │
    datetime[μs] ┆ f64i64
    ╞═════════════════════╪════════════╪══════╡
    2016-05-12 00:00:0082.194164
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    2017-05-12 00:00:0082.664411
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    2018-05-12 00:00:0083.124566
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    2019-05-12 00:00:0083.524696
    └─────────────────────┴────────────┴──────┘
  • Parameters

    • func: ((...args) => any)
        • (...args): any
        • Parameters

          • Rest ...args: any[]

          Returns any

    Returns any[]

  • Aggregate the columns of this DataFrame to their maximum value.


    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.max()
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪══════╡
    38null
    ╰─────┴─────┴──────╯
  • Parameters

    • axis: 0

    Returns pl.DataFrame

  • Parameters

    • axis: 1

    Returns pl.Series

  • Aggregate the columns of this DataFrame to their median value.


    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.median();
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    f64f64str
    ╞═════╪═════╪══════╡
    27null
    ╰─────┴─────┴──────╯
  • Unpivot DataFrame to long format.


    Parameters

    • idVars: ColumnSelection

      Columns to use as identifier variables.

    • valueVars: ColumnSelection

      Values to use as value variables.

    Returns pl.DataFrame

    Example

    > const df1 = pl.DataFrame({
    ... 'id': [1],
    ... 'asset_key_1': ['123'],
    ... 'asset_key_2': ['456'],
    ... 'asset_key_3': ['abc'],
    ... });
    > df1.melt('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']);
    shape: (3, 3)
    ┌─────┬─────────────┬───────┐
    idvariablevalue
    │ --- ┆ --- ┆ --- │
    f64strstr
    ╞═════╪═════════════╪═══════╡
    1asset_key_1123
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    1asset_key_2456
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    1asset_key_3abc
    └─────┴─────────────┴───────┘
  • Aggregate the columns of this DataFrame to their minimum value.


    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.min();
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪══════╡
    16null
    ╰─────┴─────┴──────╯
  • Parameters

    • axis: 0

    Returns pl.DataFrame

  • Parameters

    • axis: 1

    Returns pl.Series

  • Get number of chunks used by the ChunkedArrays of this DataFrame.

    Returns number

  • Create a new DataFrame that shows the null counts per column.


    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, null, 3],
    ... "bar": [6, 7, null],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.nullCount();
    shape: (1, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    u32u32u32
    ╞═════╪═════╪═════╡
    110
    └─────┴─────┴─────┘
  • Parameters

    • cols: string | string[]
    • Optional stable: boolean
    • Optional includeKey: boolean

    Returns pl.DataFrame[]

  • Type Parameters

    • T

    Parameters

    • cols: string | string[]
    • stable: boolean
    • includeKey: boolean
    • mapFn: ((df) => T)

    Returns T[]

  • Create a spreadsheet-style pivot table as a DataFrame.

    Parameters

    • values: string | string[]

      Column values to aggregate. Can be multiple columns if the columns arguments contains multiple columns as well

    • options: {
          aggregateFunc?: pl.Expr | "mean" | "min" | "max" | "first" | "last" | "count" | "median" | "sum";
          columns: string | string[];
          index: string | string[];
          maintainOrder?: boolean;
          separator?: string;
          sortColumns?: boolean;
      }
      • Optional aggregateFunc?: pl.Expr | "mean" | "min" | "max" | "first" | "last" | "count" | "median" | "sum"

        Any of: - "sum" - "max" - "min" - "mean" - "median" - "first" - "last" - "count" Defaults to "first"

      • columns: string | string[]

        Columns whose values will be used as the header of the output DataFrame

      • index: string | string[]

        One or multiple keys to group by

      • Optional maintainOrder?: boolean

        Sort the grouped keys so that the output order is predictable.

      • Optional separator?: string

        Used as separator/delimiter in generated column names.

      • Optional sortColumns?: boolean

        Sort the transposed columns by name. Default is by order of discovery.

    Returns pl.DataFrame

    Example

      > const df = pl.DataFrame(
    ... {
    ... "foo": ["one", "one", "one", "two", "two", "two"],
    ... "bar": ["A", "B", "C", "A", "B", "C"],
    ... "baz": [1, 2, 3, 4, 5, 6],
    ... }
    ... );
    > df.pivot(values:"baz", {index:"foo", columns:"bar"});
    shape: (2, 4)
    ┌─────┬─────┬─────┬─────┐
    fooABC
    │ --- ┆ --- ┆ --- ┆ --- │
    strf64f64f64
    ╞═════╪═════╪═════╪═════╡
    one123
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    two456
    └─────┴─────┴─────┴─────┘
  • Parameters

    • options: {
          aggregateFunc?: pl.Expr | "mean" | "min" | "max" | "first" | "last" | "count" | "median" | "sum";
          columns: string | string[];
          index: string | string[];
          maintainOrder?: boolean;
          separator?: string;
          sortColumns?: boolean;
          values: string | string[];
      }
      • Optional aggregateFunc?: pl.Expr | "mean" | "min" | "max" | "first" | "last" | "count" | "median" | "sum"
      • columns: string | string[]
      • index: string | string[]
      • Optional maintainOrder?: boolean
      • Optional separator?: string
      • Optional sortColumns?: boolean
      • values: string | string[]

    Returns pl.DataFrame

  • Aggregate the columns of this DataFrame to their quantile value.

    Parameters

    • quantile: number

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.quantile(0.5);
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪══════╡
    27null
    ╰─────┴─────┴──────╯
  • Rechunk the data in this DataFrame to a contiguous allocation.

    This will make sure all subsequent operations have optimal and predictable performance.

    Returns pl.DataFrame

  • Rename column names.


    Parameters

    • mapping: Record<string, string>

      Key value pairs that map from old name to new name.

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.rename({"foo": "apple"});
    ╭───────┬─────┬─────╮
    applebarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═══════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ╰───────┴─────┴─────╯
  • Replace a column at an index location.


    Parameters

    • index: number

      Column index

    • newColumn: pl.Series

      New column to insert

    Returns void

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > const x = pl.Series("apple", [10, 20, 30]);
    > df.replaceAtIdx(0, x);
    shape: (3, 3)
    ╭───────┬─────┬─────╮
    applebarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═══════╪═════╪═════╡
    106"a"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    207"b"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    308"c"
    ╰───────┴─────┴─────╯
  • Get a row as Array

    Parameters

    • index: number

      row index

    Returns any[]

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.row(2)
    [3, 8, 'c']
  • Convert columnar data to rows as arrays

    Returns any[][]

  • Select columns from this DataFrame.


    Parameters

    • Rest ...columns: ExprOrString[]

      Column or columns to select.

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.select('foo');
    shape: (3, 1)
    ┌─────┐
    foo
    │ --- │
    i64
    ╞═════╡
    1
    ├╌╌╌╌╌┤
    2
    ├╌╌╌╌╌┤
    3
    └─────┘
  • Serializes object to desired format via serde

    Parameters

    Returns Buffer

  • Shift the values by a given period and fill the parts that will be empty due to this operation with Nones.


    Parameters

    • periods: number

      Number of places to shift (may be negative).

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.shift(1);
    shape: (3, 3)
    ┌──────┬──────┬──────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞══════╪══════╪══════╡
    nullnullnull
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    16"a"
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    27"b"
    └──────┴──────┴──────┘
    > df.shift(-1)
    shape: (3, 3)
    ┌──────┬──────┬──────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞══════╪══════╪══════╡
    27"b"
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    38"c"
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    nullnullnull
    └──────┴──────┴──────┘
  • Parameters

    • __namedParameters: {
          periods: number;
      }
      • periods: number

    Returns pl.DataFrame

  • Shift the values by a given period and fill the parts that will be empty due to this operation with the result of the fill_value expression.


    Parameters

    • n: number
    • fillValue: number

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.shiftAndFill({n:1, fill_value:0});
    shape: (3, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    00"0"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    └─────┴─────┴─────┘
  • Parameters

    • __namedParameters: {
          fillValue: number;
          n: number;
      }
      • fillValue: number
      • n: number

    Returns pl.DataFrame

  • Shrink memory usage of this DataFrame to fit the exact capacity needed to hold the data.

    Returns pl.DataFrame

  • Parameters

    • inPlace: true

    Returns void

  • Parameters

    • __namedParameters: {
          inPlace: true;
      }
      • inPlace: true

    Returns void

  • Slice this DataFrame over the rows direction.


    Parameters

    • opts: {
          length: number;
          offset: number;
      }
      • length: number

        Length of the slice

      • offset: number

        Offset index.

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.slice(1, 2); // Alternatively `df.slice({offset:1, length:2})`
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    27"b"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    └─────┴─────┴─────┘
  • Parameters

    • offset: number
    • length: number

    Returns pl.DataFrame

  • Sort the DataFrame by column.


    Parameters

    • by: ColumnsOrExpr

      By which columns to sort. Only accepts string.

    • Optional descending: boolean
    • Optional maintain_order: boolean

    Returns pl.DataFrame

  • Parameters

    • __namedParameters: {
          by: ColumnsOrExpr;
          descending?: boolean;
          maintain_order?: boolean;
      }
      • by: ColumnsOrExpr
      • Optional descending?: boolean
      • Optional maintain_order?: boolean

    Returns pl.DataFrame

  • Aggregate the columns of this DataFrame to their standard deviation value.


    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.std();
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    f64f64str
    ╞═════╪═════╪══════╡
    11null
    ╰─────┴─────┴──────╯
  • Parameters

    • Optional length: number

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "letters": ["c", "c", "a", "c", "a", "b"],
    ... "nrs": [1, 2, 3, 4, 5, 6]
    ... });
    > console.log(df.toString());
    shape: (6, 2)
    ╭─────────┬─────╮
    lettersnrs
    │ --- ┆ --- │
    stri64
    ╞═════════╪═════╡
    "c"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"3
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"4
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"5
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"6
    ╰─────────┴─────╯
    > df.groupby("letters")
    ... .tail(2)
    ... .sort("letters")
    shape: (5, 2)
    ╭─────────┬─────╮
    lettersnrs
    │ --- ┆ --- │
    stri64
    ╞═════════╪═════╡
    "a"3
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"5
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"6
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"4
    ╰─────────┴─────╯
  • Converts dataframe object into a TabularDataResource

    Returns TabularDataResource

  • Converts dataframe object into HTML

    Returns string

  • Convert a DataFrame to a Series of type Struct

    Parameters

    • name: string

      Name for the struct Series

    Returns pl.Series

    Example

     > const df = pl.DataFrame({
    ... "a": [1, 2, 3, 4, 5],
    ... "b": ["one", "two", "three", "four", "five"],
    ... });
    > df.toStruct("nums");
    shape: (5,)
    Series: 'nums' [struct[2]{'a': i64, 'b': str}]
    [
    {1,"one"}
    {2,"two"}
    {3,"three"}
    {4,"four"}
    {5,"five"}
    ]
  • Transpose a DataFrame over the diagonal.

    Parameters

    • Optional options: {
          columnNames?: Iterable<string>;
          headerName?: string;
          includeHeader?: boolean;
      }
      • Optional columnNames?: Iterable<string>

        Optional generator/iterator that yields column names. Will be used to replace the columns in the DataFrame.

      • Optional headerName?: string

        If includeHeader is set, this determines the name of the column that will be inserted

      • Optional includeHeader?: boolean

        If set, the column names will be added as first column.

    Returns pl.DataFrame

    Remarks

    This is a very expensive operation. Perhaps you can do it differently.

    Example

    > const df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]});
    > df.transpose({includeHeader:true})
    shape: (2, 4)
    ┌────────┬──────────┬──────────┬──────────┐
    columncolumn_0column_1column_2
    │ --- ┆ --- ┆ --- ┆ --- │
    stri64i64i64
    ╞════════╪══════════╪══════════╪══════════╡
    a123
    ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
    b123
    └────────┴──────────┴──────────┴──────────┘
    // replace the auto generated column names with a list
    > df.transpose({includeHeader:false, columnNames:["a", "b", "c"]})
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    abc
    │ --- ┆ --- ┆ --- │
    i64i64i64
    ╞═════╪═════╪═════╡
    123
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    123
    └─────┴─────┴─────┘

    // Include the header as a separate column
    > df.transpose({
    ... includeHeader:true,
    ... headerName:"foo",
    ... columnNames:["a", "b", "c"]
    ... })
    shape: (2, 4)
    ┌─────┬─────┬─────┬─────┐
    fooabc
    │ --- ┆ --- ┆ --- ┆ --- │
    stri64i64i64
    ╞═════╪═════╪═════╪═════╡
    a123
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    b123
    └─────┴─────┴─────┴─────┘

    // Replace the auto generated column with column names from a generator function
    > function *namesGenerator() {
    ... const baseName = "my_column_";
    ... let count = 0;
    ... let name = `${baseName}_${count}`;
    ... count++;
    ... yield name;
    ... }
    > df.transpose({includeHeader:false, columnNames:namesGenerator})
    shape: (2, 3)
    ┌─────────────┬─────────────┬─────────────┐
    my_column_0my_column_1my_column_2
    │ --- ┆ --- ┆ --- │
    i64i64i64
    ╞═════════════╪═════════════╪═════════════╡
    123
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    123
    └─────────────┴─────────────┴─────────────┘
  • Drop duplicate rows from this DataFrame. Note that this fails if there is a column of type List in the DataFrame.

    Parameters

    • Optional maintainOrder: boolean
    • Optional subset: ColumnSelection

      subset to drop duplicates for

    • Optional keep: "first" | "last"

      "first" | "last"

    Returns pl.DataFrame

  • Parameters

    • opts: {
          keep?: "first" | "last";
          maintainOrder?: boolean;
          subset?: ColumnSelection;
      }
      • Optional keep?: "first" | "last"
      • Optional maintainOrder?: boolean
      • Optional subset?: ColumnSelection

    Returns pl.DataFrame

  • Decompose a struct into its fields. The fields will be inserted in to the DataFrame on the location of the struct type.

    Parameters

    • names: string | string[]

      Names of the struct columns that will be decomposed by its fields

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    ... "int": [1, 2],
    ... "str": ["a", "b"],
    ... "bool": [true, null],
    ... "list": [[1, 2], [3]],
    ... })
    ... .toStruct("my_struct")
    ... .toFrame();
    > df
    shape: (2, 1)
    ┌─────────────────────────────┐
    my_struct
    │ --- │
    struct[4]{'int',...,'list'} │
    ╞═════════════════════════════╡
    │ {1,"a",true,[1, 2]} │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ {2,"b",null,[3]} │
    └─────────────────────────────┘
    > df.unnest("my_struct")
    shape: (2, 4)
    ┌─────┬─────┬──────┬────────────┐
    intstrboollist
    │ --- ┆ --- ┆ --- ┆ --- │
    i64strboollist [i64] │
    ╞═════╪═════╪══════╪════════════╡
    1atrue ┆ [1, 2] │
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    2bnull ┆ [3] │
    └─────┴─────┴──────┴────────────┘
  • Upsample a DataFrame at a regular frequency.

    The every and offset arguments are created with the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)
    • 1i (1 index count)

    Or combine them:

    • "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

    Parameters

    Parameters

    • timeColumn: string

      Time column will be used to determine a date range. Note that this column has to be sorted for the output to make sense.

    • every: string

      Interval will start 'every' duration.

    • Optional offset: string

      Change the start of the date range by this offset.

    • Optional by: string | string[]

      First group by these columns and then upsample for every group.

    • Optional maintainOrder: boolean

      Keep the ordering predictable. This is slower.

      Returns

      DataFrame Result will be sorted by timeColumn (but note that if by columns are passed, it will only be sorted within each by group).

      Examples

      Upsample a DataFrame by a certain interval.

      const df = pl.DataFrame({ "date": [ new Date(2024, 1, 1), new Date(2024, 3, 1), new Date(2024, 4, 1), new Date(2024, 5, 1), ], "groups": ["A", "B", "A", "B"], "values": [0, 1, 2, 3], }) .withColumn(pl.col("date").cast(pl.Date).alias("date")) .sort("date");

      df.upsample({timeColumn: "date", every: "1mo", offset: "0ns", by: "groups", maintainOrder: true}) .select(pl.col("*").forwardFill()); shape: (7, 3) ┌────────────┬────────┬────────┐ │ date ┆ groups ┆ values │ │ --- ┆ --- ┆ --- │ │ date ┆ str ┆ f64 │ ╞════════════╪════════╪════════╡ │ 2024-02-01 ┆ A ┆ 0.0 │ │ 2024-03-01 ┆ A ┆ 0.0 │ │ 2024-04-01 ┆ A ┆ 0.0 │ │ 2024-05-01 ┆ A ┆ 2.0 │ │ 2024-04-01 ┆ B ┆ 1.0 │ │ 2024-05-01 ┆ B ┆ 1.0 │ │ 2024-06-01 ┆ B ┆ 3.0 │ └────────────┴────────┴────────┘

    Returns pl.DataFrame

  • Parameters

    • opts: {
          by?: string | string[];
          every: string;
          maintainOrder?: boolean;
          offset?: string;
          timeColumn: string;
      }
      • Optional by?: string | string[]
      • every: string
      • Optional maintainOrder?: boolean
      • Optional offset?: string
      • timeColumn: string

    Returns pl.DataFrame

  • Aggregate the columns of this DataFrame to their variance value.

    Returns pl.DataFrame

    Example

    > const df = pl.DataFrame({
    > "foo": [1, 2, 3],
    > "bar": [6, 7, 8],
    > "ham": ['a', 'b', 'c']
    > });
    > df.var()
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    f64f64str
    ╞═════╪═════╪══════╡
    11null
    ╰─────┴─────┴──────╯
  • Grow this DataFrame vertically by stacking a DataFrame to it.

    Parameters

    Returns pl.DataFrame

    Example

    > const df1 = pl.DataFrame({
    ... "foo": [1, 2],
    ... "bar": [6, 7],
    ... "ham": ['a', 'b']
    ... });
    > const df2 = pl.DataFrame({
    ... "foo": [3, 4],
    ... "bar": [8 , 9],
    ... "ham": ['c', 'd']
    ... });
    > df1.vstack(df2);
    shape: (4, 3)
    ╭─────┬─────┬─────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    49"d"
    ╰─────┴─────┴─────╯