Interface DataFrame<T>

A DataFrame is a two-dimensional data structure that represents data as a table with rows and columns.

Object, Array, or Series Two-dimensional data in various forms. object must contain Arrays. Array may contain Series or other Arrays.

Array of str, default undefined Column labels to use for resulting DataFrame. If specified, overrides any labels already present in the data. Must match data dimensions.

'col' | 'row' default undefined Whether to interpret two-dimensional data as columns or as rows. If None, the orientation is inferred by matching the columns and data dimensions. If this does not yield conclusive results, column orientation is used.

Constructing a DataFrame from an object :

> const data = {'a': [1n, 2n], 'b': [3, 4]};
> const df = pl.DataFrame(data);
> console.log(df.toString());
shape: (2, 2)
╭─────┬─────╮
ab
│ --- ┆ --- │
u64i64
╞═════╪═════╡
13
├╌╌╌╌╌┼╌╌╌╌╌┤
24
╰─────┴─────╯

Notice that the dtype is automatically inferred as a polars Int64:

> df.dtypes
['UInt64', `Int64']

In order to specify dtypes for your columns, initialize the DataFrame with a list of Series instead:

> const data = [pl.Series('col1', [1, 2], pl.Float32), pl.Series('col2', [3, 4], pl.Int64)];
> const df2 = pl.DataFrame(series);
> console.log(df2.toString());
shape: (2, 2)
╭──────┬──────╮
col1col2
│ --- ┆ --- │
f32i64
╞══════╪══════╡
13
├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
24
╰──────┴──────╯

Constructing a DataFrame from a list of lists, row orientation inferred:

> const data = [[1, 2, 3], [4, 5, 6]];
> const df4 = pl.DataFrame(data, ['a', 'b', 'c']);
> console.log(df4.toString());
shape: (2, 3)
╭─────┬─────┬─────╮
abc
│ --- ┆ --- ┆ --- │
i64i64i64
╞═════╪═════╪═════╡
123
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
456
╰─────┴─────┴─────╯
interface DataFrame<T> {
    [inspect](): string;
    [iterator](): Generator<any, void, any>;
    add(other: any): pl.DataFrame<T>;
    clone(): pl.DataFrame<T>;
    columns: string[];
    describe(): pl.DataFrame<any>;
    distinct(maintainOrder?: any, subset?: any, keep?: any): pl.DataFrame<any>;
    div(other: any): pl.DataFrame<T>;
    divideBy(other: any): pl.DataFrame<T>;
    drop<U>(name: U): pl.DataFrame<{
        [K in string | number | symbol]: Omit<T, U>[K]
    }>;
    drop<const U>(names: U): pl.DataFrame<{
        [K in string | number | symbol]: Omit<T, U[number]>[K]
    }>;
    drop<U, const V>(name: U, ...names: V): pl.DataFrame<{
        [K in string | number | symbol]: Omit<T, U | V[number]>[K]
    }>;
    dropNulls(column: keyof T): pl.DataFrame<T>;
    dropNulls(columns: (keyof T)[]): pl.DataFrame<T>;
    dropNulls(...columns: (keyof T)[]): pl.DataFrame<T>;
    dtypes: DataType[];
    explode(column: ExprOrString): pl.DataFrame<any>;
    explode(columns: ExprOrString[]): pl.DataFrame<any>;
    explode(column: ExprOrString, ...columns: ExprOrString[]): pl.DataFrame<any>;
    extend(other: pl.DataFrame<T>): pl.DataFrame<T>;
    fillNull(strategy: FillNullStrategy): pl.DataFrame<T>;
    filter(predicate: any): pl.DataFrame<T>;
    findIdxByName(name: keyof T): number;
    fold(operation: ((s1: pl.Series<any, string>, s2: pl.Series<any, string>) => pl.Series<any, string>)): pl.Series<any, string>;
    frameEqual(other: pl.DataFrame<any>): boolean;
    frameEqual(other: pl.DataFrame<any>, nullEqual: boolean): boolean;
    getColumn<U>(name: U): T[U];
    getColumn(name: string): pl.Series<any, string>;
    getColumns(): T[keyof T][];
    groupBy(...by: ColumnSelection[]): GroupBy;
    groupByDynamic(options: {
        by?: ColumnsOrExpr;
        check_sorted?: boolean;
        closed?:
            | "none"
            | "left"
            | "right"
            | "both";
        every: string;
        includeBoundaries?: boolean;
        indexColumn: string;
        offset?: string;
        period?: string;
        start_by: StartBy;
    }): RollingGroupBy;
    groupByRolling(opts: {
        by?: ColumnsOrExpr;
        check_sorted?: boolean;
        closed?:
            | "none"
            | "left"
            | "right"
            | "both";
        indexColumn: ColumnsOrExpr;
        offset?: string;
        period: string;
    }): RollingGroupBy;
    hashRows(k0?: number, k1?: number, k2?: number, k3?: number): pl.Series<any, string>;
    hashRows(options: {
        k0?: number;
        k1?: number;
        k2?: number;
        k3?: number;
    }): pl.Series<any, string>;
    head(length?: number): pl.DataFrame<T>;
    height: number;
    hstack<U>(columns: pl.DataFrame<U>): pl.DataFrame<{
        [K in string | number | symbol]: (T & U)[K]
    }>;
    hstack<U>(columns: U): pl.DataFrame<{
        [K in string | number | symbol]: (T & {
            [K in pl.Series<any, string> as K["name"]]: K
        })[K]
    }>;
    hstack(columns: pl.DataFrame<any> | pl.Series<any, string>[]): pl.DataFrame<any>;
    hstack(columns: pl.DataFrame<any> | pl.Series<any, string>[], inPlace?: boolean): void;
    insertAtIdx(index: number, series: pl.Series<any, string>): void;
    interpolate(): pl.DataFrame<T>;
    isDuplicated(): pl.Series<any, string>;
    isEmpty(): boolean;
    isUnique(): pl.Series<any, string>;
    join(other: pl.DataFrame<any>, options: {
        on: ValueOrArray<string>;
    } & Omit<JoinOptions, "leftOn" | "rightOn">): pl.DataFrame<any>;
    join(other: pl.DataFrame<any>, options: {
        leftOn: ValueOrArray<string>;
        rightOn: ValueOrArray<string>;
    } & Omit<JoinOptions, "on">): pl.DataFrame<any>;
    join(other: pl.DataFrame<any>, options: {
        how: "cross";
        suffix?: string;
    }): pl.DataFrame<any>;
    joinAsof(other: pl.DataFrame<any>, options: {
        allowParallel?: boolean;
        by?: string | string[];
        byLeft?: string | string[];
        byRight?: string | string[];
        forceParallel?: boolean;
        leftOn?: string;
        on?: string;
        rightOn?: string;
        strategy?: "backward" | "forward" | "nearest";
        suffix?: string;
        tolerance?: string | number;
    }): pl.DataFrame<any>;
    lazy(): LazyDataFrame;
    limit(length?: number): pl.DataFrame<T>;
    map<ReturnT>(func: ((row: any[], i: number, arr: any[][]) => ReturnT)): ReturnT[];
    max(): pl.DataFrame<T>;
    max(axis: 0): pl.DataFrame<T>;
    max(axis: 1): pl.Series<any, string>;
    mean(): pl.DataFrame<T>;
    mean(axis: 0): pl.DataFrame<T>;
    mean(axis: 1): pl.Series<any, string>;
    mean(axis: 1, nullStrategy?: "ignore" | "propagate"): pl.Series<any, string>;
    median(): pl.DataFrame<T>;
    melt(idVars: ColumnSelection, valueVars: ColumnSelection): pl.DataFrame<any>;
    min(): pl.DataFrame<T>;
    min(axis: 0): pl.DataFrame<T>;
    min(axis: 1): pl.Series<any, string>;
    minus(other: any): pl.DataFrame<T>;
    modulo(other: any): pl.DataFrame<T>;
    mul(other: any): pl.DataFrame<T>;
    multiplyBy(other: any): pl.DataFrame<T>;
    nChunks(): number;
    nullCount(): pl.DataFrame<{
        [K in string | number | symbol]: pl.Series<Float64, K & string>
    }>;
    partitionBy(cols: string | string[], stable?: boolean, includeKey?: boolean): pl.DataFrame<T>[];
    partitionBy<T>(cols: string | string[], stable: boolean, includeKey: boolean, mapFn: ((df: pl.DataFrame<any>) => T)): T[];
    pivot(values: string | string[], options: {
        aggregateFunc?:
            | pl.Expr
            | "mean"
            | "min"
            | "max"
            | "first"
            | "last"
            | "count"
            | "median"
            | "sum";
        index: string | string[];
        maintainOrder?: boolean;
        on: string | string[];
        separator?: string;
        sortColumns?: boolean;
    }): pl.DataFrame<any>;
    pivot(options: {
        aggregateFunc?:
            | pl.Expr
            | "mean"
            | "min"
            | "max"
            | "first"
            | "last"
            | "count"
            | "median"
            | "sum";
        index: string | string[];
        maintainOrder?: boolean;
        on: string | string[];
        separator?: string;
        sortColumns?: boolean;
        values: string | string[];
    }): pl.DataFrame<any>;
    plus(other: any): pl.DataFrame<T>;
    quantile(quantile: number): pl.DataFrame<T>;
    rechunk(): pl.DataFrame<T>;
    rem(other: any): pl.DataFrame<T>;
    rename<const U>(mapping: U): pl.DataFrame<{
        [K in string | number | symbol as U[K] extends string
            ? any[any]
            : K]: T[K]
    }>;
    rename(mapping: Record<string, string>): pl.DataFrame<any>;
    replaceAtIdx(index: number, newColumn: pl.Series<any, string>): void;
    row(index: number): any[];
    rows(): any[][];
    sample(opts?: {
        n: number;
        seed?: number | bigint;
        withReplacement?: boolean;
    }): pl.DataFrame<T>;
    sample(opts?: {
        frac: number;
        seed?: number | bigint;
        withReplacement?: boolean;
    }): pl.DataFrame<T>;
    sample(n?: number, frac?: number, withReplacement?: boolean, seed?: number | bigint): pl.DataFrame<T>;
    get schema(): {
        [K in string | number | symbol]: T[K]["dtype"]
    };
    select<U>(...columns: U[]): pl.DataFrame<{
        [P in string | number | symbol]: T[P]
    }>;
    select(...columns: ExprOrString[]): pl.DataFrame<T>;
    serialize(format: "json" | "bincode"): Buffer;
    shape: {
        height: number;
        width: number;
    };
    shift(periods: number): pl.DataFrame<T>;
    shift(__namedParameters: {
        periods: number;
    }): pl.DataFrame<T>;
    shiftAndFill(n: number, fillValue: number): pl.DataFrame<T>;
    shiftAndFill(__namedParameters: {
        fillValue: number;
        n: number;
    }): pl.DataFrame<T>;
    shrinkToFit(): pl.DataFrame<T>;
    shrinkToFit(inPlace: true): void;
    shrinkToFit(__namedParameters: {
        inPlace: true;
    }): void;
    slice(opts: {
        length: number;
        offset: number;
    }): pl.DataFrame<T>;
    slice(offset: number, length: number): pl.DataFrame<T>;
    sort(by: ColumnsOrExpr, descending?: boolean, nullsLast?: boolean, maintainOrder?: boolean): pl.DataFrame<T>;
    sort(__namedParameters: {
        by: ColumnsOrExpr;
        maintainOrder?: boolean;
        nullsLast?: boolean;
        reverse?: boolean;
    }): pl.DataFrame<T>;
    sort(__namedParameters: {
        by: ColumnsOrExpr;
        descending?: boolean;
        maintainOrder?: boolean;
        nullsLast?: boolean;
    }): pl.DataFrame<T>;
    std(): pl.DataFrame<T>;
    sub(other: any): pl.DataFrame<T>;
    sum(): pl.DataFrame<T>;
    sum(axis: 0): pl.DataFrame<T>;
    sum(axis: 1): pl.Series<any, string>;
    sum(axis: 1, nullStrategy?: "ignore" | "propagate"): pl.Series<any, string>;
    tail(length?: number): pl.DataFrame<T>;
    toCSV(destOrOptions?: any, options?: any): any;
    toDataResource(): TabularDataResource;
    toHTML(): string;
    toIPC(destination?: any, options?: any): any;
    toJSON(): string;
    toObject(): {
        [K in string | number | symbol]: DTypeToJs<T[K]["dtype"]>[]
    };
    toParquet(destination?: any, options?: any): any;
    toRecords(): {
        [K in string | number | symbol]: null | DTypeToJs<T[K]["dtype"]>
    }[];
    toSeries(index?: number): T[keyof T];
    toString(): string;
    toStruct(name: string): pl.Series<any, string>;
    transpose(options?: {
        columnNames?: Iterable<string, any, any>;
        headerName?: string;
        includeHeader?: boolean;
    }): pl.DataFrame<any>;
    unique(maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first" | "last"): pl.DataFrame<T>;
    unique(opts: {
        keep?: "first" | "last";
        maintainOrder?: boolean;
        subset?: ColumnSelection;
    }): pl.DataFrame<T>;
    unnest(names: string | string[]): pl.DataFrame<any>;
    unpivot(idVars: ColumnSelection, valueVars: ColumnSelection): pl.DataFrame<any>;
    upsample(timeColumn: string, every: string, by?: string | string[], maintainOrder?: boolean): pl.DataFrame<T>;
    upsample(opts: {
        by?: string | string[];
        every: string;
        maintainOrder?: boolean;
        timeColumn: string;
    }): pl.DataFrame<T>;
    var(): pl.DataFrame<T>;
    vstack(df: pl.DataFrame<T>): pl.DataFrame<T>;
    where(predicate: any): pl.DataFrame<T>;
    width: number;
    withColumn<SeriesTypeT, SeriesNameT>(column: pl.Series<SeriesTypeT, SeriesNameT>): pl.DataFrame<{
        [K in string | number | symbol]: (T & {
            [K in string]: pl.Series<SeriesTypeT, SeriesNameT>
        })[K]
    }>;
    withColumn(column: pl.Expr | pl.Series<any, string>): pl.DataFrame<any>;
    withColumnRenamed<Existing, New>(existingName: Existing, replacement: New): pl.DataFrame<{
        [K in string | number | symbol as K extends Existing
            ? New
            : K]: T[K]
    }>;
    withColumnRenamed(existing: string, replacement: string): pl.DataFrame<any>;
    withColumnRenamed<Existing, New>(opts: {
        existingName: Existing;
        replacement: New;
    }): pl.DataFrame<{
        [K in string | number | symbol as K extends Existing
            ? New
            : K]: T[K]
    }>;
    withColumnRenamed(opts: {
        existing: string;
        replacement: string;
    }): pl.DataFrame<any>;
    withColumns(...columns: (pl.Expr | pl.Series<any, string>)[]): pl.DataFrame<any>;
    withRowCount(name?: string): pl.DataFrame<any>;
    writeAvro(options?: WriteAvroOptions): Buffer;
    writeAvro(destination: string | Writable, options?: WriteAvroOptions): void;
    writeCSV(): Buffer;
    writeCSV(options: WriteCsvOptions): Buffer;
    writeCSV(dest: string | Writable, options?: WriteCsvOptions): void;
    writeIPC(options?: WriteIPCOptions): Buffer;
    writeIPC(destination: string | Writable, options?: WriteIPCOptions): void;
    writeIPCStream(options?: WriteIPCOptions): Buffer;
    writeIPCStream(destination: string | Writable, options?: WriteIPCOptions): void;
    writeJSON(options?: {
        format: "json" | "lines";
    }): Buffer;
    writeJSON(destination: string | Writable, options?: {
        format: "json" | "lines";
    }): void;
    writeParquet(options?: WriteParquetOptions): Buffer;
    writeParquet(destination: string | Writable, options?: WriteParquetOptions): void;
}

Type Parameters

  • T extends Record<string, pl.Series> = any

Hierarchy

Properties

dtypes: DataType[]
height: number
shape: {
    height: number;
    width: number;
}
width: number

Accessors

  • get schema(): {
        [K in string | number | symbol]: T[K]["dtype"]
    }
  • Returns {
        [K in string | number | symbol]: T[K]["dtype"]
    }

    > const df: pl.DataFrame = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    > df.schema
    // {
    // foo: Float64;
    // bar: Float64;
    // ham: Utf8;
    // }

Methods - Arithmetic

Methods - Deprecated

  • Parameters

    • OptionaldestOrOptions: any
    • Optionaloptions: any

    Returns any

    since 0.4.0 use writeCSV

Methods - IO

  • compat with JSON.stringify

    Returns string

  • Converts dataframe object into column oriented javascript objects

    Returns {
        [K in string | number | symbol]: DTypeToJs<T[K]["dtype"]>[]
    }

    > df.toObject()
    {
    "foo": [1,2,3],
    "bar": ["a", "b", "c"]
    }
  • Converts dataframe object into row oriented javascript objects

    Returns {
        [K in string | number | symbol]: null | DTypeToJs<T[K]["dtype"]>
    }[]

    > df.toRecords()
    [
    {"foo":1.0,"bar":"a"},
    {"foo":2.0,"bar":"b"},
    {"foo":3.0,"bar":"c"}
    ]
  • Write the DataFrame disk in avro format.

    Parameters

    Returns Buffer

  • Parameters

    Returns void

  • Write DataFrame to comma-separated values file (csv).

    If no options are specified, it will return a new string containing the contents


    Returns Buffer

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.writeCSV();
    foo,bar,ham
    1,6,a
    2,7,b
    3,8,c

    // using a file path
    > df.head(1).writeCSV("./foo.csv")
    // foo.csv
    foo,bar,ham
    1,6,a

    // using a write stream
    > const writeStream = new Stream.Writable({
    ... write(chunk, encoding, callback) {
    ... console.log("writeStream: %O', chunk.toString());
    ... callback(null);
    ... }
    ... });
    > df.head(1).writeCSV(writeStream, {includeHeader: false});
    writeStream: '1,6,a'
  • Parameters

    Returns Buffer

  • Parameters

    Returns void

  • Write to Arrow IPC feather file, either to a file path or to a write stream.

    Parameters

    Returns Buffer

  • Parameters

    Returns void

  • Write to Arrow IPC stream file, either to a file path or to a write stream.

    Parameters

    Returns Buffer

  • Parameters

    Returns void

  • Write Dataframe to JSON string, file, or write stream

    Parameters

    • Optionaloptions: {
          format: "json" | "lines";
      }
      • format: "json" | "lines"

        json | lines

    Returns Buffer

    > const df = pl.DataFrame({
    ... foo: [1,2,3],
    ... bar: ['a','b','c']
    ... })

    > df.writeJSON({format:"json"})
    `[ {"foo":1.0,"bar":"a"}, {"foo":2.0,"bar":"b"}, {"foo":3.0,"bar":"c"}]`

    > df.writeJSON({format:"lines"})
    `{"foo":1.0,"bar":"a"}
    {"foo":2.0,"bar":"b"}
    {"foo":3.0,"bar":"c"}`

    // writing to a file
    > df.writeJSON("/path/to/file.json", {format:'lines'})
  • Parameters

    • destination: string | Writable
    • Optionaloptions: {
          format: "json" | "lines";
      }
      • format: "json" | "lines"

    Returns void

  • Write the DataFrame disk in parquet format.

    Parameters

    Returns Buffer

  • Parameters

    Returns void

Methods - IO Deprecated

  • Parameters

    • Optionaldestination: any
    • Optionaloptions: any

    Returns any

    since 0.4.0 use writeIPC

  • Parameters

    • Optionaldestination: any
    • Optionaloptions: any

    Returns any

    since 0.4.0 use writeParquet

Methods - Math

  • Sample from this DataFrame by setting either n or frac.

    Parameters

    • Optionalopts: {
          n: number;
          seed?: number | bigint;
          withReplacement?: boolean;
      }
      • n: number
      • Optionalseed?: number | bigint
      • OptionalwithReplacement?: boolean

    Returns pl.DataFrame<T>

    > df = pl.DataFrame({
    > "foo": [1, 2, 3],
    > "bar": [6, 7, 8],
    > "ham": ['a', 'b', 'c']
    > })
    > df.sample({n: 2})
    shape: (2, 3)
    ╭─────┬─────┬─────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ╰─────┴─────┴─────╯
  • Parameters

    • Optionalopts: {
          frac: number;
          seed?: number | bigint;
          withReplacement?: boolean;
      }
      • frac: number
      • Optionalseed?: number | bigint
      • OptionalwithReplacement?: boolean

    Returns pl.DataFrame<T>

  • Parameters

    • Optionaln: number
    • Optionalfrac: number
    • OptionalwithReplacement: boolean
    • Optionalseed: number | bigint

    Returns pl.DataFrame<T>

Methods - Other

  • Returns Generator<any, void, any>

  • Summary statistics for a DataFrame.

    Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.


    Example

    >  const df = pl.DataFrame({
    ... 'a': [1.0, 2.8, 3.0],
    ... 'b': [4, 5, 6],
    ... "c": [True, False, True]
    ... });
    ... df.describe()
    shape: (5, 4)
    ╭──────────┬───────┬─────┬──────╮
    describeabc
    │ --- ┆ --- ┆ --- ┆ --- │
    strf64f64f64
    ╞══════════╪═══════╪═════╪══════╡
    "mean"2.2675null
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "std"1.1021null
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "min"140.0
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "max"361
    ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
    "median"2.85null
    ╰──────────┴───────┴─────┴──────╯

    Returns pl.DataFrame<any>

  • Remove column from DataFrame and return as new.


    Type Parameters

    • U extends string

    Parameters

    • name: U

    Returns pl.DataFrame<{
        [K in string | number | symbol]: Omit<T, U>[K]
    }>

    >  const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c'],
    ... "apple": ['a', 'b', 'c']
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // apple: pl.Series<Utf8, "apple">;
    // }>
    > const df2 = df.drop(['ham', 'apple']);
    // df2: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // }>
    > console.log(df2.toString());
    shape: (3, 2)
    ╭─────┬─────╮
    foobar
    │ --- ┆ --- │
    i64f64
    ╞═════╪═════╡
    16
    ├╌╌╌╌╌┼╌╌╌╌╌┤
    27
    ├╌╌╌╌╌┼╌╌╌╌╌┤
    38
    ╰─────┴─────╯
  • Type Parameters

    • const U extends string[]

    Parameters

    • names: U

    Returns pl.DataFrame<{
        [K in string | number | symbol]: Omit<T, U[number]>[K]
    }>

  • Type Parameters

    • U extends string
    • const V extends string[]

    Parameters

    • name: U
    • Rest...names: V

    Returns pl.DataFrame<{
        [K in string | number | symbol]: Omit<T, U | V[number]>[K]
    }>

  • Return a new DataFrame where the null values are dropped.

    This method only drops nulls row-wise if any single value of the row is null.


    Parameters

    • column: keyof T

    Returns pl.DataFrame<T>

    >  const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, null, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > console.log(df.dropNulls().toString());
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    └─────┴─────┴─────┘
  • Parameters

    • columns: (keyof T)[]

    Returns pl.DataFrame<T>

  • Parameters

    • Rest...columns: (keyof T)[]

    Returns pl.DataFrame<T>

  • Explode DataFrame to long format by exploding a column with Lists.


    Parameters

    • column: ExprOrString

    Returns pl.DataFrame<any>

    >  const df = pl.DataFrame({
    ... "letters": ["c", "c", "a", "c", "a", "b"],
    ... "nrs": [[1, 2], [1, 3], [4, 3], [5, 5, 5], [6], [2, 1, 2]]
    ... });
    > console.log(df.toString());
    shape: (6, 2)
    ╭─────────┬────────────╮
    lettersnrs
    │ --- ┆ --- │
    strlist [i64] │
    ╞═════════╪════════════╡
    "c" ┆ [1, 2] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "c" ┆ [1, 3] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "a" ┆ [4, 3] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "c" ┆ [5, 5, 5] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "a" ┆ [6] │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    "b" ┆ [2, 1, 2] │
    ╰─────────┴────────────╯
    > df.explode("nrs")
    shape: (13, 2)
    ╭─────────┬─────╮
    lettersnrs
    │ --- ┆ --- │
    stri64
    ╞═════════╪═════╡
    "c"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"3
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ ... ┆ ... │
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"5
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"6
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"2
    ╰─────────┴─────╯
  • Parameters

    • columns: ExprOrString[]

    Returns pl.DataFrame<any>

  • Parameters

    • column: ExprOrString
    • Rest...columns: ExprOrString[]

    Returns pl.DataFrame<any>

  • Extend the memory backed by this DataFrame with the values from other.


    Different from vstack which adds the chunks from other to the chunks of this DataFrame extent appends the data from other to the underlying memory locations and thus may cause a reallocation.

    If this does not cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries.

    Prefer extend over vstack when you want to do a query after a single append. For instance during online operations where you add n rows and rerun a query.

    Prefer vstack over extend when you want to append many times before doing a query. For instance when you read in multiple files and when to store them in a single DataFrame. In the latter case, finish the sequence of vstack operations with a rechunk.

    Parameters

    Returns pl.DataFrame<T>

  • Fill null/missing values by a filling strategy

    Parameters

    • strategy: FillNullStrategy

      One of:

      • "backward"
      • "forward"
      • "mean"
      • "min'
      • "max"
      • "zero"
      • "one"

    Returns pl.DataFrame<T>

    DataFrame with None replaced with the filling strategy.

  • Filter the rows in the DataFrame based on a predicate expression.


    Parameters

    • predicate: any

      Expression that evaluates to a boolean Series.

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    // Filter on one condition
    > df.filter(pl.col("foo").lt(3))
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16a
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27b
    └─────┴─────┴─────┘
    // Filter on multiple conditions
    > df.filter(
    ... pl.col("foo").lt(3)
    ... .and(pl.col("ham").eq(pl.lit("a")))
    ... )
    shape: (1, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16a
    └─────┴─────┴─────┘
  • Find the index of a column by name.


    Parameters

    • name: keyof T

      Name of the column to find.

    Returns number

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.findIdxByName("ham"))
    2
  • Apply a horizontal reduction on a DataFrame.

    This can be used to effectively determine aggregations on a row level, and can be applied to any DataType that can be supercasted (casted to a similar parent type).

    An example of the supercast rules when applying an arithmetic operation on two DataTypes are for instance:

    • Int8 + Utf8 = Utf8
    • Float32 + Int64 = Float32
    • Float32 + Float64 = Float64

    Parameters

    Returns pl.Series<any, string>

    Series

    > // A horizontal sum operation
    > let df = pl.DataFrame({
    ... "a": [2, 1, 3],
    ... "b": [1, 2, 3],
    ... "c": [1.0, 2.0, 3.0]
    ... });
    > df.fold((s1, s2) => s1.plus(s2))
    Series: 'a' [f64]
    [
    4
    5
    9
    ]
    > // A horizontal minimum operation
    > df = pl.DataFrame({
    ... "a": [2, 1, 3],
    ... "b": [1, 2, 3],
    ... "c": [1.0, 2.0, 3.0]
    ... });
    > df.fold((s1, s2) => s1.zipWith(s1.lt(s2), s2))
    Series: 'a' [f64]
    [
    1
    1
    3
    ]
    > // A horizontal string concatenation
    > df = pl.DataFrame({
    ... "a": ["foo", "bar", 2],
    ... "b": [1, 2, 3],
    ... "c": [1.0, 2.0, 3.0]
    ... })
    > df.fold((s1, s2) => s.plus(s2))
    Series: '' [f64]
    [
    "foo11"
    "bar22
    "233"
    ]
  • Check if DataFrame is equal to other.


    Parameters

    Returns boolean

    > const df1 = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c']
    ... })
    > const df2 = pl.DataFrame({
    ... "foo": [3, 2, 1],
    ... "bar": [8.0, 7.0, 6.0],
    ... "ham": ['c', 'b', 'a']
    ... })
    > df1.frameEqual(df1)
    true
    > df1.frameEqual(df2)
    false
  • Parameters

    Returns boolean

  • Get a single column as Series by name.


    Type Parameters

    • U extends string | number | symbol

    Parameters

    • name: U

    Returns T[U]

    > const df = pl.DataFrame({
    ... foo: [1, 2, 3],
    ... bar: [6, null, 8],
    ... ham: ["a", "b", "c"],
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    > const column = df.getColumn("foo");
    // column: pl.Series<Float64, "foo">
  • Parameters

    • name: string

    Returns pl.Series<any, string>

  • Returns T[keyof T][]

    >  const df = pl.DataFrame({
    ... foo: [1, 2, 3],
    ... bar: [6, null, 8],
    ... ham: ["a", "b", "c"],
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    > const columns = df.getColumns();
    // columns: (pl.Series<Float64, "foo"> | pl.Series<Float64, "bar"> | pl.Series<Utf8, "ham">)[]
  • Start a groupby operation.


    Parameters

    • Rest...by: ColumnSelection[]

      Column(s) to group by.

    Returns GroupBy

  • Groups based on a time value (or index value of type Int32, Int64). Time windows are calculated and rows are assigned to windows. Different from a normal groupby is that a row can be member of multiple groups. The time/index window could be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.

    A window is defined by:

    • every: interval of the window
    • period: length of the window
    • offset: offset of the window

    The every, period and offset arguments are created with the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    In case of a groupbyDynamic on an integer column, the windows are defined by:

    • "1i" # length 1
    • "10i" # length 10

    Parameters

    • options: {
          by?: ColumnsOrExpr;
          check_sorted?: boolean;
          closed?:
              | "none"
              | "left"
              | "right"
              | "both";
          every: string;
          includeBoundaries?: boolean;
          indexColumn: string;
          offset?: string;
          period?: string;
          start_by: StartBy;
      }
      • Optionalby?: ColumnsOrExpr
      • Optionalcheck_sorted?: boolean
      • Optionalclosed?:
            | "none"
            | "left"
            | "right"
            | "both"
      • every: string
      • OptionalincludeBoundaries?: boolean
      • indexColumn: string
      • Optionaloffset?: string
      • Optionalperiod?: string
      • start_by: StartBy

    Returns RollingGroupBy

  • Create rolling groups based on a time column (or index value of type Int32, Int64).

    Different from a rolling groupby the windows are now determined by the individual values and are not of constant intervals. For constant intervals use groupByDynamic

    The period and offset arguments are created with the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    In case of a groupby_rolling on an integer column, the windows are defined by:

    • "1i" # length 1
    • "10i" # length 10

    Parameters

    • opts: {
          by?: ColumnsOrExpr;
          check_sorted?: boolean;
          closed?:
              | "none"
              | "left"
              | "right"
              | "both";
          indexColumn: ColumnsOrExpr;
          offset?: string;
          period: string;
      }
      • Optionalby?: ColumnsOrExpr
      • Optionalcheck_sorted?: boolean
      • Optionalclosed?:
            | "none"
            | "left"
            | "right"
            | "both"
      • indexColumn: ColumnsOrExpr
      • Optionaloffset?: string
      • period: string

    Returns RollingGroupBy


    >dates = [
    ... "2020-01-01 13:45:48",
    ... "2020-01-01 16:42:13",
    ... "2020-01-01 16:45:09",
    ... "2020-01-02 18:12:48",
    ... "2020-01-03 19:45:32",
    ... "2020-01-08 23:16:43",
    ... ]
    >df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).withColumn(
    ... pl.col("dt").str.strptime(pl.Datetime)
    ... )
    >out = df.groupbyRolling({indexColumn:"dt", period:"2d"}).agg(
    ... [
    ... pl.sum("a").alias("sum_a"),
    ... pl.min("a").alias("min_a"),
    ... pl.max("a").alias("max_a"),
    ... ]
    ... )
    >assert(out["sum_a"].toArray() === [3, 10, 15, 24, 11, 1])
    >assert(out["max_a"].toArray() === [3, 7, 7, 9, 9, 1])
    >assert(out["min_a"].toArray() === [3, 3, 3, 3, 2, 1])
    >out
    shape: (6, 4)
    ┌─────────────────────┬───────┬───────┬───────┐
    dta_suma_maxa_min
    │ --- ┆ --- ┆ --- ┆ --- │
    datetime[ms] ┆ i64i64i64
    ╞═════════════════════╪═══════╪═══════╪═══════╡
    2020-01-01 13:45:48333
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-01 16:42:131073
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-01 16:45:091573
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-02 18:12:482493
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-03 19:45:321192
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    2020-01-08 23:16:43111
    └─────────────────────┴───────┴───────┴───────┘
  • Hash and combine the rows in this DataFrame. (Hash value is UInt64)

    Parameters

    • Optionalk0: number

      seed parameter

    • Optionalk1: number

      seed parameter

    • Optionalk2: number

      seed parameter

    • Optionalk3: number

      seed parameter

    Returns pl.Series<any, string>

  • Parameters

    • options: {
          k0?: number;
          k1?: number;
          k2?: number;
          k3?: number;
      }
      • Optionalk0?: number
      • Optionalk1?: number
      • Optionalk2?: number
      • Optionalk3?: number

    Returns pl.Series<any, string>

  • Get first N rows as DataFrame.


    Parameters

    • Optionallength: number

      Length of the head.

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3, 4, 5],
    ... "bar": [6, 7, 8, 9, 10],
    ... "ham": ['a', 'b', 'c', 'd','e']
    ... });
    > df.head(3)
    shape: (3, 3)
    ╭─────┬─────┬─────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ╰─────┴─────┴─────╯
  • Return a new DataFrame grown horizontally by stacking multiple Series to it.

    Type Parameters

    • U extends Record<string, pl.Series<any, string>> = any

    Parameters

    • columns: pl.DataFrame<U>

      array of Series or DataFrame to stack

    Returns pl.DataFrame<{
        [K in string | number | symbol]: (T & U)[K]
    }>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    > const x = pl.Series("apple", [10, 20, 30])
    // x: pl.Series<Float64, "apple">
    > df.hstack([x])
    // pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // apple: pl.Series<Float64, "apple">;
    // }>
    shape: (3, 4)
    ╭─────┬─────┬─────┬───────╮
    foobarhamapple
    │ --- ┆ --- ┆ --- ┆ --- │
    i64i64stri64
    ╞═════╪═════╪═════╪═══════╡
    16"a"10
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    27"b"20
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    38"c"30
    ╰─────┴─────┴─────┴───────╯
  • Type Parameters

    Parameters

    • columns: U

    Returns pl.DataFrame<{
        [K in string | number | symbol]: (T & {
            [K in pl.Series<any, string> as K["name"]]: K
        })[K]
    }>

  • Parameters

    Returns pl.DataFrame<any>

  • Parameters

    Returns void

  • Insert a Series at a certain column index. This operation is in place.

    Parameters

    • index: number

      Column position to insert the new Series column.

    • series: pl.Series<any, string>

      Series to insert

    Returns void

  • Check if the dataframe is empty

    Returns boolean

  • SQL like joins.

    Parameters

    • other: pl.DataFrame<any>
    • options: {
          on: ValueOrArray<string>;
      } & Omit<JoinOptions, "leftOn" | "rightOn">

    Returns pl.DataFrame<any>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c']
    ... });
    > const otherDF = pl.DataFrame({
    ... "apple": ['x', 'y', 'z'],
    ... "ham": ['a', 'b', 'd']
    ... });
    > df.join(otherDF, {on: 'ham'})
    shape: (2, 4)
    ╭─────┬─────┬─────┬───────╮
    foobarhamapple
    │ --- ┆ --- ┆ --- ┆ --- │
    i64f64strstr
    ╞═════╪═════╪═════╪═══════╡
    16"a""x"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    27"b""y"
    ╰─────┴─────┴─────┴───────╯
  • Parameters

    • other: pl.DataFrame<any>
    • options: {
          leftOn: ValueOrArray<string>;
          rightOn: ValueOrArray<string>;
      } & Omit<JoinOptions, "on">

    Returns pl.DataFrame<any>

  • Parameters

    • other: pl.DataFrame<any>
    • options: {
          how: "cross";
          suffix?: string;
      }
      • how: "cross"
      • Optionalsuffix?: string

    Returns pl.DataFrame<any>

  • Perform an asof join. This is similar to a left-join except that we match on nearest key rather than equal keys.

    Both DataFrames must be sorted by the asofJoin key.

    For each row in the left DataFrame:

    • A "backward" search selects the last row in the right DataFrame whose 'on' key is less than or equal to the left's key.

    • A "forward" search selects the first row in the right DataFrame whose 'on' key is greater than or equal to the left's key.

    • A "nearest" search selects the last row in the right DataFrame whose value is nearest to the left's key. String keys are not currently supported for a nearest search.

    The default is "backward".

    Parameters

    • other: pl.DataFrame<any>

      DataFrame to join with.

    • options: {
          allowParallel?: boolean;
          by?: string | string[];
          byLeft?: string | string[];
          byRight?: string | string[];
          forceParallel?: boolean;
          leftOn?: string;
          on?: string;
          rightOn?: string;
          strategy?: "backward" | "forward" | "nearest";
          suffix?: string;
          tolerance?: string | number;
      }
      • OptionalallowParallel?: boolean

        Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.

      • Optionalby?: string | string[]
      • OptionalbyLeft?: string | string[]

        join on these columns before doing asof join

      • OptionalbyRight?: string | string[]

        join on these columns before doing asof join

      • OptionalforceParallel?: boolean

        Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.

      • OptionalleftOn?: string

        Join column of the left DataFrame.

      • Optionalon?: string

        Join column of both DataFrames. If set, leftOn and rightOn should be undefined.

      • OptionalrightOn?: string

        Join column of the right DataFrame.

      • Optionalstrategy?: "backward" | "forward" | "nearest"

        One of 'forward', 'backward', 'nearest'

      • Optionalsuffix?: string

        Suffix to append to columns with a duplicate name.

      • Optionaltolerance?: string | number

        Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. If an asof join is done on columns of dtype "Date", "Datetime" you use the following string language:

        • 1ns (1 nanosecond)
        • 1us (1 microsecond)
        • 1ms (1 millisecond)
        • 1s (1 second)
        • 1m (1 minute)
        • 1h (1 hour)
        • 1d (1 day)
        • 1w (1 week)
        • 1mo (1 calendar month)
        • 1y (1 calendar year)
        • 1i (1 index count)

        Or combine them:

        • "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    Returns pl.DataFrame<any>

    > const gdp = pl.DataFrame({
    ... date: [
    ... new Date('2016-01-01'),
    ... new Date('2017-01-01'),
    ... new Date('2018-01-01'),
    ... new Date('2019-01-01'),
    ... ], // note record date: Jan 1st (sorted!)
    ... gdp: [4164, 4411, 4566, 4696],
    ... })
    > const population = pl.DataFrame({
    ... date: [
    ... new Date('2016-05-12'),
    ... new Date('2017-05-12'),
    ... new Date('2018-05-12'),
    ... new Date('2019-05-12'),
    ... ], // note record date: May 12th (sorted!)
    ... "population": [82.19, 82.66, 83.12, 83.52],
    ... })
    > population.joinAsof(
    ... gdp,
    ... {leftOn:"date", rightOn:"date", strategy:"backward"}
    ... )
    shape: (4, 3)
    ┌─────────────────────┬────────────┬──────┐
    datepopulationgdp
    │ --- ┆ --- ┆ --- │
    datetimes] ┆ f64i64
    ╞═════════════════════╪════════════╪══════╡
    2016-05-12 00:00:0082.194164
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    2017-05-12 00:00:0082.664411
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    2018-05-12 00:00:0083.124566
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    2019-05-12 00:00:0083.524696
    └─────────────────────┴────────────┴──────┘
  • Aggregate the columns of this DataFrame to their maximum value.


    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.max()
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪══════╡
    38null
    ╰─────┴─────┴──────╯
  • Parameters

    • axis: 0

    Returns pl.DataFrame<T>

  • Parameters

    • axis: 1

    Returns pl.Series<any, string>

  • Aggregate the columns of this DataFrame to their median value.


    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.median();
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    f64f64str
    ╞═════╪═════╪══════╡
    27null
    ╰─────┴─────┴──────╯
  • Unpivot a DataFrame from wide to long format.

    Parameters

    • idVars: ColumnSelection
    • valueVars: ColumnSelection

    Returns pl.DataFrame<any>

    since 0.13.0 use unpivot

  • Aggregate the columns of this DataFrame to their minimum value.


    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.min();
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪══════╡
    16null
    ╰─────┴─────┴──────╯
  • Parameters

    • axis: 0

    Returns pl.DataFrame<T>

  • Parameters

    • axis: 1

    Returns pl.Series<any, string>

  • Get number of chunks used by the ChunkedArrays of this DataFrame.

    Returns number

  • Create a new DataFrame that shows the null counts per column.


    Returns pl.DataFrame<{
        [K in string | number | symbol]: pl.Series<Float64, K & string>
    }>

    > const df = pl.DataFrame({
    ... "foo": [1, null, 3],
    ... "bar": [6, 7, null],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.nullCount();
    shape: (1, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    u32u32u32
    ╞═════╪═════╪═════╡
    110
    └─────┴─────┴─────┘
  • Parameters

    • cols: string | string[]
    • Optionalstable: boolean
    • OptionalincludeKey: boolean

    Returns pl.DataFrame<T>[]

  • Type Parameters

    • T

    Parameters

    • cols: string | string[]
    • stable: boolean
    • includeKey: boolean
    • mapFn: ((df: pl.DataFrame<any>) => T)

    Returns T[]

  • Create a spreadsheet-style pivot table as a DataFrame.

    Parameters

    • values: string | string[]

      The existing column(s) of values which will be moved under the new columns from index. If an aggregation is specified, these are the values on which the aggregation will be computed. If None, all remaining columns not specified on on and index will be used. At least one of index and values must be specified.

    • options: {
          aggregateFunc?:
              | pl.Expr
              | "mean"
              | "min"
              | "max"
              | "first"
              | "last"
              | "count"
              | "median"
              | "sum";
          index: string | string[];
          maintainOrder?: boolean;
          on: string | string[];
          separator?: string;
          sortColumns?: boolean;
      }
      • OptionalaggregateFunc?:
            | pl.Expr
            | "mean"
            | "min"
            | "max"
            | "first"
            | "last"
            | "count"
            | "median"
            | "sum"

        Any of: - "sum" - "max" - "min" - "mean" - "median" - "first" - "last" - "count" Defaults to "first"

      • index: string | string[]

        The column(s) that remain from the input to the output. The output DataFrame will have one row for each unique combination of the index's values. If None, all remaining columns not specified on on and values will be used. At least one of index and values must be specified.

      • OptionalmaintainOrder?: boolean

        Sort the grouped keys so that the output order is predictable.

      • on: string | string[]

        The column(s) whose values will be used as the new columns of the output DataFrame.

      • Optionalseparator?: string

        Used as separator/delimiter in generated column names.

      • OptionalsortColumns?: boolean

        Sort the transposed columns by name. Default is by order of discovery.

    Returns pl.DataFrame<any>

      > const df = pl.DataFrame(
    ... {
    ... "foo": ["one", "one", "one", "two", "two", "two"],
    ... "bar": ["A", "B", "C", "A", "B", "C"],
    ... "baz": [1, 2, 3, 4, 5, 6],
    ... }
    ... );
    > df.pivot("baz", {index:"foo", on:"bar"});
    shape: (2, 4)
    ┌─────┬─────┬─────┬─────┐
    fooABC
    │ --- ┆ --- ┆ --- ┆ --- │
    strf64f64f64
    ╞═════╪═════╪═════╪═════╡
    one123
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    two456
    └─────┴─────┴─────┴─────┘
  • Parameters

    • options: {
          aggregateFunc?:
              | pl.Expr
              | "mean"
              | "min"
              | "max"
              | "first"
              | "last"
              | "count"
              | "median"
              | "sum";
          index: string | string[];
          maintainOrder?: boolean;
          on: string | string[];
          separator?: string;
          sortColumns?: boolean;
          values: string | string[];
      }
      • OptionalaggregateFunc?:
            | pl.Expr
            | "mean"
            | "min"
            | "max"
            | "first"
            | "last"
            | "count"
            | "median"
            | "sum"
      • index: string | string[]
      • OptionalmaintainOrder?: boolean
      • on: string | string[]
      • Optionalseparator?: string
      • OptionalsortColumns?: boolean
      • values: string | string[]

    Returns pl.DataFrame<any>

  • Aggregate the columns of this DataFrame to their quantile value.

    Parameters

    • quantile: number

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.quantile(0.5);
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪══════╡
    27null
    ╰─────┴─────┴──────╯
  • Rechunk the data in this DataFrame to a contiguous allocation.

    This will make sure all subsequent operations have optimal and predictable performance.

    Returns pl.DataFrame<T>

  • Rename column names.


    Type Parameters

    • const U extends Partial<Record<keyof T, string>>

    Parameters

    • mapping: U

      Key value pairs that map from old name to new name.

    Returns pl.DataFrame<{
        [K in string | number | symbol as U[K] extends string
            ? any[any]
            : K]: T[K]
    }>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    > df.rename({"foo": "apple"});
    ╭───────┬─────┬─────╮
    applebarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═══════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ╰───────┴─────┴─────╯
  • Parameters

    • mapping: Record<string, string>

    Returns pl.DataFrame<any>

  • Replace a column at an index location.

    Parameters

    • index: number

      Column index

    • newColumn: pl.Series<any, string>

      New column to insert

    Returns void

    typescript cannot encode type mutation, so the type of the DataFrame will be incorrect. cast the type of dataframe manually.


    > const df: pl.DataFrame = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    > const x = pl.Series("apple", [10, 20, 30]);
    // x: pl.Series<Float64, "apple">
    > df.replaceAtIdx(0, x);
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">; <- notice how the type is still the same!
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    shape: (3, 3)
    ╭───────┬─────┬─────╮
    applebarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═══════╪═════╪═════╡
    106"a"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    207"b"
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    308"c"
    ╰───────┴─────┴─────╯
  • Get a row as Array

    Parameters

    • index: number

      row index

    Returns any[]

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.row(2)
    [3, 8, 'c']
  • Convert columnar data to rows as arrays

    Returns any[][]

  • Select columns from this DataFrame.


    Type Parameters

    • U extends string | number | symbol

    Parameters

    • Rest...columns: U[]

      Column or columns to select.

    Returns pl.DataFrame<{
        [P in string | number | symbol]: T[P]
    }>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    // df: pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // bar: pl.Series<Float64, "bar">;
    // ham: pl.Series<Utf8, "ham">;
    // }>
    > df.select('foo');
    // pl.DataFrame<{
    // foo: pl.Series<Float64, "foo">;
    // }>
    shape: (3, 1)
    ┌─────┐
    foo
    │ --- │
    i64
    ╞═════╡
    1
    ├╌╌╌╌╌┤
    2
    ├╌╌╌╌╌┤
    3
    └─────┘
  • Parameters

    • Rest...columns: ExprOrString[]

    Returns pl.DataFrame<T>

  • Serializes object to desired format via serde

    Parameters

    Returns Buffer

  • Shift the values by a given period and fill the parts that will be empty due to this operation with Nones.


    Parameters

    • periods: number

      Number of places to shift (may be negative).

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.shift(1);
    shape: (3, 3)
    ┌──────┬──────┬──────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞══════╪══════╪══════╡
    nullnullnull
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    16"a"
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    27"b"
    └──────┴──────┴──────┘
    > df.shift(-1)
    shape: (3, 3)
    ┌──────┬──────┬──────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞══════╪══════╪══════╡
    27"b"
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    38"c"
    ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
    nullnullnull
    └──────┴──────┴──────┘
  • Parameters

    • __namedParameters: {
          periods: number;
      }
      • periods: number

    Returns pl.DataFrame<T>

  • Shift the values by a given period and fill the parts that will be empty due to this operation with the result of the fill_value expression.


    Parameters

    • n: number
    • fillValue: number

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.shiftAndFill({n:1, fill_value:0});
    shape: (3, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    00"0"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    └─────┴─────┴─────┘
  • Parameters

    • __namedParameters: {
          fillValue: number;
          n: number;
      }
      • fillValue: number
      • n: number

    Returns pl.DataFrame<T>

  • Shrink memory usage of this DataFrame to fit the exact capacity needed to hold the data.

    Returns pl.DataFrame<T>

  • Parameters

    • inPlace: true

    Returns void

  • Parameters

    • __namedParameters: {
          inPlace: true;
      }
      • inPlace: true

    Returns void

  • Slice this DataFrame over the rows direction.


    Parameters

    • opts: {
          length: number;
          offset: number;
      }
      • length: number

        Length of the slice

      • offset: number

        Offset index.

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6.0, 7.0, 8.0],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.slice(1, 2); // Alternatively `df.slice({offset:1, length:2})`
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    27"b"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    └─────┴─────┴─────┘
  • Parameters

    • offset: number
    • length: number

    Returns pl.DataFrame<T>

  • Sort the DataFrame by column.


    Parameters

    • by: ColumnsOrExpr

      Column(s) to sort by. Accepts expression input, including selectors. Strings are parsed as column names.

    • Optionaldescending: boolean

      Sort in descending order. When sorting by multiple columns, can be specified per column by passing a sequence of booleans.

    • OptionalnullsLast: boolean

      Place null values last; can specify a single boolean applying to all columns or a sequence of booleans for per-column control.

    • OptionalmaintainOrder: boolean

      Whether the order should be maintained if elements are equal.

    Returns pl.DataFrame<T>

  • Parameters

    • __namedParameters: {
          by: ColumnsOrExpr;
          maintainOrder?: boolean;
          nullsLast?: boolean;
          reverse?: boolean;
      }
      • by: ColumnsOrExpr
      • OptionalmaintainOrder?: boolean
      • OptionalnullsLast?: boolean
      • Optionalreverse?: boolean

        since 0.16.0

        descending

    Returns pl.DataFrame<T>

  • Parameters

    • __namedParameters: {
          by: ColumnsOrExpr;
          descending?: boolean;
          maintainOrder?: boolean;
          nullsLast?: boolean;
      }
      • by: ColumnsOrExpr
      • Optionaldescending?: boolean
      • OptionalmaintainOrder?: boolean
      • OptionalnullsLast?: boolean

    Returns pl.DataFrame<T>

  • Aggregate the columns of this DataFrame to their standard deviation value.


    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "foo": [1, 2, 3],
    ... "bar": [6, 7, 8],
    ... "ham": ['a', 'b', 'c']
    ... });
    > df.std();
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    f64f64str
    ╞═════╪═════╪══════╡
    11null
    ╰─────┴─────┴──────╯
  • Parameters

    • Optionallength: number

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    ... "letters": ["c", "c", "a", "c", "a", "b"],
    ... "nrs": [1, 2, 3, 4, 5, 6]
    ... });
    > console.log(df.toString());
    shape: (6, 2)
    ╭─────────┬─────╮
    lettersnrs
    │ --- ┆ --- │
    stri64
    ╞═════════╪═════╡
    "c"1
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"3
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"4
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"5
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"6
    ╰─────────┴─────╯
    > df.groupby("letters")
    ... .tail(2)
    ... .sort("letters")
    shape: (5, 2)
    ╭─────────┬─────╮
    lettersnrs
    │ --- ┆ --- │
    stri64
    ╞═════════╪═════╡
    "a"3
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "a"5
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "b"6
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"2
    ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    "c"4
    ╰─────────┴─────╯
  • Converts dataframe object into a TabularDataResource

    Returns TabularDataResource

  • Converts dataframe object into HTML

    Returns string

  • Parameters

    • Optionalindex: number

    Returns T[keyof T]

  • Returns a string representation of an object.

    Returns string

  • Convert a DataFrame to a Series of type Struct

    Parameters

    • name: string

      Name for the struct Series

    Returns pl.Series<any, string>

     > const df = pl.DataFrame({
    ... "a": [1, 2, 3, 4, 5],
    ... "b": ["one", "two", "three", "four", "five"],
    ... });
    > df.toStruct("nums");
    shape: (5,)
    Series: 'nums' [struct[2]{'a': i64, 'b': str}]
    [
    {1,"one"}
    {2,"two"}
    {3,"three"}
    {4,"four"}
    {5,"five"}
    ]
  • Transpose a DataFrame over the diagonal.

    Parameters

    • Optionaloptions: {
          columnNames?: Iterable<string, any, any>;
          headerName?: string;
          includeHeader?: boolean;
      }
      • OptionalcolumnNames?: Iterable<string, any, any>

        Optional generator/iterator that yields column names. Will be used to replace the columns in the DataFrame.

      • OptionalheaderName?: string

        If includeHeader is set, this determines the name of the column that will be inserted

      • OptionalincludeHeader?: boolean

        If set, the column names will be added as first column.

    Returns pl.DataFrame<any>

    This is a very expensive operation. Perhaps you can do it differently.

    > const df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]});
    > df.transpose({includeHeader:true})
    shape: (2, 4)
    ┌────────┬──────────┬──────────┬──────────┐
    columncolumn_0column_1column_2
    │ --- ┆ --- ┆ --- ┆ --- │
    stri64i64i64
    ╞════════╪══════════╪══════════╪══════════╡
    a123
    ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
    b123
    └────────┴──────────┴──────────┴──────────┘
    // replace the auto generated column names with a list
    > df.transpose({includeHeader:false, columnNames:["a", "b", "c"]})
    shape: (2, 3)
    ┌─────┬─────┬─────┐
    abc
    │ --- ┆ --- ┆ --- │
    i64i64i64
    ╞═════╪═════╪═════╡
    123
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    123
    └─────┴─────┴─────┘

    // Include the header as a separate column
    > df.transpose({
    ... includeHeader:true,
    ... headerName:"foo",
    ... columnNames:["a", "b", "c"]
    ... })
    shape: (2, 4)
    ┌─────┬─────┬─────┬─────┐
    fooabc
    │ --- ┆ --- ┆ --- ┆ --- │
    stri64i64i64
    ╞═════╪═════╪═════╪═════╡
    a123
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    b123
    └─────┴─────┴─────┴─────┘

    // Replace the auto generated column with column names from a generator function
    > function *namesGenerator() {
    ... const baseName = "my_column_";
    ... let count = 0;
    ... let name = `${baseName}_${count}`;
    ... count++;
    ... yield name;
    ... }
    > df.transpose({includeHeader:false, columnNames:namesGenerator})
    shape: (2, 3)
    ┌─────────────┬─────────────┬─────────────┐
    my_column_0my_column_1my_column_2
    │ --- ┆ --- ┆ --- │
    i64i64i64
    ╞═════════════╪═════════════╪═════════════╡
    123
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    123
    └─────────────┴─────────────┴─────────────┘
  • Drop duplicate rows from this DataFrame. Note that this fails if there is a column of type List in the DataFrame.

    Parameters

    • OptionalmaintainOrder: boolean
    • Optionalsubset: ColumnSelection

      subset to drop duplicates for

    • Optionalkeep: "first" | "last"

      "first" | "last"

    Returns pl.DataFrame<T>

  • Parameters

    • opts: {
          keep?: "first" | "last";
          maintainOrder?: boolean;
          subset?: ColumnSelection;
      }
      • Optionalkeep?: "first" | "last"
      • OptionalmaintainOrder?: boolean
      • Optionalsubset?: ColumnSelection

    Returns pl.DataFrame<T>

  • Decompose a struct into its fields. The fields will be inserted in to the DataFrame on the location of the struct type.

    Parameters

    • names: string | string[]

      Names of the struct columns that will be decomposed by its fields

    Returns pl.DataFrame<any>

    > const df = pl.DataFrame({
    ... "int": [1, 2],
    ... "str": ["a", "b"],
    ... "bool": [true, null],
    ... "list": [[1, 2], [3]],
    ... })
    ... .toStruct("my_struct")
    ... .toFrame();
    > df
    shape: (2, 1)
    ┌─────────────────────────────┐
    my_struct
    │ --- │
    struct[4]{'int',...,'list'} │
    ╞═════════════════════════════╡
    │ {1,"a",true,[1, 2]} │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ {2,"b",null,[3]} │
    └─────────────────────────────┘
    > df.unnest("my_struct")
    shape: (2, 4)
    ┌─────┬─────┬──────┬────────────┐
    intstrboollist
    │ --- ┆ --- ┆ --- ┆ --- │
    i64strboollist [i64] │
    ╞═════╪═════╪══════╪════════════╡
    1atrue ┆ [1, 2] │
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    2bnull ┆ [3] │
    └─────┴─────┴──────┴────────────┘
  • Unpivot a DataFrame from wide to long format.


    Parameters

    • idVars: ColumnSelection

      Columns to use as identifier variables.

    • valueVars: ColumnSelection

      Values to use as value variables.

    Returns pl.DataFrame<any>

    > const df1 = pl.DataFrame({
    ... 'id': [1],
    ... 'asset_key_1': ['123'],
    ... 'asset_key_2': ['456'],
    ... 'asset_key_3': ['abc'],
    ... });
    > df1.unpivot('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']);
    shape: (3, 3)
    ┌─────┬─────────────┬───────┐
    idvariablevalue
    │ --- ┆ --- ┆ --- │
    f64strstr
    ╞═════╪═════════════╪═══════╡
    1asset_key_1123
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    1asset_key_2456
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
    1asset_key_3abc
    └─────┴─────────────┴───────┘
  • Upsample a DataFrame at a regular frequency.

    The every and offset arguments are created with the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 calendar day)
    • 1w (1 calendar week)
    • 1mo (1 calendar month)
    • 1q (1 calendar quarter)
    • 1y (1 calendar year)
    • 1i (1 index count)

    Or combine them:

    • "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

    By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

    Parameters

    • timeColumn: string

      Time column will be used to determine a date range. Note that this column has to be sorted for the output to make sense.

    • every: string

      Interval will start 'every' duration.

    • Optionalby: string | string[]

      First group by these columns and then upsample for every group.

    • OptionalmaintainOrder: boolean

      Keep the ordering predictable. This is slower.

      DataFrame Result will be sorted by timeColumn (but note that if by columns are passed, it will only be sorted within each by group).

      Upsample a DataFrame by a certain interval.

      const df = pl.DataFrame({ "date": [ new Date(2024, 1, 1), new Date(2024, 3, 1), new Date(2024, 4, 1), new Date(2024, 5, 1), ], "groups": ["A", "B", "A", "B"], "values": [0, 1, 2, 3], }) .withColumn(pl.col("date").cast(pl.Date).alias("date")) .sort("date");

      df.upsample({timeColumn: "date", every: "1mo", by: "groups", maintainOrder: true}) .select(pl.col("*").forwardFill()); shape: (7, 3) ┌────────────┬────────┬────────┐ │ date ┆ groups ┆ values │ │ --- ┆ --- ┆ --- │ │ date ┆ str ┆ f64 │ ╞════════════╪════════╪════════╡ │ 2024-02-01 ┆ A ┆ 0.0 │ │ 2024-03-01 ┆ A ┆ 0.0 │ │ 2024-04-01 ┆ A ┆ 0.0 │ │ 2024-05-01 ┆ A ┆ 2.0 │ │ 2024-04-01 ┆ B ┆ 1.0 │ │ 2024-05-01 ┆ B ┆ 1.0 │ │ 2024-06-01 ┆ B ┆ 3.0 │ └────────────┴────────┴────────┘

    Returns pl.DataFrame<T>

  • Parameters

    • opts: {
          by?: string | string[];
          every: string;
          maintainOrder?: boolean;
          timeColumn: string;
      }
      • Optionalby?: string | string[]
      • every: string
      • OptionalmaintainOrder?: boolean
      • timeColumn: string

    Returns pl.DataFrame<T>

  • Aggregate the columns of this DataFrame to their variance value.

    Returns pl.DataFrame<T>

    > const df = pl.DataFrame({
    > "foo": [1, 2, 3],
    > "bar": [6, 7, 8],
    > "ham": ['a', 'b', 'c']
    > });
    > df.var()
    shape: (1, 3)
    ╭─────┬─────┬──────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    f64f64str
    ╞═════╪═════╪══════╡
    11null
    ╰─────┴─────┴──────╯
  • Grow this DataFrame vertically by stacking a DataFrame to it.

    Parameters

    Returns pl.DataFrame<T>

    > const df1 = pl.DataFrame({
    ... "foo": [1, 2],
    ... "bar": [6, 7],
    ... "ham": ['a', 'b']
    ... });
    > const df2 = pl.DataFrame({
    ... "foo": [3, 4],
    ... "bar": [8 , 9],
    ... "ham": ['c', 'd']
    ... });
    > df1.vstack(df2);
    shape: (4, 3)
    ╭─────┬─────┬─────╮
    foobarham
    │ --- ┆ --- ┆ --- │
    i64i64str
    ╞═════╪═════╪═════╡
    16"a"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    27"b"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    38"c"
    ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
    49"d"
    ╰─────┴─────┴─────╯
  • Return a new DataFrame with the column renamed.

    Type Parameters

    • Existing extends string | number | symbol
    • New extends string

    Parameters

    Returns pl.DataFrame<{
        [K in string | number | symbol as K extends Existing
            ? New
            : K]: T[K]
    }>

  • Parameters

    • existing: string
    • replacement: string

    Returns pl.DataFrame<any>

  • Type Parameters

    • Existing extends string | number | symbol
    • New extends string

    Parameters

    Returns pl.DataFrame<{
        [K in string | number | symbol as K extends Existing
            ? New
            : K]: T[K]
    }>

  • Parameters

    • opts: {
          existing: string;
          replacement: string;
      }
      • existing: string
      • replacement: string

    Returns pl.DataFrame<any>