nodejs-polars
    Preparing search index...

    Interface LazyDataFrame<S>

    Representation of a Lazy computation graph / query.

    interface LazyDataFrame<S extends Schema = any> {
        "[inspect]"(): string;
        "[toStringTag]": string;
        cache(): LazyDataFrame<S>;
        clone(): LazyDataFrame<S>;
        collect(opts?: LazyOptions): Promise<pl.DataFrame<S>>;
        collectSync(opts?: LazyOptions): pl.DataFrame<S>;
        get columns(): string[];
        describeOptimizedPlan(opts?: LazyOptions): string;
        describePlan(): string;
        drop<U extends string>(
            name: U,
        ): LazyDataFrame<{ [K in string | number | symbol]: Omit<S, U>[K] }>;
        drop<const U extends string[]>(
            names: U,
        ): LazyDataFrame<
            { [K in string
            | number
            | symbol]: Omit<S, U[number]>[K] },
        >;
        drop<U extends string, const V extends string[]>(
            name: U,
            ...names: V,
        ): LazyDataFrame<
            { [K in string
            | number
            | symbol]: Omit<S, U | V[number]>[K] },
        >;
        dropNulls(column: string): LazyDataFrame<S>;
        dropNulls(columns: string[]): LazyDataFrame<S>;
        dropNulls(...columns: string[]): LazyDataFrame<S>;
        explode(column: ExprOrString): LazyDataFrame;
        explode(columns: ExprOrString[]): LazyDataFrame;
        explode(column: ExprOrString, ...columns: ExprOrString[]): LazyDataFrame;
        fetch(numRows: number, opts: LazyOptions): Promise<pl.DataFrame<S>>;
        fetch(numRows?: number): Promise<pl.DataFrame<S>>;
        fetchSync(numRows?: number): pl.DataFrame<S>;
        fetchSync(numRows: number, opts: LazyOptions): pl.DataFrame<S>;
        fillNull(fillValue: string | number | pl.Expr): LazyDataFrame<S>;
        filter(predicate: string | pl.Expr): LazyDataFrame<S>;
        first(): pl.DataFrame<S>;
        groupBy(by: ColumnsOrExpr, maintainOrder?: boolean): LazyGroupBy;
        groupBy(by: ColumnsOrExpr, opts: { maintainOrder: boolean }): LazyGroupBy;
        groupByDynamic(
            options: {
                by?: ColumnsOrExpr;
                closed?: "none" | "left" | "right" | "both";
                every: string;
                includeBoundaries?: boolean;
                indexColumn: string;
                label?: string;
                offset?: string;
                period?: string;
                startBy?: StartBy;
            },
        ): LazyGroupBy;
        groupByRolling(
            opts: {
                by?: ColumnsOrExpr;
                closed?: "none" | "left" | "right" | "both";
                indexColumn: ColumnsOrExpr;
                offset?: string;
                period: string;
            },
        ): LazyGroupBy;
        head(length?: number): LazyDataFrame<S>;
        inner(): any;
        join<
            S2 extends Schema,
            const Opts extends
                LazySameNameColumnJoinOptions<
                    Extract<keyof S, string>,
                    Extract<keyof S2, string>,
                >,
        >(
            other: LazyDataFrame<S2>,
            joinOptions: Opts & SameNameColumnJoinOptions<string, string> & LazyJoinBase,
        ): LazyDataFrame<
            {
                [K in string
                | number
                | symbol]: (
                    { [K1 in string
                    | number
                    | symbol]: S[K1] } & {
                        [K2 in string | number | symbol]: K2 extends keyof S
                            ? never
                            : S2[K2]
                    } & {
                        [K_SUFFIXED in string
                        | number
                        | symbol as `${K_SUFFIXED extends string
                            ? K_SUFFIXED<K_SUFFIXED>
                            : never}${ExtractSuffix<Opts>}`]: K_SUFFIXED extends string
                            ? S2[K_SUFFIXED<K_SUFFIXED>]
                            : never
                    }
                )[K]
            },
        >;
        join<
            S2 extends Schema,
            const Opts extends
                LazyDifferentNameColumnJoinOptions<
                    Extract<keyof S, string>,
                    Extract<keyof S2, string>,
                >,
        >(
            other: LazyDataFrame<S2>,
            joinOptions: Opts & DifferentNameColumnJoinOptions<string, string> & LazyJoinBase,
        ): LazyDataFrame<
            {
                [K in string
                | number
                | symbol]: (
                    { [K1 in string
                    | number
                    | symbol]: S[K1] } & {
                        [K2 in string | number | symbol]: K2 extends keyof S
                            ? never
                            : S2[K2]
                    } & {
                        [K_SUFFIXED in string
                        | number
                        | symbol as `${K_SUFFIXED extends string
                            ? K_SUFFIXED<K_SUFFIXED>
                            : never}${ExtractSuffix<Opts>}`]: K_SUFFIXED extends string
                            ? S2[K_SUFFIXED<K_SUFFIXED>]
                            : never
                    }
                )[K]
            },
        >;
        join<S2 extends Schema, const Opts extends LazyCrossJoinOptions>(
            other: LazyDataFrame<S2>,
            joinOptions: Opts & CrossJoinOptions & LazyJoinBase,
        ): LazyDataFrame<
            {
                [K in string
                | number
                | symbol]: (
                    { [K1 in string
                    | number
                    | symbol]: S[K1] } & {
                        [K2 in string | number | symbol]: K2 extends keyof S
                            ? never
                            : S2[K2]
                    } & {
                        [K_SUFFIXED in string
                        | number
                        | symbol as `${K_SUFFIXED extends string
                            ? K_SUFFIXED<K_SUFFIXED>
                            : never}${ExtractSuffix<Opts>}`]: K_SUFFIXED extends string
                            ? S2[K_SUFFIXED<K_SUFFIXED>]
                            : never
                    }
                )[K]
            },
        >;
        joinAsof(
            other: LazyDataFrame,
            options: {
                allowParallel?: boolean;
                by?: string | string[];
                byLeft?: string | string[];
                byRight?: string | string[];
                forceParallel?: boolean;
                leftOn?: string;
                on?: string;
                rightOn?: string;
                strategy?: "backward" | "forward" | "nearest";
                suffix?: string;
                tolerance?: string | number;
            },
        ): LazyDataFrame;
        last(): LazyDataFrame<S>;
        limit(n?: number): LazyDataFrame<S>;
        max(): LazyDataFrame<S>;
        mean(): LazyDataFrame<S>;
        median(): LazyDataFrame<S>;
        melt(idVars: ColumnSelection, valueVars: ColumnSelection): LazyDataFrame;
        min(): LazyDataFrame<S>;
        quantile(quantile: number): LazyDataFrame<S>;
        rename<const U extends Partial<Record<keyof S, string>>>(
            mapping: U,
        ): LazyDataFrame<
            {
                [K in string
                | number
                | symbol as U[K] extends string ? any[any] : K]: S[K]
            },
        >;
        rename(mapping: Record<string, string>): LazyDataFrame;
        reverse(): LazyDataFrame<S>;
        select<U extends string | number | symbol>(
            ...columns: U[],
        ): LazyDataFrame<{ [P in string | number | symbol]: S[P] }>;
        select(column: ExprOrString | pl.Series<any, string>): LazyDataFrame;
        select(columns: (ExprOrString | pl.Series<any, string>)[]): LazyDataFrame;
        select(
            ...columns: (ExprOrString | pl.Series<any, string>)[],
        ): LazyDataFrame;
        serialize(format: "json" | "bincode"): Buffer;
        shift(periods: number): LazyDataFrame<S>;
        shift(opts: { periods: number }): LazyDataFrame<S>;
        shiftAndFill(n: number, fillValue: number): LazyDataFrame<S>;
        shiftAndFill(opts: { fillValue: number; n: number }): LazyDataFrame<S>;
        sinkCSV(path: string, options?: CsvWriterOptions): LazyDataFrame;
        sinkIpc(path: string, options?: SinkIpcOptions): LazyDataFrame;
        sinkNdJson(path: string, options?: SinkJsonOptions): LazyDataFrame;
        sinkParquet(path: string, options?: SinkParquetOptions): LazyDataFrame;
        slice(offset: number, length: number): LazyDataFrame<S>;
        slice(opts: { length: number; offset: number }): LazyDataFrame<S>;
        sort(
            by: ColumnsOrExpr,
            descending?: ValueOrArray<boolean>,
            nullsLast?: boolean,
            maintainOrder?: boolean,
        ): LazyDataFrame<S>;
        sort(
            opts: {
                by: ColumnsOrExpr;
                descending?: ValueOrArray<boolean>;
                maintainOrder?: boolean;
                nullsLast?: boolean;
            },
        ): LazyDataFrame<S>;
        std(): LazyDataFrame<S>;
        sum(): LazyDataFrame<S>;
        tail(length?: number): LazyDataFrame<S>;
        toJSON(): string;
        unique(
            maintainOrder?: boolean,
            subset?: ColumnSelection,
            keep?: "first" | "last",
        ): LazyDataFrame<S>;
        unique(
            opts: {
                keep?: "first" | "last";
                maintainOrder?: boolean;
                subset?: ColumnSelection;
            },
        ): LazyDataFrame<S>;
        unpivot(
            idVars: ColumnSelection,
            valueVars: ColumnSelection,
            options?: { valueName?: null | string; variableName?: null | string },
        ): LazyDataFrame;
        var(): LazyDataFrame<S>;
        withColumn(expr: pl.Expr | pl.Series<any, string>): LazyDataFrame;
        withColumnRenamed<
            Existing extends string
            | number
            | symbol,
            New extends string,
        >(
            existing: Existing,
            replacement: New,
        ): LazyDataFrame<
            {
                [K in string
                | number
                | symbol as K extends Existing ? New : K]: S[K]
            },
        >;
        withColumnRenamed(existing: string, replacement: string): LazyDataFrame;
        withColumns(...exprs: (pl.Expr | pl.Series<any, string>)[]): LazyDataFrame;
        withRowCount(): LazyDataFrame;
    }

    Type Parameters

    Hierarchy

    • Serialize
    • GroupByOps<LazyGroupBy>
      • LazyDataFrame
    Index

    Properties

    "[toStringTag]": string

    Accessors

    Methods

    • Collect into a DataFrame. Note: use fetch if you want to run this query on the first n rows only. This can be a huge time saver in debugging queries.

      Parameters

      • Optionalopts: LazyOptions

        options for lazy operations

        • OptionalcommSubexprElim?: boolean
        • OptionalcommSubplanElim?: boolean
        • OptionalnoOptimization?: boolean
        • OptionalpredicatePushdown?: boolean
        • OptionalprojectionPushdown?: boolean
        • OptionalsimplifyExpression?: boolean
        • OptionalslicePushdown?: boolean
        • Optionalstreaming?: boolean
        • OptionaltypeCoercion?: boolean

      Returns Promise<pl.DataFrame<S>>

      DataFrame

    • Remove one or multiple columns from a DataFrame.

      Type Parameters

      • U extends string

      Parameters

      • name: U

        column or list of columns to be removed

      Returns LazyDataFrame<{ [K in string | number | symbol]: Omit<S, U>[K] }>

    • Type Parameters

      • const U extends string[]

      Parameters

      • names: U

      Returns LazyDataFrame<{ [K in string | number | symbol]: Omit<S, U[number]>[K] }>

    • Type Parameters

      • U extends string
      • const V extends string[]

      Parameters

      • name: U
      • ...names: V

      Returns LazyDataFrame<{ [K in string | number | symbol]: Omit<S, U | V[number]>[K] }>

    • Fetch is like a collect operation, but it overwrites the number of rows read by every scan

      Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.

      Parameters

      • numRows: number

        collect 'n' number of rows from data source

      • opts: LazyOptions

        options for lazy operations

        • OptionalcommSubexprElim?: boolean
        • OptionalcommSubplanElim?: boolean
        • OptionalnoOptimization?: boolean
        • OptionalpredicatePushdown?: boolean
        • OptionalprojectionPushdown?: boolean
        • OptionalsimplifyExpression?: boolean
        • OptionalslicePushdown?: boolean
        • Optionalstreaming?: boolean
        • OptionaltypeCoercion?: boolean

      Returns Promise<pl.DataFrame<S>>

    • Parameters

      • OptionalnumRows: number

      Returns Promise<pl.DataFrame<S>>

    • Filter the rows in the DataFrame based on a predicate expression.

      Parameters

      • predicate: string | pl.Expr

        Expression that evaluates to a boolean Series.

      Returns LazyDataFrame<S>

      > lf = pl.DataFrame({
      > "foo": [1, 2, 3],
      > "bar": [6, 7, 8],
      > "ham": ['a', 'b', 'c']
      > }).lazy()
      > // Filter on one condition
      > lf.filter(pl.col("foo").lt(3)).collect()
      shape: (2, 3)
      ┌─────┬─────┬─────┐
      foobarham
      │ --- ┆ --- ┆ --- │
      i64i64str
      ╞═════╪═════╪═════╡
      16a
      ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
      27b
      └─────┴─────┴─────┘
    • Start a groupby operation.

      Parameters

      • by: ColumnsOrExpr
      • OptionalmaintainOrder: boolean

      Returns LazyGroupBy

    • Parameters

      • by: ColumnsOrExpr
      • opts: { maintainOrder: boolean }

      Returns LazyGroupBy

    • Groups based on a time value (or index value of type Int32, Int64). Time windows are calculated and rows are assigned to windows. Different from a normal groupby is that a row can be member of multiple groups. The time/index window could be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.

      A window is defined by:

      • every: interval of the window
      • period: length of the window
      • offset: offset of the window

      The every, period and offset arguments are created with the following string language:

      • 1ns (1 nanosecond)
      • 1us (1 microsecond)
      • 1ms (1 millisecond)
      • 1s (1 second)
      • 1m (1 minute)
      • 1h (1 hour)
      • 1d (1 day)
      • 1w (1 week)
      • 1mo (1 calendar month)
      • 1y (1 calendar year)
      • 1i (1 index count)

      Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

      In case of a groupbyDynamic on an integer column, the windows are defined by:

      • "1i" # length 1
      • "10i" # length 10

      Parameters

      • options: {
            by?: ColumnsOrExpr;
            closed?: "none" | "left" | "right" | "both";
            every: string;
            includeBoundaries?: boolean;
            indexColumn: string;
            label?: string;
            offset?: string;
            period?: string;
            startBy?: StartBy;
        }
        • Optionalby?: ColumnsOrExpr

          Also group by this column/these columns

        • Optionalclosed?: "none" | "left" | "right" | "both"

          Defines if the window interval is closed or not. Any of {"left", "right", "both" "none"}

        • every: string

          interval of the window

        • OptionalincludeBoundaries?: boolean

          add the lower and upper bound of the window to the "_lower_bound" and "_upper_bound" columns. This will impact performance because it's harder to parallelize

        • indexColumn: string

          Column used to group based on the time window. Often to type Date/Datetime This column must be sorted in ascending order. If not the output will not make sense.

          In case of a dynamic groupby on indices, dtype needs to be one of {Int32, Int64}. Note that
          Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.
          
        • Optionallabel?: string

          Define which label to use for the window: Any if {'left', 'right', 'datapoint'}

        • Optionaloffset?: string

          offset of the window if None and period is None it will be equal to negative every

        • Optionalperiod?: string

          length of the window, if None it is equal to 'every'

        • OptionalstartBy?: StartBy

          The strategy to determine the start of the first window by. Any of {'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}

      Returns LazyGroupBy

    • Create rolling groups based on a time column (or index value of type Int32, Int64).

      Different from a rolling groupby the windows are now determined by the individual values and are not of constant intervals. For constant intervals use groupByDynamic

      The period and offset arguments are created with the following string language:

      • 1ns (1 nanosecond)
      • 1us (1 microsecond)
      • 1ms (1 millisecond)
      • 1s (1 second)
      • 1m (1 minute)
      • 1h (1 hour)
      • 1d (1 day)
      • 1w (1 week)
      • 1mo (1 calendar month)
      • 1y (1 calendar year)
      • 1i (1 index count)

      Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

      In case of a groupby_rolling on an integer column, the windows are defined by:

      • "1i" # length 1
      • "10i" # length 10

      Parameters

      • opts: {
            by?: ColumnsOrExpr;
            closed?: "none" | "left" | "right" | "both";
            indexColumn: ColumnsOrExpr;
            offset?: string;
            period: string;
        }
        • Optionalby?: ColumnsOrExpr

          Also group by this column/these columns

        • Optionalclosed?: "none" | "left" | "right" | "both"

          Defines if the window interval is closed or not. Any of {"left", "right", "both" "none"}

        • indexColumn: ColumnsOrExpr

          Column used to group based on the time window. Often to type Date/Datetime This column must be sorted in ascending order. If not the output will not make sense.

          In case of a rolling groupby on indices, dtype needs to be one of {Int32, Int64}. Note that Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.

        • Optionaloffset?: string

          offset of the window. Default is -period

        • period: string

          length of the window

      Returns LazyGroupBy


      >dates = [
      ... "2020-01-01 13:45:48",
      ... "2020-01-01 16:42:13",
      ... "2020-01-01 16:45:09",
      ... "2020-01-02 18:12:48",
      ... "2020-01-03 19:45:32",
      ... "2020-01-08 23:16:43",
      ... ]
      >df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).withColumn(
      ... pl.col("dt").str.strptime(pl.Datetime)
      ... )
      >out = df.groupbyRolling({indexColumn:"dt", period:"2d"}).agg(
      ... [
      ... pl.sum("a").alias("sum_a"),
      ... pl.min("a").alias("min_a"),
      ... pl.max("a").alias("max_a"),
      ... ]
      ... )
      >assert(out["sum_a"].toArray() === [3, 10, 15, 24, 11, 1])
      >assert(out["max_a"].toArray() === [3, 7, 7, 9, 9, 1])
      >assert(out["min_a"].toArray() === [3, 3, 3, 3, 2, 1])
      >out
      shape: (6, 4)
      ┌─────────────────────┬───────┬───────┬───────┐
      dta_suma_maxa_min
      │ --- ┆ --- ┆ --- ┆ --- │
      datetime[ms] ┆ i64i64i64
      ╞═════════════════════╪═══════╪═══════╪═══════╡
      2020-01-01 13:45:48333
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
      2020-01-01 16:42:131073
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
      2020-01-01 16:45:091573
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
      2020-01-02 18:12:482493
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
      2020-01-03 19:45:321192
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
      2020-01-08 23:16:43111
      └─────────────────────┴───────┴───────┴───────┘
    • Gets the first n rows of the DataFrame. You probably don't want to use this!

      Consider using the fetch operation. The fetch operation will truly load the first nrows lazily.

      Parameters

      • Optionallength: number

      Returns LazyDataFrame<S>

    • SQL like joins.

      Type Parameters

      Parameters

      • other: LazyDataFrame<S2>

        DataFrame to join with.

      • joinOptions: Opts & SameNameColumnJoinOptions<string, string> & LazyJoinBase

        options for same named column join

        • Optionalhow?: Exclude<JoinType, "cross">

          Join strategy

        • on: ValueOrArray<L & R>

          Name(s) of the join columns in both DataFrames.

        • Optionalsuffix?: string

          Suffix to append to columns with a duplicate name.

        • on

          Name(s) of the join columns in both DataFrames.

        • how

          Join strategy

        • suffix

          Suffix to append to columns with a duplicate name.

        • allowParallel

          Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.

        • forceParallel

          Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.

      Returns LazyDataFrame<
          {
              [K in string
              | number
              | symbol]: (
                  { [K1 in string
                  | number
                  | symbol]: S[K1] } & {
                      [K2 in string | number | symbol]: K2 extends keyof S ? never : S2[K2]
                  } & {
                      [K_SUFFIXED in string
                      | number
                      | symbol as `${K_SUFFIXED extends string
                          ? K_SUFFIXED<K_SUFFIXED>
                          : never}${ExtractSuffix<Opts>}`]: K_SUFFIXED extends string
                          ? S2[K_SUFFIXED<K_SUFFIXED>]
                          : never
                  }
              )[K]
          },
      >

      >>> const df = pl.DataFrame({
      >>> foo: [1, 2, 3],
      >>> bar: [6.0, 7.0, 8.0],
      >>> ham: ['a', 'b', 'c'],
      >>> }).lazy()
      >>>
      >>> const otherDF = pl.DataFrame({
      >>> apple: ['x', 'y', 'z'],
      >>> ham: ['a', 'b', 'd'],
      >>> }).lazy();
      >>> const result = await df.join(otherDF, { on: 'ham', how: 'inner' }).collect();
      shape: (2, 4)
      ╭─────┬─────┬─────┬───────╮
      foobarhamapple
      │ --- ┆ --- ┆ --- ┆ --- │
      i64f64strstr
      ╞═════╪═════╪═════╪═══════╡
      16"a""x"
      ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
      27"b""y"
      ╰─────┴─────┴─────┴───────╯
    • SQL like joins with different names for left and right dataframes.

      Type Parameters

      Parameters

      • other: LazyDataFrame<S2>

        DataFrame to join with.

      • joinOptions: Opts & DifferentNameColumnJoinOptions<string, string> & LazyJoinBase

        options for differently named column join

        • Optionalhow?: Exclude<JoinType, "cross">

          Join strategy

        • leftOn: ValueOrArray<L>

          Name(s) of the left join column(s).

        • rightOn: ValueOrArray<R>

          Name(s) of the right join column(s).

        • Optionalsuffix?: string

          Suffix to append to columns with a duplicate name.

        • leftOn

          Name(s) of the left join column(s).

        • rightOn

          Name(s) of the right join column(s).

        • how

          Join strategy

        • suffix

          Suffix to append to columns with a duplicate name.

        • allowParallel

          Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.

        • forceParallel

          Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.

      Returns LazyDataFrame<
          {
              [K in string
              | number
              | symbol]: (
                  { [K1 in string
                  | number
                  | symbol]: S[K1] } & {
                      [K2 in string | number | symbol]: K2 extends keyof S ? never : S2[K2]
                  } & {
                      [K_SUFFIXED in string
                      | number
                      | symbol as `${K_SUFFIXED extends string
                          ? K_SUFFIXED<K_SUFFIXED>
                          : never}${ExtractSuffix<Opts>}`]: K_SUFFIXED extends string
                          ? S2[K_SUFFIXED<K_SUFFIXED>]
                          : never
                  }
              )[K]
          },
      >

      >>> const df = pl.DataFrame({
      >>> foo: [1, 2, 3],
      >>> bar: [6.0, 7.0, 8.0],
      >>> ham: ['a', 'b', 'c'],
      >>> }).lazy()
      >>>
      >>> const otherDF = pl.DataFrame({
      >>> apple: ['x', 'y', 'z'],
      >>> ham: ['a', 'b', 'd'],
      >>> }).lazy();
      >>> const result = await df.join(otherDF, { leftOn: 'ham', rightOn: 'ham', how: 'inner' }).collect();
      shape: (2, 4)
      ╭─────┬─────┬─────┬───────╮
      foobarhamapple
      │ --- ┆ --- ┆ --- ┆ --- │
      i64f64strstr
      ╞═════╪═════╪═════╪═══════╡
      16"a""x"
      ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
      27"b""y"
      ╰─────┴─────┴─────┴───────╯
    • SQL like cross joins.

      Type Parameters

      Parameters

      • other: LazyDataFrame<S2>

        DataFrame to join with.

      • joinOptions: Opts & CrossJoinOptions & LazyJoinBase

        options for cross join

        • how: "cross"

          Join strategy

        • Optionalsuffix?: string

          Suffix to append to columns with a duplicate name.

        • how

          Join strategy

        • suffix

          Suffix to append to columns with a duplicate name.

        • allowParallel

          Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.

        • forceParallel

          Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.

      Returns LazyDataFrame<
          {
              [K in string
              | number
              | symbol]: (
                  { [K1 in string
                  | number
                  | symbol]: S[K1] } & {
                      [K2 in string | number | symbol]: K2 extends keyof S ? never : S2[K2]
                  } & {
                      [K_SUFFIXED in string
                      | number
                      | symbol as `${K_SUFFIXED extends string
                          ? K_SUFFIXED<K_SUFFIXED>
                          : never}${ExtractSuffix<Opts>}`]: K_SUFFIXED extends string
                          ? S2[K_SUFFIXED<K_SUFFIXED>]
                          : never
                  }
              )[K]
          },
      >

      >>> const df = pl.DataFrame({
      >>> foo: [1, 2],
      >>> bar: [6.0, 7.0],
      >>> ham: ['a', 'b'],
      >>> }).lazy()
      >>>
      >>> const otherDF = pl.DataFrame({
      >>> apple: ['x', 'y'],
      >>> ham: ['a', 'b'],
      >>> }).lazy();
      >>> const result = await df.join(otherDF, { how: 'cross' }).collect();
      shape: (4, 5)
      ╭─────┬─────┬─────┬───────┬───────────╮
      foobarhamappleham_right
      │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
      f64f64strstrstr
      ╞═════╪═════╪═════╪═══════╪═══════════╡
      1.06.0axa
      1.06.0ayb
      2.07.0bxa
      2.07.0byb
      ╰─────┴─────┴─────┴───────┴───────────╯
    • Perform an asof join. This is similar to a left-join except that we match on nearest key rather than equal keys.

      Both DataFrames must be sorted by the asof_join key.

      For each row in the left DataFrame:

      • A "backward" search selects the last row in the right DataFrame whose 'on' key is less than or equal to the left's key.

      • A "forward" search selects the first row in the right DataFrame whose 'on' key is greater than or equal to the left's key.

      • A "nearest" search selects the last row in the right DataFrame whose value is nearest to the left's key. String keys are not currently supported for a nearest search.

      The default is "backward".

      Parameters

      • other: LazyDataFrame

        DataFrame to join with.

      • options: {
            allowParallel?: boolean;
            by?: string | string[];
            byLeft?: string | string[];
            byRight?: string | string[];
            forceParallel?: boolean;
            leftOn?: string;
            on?: string;
            rightOn?: string;
            strategy?: "backward" | "forward" | "nearest";
            suffix?: string;
            tolerance?: string | number;
        }
        • OptionalallowParallel?: boolean

          Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.

        • Optionalby?: string | string[]
        • OptionalbyLeft?: string | string[]

          join on these columns before doing asof join

        • OptionalbyRight?: string | string[]

          join on these columns before doing asof join

        • OptionalforceParallel?: boolean

          Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.

        • OptionalleftOn?: string

          Join column of the left DataFrame.

        • Optionalon?: string

          Join column of both DataFrames. If set, leftOn and rightOn should be undefined.

        • OptionalrightOn?: string

          Join column of the right DataFrame.

        • Optionalstrategy?: "backward" | "forward" | "nearest"

          One of {'forward', 'backward', 'nearest'}

        • Optionalsuffix?: string

          Suffix to append to columns with a duplicate name.

        • Optionaltolerance?: string | number

          Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. If an asof join is done on columns of dtype "Date", "Datetime" you use the following string language:

          • 1ns (1 nanosecond)
          • 1us (1 microsecond)
          • 1ms (1 millisecond)
          • 1s (1 second)
          • 1m (1 minute)
          • 1h (1 hour)
          • 1d (1 day)
          • 1w (1 week)
          • 1mo (1 calendar month)
          • 1y (1 calendar year)
          • 1i (1 index count)

          Or combine them:

          • "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

      Returns LazyDataFrame

       >const gdp = pl.DataFrame({
      ... date: [
      ... new Date('2016-01-01'),
      ... new Date('2017-01-01'),
      ... new Date('2018-01-01'),
      ... new Date('2019-01-01'),
      ... ], // note record date: Jan 1st (sorted!)
      ... gdp: [4164, 4411, 4566, 4696],
      ... })
      >const population = pl.DataFrame({
      ... date: [
      ... new Date('2016-05-12'),
      ... new Date('2017-05-12'),
      ... new Date('2018-05-12'),
      ... new Date('2019-05-12'),
      ... ], // note record date: May 12th (sorted!)
      ... "population": [82.19, 82.66, 83.12, 83.52],
      ... })
      >population.joinAsof(
      ... gdp,
      ... {leftOn:"date", rightOn:"date", strategy:"backward"}
      ... )
      shape: (4, 3)
      ┌─────────────────────┬────────────┬──────┐
      datepopulationgdp
      │ --- ┆ --- ┆ --- │
      datetime[μs] ┆ f64i64
      ╞═════════════════════╪════════════╪══════╡
      2016-05-12 00:00:0082.194164
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
      2017-05-12 00:00:0082.664411
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
      2018-05-12 00:00:0083.124566
      ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
      2019-05-12 00:00:0083.524696
      └─────────────────────┴────────────┴──────┘
    • Type Parameters

      • const U extends Partial<Record<keyof S, string>>

      Parameters

      • mapping: U

      Returns LazyDataFrame<
          {
              [K in string
              | number
              | symbol as U[K] extends string ? any[any] : K]: S[K]
          },
      >

    • Parameters

      • mapping: Record<string, string>

      Returns LazyDataFrame

    • Serializes object to desired format via serde

      Parameters

      Returns Buffer

    • Evaluate the query in streaming mode and write to a CSV file.

      .. warning:: Streaming mode is considered unstable. It may be changed at any point without it being considered a breaking change.

      This allows streaming results that are larger than RAM to be written to disk.

      Parameters

      • path: string

        File path to which the file should be written.

      • Optionaloptions: CsvWriterOptions

        Options for

        • OptionalbatchSize?: number
        • OptionaldateFormat?: string
        • OptionaldatetimeFormat?: string
        • OptionalfloatPrecision?: number
        • OptionalincludeBom?: boolean
        • OptionalincludeHeader?: boolean
        • OptionallineTerminator?: string
        • OptionalmaintainOrder?: boolean
        • OptionalnullValue?: string
        • OptionalquoteChar?: string
        • Optionalseparator?: string
        • OptionaltimeFormat?: string

      Returns LazyDataFrame

      const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") lf.sinkCsv("out.csv").collect()

    • Evaluate the query in streaming mode and write to an IPC file. This allows streaming results that are larger than RAM to be written to disk.

      Parameters

      Parameters

      • path: string

        File path to which the file should be written.

      • Optionaloptions: SinkIpcOptions

        Options for

        • OptionalcloudOptions?: Map<string, string>
        • OptionalcompatLevel?: string
        • Optionalcompression?: string
        • OptionalmaintainOrder?: boolean
        • Optionalmkdir?: boolean
        • Optionalretries?: number
        • OptionalsyncOnClose?: string

      Returns LazyDataFrame

      const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP lf.sinkIpc("out.arrow").collect()

    • Evaluate the query in streaming mode and write to an NDJSON file. This allows streaming results that are larger than RAM to be written to disk.

      Parameters

      Parameters

      • path: string

        File path to which the file should be written.

      • Optionaloptions: SinkJsonOptions

        Options for

        • OptionalcloudOptions?: Map<string, string>
        • OptionalmaintainOrder?: boolean
        • Optionalmkdir?: boolean
        • Optionalretries?: number
        • OptionalsyncOnClose?: string

      Returns LazyDataFrame

      const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP lf.sinkNdJson("out.ndjson").collect()

    • Evaluate the query in streaming mode and write to a Parquet file.

      This allows streaming results that are larger than RAM to be written to disk.

      Parameters

      • path: string

        File path to which the file should be written.

      • Optionaloptions: SinkParquetOptions

        Options for

        • OptionalcloudOptions?: Map<string, string>
        • Optionalcompression?: string
        • OptionalcompressionLevel?: number
        • OptionaldataPagesizeLimit?: number
        • OptionalmaintainOrder?: boolean
        • OptionalnoOptimization?: boolean
        • OptionalpredicatePushdown?: boolean
        • OptionalprojectionPushdown?: boolean
        • Optionalretries?: number
        • OptionalrowGroupSize?: number
        • OptionalsimplifyExpression?: boolean
        • OptionalsinkOptions?: SinkOptions
        • OptionalslicePushdown?: boolean
        • Optionalstatistics?: boolean
        • OptionaltypeCoercion?: boolean

      Returns LazyDataFrame

      const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP lf.sinkParquet("out.parquet").collect() # doctest: +SKIP

    • Parameters

      • by: ColumnsOrExpr
      • Optionaldescending: ValueOrArray<boolean>
      • OptionalnullsLast: boolean
      • OptionalmaintainOrder: boolean

      Returns LazyDataFrame<S>

    • Parameters

      • opts: {
            by: ColumnsOrExpr;
            descending?: ValueOrArray<boolean>;
            maintainOrder?: boolean;
            nullsLast?: boolean;
        }

      Returns LazyDataFrame<S>

    • Drop duplicate rows from this DataFrame. Note that this fails if there is a column of type List in the DataFrame.

      Parameters

      • OptionalmaintainOrder: boolean
      • Optionalsubset: ColumnSelection

        subset to drop duplicates for

      • Optionalkeep: "first" | "last"

        "first" | "last"

      Returns LazyDataFrame<S>

    • Parameters

      • opts: { keep?: "first" | "last"; maintainOrder?: boolean; subset?: ColumnSelection }

      Returns LazyDataFrame<S>

    • Parameters

      • idVars: ColumnSelection
      • valueVars: ColumnSelection
      • Optionaloptions: { valueName?: null | string; variableName?: null | string }

      Returns LazyDataFrame