Skip to content

Replace all values by different values.

Source code

Description

This changes all the values in a column, either using a specific replacement or a default one. See $replace() to replace only a subset of values.

Usage

<Expr>$replace_strict(old, new, default = NULL, return_dtype = NULL)

Arguments

old Can be several things:
  • a vector indicating the values to recode;
  • if new is missing, this can be a named list e.g list(old = “new”) where the names are the old values and the values are the replacements. Note that if old values are numeric, the names must be wrapped in backticks;
  • an Expr
new Either a vector of length 1, a vector of same length as old or an Expr. If missing, old must be a named list.
default The default replacement if the value is not in old. Can be an Expr. If NULL (default), then the value doesn’t change.
return_dtype The data type of the resulting expression. If set to NULL (default), the data type is determined automatically based on the other inputs.

Value

Expr

Examples

library("polars")

df = pl$DataFrame(a = c(1, 2, 2, 3))

# "old" and "new" can take vectors of length 1 or of same length
df$with_columns(replaced = pl$col("a")$replace_strict(2, 100, default = 1))
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ f64      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1.0      │
#> │ 2.0 ┆ 100.0    │
#> │ 2.0 ┆ 100.0    │
#> │ 3.0 ┆ 1.0      │
#> └─────┴──────────┘
df$with_columns(
  replaced = pl$col("a")$replace_strict(c(2, 3), c(100, 200), default = 1)
)
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ f64      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1.0      │
#> │ 2.0 ┆ 100.0    │
#> │ 2.0 ┆ 100.0    │
#> │ 3.0 ┆ 200.0    │
#> └─────┴──────────┘
# "old" can be a named list where names are values to replace, and values are
# the replacements
mapping = list(`2` = 100, `3` = 200)
df$with_columns(replaced = pl$col("a")$replace_strict(mapping, default = -1))
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ f64      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ -1.0     │
#> │ 2.0 ┆ 100.0    │
#> │ 2.0 ┆ 100.0    │
#> │ 3.0 ┆ 200.0    │
#> └─────┴──────────┘
# one can specify the data type to return instead of automatically
# inferring it
df$with_columns(
  replaced = pl$col("a")$replace_strict(mapping, default = 1, return_dtype = pl$Int32)
)
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ i32      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1        │
#> │ 2.0 ┆ 100      │
#> │ 2.0 ┆ 100      │
#> │ 3.0 ┆ 200      │
#> └─────┴──────────┘
# "old", "new", and "default" can take Expr
df = pl$DataFrame(a = c(1, 2, 2, 3), b = c(1.5, 2.5, 5, 1))
df$with_columns(
  replaced = pl$col("a")$replace_strict(
    old = pl$col("a")$max(),
    new = pl$col("b")$sum(),
    default = pl$col("b"),
  )
)
#> shape: (4, 3)
#> ┌─────┬─────┬──────────┐
#> │ a   ┆ b   ┆ replaced │
#> │ --- ┆ --- ┆ ---      │
#> │ f64 ┆ f64 ┆ f64      │
#> ╞═════╪═════╪══════════╡
#> │ 1.0 ┆ 1.5 ┆ 1.5      │
#> │ 2.0 ┆ 2.5 ┆ 2.5      │
#> │ 2.0 ┆ 5.0 ┆ 5.0      │
#> │ 3.0 ┆ 1.0 ┆ 10.0     │
#> └─────┴─────┴──────────┘