Perform joins on nearest keys
Description
This is similar to a left-join except that we match on nearest key rather than equal keys.
Usage
<DataFrame>$join_asof(
other,
...,
left_on = NULL,
right_on = NULL,
on = NULL,
by_left = NULL,
by_right = NULL,
by = NULL,
strategy = c("backward", "forward", "nearest"),
suffix = "_right",
tolerance = NULL,
allow_parallel = TRUE,
force_parallel = FALSE,
coalesce = TRUE
)
Arguments
other
|
DataFrame or LazyFrame |
…
|
Not used, blocks use of further positional arguments |
left_on , right_on
|
Same as on but only for the left or the right DataFrame.
They must have the same length.
|
on
|
Either a vector of column names or a list of expressions and/or strings.
Use left_on and right_on if the column names
to match on are different between the two DataFrames.
|
by_left , by_right
|
Same as by but only for the left or the right table. They
must have the same length.
|
by
|
Join on these columns before performing asof join. Either a vector of
column names or a list of expressions and/or strings. Use
left_by and right_by if the column names to
match on are different between the two tables.
|
strategy
|
Strategy for where to find match:
|
suffix
|
Suffix to add to duplicated column names. |
tolerance
|
Numeric tolerance. By setting this the join will only be done if the
near keys are within this distance. If an asof join is done on columns
of dtype "Date", "Datetime", "Duration" or "Time", use the Polars
duration string language. About the language, see the
Polars duration string language
section for details.
There may be a circumstance where R types are not sufficient to express
a numeric tolerance. In that case, you can use the expression syntax
like tolerance = pl$lit(42)$cast(pl$Uint64)
|
allow_parallel
|
Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel. |
force_parallel
|
Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. |
coalesce
|
Coalescing behavior (merging of on / left_on /
right_on columns):
|
Details
Both tables (DataFrames or LazyFrames) must be sorted by the asof_join key.
Value
New joined DataFrame
Polars duration string language
Polars duration string language is a simple representation of durations. It is used in many Polars functions that accept durations.
It has the following format:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 calendar day)
- 1w (1 calendar week)
- 1mo (1 calendar month)
- 1q (1 calendar quarter)
- 1y (1 calendar year)
Or combine them: “3d12h4m25s”
# 3 days, 12 hours, 4
minutes, and 25 seconds
By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".
Examples
library("polars")
# create two DataFrames to join asof
gdp = pl$DataFrame(
date = as.Date(c("2015-1-1", "2016-1-1", "2017-5-1", "2018-1-1", "2019-1-1")),
gdp = c(4321, 4164, 4411, 4566, 4696),
group = c("b", "a", "a", "b", "b")
)
pop = pl$DataFrame(
date = as.Date(c("2016-5-12", "2017-5-12", "2018-5-12", "2019-5-12")),
population = c(82.19, 82.66, 83.12, 83.52),
group = c("b", "b", "a", "a")
)
# optional make sure tables are already sorted with "on" join-key
gdp = gdp$sort("date")
pop = pop$sort("date")
# Left-join_asof DataFrame pop with gdp on "date"
# Look backward in gdp to find closest matching date
pop$join_asof(gdp, on = "date", strategy = "backward")
#> shape: (4, 5)
#> ┌────────────┬────────────┬───────┬────────┬─────────────┐
#> │ date ┆ population ┆ group ┆ gdp ┆ group_right │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ date ┆ f64 ┆ str ┆ f64 ┆ str │
#> ╞════════════╪════════════╪═══════╪════════╪═════════════╡
#> │ 2016-05-12 ┆ 82.19 ┆ b ┆ 4164.0 ┆ a │
#> │ 2017-05-12 ┆ 82.66 ┆ b ┆ 4411.0 ┆ a │
#> │ 2018-05-12 ┆ 83.12 ┆ a ┆ 4566.0 ┆ b │
#> │ 2019-05-12 ┆ 83.52 ┆ a ┆ 4696.0 ┆ b │
#> └────────────┴────────────┴───────┴────────┴─────────────┘
#> shape: (4, 5)
#> ┌────────────┬────────────┬───────┬────────┬─────────────┐
#> │ date ┆ population ┆ group ┆ gdp ┆ group_right │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ date ┆ f64 ┆ str ┆ f64 ┆ str │
#> ╞════════════╪════════════╪═══════╪════════╪═════════════╡
#> │ 2016-05-12 ┆ 82.19 ┆ b ┆ 4411.0 ┆ a │
#> │ 2017-05-12 ┆ 82.66 ┆ b ┆ 4566.0 ┆ b │
#> │ 2018-05-12 ┆ 83.12 ┆ a ┆ 4696.0 ┆ b │
#> │ 2019-05-12 ┆ 83.52 ┆ a ┆ null ┆ null │
#> └────────────┴────────────┴───────┴────────┴─────────────┘
# join by a group: "only look within within group"
pop$join_asof(gdp, on = "date", by = "group", strategy = "backward")
#> shape: (4, 4)
#> ┌────────────┬────────────┬───────┬────────┐
#> │ date ┆ population ┆ group ┆ gdp │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ date ┆ f64 ┆ str ┆ f64 │
#> ╞════════════╪════════════╪═══════╪════════╡
#> │ 2016-05-12 ┆ 82.19 ┆ b ┆ 4321.0 │
#> │ 2017-05-12 ┆ 82.66 ┆ b ┆ 4321.0 │
#> │ 2018-05-12 ┆ 83.12 ┆ a ┆ 4411.0 │
#> │ 2019-05-12 ┆ 83.52 ┆ a ┆ 4411.0 │
#> └────────────┴────────────┴───────┴────────┘
# only look 2 weeks and 2 days back
pop$join_asof(gdp, on = "date", strategy = "backward", tolerance = "2w2d")
#> shape: (4, 5)
#> ┌────────────┬────────────┬───────┬────────┬─────────────┐
#> │ date ┆ population ┆ group ┆ gdp ┆ group_right │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ date ┆ f64 ┆ str ┆ f64 ┆ str │
#> ╞════════════╪════════════╪═══════╪════════╪═════════════╡
#> │ 2016-05-12 ┆ 82.19 ┆ b ┆ null ┆ null │
#> │ 2017-05-12 ┆ 82.66 ┆ b ┆ 4411.0 ┆ a │
#> │ 2018-05-12 ┆ 83.12 ┆ a ┆ null ┆ null │
#> │ 2019-05-12 ┆ 83.52 ┆ a ┆ null ┆ null │
#> └────────────┴────────────┴───────┴────────┴─────────────┘
# only look 11 days back (numeric tolerance depends on polars type, <date> is in days)
pop$join_asof(gdp, on = "date", strategy = "backward", tolerance = 11)
#> shape: (4, 5)
#> ┌────────────┬────────────┬───────┬────────┬─────────────┐
#> │ date ┆ population ┆ group ┆ gdp ┆ group_right │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ date ┆ f64 ┆ str ┆ f64 ┆ str │
#> ╞════════════╪════════════╪═══════╪════════╪═════════════╡
#> │ 2016-05-12 ┆ 82.19 ┆ b ┆ null ┆ null │
#> │ 2017-05-12 ┆ 82.66 ┆ b ┆ 4411.0 ┆ a │
#> │ 2018-05-12 ┆ 83.12 ┆ a ┆ null ┆ null │
#> │ 2019-05-12 ┆ 83.52 ┆ a ┆ null ┆ null │
#> └────────────┴────────────┴───────┴────────┴─────────────┘