Convert a String column into a Date/Datetime/Time column.
Description
Similar to the strptime()
function.
Usage
<Expr>$str$strptime(
dtype,
format = NULL,
...,
strict = TRUE,
exact = TRUE,
cache = TRUE,
ambiguous = "raise"
)
Arguments
dtype
|
The data type to convert into. Can be either pl$Date ,
pl$Datetime() , or pl$Time .
|
format
|
Format to use for conversion. Refer to
the
chrono crate documentation for the full specification. Example:
“%Y-%m-%d %H:%M:%S” . If NULL (default), the
format is inferred from the data. Notice that time zone
%Z is not supported and will just
ignore timezones. Numeric time zones like
%z or
%:z are supported.
|
…
|
Not used. |
strict
|
If TRUE (default), raise an error if a single string cannot
be parsed. If FALSE , produce a polars null .
|
exact
|
If TRUE (default), require an exact format match. If
FALSE , allow the format to match anywhere in the target
string. Conversion to the Time type is always exact. Note that using
exact = FALSE introduces a performance penalty - cleaning
your data beforehand will almost certainly be more performant.
|
cache
|
Use a cache of unique, converted dates to apply the datetime conversion. |
ambiguous
|
Determine how to deal with ambiguous datetimes:
|
Details
When parsing a Datetime the column precision will be inferred from the
format string, if given, e.g.: “%F %T%.3f”
=>
pl$Datetime("ms")
. If no fractional second component is
found then the default is “us”
(microsecond).
Value
Expr of Date, Datetime or Time type
See Also
-
\
$str$to_date() -
\
$str$to_datetime() -
\
$str$to_time()
Examples
library("polars")
# Dealing with a consistent format
s = as_polars_series(c("2020-01-01 01:00Z", "2020-01-01 02:00Z"))
s$str$strptime(pl$Datetime(), "%Y-%m-%d %H:%M%#z")
#> polars Series: shape: (2,)
#> Series: '' [datetime[μs, UTC]]
#> [
#> 2020-01-01 01:00:00 UTC
#> 2020-01-01 02:00:00 UTC
#> ]
#> polars Series: shape: (2,)
#> Series: '' [datetime[μs, UTC]]
#> [
#> 2020-01-01 01:00:00 UTC
#> 2020-01-01 02:00:00 UTC
#> ]
# Datetime with timezone is interpreted as UTC timezone
as_polars_series("2020-01-01T01:00:00+09:00")$str$strptime(pl$Datetime())
#> polars Series: shape: (1,)
#> Series: '' [datetime[μs, UTC]]
#> [
#> 2019-12-31 16:00:00 UTC
#> ]
# Dealing with different formats.
s = as_polars_series(
c(
"2021-04-22",
"2022-01-04 00:00:00",
"01/31/22",
"Sun Jul 8 00:34:60 2001"
),
"date"
)
s$to_frame()$select(
pl$coalesce(
pl$col("date")$str$strptime(pl$Date, "%F", strict = FALSE),
pl$col("date")$str$strptime(pl$Date, "%F %T", strict = FALSE),
pl$col("date")$str$strptime(pl$Date, "%D", strict = FALSE),
pl$col("date")$str$strptime(pl$Date, "%c", strict = FALSE)
)
)
#> shape: (4, 1)
#> ┌────────────┐
#> │ date │
#> │ --- │
#> │ date │
#> ╞════════════╡
#> │ 2021-04-22 │
#> │ 2022-01-04 │
#> │ 2022-01-31 │
#> │ 2001-07-08 │
#> └────────────┘
# Ignore invalid time
s = as_polars_series(
c(
"2023-01-01 11:22:33 -0100",
"2023-01-01 11:22:33 +0300",
"invalid time"
)
)
s$str$strptime(
pl$Datetime("ns"),
format = "%Y-%m-%d %H:%M:%S %z",
strict = FALSE
)
#> polars Series: shape: (3,)
#> Series: '' [datetime[ns, UTC]]
#> [
#> 2023-01-01 12:22:33 UTC
#> 2023-01-01 08:22:33 UTC
#> null
#> ]