Read into a DataFrame from Arrow IPC (Feather v2) file
Description
Read into a DataFrame from Arrow IPC (Feather v2) file
Usage
pl_read_ipc(
source,
...,
n_rows = NULL,
memory_map = TRUE,
row_index_name = NULL,
row_index_offset = 0L,
rechunk = FALSE,
cache = TRUE
)
Arguments
source
|
A single character or a raw vector of Apache Arrow IPC file. You can use
globbing with \* to scan/read multiple files in the same
directory (see examples).
|
…
|
Ignored. |
n_rows
|
Maximum number of rows to read. |
memory_map
|
A logical. If TRUE , try to memory map the file. This can
greatly improve performance on repeated queries as the OS may cache
pages. Only uncompressed Arrow IPC files can be memory mapped.
|
row_index_name
|
If not NULL , this will insert a row index column with the
given name into the DataFrame.
|
row_index_offset
|
Offset to start the row index column (only used if the name is set). |
rechunk
|
In case of reading multiple files via a glob pattern, rechunk the final DataFrame into contiguous memory chunks. |
cache
|
Cache the result after reading. |
Value
DataFrame
Examples
library("polars")
temp_dir = tempfile()
# Write a hive-style partitioned arrow file dataset
arrow::write_dataset(
mtcars,
temp_dir,
partitioning = c("cyl", "gear"),
format = "arrow",
hive_style = TRUE
)
list.files(temp_dir, recursive = TRUE)
#> [1] "cyl=4/gear=3/part-0.arrow" "cyl=4/gear=4/part-0.arrow"
#> [3] "cyl=4/gear=5/part-0.arrow" "cyl=6/gear=3/part-0.arrow"
#> [5] "cyl=6/gear=4/part-0.arrow" "cyl=6/gear=5/part-0.arrow"
#> [7] "cyl=8/gear=3/part-0.arrow" "cyl=8/gear=5/part-0.arrow"
# Read the dataset
# Sinse hive-style partitioning is not supported,
# the `cyl` and `gear` columns are not contained in the result
pl$read_ipc(
file.path(temp_dir, "**/*.arrow")
)
#> shape: (32, 9)
#> ┌──────┬───────┬───────┬──────┬───┬───────┬─────┬─────┬──────┐
#> │ mpg ┆ disp ┆ hp ┆ drat ┆ … ┆ qsec ┆ vs ┆ am ┆ carb │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
#> ╞══════╪═══════╪═══════╪══════╪═══╪═══════╪═════╪═════╪══════╡
#> │ 21.5 ┆ 120.1 ┆ 97.0 ┆ 3.7 ┆ … ┆ 20.01 ┆ 1.0 ┆ 0.0 ┆ 1.0 │
#> │ 22.8 ┆ 108.0 ┆ 93.0 ┆ 3.85 ┆ … ┆ 18.61 ┆ 1.0 ┆ 1.0 ┆ 1.0 │
#> │ 24.4 ┆ 146.7 ┆ 62.0 ┆ 3.69 ┆ … ┆ 20.0 ┆ 1.0 ┆ 0.0 ┆ 2.0 │
#> │ 22.8 ┆ 140.8 ┆ 95.0 ┆ 3.92 ┆ … ┆ 22.9 ┆ 1.0 ┆ 0.0 ┆ 2.0 │
#> │ 32.4 ┆ 78.7 ┆ 66.0 ┆ 4.08 ┆ … ┆ 19.47 ┆ 1.0 ┆ 1.0 ┆ 1.0 │
#> │ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │
#> │ 15.2 ┆ 304.0 ┆ 150.0 ┆ 3.15 ┆ … ┆ 17.3 ┆ 0.0 ┆ 0.0 ┆ 2.0 │
#> │ 13.3 ┆ 350.0 ┆ 245.0 ┆ 3.73 ┆ … ┆ 15.41 ┆ 0.0 ┆ 0.0 ┆ 4.0 │
#> │ 19.2 ┆ 400.0 ┆ 175.0 ┆ 3.08 ┆ … ┆ 17.05 ┆ 0.0 ┆ 0.0 ┆ 2.0 │
#> │ 15.8 ┆ 351.0 ┆ 264.0 ┆ 4.22 ┆ … ┆ 14.5 ┆ 0.0 ┆ 1.0 ┆ 4.0 │
#> │ 15.0 ┆ 301.0 ┆ 335.0 ┆ 3.54 ┆ … ┆ 14.6 ┆ 0.0 ┆ 1.0 ┆ 8.0 │
#> └──────┴───────┴───────┴──────┴───┴───────┴─────┴─────┴──────┘
# Read a raw vector
arrow::arrow_table(
foo = 1:5,
bar = 6:10,
ham = letters[1:5]
) |>
arrow::write_to_raw(format = "file") |>
pl$read_ipc()
#> shape: (5, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ ham │
#> │ --- ┆ --- ┆ --- │
#> │ i32 ┆ i32 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1 ┆ 6 ┆ a │
#> │ 2 ┆ 7 ┆ b │
#> │ 3 ┆ 8 ┆ c │
#> │ 4 ┆ 9 ┆ d │
#> │ 5 ┆ 10 ┆ e │
#> └─────┴─────┴─────┘