From 7811771affd62fdf1557a6d45abbd5d7f26fb8c2 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:39:28 +0200 Subject: [PATCH 01/33] Bump rust-polars to 0.43.0 [skip ci] --- src/rust/Cargo.lock | 130 ++++++++++++++++++++++++++++---------------- src/rust/Cargo.toml | 6 +- 2 files changed, 87 insertions(+), 49 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index abe741fbc..68f4f319a 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -302,6 +302,15 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.1.11" @@ -375,6 +384,21 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "compact_str" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "constant_time_eq" version = "0.3.0" @@ -1683,8 +1707,8 @@ dependencies = [ [[package]] name = "polars" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "getrandom", "polars-arrow", @@ -1703,8 +1727,8 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "atoi", @@ -1728,6 +1752,7 @@ dependencies = [ "parking_lot", "polars-arrow-format", "polars-error", + "polars-schema", "polars-utils", "ryu", "serde", @@ -1750,8 +1775,8 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "bytemuck", "either", @@ -1765,8 +1790,8 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", @@ -1784,6 +1809,7 @@ dependencies = [ "polars-compute", "polars-error", "polars-row", + "polars-schema", "polars-utils", "rand", "rand_distr", @@ -1791,7 +1817,6 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "thiserror", "version_check", "xxhash-rust", @@ -1799,8 +1824,8 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "avro-schema", "object_store", @@ -1812,13 +1837,14 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", "once_cell", "polars-arrow", + "polars-compute", "polars-core", "polars-io", "polars-ops", @@ -1826,13 +1852,12 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", ] [[package]] name = "polars-io" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "async-trait", @@ -1846,6 +1871,7 @@ dependencies = [ "fs4", "futures", "glob", + "hashbrown", "home", "itoa", "memchr", @@ -1859,6 +1885,7 @@ dependencies = [ "polars-error", "polars-json", "polars-parquet", + "polars-schema", "polars-time", "polars-utils", "rayon", @@ -1869,7 +1896,6 @@ dependencies = [ "serde_json", "simd-json", "simdutf8", - "smartstring", "tokio", "tokio-util", "url", @@ -1878,8 +1904,8 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "chrono", @@ -1899,8 +1925,8 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", @@ -1919,15 +1945,14 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", "tokio", "version_check", ] [[package]] name = "polars-mem-engine" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "futures", "memmap2", @@ -1947,8 +1972,8 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "aho-corasick", @@ -1969,6 +1994,7 @@ dependencies = [ "polars-core", "polars-error", "polars-json", + "polars-schema", "polars-utils", "rand", "rand_distr", @@ -1976,15 +2002,14 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "unicode-reverse", "version_check", ] [[package]] name = "polars-parquet" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "async-stream", @@ -1994,6 +2019,7 @@ dependencies = [ "ethnum", "flate2", "futures", + "hashbrown", "lz4", "num-traits", "parquet-format-safe", @@ -2010,8 +2036,8 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -2029,7 +2055,6 @@ dependencies = [ "polars-row", "polars-utils", "rayon", - "smartstring", "tokio", "uuid", "version_check", @@ -2037,12 +2062,13 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", "bytemuck", + "bytes", "chrono", "chrono-tz", "either", @@ -2063,15 +2089,14 @@ dependencies = [ "recursive", "regex", "serde", - "smartstring", "strum_macros", "version_check", ] [[package]] name = "polars-row" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "bytemuck", "polars-arrow", @@ -2079,10 +2104,22 @@ dependencies = [ "polars-utils", ] +[[package]] +name = "polars-schema" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +dependencies = [ + "indexmap", + "polars-error", + "polars-utils", + "serde", + "version_check", +] + [[package]] name = "polars-sql" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "hex", "once_cell", @@ -2093,6 +2130,7 @@ dependencies = [ "polars-ops", "polars-plan", "polars-time", + "polars-utils", "rand", "serde", "serde_json", @@ -2101,8 +2139,8 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "atoi", "bytemuck", @@ -2117,26 +2155,27 @@ dependencies = [ "polars-utils", "regex", "serde", - "smartstring", ] [[package]] name = "polars-utils" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bytemuck", "bytes", + "compact_str", "hashbrown", "indexmap", + "libc", "memmap2", "num-traits", "once_cell", "polars-error", "raw-cpuid", "rayon", - "smartstring", + "serde", "stacker", "sysinfo", "version_check", @@ -2749,7 +2788,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ "autocfg", - "serde", "static_assertions", "version_check", ] diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 698233bd0..3b68df956 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -50,8 +50,8 @@ serde_json = "*" smartstring = "1.0.1" state = "0.6.0" thiserror = "1.0.63" -polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "67551b6594c581731f0e9ca814ff7c39377bd324", default-features = false } -polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "67551b6594c581731f0e9ca814ff7c39377bd324", default-features = false } +polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } +polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } either = "1" [dependencies.polars] @@ -160,4 +160,4 @@ features = [ "zip_with", ] git = "https://github.com/pola-rs/polars.git" -rev = "67551b6594c581731f0e9ca814ff7c39377bd324" +rev = "d8acacfadc7059f6acc363a68839ec312910751e" From b43b404ecba4de9c10d58d6af29fb90ae9ff894e Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 12 Sep 2024 18:43:56 +0200 Subject: [PATCH 02/33] 0.43.1 [skip ci] --- src/rust/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 3b68df956..d0deaccbb 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -50,8 +50,8 @@ serde_json = "*" smartstring = "1.0.1" state = "0.6.0" thiserror = "1.0.63" -polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } -polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } +polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "54218e7e35e3defd4b0801e820c56eea6b91e525", default-features = false } +polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "54218e7e35e3defd4b0801e820c56eea6b91e525", default-features = false } either = "1" [dependencies.polars] @@ -160,4 +160,4 @@ features = [ "zip_with", ] git = "https://github.com/pola-rs/polars.git" -rev = "d8acacfadc7059f6acc363a68839ec312910751e" +rev = "54218e7e35e3defd4b0801e820c56eea6b91e525" From f2a88d2359680030d9b85dceaa5a03565246290b Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:17:49 +0200 Subject: [PATCH 03/33] start fixing [skip ci] --- src/rust/Cargo.lock | 76 ++++++++++++------------- src/rust/src/arrow_interop/to_rust.rs | 4 +- src/rust/src/lazy/dataframe.rs | 2 +- src/rust/src/lazy/dsl.rs | 40 +++++++------ src/rust/src/rdataframe/mod.rs | 4 +- src/rust/src/rdataframe/read_csv.rs | 4 +- src/rust/src/rdataframe/read_ipc.rs | 4 +- src/rust/src/rdataframe/read_parquet.rs | 2 +- src/rust/src/rdatatype.rs | 2 +- 9 files changed, 72 insertions(+), 66 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 68f4f319a..ff448c5e0 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -1707,8 +1707,8 @@ dependencies = [ [[package]] name = "polars" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "getrandom", "polars-arrow", @@ -1727,8 +1727,8 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "atoi", @@ -1775,8 +1775,8 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "bytemuck", "either", @@ -1790,8 +1790,8 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1824,8 +1824,8 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "avro-schema", "object_store", @@ -1837,8 +1837,8 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1856,8 +1856,8 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "async-trait", @@ -1904,8 +1904,8 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "chrono", @@ -1925,8 +1925,8 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1951,8 +1951,8 @@ dependencies = [ [[package]] name = "polars-mem-engine" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "futures", "memmap2", @@ -1972,8 +1972,8 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "aho-corasick", @@ -2008,8 +2008,8 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "async-stream", @@ -2036,8 +2036,8 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -2062,8 +2062,8 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -2095,8 +2095,8 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "bytemuck", "polars-arrow", @@ -2106,8 +2106,8 @@ dependencies = [ [[package]] name = "polars-schema" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "indexmap", "polars-error", @@ -2118,8 +2118,8 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "hex", "once_cell", @@ -2139,8 +2139,8 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "atoi", "bytemuck", @@ -2159,8 +2159,8 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bytemuck", diff --git a/src/rust/src/arrow_interop/to_rust.rs b/src/rust/src/arrow_interop/to_rust.rs index 2dfe6ca1d..43ac5556f 100644 --- a/src/rust/src/arrow_interop/to_rust.rs +++ b/src/rust/src/arrow_interop/to_rust.rs @@ -25,7 +25,7 @@ pub fn arrow_array_to_rust(arrow_array: Robj) -> Result { let array = unsafe { let field = ffi::import_field_from_c(schema.as_ref()).map_err(|err| err.to_string())?; - ffi::import_array_from_c(*array, field.data_type).map_err(|err| err.to_string())? + ffi::import_array_from_c(*array, field.dtype).map_err(|err| err.to_string())? }; Ok(array) } @@ -65,7 +65,7 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { let array_iter = columns_list.into_iter().map(|(_, column)| { let arr = arrow_array_to_rust(column)?; run_parallel |= matches!( - arr.data_type(), + arr.dtype(), ArrowDataType::Utf8 | ArrowDataType::Dictionary(_, _, _) ); let list_res: Result<_, String> = Ok(arr); diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 22c2e60b0..9d42b09a2 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -721,7 +721,7 @@ impl RPolarsLazyFrame { #[derive(Clone)] pub struct RPolarsLazyGroupBy { pub lgb: pl::LazyGroupBy, - opt_state: pl::OptState, + opt_state: pl::OptFlags, } #[extendr] diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 8cd203ad1..bb2151f0d 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -1030,7 +1030,7 @@ impl RPolarsExpr { } } - pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self { + pub fn value_counts(&self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self { self.0 .clone() .value_counts(sort, parallel, name, normalize) @@ -1255,12 +1255,12 @@ impl RPolarsExpr { let width_strat = robj_to!(ListToStructWidthStrategy, n_field_strategy)?; let fields = robj_to!(Option, Robj, fields)?.map(|robj| { let par_fn: ParRObj = robj.into(); - let f: Arc<(dyn Fn(usize) -> SmartString + Send + Sync + 'static)> = + let f: Arc<(dyn Fn(usize) -> pl::PlSmallStr + Send + Sync + 'static)> = pl::Arc::new(move |idx: usize| { let thread_com = ThreadCom::from_global(&CONFIG); thread_com.send(RFnSignature::FnF64ToString(par_fn.clone(), idx as f64)); let s = thread_com.recv().unwrap_string(); - let s: SmartString = s.into(); + let s: pl::PlSmallStr = s.into(); s }); f @@ -1443,12 +1443,12 @@ impl RPolarsExpr { fn arr_to_struct(&self, fields: Robj) -> RResult { let fields = robj_to!(Option, Robj, fields)?.map(|robj| { let par_fn: ParRObj = robj.into(); - let f: Arc<(dyn Fn(usize) -> SmartString + Send + Sync + 'static)> = + let f: Arc<(dyn Fn(usize) -> pl::PlSmallStr + Send + Sync + 'static)> = pl::Arc::new(move |idx: usize| { let thread_com = ThreadCom::from_global(&CONFIG); thread_com.send(RFnSignature::FnF64ToString(par_fn.clone(), idx as f64)); let s = thread_com.recv().unwrap_string(); - let s: SmartString = s.into(); + let s: pl::PlSmallStr = s.into(); s }); f @@ -1583,22 +1583,23 @@ impl RPolarsExpr { .0 .clone() .dt() - .convert_time_zone(robj_to!(String, time_zone)?) + .convert_time_zone(robj_to!(String, time_zone)?.into()) .into()) } pub fn dt_replace_time_zone( &self, - time_zone: Nullable, + time_zone: Robj, ambiguous: Robj, non_existent: Robj, ) -> RResult { + let time_zone = robj_to!(Option, String, time_zone)?.map(|x| x.into()); Ok(self .0 .clone() .dt() .replace_time_zone( - time_zone.into_option(), + time_zone, robj_to!(PLExpr, ambiguous)?, robj_to!(NonExistent, non_existent)?, ) @@ -1985,7 +1986,7 @@ impl RPolarsExpr { // set expected type of output from R function let ot = robj_to!(Option, PLPolarsDataType, output_type)?; let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2019,7 +2020,7 @@ impl RPolarsExpr { let ot = robj_to!(Option, PLPolarsDataType, output_type)?; let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2052,7 +2053,7 @@ impl RPolarsExpr { let ot = null_to_opt(output_type).map(|rdt| rdt.0.clone()); let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2321,12 +2322,13 @@ impl RPolarsExpr { exact: Robj, cache: Robj, ) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); Ok(self .0 .clone() .str() .to_date(pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format, strict: robj_to!(bool, strict)?, exact: robj_to!(bool, exact)?, cache: robj_to!(bool, cache)?, @@ -2345,15 +2347,19 @@ impl RPolarsExpr { cache: Robj, ambiguous: Robj, ) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); + let time_unit = robj_to!(Option, timeunit, time_unit)?.map(|x| x.into()); + let time_zone = robj_to!(Option, String, time_zone)?.map(|x| x.into()); + Ok(self .0 .clone() .str() .to_datetime( - robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + time_unit, + time_zone, pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format: format, strict: robj_to!(bool, strict)?, exact: robj_to!(bool, exact)?, cache: robj_to!(bool, cache)?, @@ -2364,12 +2370,14 @@ impl RPolarsExpr { } pub fn str_to_time(&self, format: Robj, strict: Robj, cache: Robj) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); + Ok(self .0 .clone() .str() .to_time(pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format, strict: robj_to!(bool, strict)?, cache: robj_to!(bool, cache)?, exact: true, diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index db8df6b61..7d11a1da5 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -164,7 +164,7 @@ impl RPolarsDataFrame { .0 .clone() .with_row_index( - robj_to!(String, name)?.as_str(), + robj_to!(String, name)?.into(), robj_to!(Option, u32, offset)?, ) .map_err(polars_to_rpolars_err)? @@ -327,7 +327,7 @@ impl RPolarsDataFrame { pub fn to_struct(&self, name: Robj) -> RResult { use pl::IntoSeries; - let name = robj_to!(Option, str, name)?.unwrap_or(""); + let name = robj_to!(str, name)?.into(); let s = self.0.clone().into_struct(name); Ok(s.into_series().into()) } diff --git a/src/rust/src/rdataframe/read_csv.rs b/src/rust/src/rdataframe/read_csv.rs index 218245d1b..bc83425dc 100644 --- a/src/rust/src/rdataframe/read_csv.rs +++ b/src/rust/src/rdataframe/read_csv.rs @@ -22,7 +22,7 @@ use polars::prelude::LazyFileListReader; #[extendr] impl RPolarsRNullValues { pub fn new_all_columns(x: String) -> Self { - RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x)) + RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x.into())) } pub fn new_columns(x: Vec) -> Self { RPolarsRNullValues(pl::NullValues::AllColumns(x)) @@ -95,7 +95,7 @@ pub fn new_from_csv( let schema = dtv.map(|some_od| { let fields = some_od.0.iter().map(|(name, dtype)| { if let Some(sname) = name { - pl::Field::new(sname, dtype.clone()) + pl::Field::new(sname.into(), dtype.clone()) } else { todo!("missing column name for dtype not implemented"); } diff --git a/src/rust/src/rdataframe/read_ipc.rs b/src/rust/src/rdataframe/read_ipc.rs index 5185be59a..543e41c82 100644 --- a/src/rust/src/rdataframe/read_ipc.rs +++ b/src/rust/src/rdataframe/read_ipc.rs @@ -13,7 +13,6 @@ pub fn import_arrow_ipc( rechunk: Robj, row_name: Robj, row_index: Robj, - memory_map: Robj, hive_partitioning: Robj, hive_schema: Robj, try_parse_hive_dates: Robj, @@ -38,10 +37,9 @@ pub fn import_arrow_ipc( }) }) .transpose()?, - memory_map: robj_to!(bool, memory_map)?, cloud_options: None, hive_options, - include_file_paths: robj_to!(Option, String, include_file_paths)?.map(Arc::from), + include_file_paths: robj_to!(Option, String, include_file_paths)?.map(|x| x.into()), }; let lf = LazyFrame::scan_ipc(robj_to!(String, path)?, args) .map_err(crate::rpolarserr::polars_to_rpolars_err)?; diff --git a/src/rust/src/rdataframe/read_parquet.rs b/src/rust/src/rdataframe/read_parquet.rs index 8cb03b6c4..f147d8d48 100644 --- a/src/rust/src/rdataframe/read_parquet.rs +++ b/src/rust/src/rdataframe/read_parquet.rs @@ -52,7 +52,7 @@ pub fn new_from_parquet( use_statistics: robj_to!(bool, use_statistics)?, hive_options, glob: robj_to!(bool, glob)?, - include_file_paths: robj_to!(Option, String, include_file_paths)?.map(Arc::from), + include_file_paths: robj_to!(Option, String, include_file_paths)?.map(|x| x.into()), }; pl::LazyFrame::scan_parquet(path, args) diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index bdf70764e..cab767c0b 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -97,7 +97,7 @@ impl RPolarsDataType { let s = robjname2series(categories, "").unwrap(); let ca = s.str()?; let categories = ca.downcast_iter().next().unwrap().clone(); - Ok(RPolarsDataType(pl::datatypes::create_enum_data_type( + Ok(RPolarsDataType(pl::datatypes::create_enum_dtype( categories, ))) } From eea3fdf4b14230abae6cab3e99ad7efeefd32feb Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:39:06 +0200 Subject: [PATCH 04/33] some fixes for series [skip ci] --- src/rust/src/rdataframe/mod.rs | 2 +- src/rust/src/series.rs | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 7d11a1da5..2b19f4e42 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -355,7 +355,7 @@ impl RPolarsDataFrame { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let schema = self.0.schema().to_arrow(compat_level); let data_type = ArrowDataType::Struct(schema.fields); - let field = ArrowField::new("", data_type, false); + let field = ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedDataFrameIterator::new(self.0.clone(), compat_level)); let mut stream = arrow::ffi::export_iterator(iter_boxed, field); diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 29e9f4f0c..81152cd65 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -81,7 +81,7 @@ impl From<&RPolarsExpr> for pl::PolarsResult { .map(|df| { df.select_at_idx(0) .cloned() - .unwrap_or_else(|| pl::Series::new_empty("", &pl::DataType::Null)) + .unwrap_or_else(|| pl::Series::new_empty("".into(), &pl::DataType::Null)) .into() }) } @@ -118,7 +118,7 @@ impl RPolarsSeries { } //any mut method exposed in R suffixed _mut pub fn rename_mut(&mut self, name: &str) { - self.0.rename(name); + self.0.rename(name.into()); } //any other method or trait method in alphabetical order @@ -169,7 +169,7 @@ impl RPolarsSeries { normalize: bool, ) -> std::result::Result { self.0 - .value_counts(sort, parallel, name, normalize) + .value_counts(sort, parallel, name.into(), normalize) .map(RPolarsDataFrame) .map_err(|err| format!("in value_counts: {:?}", err)) } @@ -315,7 +315,7 @@ impl RPolarsSeries { pub fn alias(&self, name: &str) -> RPolarsSeries { let mut s = self.0.clone(); - s.rename(name); + s.rename(name.into()); RPolarsSeries(s) } @@ -512,11 +512,11 @@ impl RPolarsSeries { .cast(&DataType::UInt8) .map_err(polars_to_rpolars_err)? .mean_reduce() - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => { - let s = self.0.mean_reduce().into_series(""); + let s = self.0.mean_reduce().into_series("".into()); RPolarsSeries(s).to_r("double") } _ => Ok(self.0.mean().into()), @@ -532,7 +532,7 @@ impl RPolarsSeries { .map_err(polars_to_rpolars_err)? .median_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => { @@ -540,7 +540,7 @@ impl RPolarsSeries { .0 .median_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } _ => Ok(self.0.median().into()), @@ -552,7 +552,7 @@ impl RPolarsSeries { self.0 .min_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -562,7 +562,7 @@ impl RPolarsSeries { self.0 .max_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -572,7 +572,7 @@ impl RPolarsSeries { self.0 .sum_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -584,7 +584,7 @@ impl RPolarsSeries { self.0 .std_reduce(ddof) .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -596,7 +596,7 @@ impl RPolarsSeries { self.0 .var_reduce(ddof) .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -631,7 +631,7 @@ impl RPolarsSeries { pub fn export_stream(&self, stream_ptr: &str, compat_level: Robj) { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let data_type = self.0.dtype().to_arrow(compat_level); - let field = pl::ArrowField::new("", data_type, false); + let field = pl::ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedSeriesIterator::new(self.0.clone(), compat_level)); let mut stream = arrow::ffi::export_iterator(iter_boxed, field); @@ -668,7 +668,7 @@ impl RPolarsSeries { let name = robj_to!(str, name)?; let arr = crate::arrow_interop::to_rust::arrow_array_to_rust(array)?; - match arr.data_type() { + match arr.dtype() { ArrowDataType::LargeList(_) => { let array = arr.as_any().downcast_ref::().unwrap(); @@ -681,7 +681,7 @@ impl RPolarsSeries { } previous = o; } - let mut out = unsafe { ListChunked::from_chunks(name, vec![arr]) }; + let mut out = unsafe { ListChunked::from_chunks(name.into(), vec![arr]) }; if fast_explode { out.set_fast_explode() } From 16ce3154228f2d27360cdf2741b28c682284f52a Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:45:11 +0200 Subject: [PATCH 05/33] more [skip ci] --- src/rust/src/rlib.rs | 105 ++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 10df4c0e7..86190b638 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -6,61 +6,50 @@ use crate::RFnSignature; use crate::CONFIG; use extendr_api::prelude::*; use polars::chunked_array::ops::SortMultipleOptions; +use polars::lazy::dsl; use polars::prelude as pl; use std::result::Result; #[extendr] fn min_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::min_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::min_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn max_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::max_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::max_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn sum_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::sum_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::sum_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn mean_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::mean_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::mean_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn all_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::all_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::all_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn any_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::any_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::any_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] @@ -87,7 +76,7 @@ fn concat_str(dotdotdot: Robj, separator: Robj, ignore_nulls: Robj) -> RResult RResult { - Ok(RPolarsExpr(polars::lazy::prelude::date_range( + Ok(RPolarsExpr(dsl::date_range( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), @@ -97,7 +86,7 @@ fn date_range(start: Robj, end: Robj, interval: &str, closed: Robj) -> RResult RResult { - Ok(RPolarsExpr(polars::lazy::prelude::date_ranges( + Ok(RPolarsExpr(dsl::date_ranges( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), @@ -114,13 +103,13 @@ fn datetime_range( time_unit: Robj, time_zone: Robj, ) -> RResult { - Ok(RPolarsExpr(polars::lazy::prelude::datetime_range( + Ok(RPolarsExpr(dsl::datetime_range( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + robj_to!(Option, String, time_zone)?.map(|x| x.into()), ))) } @@ -133,13 +122,13 @@ fn datetime_ranges( time_unit: Robj, time_zone: Robj, ) -> RResult { - Ok(RPolarsExpr(polars::lazy::prelude::datetime_ranges( + Ok(RPolarsExpr(dsl::datetime_ranges( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + robj_to!(Option, String, time_zone)?.map(|x| x.into()), ))) } @@ -284,19 +273,17 @@ pub fn duration( time_unit: Robj, ) -> RResult { let args = pl::DurationArgs { - weeks: robj_to!(Option, PLExprCol, weeks)?.unwrap_or(polars::lazy::dsl::lit(0)), - days: robj_to!(Option, PLExprCol, days)?.unwrap_or(polars::lazy::dsl::lit(0)), - hours: robj_to!(Option, PLExprCol, hours)?.unwrap_or(polars::lazy::dsl::lit(0)), - minutes: robj_to!(Option, PLExprCol, minutes)?.unwrap_or(polars::lazy::dsl::lit(0)), - seconds: robj_to!(Option, PLExprCol, seconds)?.unwrap_or(polars::lazy::dsl::lit(0)), - milliseconds: robj_to!(Option, PLExprCol, milliseconds)? - .unwrap_or(polars::lazy::dsl::lit(0)), - microseconds: robj_to!(Option, PLExprCol, microseconds)? - .unwrap_or(polars::lazy::dsl::lit(0)), - nanoseconds: robj_to!(Option, PLExprCol, nanoseconds)?.unwrap_or(polars::lazy::dsl::lit(0)), + weeks: robj_to!(Option, PLExprCol, weeks)?.unwrap_or(dsl::lit(0)), + days: robj_to!(Option, PLExprCol, days)?.unwrap_or(dsl::lit(0)), + hours: robj_to!(Option, PLExprCol, hours)?.unwrap_or(dsl::lit(0)), + minutes: robj_to!(Option, PLExprCol, minutes)?.unwrap_or(dsl::lit(0)), + seconds: robj_to!(Option, PLExprCol, seconds)?.unwrap_or(dsl::lit(0)), + milliseconds: robj_to!(Option, PLExprCol, milliseconds)?.unwrap_or(dsl::lit(0)), + microseconds: robj_to!(Option, PLExprCol, microseconds)?.unwrap_or(dsl::lit(0)), + nanoseconds: robj_to!(Option, PLExprCol, nanoseconds)?.unwrap_or(dsl::lit(0)), time_unit: robj_to!(timeunit, time_unit)?, }; - Ok(polars::lazy::dsl::duration(args).into()) + Ok(dsl::duration(args).into()) } #[extendr] @@ -317,15 +304,15 @@ pub fn datetime( year: robj_to!(PLExprCol, year)?, month: robj_to!(PLExprCol, month)?, day: robj_to!(PLExprCol, day)?, - hour: robj_to!(Option, PLExprCol, hour)?.unwrap_or(polars::lazy::dsl::lit(0)), - minute: robj_to!(Option, PLExprCol, minute)?.unwrap_or(polars::lazy::dsl::lit(0)), - second: robj_to!(Option, PLExprCol, second)?.unwrap_or(polars::lazy::dsl::lit(0)), - microsecond: robj_to!(Option, PLExprCol, microsecond)?.unwrap_or(polars::lazy::dsl::lit(0)), + hour: robj_to!(Option, PLExprCol, hour)?.unwrap_or(dsl::lit(0)), + minute: robj_to!(Option, PLExprCol, minute)?.unwrap_or(dsl::lit(0)), + second: robj_to!(Option, PLExprCol, second)?.unwrap_or(dsl::lit(0)), + microsecond: robj_to!(Option, PLExprCol, microsecond)?.unwrap_or(dsl::lit(0)), time_unit: robj_to!(timeunit, time_unit)?, - time_zone: robj_to!(Option, String, time_zone)?, + time_zone: robj_to!(Option, String, time_zone)?.map(|x| x.into()), ambiguous: robj_to!(PLExpr, ambiguous)?, }; - Ok(polars::lazy::dsl::datetime(args).into()) + Ok(dsl::datetime(args).into()) } #[extendr] @@ -363,7 +350,7 @@ pub fn int_range(start: Robj, end: Robj, step: i64, dtype: Robj) -> RResult RResult RResult RResult { let names = robj_to!(Vec, String, names)?; - Ok(pl::Expr::Field( - names - .into_iter() - .map(|name| pl::Arc::from(name.as_str())) - .collect(), - ) - .into()) + Ok(pl::Expr::Field(names.into_iter().map(|name| name.into()).collect()).into()) } extendr_module! { From 52fa1b0aaf6cbc4040f3caa4c2a06cc904d66942 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:56:56 +0200 Subject: [PATCH 06/33] more [skip ci] --- src/rust/src/conversion_r_to_s.rs | 33 +++++++++++++---------------- src/rust/src/conversion_s_to_r.rs | 2 +- src/rust/src/rdataframe/read_csv.rs | 10 +++++---- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/rust/src/conversion_r_to_s.rs b/src/rust/src/conversion_r_to_s.rs index 6335fb3ee..3c758fa63 100644 --- a/src/rust/src/conversion_r_to_s.rs +++ b/src/rust/src/conversion_r_to_s.rs @@ -75,7 +75,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult(real_slice) }; - Ok(SeriesTree::Series(pl::Series::new(name, i64_slice))) + Ok(SeriesTree::Series(pl::Series::new(name.into(), i64_slice))) } else { let mut s: pl::Series = rdouble //convert R NAs to rust options .iter() @@ -89,7 +89,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult pl::PolarsResult pl::PolarsResult pl::PolarsResult Ok(SeriesTree::Series( @@ -138,14 +138,14 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult { let rints = x.as_integers().expect("as matched"); let s = if rints.no_na().is_true() { - pl::Series::new(name, x.as_integer_slice().expect("as matched")) + pl::Series::new(name.into(), x.as_integer_slice().expect("as matched")) } else { //convert R NAs to rust options let mut s: pl::Series = rints .iter() .map(|x| if x.is_na() { None } else { Some(x.inner()) }) .collect(); - s.rename(name); + s.rename(name.into()); s }; @@ -175,7 +175,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult pl::PolarsResult { Ok(SeriesTree::Series( (s * 1_000f64).cast(&pl::DataType::Int64)?.cast( - &pl::DataType::Datetime(pl::TimeUnit::Milliseconds, Some(tz)), + &pl::DataType::Datetime(pl::TimeUnit::Milliseconds, Some(tz.into())), )?, )) } @@ -235,17 +235,14 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult Ok(s), // SeriesTree is just a regular Series, return as is SeriesTree::SeriesEmptyVec => { // Create Series of empty array and cast to the found leaf_dtype. use polars::prelude::ListBuilderTrait; - let empty_list_series = pl::ListBinaryChunkedBuilder::new(name, 0,0).finish().into_series(); + let empty_list_series = pl::ListBinaryChunkedBuilder::new(name.into(), 0,0).finish().into_series(); //cast to any discovered leaftype to allow concatenation without Error if let Some(leaf_dt_ref) = leaf_dtype { @@ -342,7 +339,7 @@ fn concat_series_tree( } // use polars new method to concat concatenated series - Ok(pl::Series::new(name, series_vec)) + Ok(pl::Series::new(name.into(), series_vec)) } } } @@ -350,7 +347,7 @@ fn concat_series_tree( //handle R character/strings to utf8 fn robj_to_utf8_series(rstrings: Strings, name: &str) -> pl::Series { if rstrings.no_na().is_true() { - pl::Series::new(name, rstrings.as_robj().as_str_vector().unwrap()) + pl::Series::new(name.into(), rstrings.as_robj().as_str_vector().unwrap()) } else { //convert R NAs to rust options let mut s: Vec> = Vec::with_capacity(rstrings.len()); @@ -360,6 +357,6 @@ fn robj_to_utf8_series(rstrings: Strings, name: &str) -> pl::Series { .map(|x| if x.is_na() { None } else { Some(x.as_str()) }), ); - pl::Series::new(name, s) + pl::Series::new(name.into(), s) } } diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index 8199148e1..c08e15cb3 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -226,7 +226,7 @@ pub fn pl_series_to_list( pl::DataFrame::new(vec![s.clone()])? .lazy() .select([col(s_name).dt().replace_time_zone( - Some(sys_tz), + Some(sys_tz.into()), pl::lit("raise"), pl::NonExistent::Raise, )]) diff --git a/src/rust/src/rdataframe/read_csv.rs b/src/rust/src/rdataframe/read_csv.rs index bc83425dc..32072a524 100644 --- a/src/rust/src/rdataframe/read_csv.rs +++ b/src/rust/src/rdataframe/read_csv.rs @@ -25,15 +25,17 @@ impl RPolarsRNullValues { RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x.into())) } pub fn new_columns(x: Vec) -> Self { - RPolarsRNullValues(pl::NullValues::AllColumns(x)) + RPolarsRNullValues(pl::NullValues::AllColumns( + x.into_iter().map(|xi| xi.into()).collect(), + )) } pub fn new_named(robj: Robj) -> Self { let null_markers = robj.as_str_iter().expect("must be str"); let column_names = robj.names().expect("names were missing"); - let key_val_pair: Vec<(String, String)> = column_names + let key_val_pair: Vec<(pl::PlSmallStr, pl::PlSmallStr)> = column_names .zip(null_markers) - .map(|(k, v)| (k.to_owned(), v.to_owned())) + .map(|(k, v)| (k.into(), v.into())) .collect(); RPolarsRNullValues(pl::NullValues::Named(key_val_pair)) } @@ -113,7 +115,7 @@ pub fn new_from_csv( .with_cache(robj_to!(bool, cache)?) .with_dtype_overwrite(schema.map(|schema| std::sync::Arc::new(schema))) .with_low_memory(robj_to!(bool, low_memory)?) - .with_comment_prefix(robj_to!(Option, str, comment_prefix)?) + .with_comment_prefix(robj_to!(Option, str, comment_prefix)?.map(|x| x.into())) .with_quote_char(robj_to!(Option, Utf8Byte, quote_char)?) .with_eol_char(robj_to!(Utf8Byte, eol_char)?) .with_rechunk(robj_to!(bool, rechunk)?) From 5bb55fd2c9a7c9418830115bb972a7cfb8061f3c Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 13:31:29 +0200 Subject: [PATCH 07/33] more [skip ci] --- src/rust/src/lazy/dataframe.rs | 9 ++++++--- src/rust/src/rdataframe/mod.rs | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 9d42b09a2..04857e68a 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -342,7 +342,10 @@ impl RPolarsLazyFrame { let maintain_order = robj_to!(bool, maintain_order)?; let subset = robj_to!(Option, Vec, String, subset)?; let lf = if maintain_order { - self.0.clone().unique_stable(subset, ke) + self.0.clone().unique_stable( + subset.map(|x| x.into_iter().map(|y| y.into()).collect()), + ke, + ) } else { self.0.clone().unique(subset, ke) }; @@ -699,14 +702,14 @@ impl RPolarsLazyFrame { .iter() .map(|(k, v)| { let data_type = robj_to!(RPolarsDataType, v)?; - Ok(pl::Field::new(k, data_type.0)) + Ok(pl::Field::new(k.into(), data_type.0)) }) .collect::>>()?; let mut cast_map = PlHashMap::with_capacity(dtypes.len()); cast_map.extend( dtypes .iter() - .map(|f| (f.name().as_ref(), f.data_type().clone())), + .map(|f| (f.name().as_ref(), f.dtype().clone())), ); Ok(self.0.clone().cast(cast_map, strict).into()) } diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 2b19f4e42..06fe8ef7a 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -18,13 +18,13 @@ pub use lazy::dataframe::*; use crate::conversion_s_to_r::pl_series_to_list; pub use crate::series::*; +use crate::utils::{collect_hinted_result, r_result_list}; use arrow::datatypes::ArrowDataType; use polars::prelude::ArrowField; +use polars::prelude::SchemaExt; use polars_core::error::PolarsError; use polars_core::utils::arrow; -use crate::utils::{collect_hinted_result, r_result_list}; - use crate::conversion::strings_to_smartstrings; use polars::frame::explode::UnpivotArgsIR; use polars::prelude::pivot::{pivot, pivot_stable}; @@ -40,7 +40,8 @@ pub struct OwnedDataFrameIterator { impl OwnedDataFrameIterator { pub fn new(df: polars::frame::DataFrame, compat_level: CompatLevel) -> Self { let schema = df.schema().to_arrow(compat_level); - let data_type = ArrowDataType::Struct(schema.fields); + // TODO: changed when bumping to 0.43.1, might need refactor + let data_type = ArrowDataType::Struct(schema.iter_values().map(|x| x.clone()).collect()); let vs = df.get_columns().to_vec(); Self { columns: vs, @@ -221,7 +222,8 @@ impl RPolarsDataFrame { pub fn schema(&self) -> List { let mut l = self.dtypes(); - l.set_names(self.0.get_column_names()).unwrap(); + let nms = self.0.get_column_names().into_iter().map(|x| x.as_str()); + l.set_names(nms).unwrap(); l } @@ -354,7 +356,8 @@ impl RPolarsDataFrame { pub fn export_stream(&self, stream_ptr: &str, compat_level: Robj) { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let schema = self.0.schema().to_arrow(compat_level); - let data_type = ArrowDataType::Struct(schema.fields); + // TODO: changed when bumping to 0.43.1, might need refactor + let data_type = ArrowDataType::Struct(schema.iter_values().map(|x| x.clone()).collect()); let field = ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedDataFrameIterator::new(self.0.clone(), compat_level)); @@ -393,8 +396,14 @@ impl RPolarsDataFrame { ) -> RResult { use polars::prelude::UnpivotDF; let args = UnpivotArgsIR { - on: strings_to_smartstrings(robj_to!(Vec, String, on)?), - index: strings_to_smartstrings(robj_to!(Vec, String, index)?), + on: robj_to!(Vec, String, on)? + .into_iter() + .map(|x| x.into()) + .collect(), + index: robj_to!(Vec, String, index)? + .into_iter() + .map(|x| x.into()) + .collect(), value_name: robj_to!(Option, String, value_name)?.map(|s| s.into()), variable_name: robj_to!(Option, String, variable_name)?.map(|s| s.into()), }; From 1329c66d77b03ebac08968e6a6ba38a2ace0eb60 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 13:47:28 +0200 Subject: [PATCH 08/33] more [skip ci] --- src/rust/src/conversion_r_to_s.rs | 2 +- src/rust/src/conversion_s_to_r.rs | 4 ++-- src/rust/src/rdatatype.rs | 15 +++++---------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/rust/src/conversion_r_to_s.rs b/src/rust/src/conversion_r_to_s.rs index 3c758fa63..cc070fbf6 100644 --- a/src/rust/src/conversion_r_to_s.rs +++ b/src/rust/src/conversion_r_to_s.rs @@ -240,7 +240,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult { - let df = s.clone().into_frame().unnest([s.name()]).unwrap(); + let df = s.clone().into_frame().unnest([s.name().clone()]).unwrap(); let mut l = RPolarsDataFrame(df).to_list_result(int64_conversion)?; //TODO contribute extendr_api set_attrib mutates &self, change signature to surprise anyone @@ -225,7 +225,7 @@ pub fn pl_series_to_list( let s_name = s.name(); pl::DataFrame::new(vec![s.clone()])? .lazy() - .select([col(s_name).dt().replace_time_zone( + .select([col(s_name.clone()).dt().replace_time_zone( Some(sys_tz.into()), pl::lit("raise"), pl::NonExistent::Raise, diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index cab767c0b..5ba69414e 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -102,9 +102,9 @@ impl RPolarsDataType { ))) } - pub fn new_datetime(tu: Robj, tz: Nullable) -> RResult { - robj_to!(timeunit, tu) - .map(|dt| RPolarsDataType(pl::DataType::Datetime(dt, null_to_opt(tz)))) + pub fn new_datetime(tu: Robj, tz: Robj) -> RResult { + let tz = robj_to!(Option, String, tz)?.map(|x| x.into()); + robj_to!(timeunit, tu).map(|dt| RPolarsDataType(pl::DataType::Datetime(dt, tz))) } pub fn new_duration(tu: Robj) -> RResult { @@ -475,12 +475,7 @@ pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult Ok(av::UInt64(x)), lv::UInt8(x) => Ok(av::UInt8(x)), // lv::Utf8(x) => Ok(av::Utf8(x.as_str())), - lv::String(x) => { - let mut s = SString::new(); - - s.push_str(x.as_str()); - Ok(av::StringOwned(s)) - } + lv::String(x) => Ok(av::StringOwned(x)), x => rerr().notachoice(format!("cannot convert LiteralValue {:?} to AnyValue", x)), } } @@ -741,8 +736,8 @@ pub fn robj_to_statistics_options(robj: Robj) -> RResult pub fn robj_to_wrap_schema(robj: Robj) -> RResult> { use pl::Schema; - let mut schema = Schema::new(); let hm = robj.as_list().unwrap().into_hashmap(); + let mut schema = Schema::with_capacity(hm.capacity()); for (key, value) in hm.into_iter() { let dt = crate::utils::robj_to_datatype(value)?; From a8ffb213e8b6c2e12980b1fbcdc9fd6d35012cf7 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 14:05:58 +0200 Subject: [PATCH 09/33] more [skip ci] --- src/rust/src/lazy/dsl.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index bb2151f0d..c6bcc4ced 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -2118,13 +2118,13 @@ impl RPolarsExpr { // //wrap as series // }; - let f = move |name: &str| -> pl::PolarsResult { + let f = move |name: &pl::PlSmallStr| -> pl::PolarsResult { let robj = probj.clone().0; let rfun = robj .as_function() .expect("internal error: this is not an R function"); - let newname_robj = rfun.call(pairlist!(name)).map_err(|err| { + let newname_robj = rfun.call(pairlist!(name.as_str())).map_err(|err| { let es = format!("in $name$map(): user function raised this error: {:?}", err).into(); pl_error::ComputeError(es) @@ -2138,7 +2138,7 @@ impl RPolarsExpr { .into(); pl_error::ComputeError(es) }) - .map(|str| str.to_string()) + .map(|str| str.into()) }; Ok(self.clone().0.name().map(f).into()) From 79da8b3d31560c741b84b9d544bbcf19551be4c5 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 14:12:27 +0200 Subject: [PATCH 10/33] remove some unused imports [skip ci] --- src/rust/src/lazy/dsl.rs | 1 - src/rust/src/rdatatype.rs | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index c6bcc4ced..622595e9b 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -25,7 +25,6 @@ pub type NameGenerator = pl::Arc String + Send + Sync>; use crate::rdatatype::robjs_to_ewm_options; use crate::utils::r_expr_to_rust_expr; use crate::utils::unpack_r_eval; -use smartstring::{LazyCompact, SmartString}; use std::sync::Arc; #[derive(Clone, Debug)] diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index 5ba69414e..81678433e 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -8,7 +8,6 @@ use polars_core::prelude::QuantileInterpolOptions; use crate::rpolarserr::{polars_to_rpolars_err, rerr, RPolarsErr, RResult, WithRctx}; use crate::utils::collect_hinted_result; use crate::utils::robj_to_rchoice; -use crate::utils::wrappers::null_to_opt; use pl::UniqueKeepStrategy; use polars::prelude::AsofStrategy; use std::num::NonZeroUsize; @@ -452,7 +451,6 @@ pub fn robj_to_window_mapping(robj: Robj) -> RResult { pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult> { use pl::AnyValue as av; use pl::LiteralValue as lv; - use smartstring::alias::String as SString; match litval { lv::Boolean(x) => Ok(av::Boolean(x)), //lv::Datetime(datetime, unit) => Ok(av::Datetime(datetime, unit, &None)), #check how to convert From 45a03e889c190df21779b7435690832388b9abe2 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 14 Sep 2024 15:06:54 +0000 Subject: [PATCH 11/33] fix: fix all rust lib compile errors --- R/extendr-wrappers.R | 2 +- src/rust/src/arrow_interop/to_rust.rs | 10 +++++++--- src/rust/src/conversion.rs | 8 -------- src/rust/src/lib.rs | 1 - src/rust/src/rdataframe/mod.rs | 1 - src/rust/src/series.rs | 4 ++-- 6 files changed, 10 insertions(+), 16 deletions(-) delete mode 100644 src/rust/src/conversion.rs diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 166d790a9..0eea4d007 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -94,7 +94,7 @@ concat_series <- function(l, rechunk, to_supertypes) .Call(wrap__concat_series, new_from_csv <- function(path, has_header, separator, comment_prefix, quote_char, skip_rows, dtypes, null_values, ignore_errors, cache, infer_schema_length, n_rows, encoding, low_memory, rechunk, skip_rows_after_header, row_index_name, row_index_offset, try_parse_dates, eol_char, raise_if_empty, truncate_ragged_lines) .Call(wrap__new_from_csv, path, has_header, separator, comment_prefix, quote_char, skip_rows, dtypes, null_values, ignore_errors, cache, infer_schema_length, n_rows, encoding, low_memory, rechunk, skip_rows_after_header, row_index_name, row_index_offset, try_parse_dates, eol_char, raise_if_empty, truncate_ragged_lines) -import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_index, memory_map, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_index, memory_map, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) +import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_index, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_index, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) new_from_ndjson <- function(path, infer_schema_length, batch_size, n_rows, low_memory, rechunk, row_index_name, row_index_offset, ignore_errors) .Call(wrap__new_from_ndjson, path, infer_schema_length, batch_size, n_rows, low_memory, rechunk, row_index_name, row_index_offset, ignore_errors) diff --git a/src/rust/src/arrow_interop/to_rust.rs b/src/rust/src/arrow_interop/to_rust.rs index 43ac5556f..949b5e269 100644 --- a/src/rust/src/arrow_interop/to_rust.rs +++ b/src/rust/src/arrow_interop/to_rust.rs @@ -46,7 +46,10 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { }; let names = robj_record_batch_names .as_str_vector() - .ok_or_else(|| "internal error: Robj$schema$names is not a char vec".to_string())?; + .ok_or_else(|| "internal error: Robj$schema$names is not a char vec".to_string())? + .into_iter() + .map(PlSmallStr::from_str) + .collect::>(); //iterate over record batches let rb_len = rb.len(); @@ -83,14 +86,15 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { .into_par_iter() .zip(names.par_iter()) .map(|(arr, name)| { - let s = Series::try_from((*name, arr)).map_err(|err| err.to_string())?; + let s = + Series::try_from((name.clone(), arr)).map_err(|err| err.to_string())?; Ok(s) }) .collect::, String>>() }) } else { let iter = arrays_vec.into_iter().zip(names.iter()).map(|(arr, name)| { - let s = Series::try_from((*name, arr)).map_err(|err| err.to_string())?; + let s = Series::try_from((name.clone(), arr)).map_err(|err| err.to_string())?; Ok(s) }); crate::utils::collect_hinted_result(n_columns, iter) diff --git a/src/rust/src/conversion.rs b/src/rust/src/conversion.rs deleted file mode 100644 index 62a656579..000000000 --- a/src/rust/src/conversion.rs +++ /dev/null @@ -1,8 +0,0 @@ -use smartstring::alias::String as SmartString; -pub(crate) fn strings_to_smartstrings(container: I) -> Vec -where - I: IntoIterator, - S: AsRef, -{ - container.into_iter().map(|s| s.as_ref().into()).collect() -} diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index f87968b3a..54a191ec1 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -14,7 +14,6 @@ pub mod lazy; pub mod arrow_interop; pub mod concat; -pub mod conversion; pub mod conversion_r_to_s; pub mod conversion_s_to_r; pub mod info; diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 06fe8ef7a..916164675 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -25,7 +25,6 @@ use polars::prelude::SchemaExt; use polars_core::error::PolarsError; use polars_core::utils::arrow; -use crate::conversion::strings_to_smartstrings; use polars::frame::explode::UnpivotArgsIR; use polars::prelude::pivot::{pivot, pivot_stable}; diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 81152cd65..461683025 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -647,7 +647,7 @@ impl RPolarsSeries { } pub fn import_stream(name: Robj, stream_ptr: Robj) -> RResult { - let name = robj_to!(str, name)?; + let name: PlSmallStr = robj_to!(str, name)?.into(); let stream_in_ptr_addr = robj_to!(usize, stream_ptr)?; let stream_in_ptr = unsafe { Box::from_raw(stream_in_ptr_addr as *mut arrow::ffi::ArrowArrayStream) }; @@ -665,7 +665,7 @@ impl RPolarsSeries { } pub fn from_arrow_array_robj(name: Robj, array: Robj) -> Result { - let name = robj_to!(str, name)?; + let name: PlSmallStr = robj_to!(str, name)?.into(); let arr = crate::arrow_interop::to_rust::arrow_array_to_rust(array)?; match arr.dtype() { From cd57d8c9d1d06cdef333ba71d2086751b0e6a9fb Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 14 Sep 2024 15:10:59 +0000 Subject: [PATCH 12/33] chore: bump rust toolchain --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ff4c34d76..c7b6d385f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -119,4 +119,4 @@ Collate: Config/rextendr/version: 0.3.1 VignetteBuilder: knitr Config/polars/LibVersion: 0.42.2 -Config/polars/RustToolchainVersion: nightly-2024-07-26 +Config/polars/RustToolchainVersion: nightly-2024-08-26 From 1443412fe488b73ab0c9c4f67e7181248d915a76 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 14 Sep 2024 15:12:34 +0000 Subject: [PATCH 13/33] chore: bump lib version --- src/rust/Cargo.lock | 2 +- src/rust/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index ff448c5e0..06fe4547f 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -2277,7 +2277,7 @@ dependencies = [ [[package]] name = "r-polars" -version = "0.42.2" +version = "0.43.0" dependencies = [ "either", "extendr-api", diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index d0deaccbb..934c5149e 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "r-polars" -version = "0.42.2" +version = "0.43.0" edition = "2021" rust-version = "1.80.0" publish = false From b6804086b55eb52fb1d525a5fe215179e933cd0a Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 18:37:24 +0200 Subject: [PATCH 14/33] fix most failures [skip ci] --- R/as_polars.R | 4 ++-- R/io_ipc.R | 1 - tests/testthat/_snaps/lazy.md | 8 +++----- tests/testthat/test-ipc.R | 1 - tests/testthat/test-lazy.R | 4 ++-- tests/testthat/test-sink_stream.R | 8 ++++---- 6 files changed, 11 insertions(+), 15 deletions(-) diff --git a/R/as_polars.R b/R/as_polars.R index 697f6dcd5..40ede5aa6 100644 --- a/R/as_polars.R +++ b/R/as_polars.R @@ -383,14 +383,14 @@ as_polars_series.POSIXlt = function(x, name = NULL, ...) { #' @rdname as_polars_series #' @export as_polars_series.data.frame = function(x, name = NULL, ...) { - as_polars_df(x)$to_struct(name = name) + as_polars_df(x)$to_struct(name = name %||% "") } #' @rdname as_polars_series #' @export as_polars_series.vctrs_rcrd = function(x, name = NULL, ...) { - pl$select(unclass(x))$to_struct(name = name) + pl$select(unclass(x))$to_struct(name = name %||% "") } diff --git a/R/io_ipc.R b/R/io_ipc.R index 1ced8b429..dbb5d6725 100644 --- a/R/io_ipc.R +++ b/R/io_ipc.R @@ -60,7 +60,6 @@ pl_scan_ipc = function( rechunk = rechunk, row_name = row_index_name, row_index = row_index_offset, - memory_map = memory_map, hive_partitioning = hive_partitioning, hive_schema = hive_schema, try_parse_hive_dates = try_parse_hive_dates, diff --git a/tests/testthat/_snaps/lazy.md b/tests/testthat/_snaps/lazy.md index 7d1810ac5..8591cf0d8 100644 --- a/tests/testthat/_snaps/lazy.md +++ b/tests/testthat/_snaps/lazy.md @@ -10,7 +10,7 @@ FILTER [(col("a")) == (2)] FROM DF ["a", "b"]; PROJECT */2 COLUMNS; SELECTION: None -# LazyFrame serialize/deseialize +# LazyFrame serialize/deserialize Code jsonlite::prettify(json) @@ -51,13 +51,11 @@ ] }, "schema": { - "inner": { + "fields": { "a": "Int32", "b": "String" } - }, - "output_schema": null, - "filter": null + } } }, "predicate": { diff --git a/tests/testthat/test-ipc.R b/tests/testthat/test-ipc.R index 9034e81e3..f2db59fd3 100644 --- a/tests/testthat/test-ipc.R +++ b/tests/testthat/test-ipc.R @@ -34,7 +34,6 @@ test_that("Test reading data from Apache Arrow IPC", { expect_grepl_error(pl$scan_ipc(tmpf, rechunk = list())) expect_grepl_error(pl$scan_ipc(tmpf, row_index_name = c("x", "y"))) expect_grepl_error(pl$scan_ipc(tmpf, row_index_name = "name", row_index_offset = data.frame())) - expect_grepl_error(pl$scan_ipc(tmpf, memory_map = NULL)) }) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 94c596ca3..ce42b9f88 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -27,7 +27,7 @@ test_that("create LazyFrame", { }) -test_that("LazyFrame serialize/deseialize", { +test_that("LazyFrame serialize/deserialize", { skip_if_not_installed("jsonlite") df = pl$DataFrame( @@ -48,7 +48,7 @@ test_that("LazyFrame serialize/deseialize", { df$lazy()$select( pl$col("a")$map_elements(\(x) -abs(x)) )$serialize(), - "serialize not supported for this 'opaque' function" + "serialization not supported for this 'opaque' function" ) }) diff --git a/tests/testthat/test-sink_stream.R b/tests/testthat/test-sink_stream.R index 9427ab15a..c49335b10 100644 --- a/tests/testthat/test-sink_stream.R +++ b/tests/testthat/test-sink_stream.R @@ -49,27 +49,27 @@ test_that("Test sinking data to IPC file", { on.exit(unlink(tmpf)) lf$sink_ipc(tmpf) expect_grepl_error(lf$sink_ipc(tmpf, compression = "rar")) - expect_identical(pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame(), rdf) + expect_identical(pl$scan_ipc(tmpf)$collect()$to_data_frame(), rdf) # update with new data lf$slice(5, 5)$sink_ipc(tmpf) expect_equal( - pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame(), + pl$scan_ipc(tmpf)$collect()$to_data_frame(), lf$slice(5, 5)$collect()$to_data_frame() ) lf$sink_ipc(tmpf) # from another process via rcall rdf_callr = callr::r(\(tmpf) { - polars::pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame() + polars::pl$scan_ipc(tmpf)$collect()$to_data_frame() }, args = list(tmpf = tmpf)) expect_identical(rdf_callr, rdf) # from another process via rpool f_ipc_to_s = \(s) { - polars::pl$scan_ipc(s$to_r(), memory_map = FALSE)$ + polars::pl$scan_ipc(s$to_r())$ select(polars::pl$struct(polars::pl$all()))$ collect()$ to_series() From 57d64c23d1a1f122f51bbafef4dd2c1f7f11f32f Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:39:28 +0200 Subject: [PATCH 15/33] Bump rust-polars to 0.43.0 [skip ci] --- src/rust/Cargo.lock | 130 ++++++++++++++++++++++++++++---------------- src/rust/Cargo.toml | 6 +- 2 files changed, 87 insertions(+), 49 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index fd7a9d5ab..cec7b167e 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -302,6 +302,15 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.1.11" @@ -375,6 +384,21 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "compact_str" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "constant_time_eq" version = "0.3.0" @@ -1683,8 +1707,8 @@ dependencies = [ [[package]] name = "polars" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "getrandom", "polars-arrow", @@ -1703,8 +1727,8 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "atoi", @@ -1728,6 +1752,7 @@ dependencies = [ "parking_lot", "polars-arrow-format", "polars-error", + "polars-schema", "polars-utils", "ryu", "serde", @@ -1750,8 +1775,8 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "bytemuck", "either", @@ -1765,8 +1790,8 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", @@ -1784,6 +1809,7 @@ dependencies = [ "polars-compute", "polars-error", "polars-row", + "polars-schema", "polars-utils", "rand", "rand_distr", @@ -1791,7 +1817,6 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "thiserror", "version_check", "xxhash-rust", @@ -1799,8 +1824,8 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "avro-schema", "object_store", @@ -1812,13 +1837,14 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", "once_cell", "polars-arrow", + "polars-compute", "polars-core", "polars-io", "polars-ops", @@ -1826,13 +1852,12 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", ] [[package]] name = "polars-io" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "async-trait", @@ -1846,6 +1871,7 @@ dependencies = [ "fs4", "futures", "glob", + "hashbrown", "home", "itoa", "memchr", @@ -1859,6 +1885,7 @@ dependencies = [ "polars-error", "polars-json", "polars-parquet", + "polars-schema", "polars-time", "polars-utils", "rayon", @@ -1869,7 +1896,6 @@ dependencies = [ "serde_json", "simd-json", "simdutf8", - "smartstring", "tokio", "tokio-util", "url", @@ -1878,8 +1904,8 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "chrono", @@ -1899,8 +1925,8 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", @@ -1919,15 +1945,14 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", "tokio", "version_check", ] [[package]] name = "polars-mem-engine" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "futures", "memmap2", @@ -1947,8 +1972,8 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "aho-corasick", @@ -1969,6 +1994,7 @@ dependencies = [ "polars-core", "polars-error", "polars-json", + "polars-schema", "polars-utils", "rand", "rand_distr", @@ -1976,15 +2002,14 @@ dependencies = [ "regex", "serde", "serde_json", - "smartstring", "unicode-reverse", "version_check", ] [[package]] name = "polars-parquet" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "async-stream", @@ -1994,6 +2019,7 @@ dependencies = [ "ethnum", "flate2", "futures", + "hashbrown", "lz4", "num-traits", "parquet-format-safe", @@ -2010,8 +2036,8 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -2029,7 +2055,6 @@ dependencies = [ "polars-row", "polars-utils", "rayon", - "smartstring", "tokio", "uuid", "version_check", @@ -2037,12 +2062,13 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bitflags", "bytemuck", + "bytes", "chrono", "chrono-tz", "either", @@ -2063,15 +2089,14 @@ dependencies = [ "recursive", "regex", "serde", - "smartstring", "strum_macros", "version_check", ] [[package]] name = "polars-row" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "bytemuck", "polars-arrow", @@ -2079,10 +2104,22 @@ dependencies = [ "polars-utils", ] +[[package]] +name = "polars-schema" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +dependencies = [ + "indexmap", + "polars-error", + "polars-utils", + "serde", + "version_check", +] + [[package]] name = "polars-sql" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "hex", "once_cell", @@ -2093,6 +2130,7 @@ dependencies = [ "polars-ops", "polars-plan", "polars-time", + "polars-utils", "rand", "serde", "serde_json", @@ -2101,8 +2139,8 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "atoi", "bytemuck", @@ -2117,26 +2155,27 @@ dependencies = [ "polars-utils", "regex", "serde", - "smartstring", ] [[package]] name = "polars-utils" -version = "0.42.0" -source = "git+https://github.com/pola-rs/polars.git?rev=67551b6594c581731f0e9ca814ff7c39377bd324#67551b6594c581731f0e9ca814ff7c39377bd324" +version = "0.43.0" +source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" dependencies = [ "ahash", "bytemuck", "bytes", + "compact_str", "hashbrown", "indexmap", + "libc", "memmap2", "num-traits", "once_cell", "polars-error", "raw-cpuid", "rayon", - "smartstring", + "serde", "stacker", "sysinfo", "version_check", @@ -2749,7 +2788,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ "autocfg", - "serde", "static_assertions", "version_check", ] diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index c6f16225c..9b22d1854 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -50,8 +50,8 @@ serde_json = "*" smartstring = "1.0.1" state = "0.6.0" thiserror = "1.0.63" -polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "67551b6594c581731f0e9ca814ff7c39377bd324", default-features = false } -polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "67551b6594c581731f0e9ca814ff7c39377bd324", default-features = false } +polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } +polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } either = "1" [dependencies.polars] @@ -160,4 +160,4 @@ features = [ "zip_with", ] git = "https://github.com/pola-rs/polars.git" -rev = "67551b6594c581731f0e9ca814ff7c39377bd324" +rev = "d8acacfadc7059f6acc363a68839ec312910751e" From 71c91492a1cb2bb9b34e48eec59c4ba872259c1f Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 12 Sep 2024 18:43:56 +0200 Subject: [PATCH 16/33] 0.43.1 [skip ci] --- src/rust/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 9b22d1854..1cd2f1474 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -50,8 +50,8 @@ serde_json = "*" smartstring = "1.0.1" state = "0.6.0" thiserror = "1.0.63" -polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } -polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "d8acacfadc7059f6acc363a68839ec312910751e", default-features = false } +polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "54218e7e35e3defd4b0801e820c56eea6b91e525", default-features = false } +polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "54218e7e35e3defd4b0801e820c56eea6b91e525", default-features = false } either = "1" [dependencies.polars] @@ -160,4 +160,4 @@ features = [ "zip_with", ] git = "https://github.com/pola-rs/polars.git" -rev = "d8acacfadc7059f6acc363a68839ec312910751e" +rev = "54218e7e35e3defd4b0801e820c56eea6b91e525" From 7bf4e98d1e38f587fd45fbc2ab343e57e3de7874 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:17:49 +0200 Subject: [PATCH 17/33] start fixing [skip ci] --- src/rust/Cargo.lock | 76 ++++++++++++------------- src/rust/src/arrow_interop/to_rust.rs | 4 +- src/rust/src/lazy/dataframe.rs | 2 +- src/rust/src/lazy/dsl.rs | 40 +++++++------ src/rust/src/rdataframe/mod.rs | 4 +- src/rust/src/rdataframe/read_csv.rs | 4 +- src/rust/src/rdataframe/read_ipc.rs | 4 +- src/rust/src/rdataframe/read_parquet.rs | 2 +- src/rust/src/rdatatype.rs | 2 +- 9 files changed, 72 insertions(+), 66 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index cec7b167e..4a64d4016 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -1707,8 +1707,8 @@ dependencies = [ [[package]] name = "polars" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "getrandom", "polars-arrow", @@ -1727,8 +1727,8 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "atoi", @@ -1775,8 +1775,8 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "bytemuck", "either", @@ -1790,8 +1790,8 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1824,8 +1824,8 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "avro-schema", "object_store", @@ -1837,8 +1837,8 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1856,8 +1856,8 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "async-trait", @@ -1904,8 +1904,8 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "chrono", @@ -1925,8 +1925,8 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -1951,8 +1951,8 @@ dependencies = [ [[package]] name = "polars-mem-engine" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "futures", "memmap2", @@ -1972,8 +1972,8 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "aho-corasick", @@ -2008,8 +2008,8 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "async-stream", @@ -2036,8 +2036,8 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -2062,8 +2062,8 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bitflags", @@ -2095,8 +2095,8 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "bytemuck", "polars-arrow", @@ -2106,8 +2106,8 @@ dependencies = [ [[package]] name = "polars-schema" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "indexmap", "polars-error", @@ -2118,8 +2118,8 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "hex", "once_cell", @@ -2139,8 +2139,8 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "atoi", "bytemuck", @@ -2159,8 +2159,8 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.43.0" -source = "git+https://github.com/pola-rs/polars.git?rev=d8acacfadc7059f6acc363a68839ec312910751e#d8acacfadc7059f6acc363a68839ec312910751e" +version = "0.43.1" +source = "git+https://github.com/pola-rs/polars.git?rev=54218e7e35e3defd4b0801e820c56eea6b91e525#54218e7e35e3defd4b0801e820c56eea6b91e525" dependencies = [ "ahash", "bytemuck", diff --git a/src/rust/src/arrow_interop/to_rust.rs b/src/rust/src/arrow_interop/to_rust.rs index 2dfe6ca1d..43ac5556f 100644 --- a/src/rust/src/arrow_interop/to_rust.rs +++ b/src/rust/src/arrow_interop/to_rust.rs @@ -25,7 +25,7 @@ pub fn arrow_array_to_rust(arrow_array: Robj) -> Result { let array = unsafe { let field = ffi::import_field_from_c(schema.as_ref()).map_err(|err| err.to_string())?; - ffi::import_array_from_c(*array, field.data_type).map_err(|err| err.to_string())? + ffi::import_array_from_c(*array, field.dtype).map_err(|err| err.to_string())? }; Ok(array) } @@ -65,7 +65,7 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { let array_iter = columns_list.into_iter().map(|(_, column)| { let arr = arrow_array_to_rust(column)?; run_parallel |= matches!( - arr.data_type(), + arr.dtype(), ArrowDataType::Utf8 | ArrowDataType::Dictionary(_, _, _) ); let list_res: Result<_, String> = Ok(arr); diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 22c2e60b0..9d42b09a2 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -721,7 +721,7 @@ impl RPolarsLazyFrame { #[derive(Clone)] pub struct RPolarsLazyGroupBy { pub lgb: pl::LazyGroupBy, - opt_state: pl::OptState, + opt_state: pl::OptFlags, } #[extendr] diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 8cd203ad1..bb2151f0d 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -1030,7 +1030,7 @@ impl RPolarsExpr { } } - pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self { + pub fn value_counts(&self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self { self.0 .clone() .value_counts(sort, parallel, name, normalize) @@ -1255,12 +1255,12 @@ impl RPolarsExpr { let width_strat = robj_to!(ListToStructWidthStrategy, n_field_strategy)?; let fields = robj_to!(Option, Robj, fields)?.map(|robj| { let par_fn: ParRObj = robj.into(); - let f: Arc<(dyn Fn(usize) -> SmartString + Send + Sync + 'static)> = + let f: Arc<(dyn Fn(usize) -> pl::PlSmallStr + Send + Sync + 'static)> = pl::Arc::new(move |idx: usize| { let thread_com = ThreadCom::from_global(&CONFIG); thread_com.send(RFnSignature::FnF64ToString(par_fn.clone(), idx as f64)); let s = thread_com.recv().unwrap_string(); - let s: SmartString = s.into(); + let s: pl::PlSmallStr = s.into(); s }); f @@ -1443,12 +1443,12 @@ impl RPolarsExpr { fn arr_to_struct(&self, fields: Robj) -> RResult { let fields = robj_to!(Option, Robj, fields)?.map(|robj| { let par_fn: ParRObj = robj.into(); - let f: Arc<(dyn Fn(usize) -> SmartString + Send + Sync + 'static)> = + let f: Arc<(dyn Fn(usize) -> pl::PlSmallStr + Send + Sync + 'static)> = pl::Arc::new(move |idx: usize| { let thread_com = ThreadCom::from_global(&CONFIG); thread_com.send(RFnSignature::FnF64ToString(par_fn.clone(), idx as f64)); let s = thread_com.recv().unwrap_string(); - let s: SmartString = s.into(); + let s: pl::PlSmallStr = s.into(); s }); f @@ -1583,22 +1583,23 @@ impl RPolarsExpr { .0 .clone() .dt() - .convert_time_zone(robj_to!(String, time_zone)?) + .convert_time_zone(robj_to!(String, time_zone)?.into()) .into()) } pub fn dt_replace_time_zone( &self, - time_zone: Nullable, + time_zone: Robj, ambiguous: Robj, non_existent: Robj, ) -> RResult { + let time_zone = robj_to!(Option, String, time_zone)?.map(|x| x.into()); Ok(self .0 .clone() .dt() .replace_time_zone( - time_zone.into_option(), + time_zone, robj_to!(PLExpr, ambiguous)?, robj_to!(NonExistent, non_existent)?, ) @@ -1985,7 +1986,7 @@ impl RPolarsExpr { // set expected type of output from R function let ot = robj_to!(Option, PLPolarsDataType, output_type)?; let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2019,7 +2020,7 @@ impl RPolarsExpr { let ot = robj_to!(Option, PLPolarsDataType, output_type)?; let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2052,7 +2053,7 @@ impl RPolarsExpr { let ot = null_to_opt(output_type).map(|rdt| rdt.0.clone()); let output_map = pl::GetOutput::map_field(move |fld| match ot { - Some(ref dt) => Ok(pl::Field::new(fld.name(), dt.clone())), + Some(ref dt) => Ok(pl::Field::new(fld.name().clone(), dt.clone())), None => Ok(fld.clone()), }); @@ -2321,12 +2322,13 @@ impl RPolarsExpr { exact: Robj, cache: Robj, ) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); Ok(self .0 .clone() .str() .to_date(pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format, strict: robj_to!(bool, strict)?, exact: robj_to!(bool, exact)?, cache: robj_to!(bool, cache)?, @@ -2345,15 +2347,19 @@ impl RPolarsExpr { cache: Robj, ambiguous: Robj, ) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); + let time_unit = robj_to!(Option, timeunit, time_unit)?.map(|x| x.into()); + let time_zone = robj_to!(Option, String, time_zone)?.map(|x| x.into()); + Ok(self .0 .clone() .str() .to_datetime( - robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + time_unit, + time_zone, pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format: format, strict: robj_to!(bool, strict)?, exact: robj_to!(bool, exact)?, cache: robj_to!(bool, cache)?, @@ -2364,12 +2370,14 @@ impl RPolarsExpr { } pub fn str_to_time(&self, format: Robj, strict: Robj, cache: Robj) -> RResult { + let format = robj_to!(Option, String, format)?.map(|x| x.into()); + Ok(self .0 .clone() .str() .to_time(pl::StrptimeOptions { - format: robj_to!(Option, String, format)?, + format, strict: robj_to!(bool, strict)?, cache: robj_to!(bool, cache)?, exact: true, diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index db8df6b61..7d11a1da5 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -164,7 +164,7 @@ impl RPolarsDataFrame { .0 .clone() .with_row_index( - robj_to!(String, name)?.as_str(), + robj_to!(String, name)?.into(), robj_to!(Option, u32, offset)?, ) .map_err(polars_to_rpolars_err)? @@ -327,7 +327,7 @@ impl RPolarsDataFrame { pub fn to_struct(&self, name: Robj) -> RResult { use pl::IntoSeries; - let name = robj_to!(Option, str, name)?.unwrap_or(""); + let name = robj_to!(str, name)?.into(); let s = self.0.clone().into_struct(name); Ok(s.into_series().into()) } diff --git a/src/rust/src/rdataframe/read_csv.rs b/src/rust/src/rdataframe/read_csv.rs index 218245d1b..bc83425dc 100644 --- a/src/rust/src/rdataframe/read_csv.rs +++ b/src/rust/src/rdataframe/read_csv.rs @@ -22,7 +22,7 @@ use polars::prelude::LazyFileListReader; #[extendr] impl RPolarsRNullValues { pub fn new_all_columns(x: String) -> Self { - RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x)) + RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x.into())) } pub fn new_columns(x: Vec) -> Self { RPolarsRNullValues(pl::NullValues::AllColumns(x)) @@ -95,7 +95,7 @@ pub fn new_from_csv( let schema = dtv.map(|some_od| { let fields = some_od.0.iter().map(|(name, dtype)| { if let Some(sname) = name { - pl::Field::new(sname, dtype.clone()) + pl::Field::new(sname.into(), dtype.clone()) } else { todo!("missing column name for dtype not implemented"); } diff --git a/src/rust/src/rdataframe/read_ipc.rs b/src/rust/src/rdataframe/read_ipc.rs index 5185be59a..543e41c82 100644 --- a/src/rust/src/rdataframe/read_ipc.rs +++ b/src/rust/src/rdataframe/read_ipc.rs @@ -13,7 +13,6 @@ pub fn import_arrow_ipc( rechunk: Robj, row_name: Robj, row_index: Robj, - memory_map: Robj, hive_partitioning: Robj, hive_schema: Robj, try_parse_hive_dates: Robj, @@ -38,10 +37,9 @@ pub fn import_arrow_ipc( }) }) .transpose()?, - memory_map: robj_to!(bool, memory_map)?, cloud_options: None, hive_options, - include_file_paths: robj_to!(Option, String, include_file_paths)?.map(Arc::from), + include_file_paths: robj_to!(Option, String, include_file_paths)?.map(|x| x.into()), }; let lf = LazyFrame::scan_ipc(robj_to!(String, path)?, args) .map_err(crate::rpolarserr::polars_to_rpolars_err)?; diff --git a/src/rust/src/rdataframe/read_parquet.rs b/src/rust/src/rdataframe/read_parquet.rs index 8cb03b6c4..f147d8d48 100644 --- a/src/rust/src/rdataframe/read_parquet.rs +++ b/src/rust/src/rdataframe/read_parquet.rs @@ -52,7 +52,7 @@ pub fn new_from_parquet( use_statistics: robj_to!(bool, use_statistics)?, hive_options, glob: robj_to!(bool, glob)?, - include_file_paths: robj_to!(Option, String, include_file_paths)?.map(Arc::from), + include_file_paths: robj_to!(Option, String, include_file_paths)?.map(|x| x.into()), }; pl::LazyFrame::scan_parquet(path, args) diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index bdf70764e..cab767c0b 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -97,7 +97,7 @@ impl RPolarsDataType { let s = robjname2series(categories, "").unwrap(); let ca = s.str()?; let categories = ca.downcast_iter().next().unwrap().clone(); - Ok(RPolarsDataType(pl::datatypes::create_enum_data_type( + Ok(RPolarsDataType(pl::datatypes::create_enum_dtype( categories, ))) } From 2b1f015908e7e9fd8178aca966b87e2c5e4997a1 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:39:06 +0200 Subject: [PATCH 18/33] some fixes for series [skip ci] --- src/rust/src/rdataframe/mod.rs | 2 +- src/rust/src/series.rs | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 7d11a1da5..2b19f4e42 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -355,7 +355,7 @@ impl RPolarsDataFrame { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let schema = self.0.schema().to_arrow(compat_level); let data_type = ArrowDataType::Struct(schema.fields); - let field = ArrowField::new("", data_type, false); + let field = ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedDataFrameIterator::new(self.0.clone(), compat_level)); let mut stream = arrow::ffi::export_iterator(iter_boxed, field); diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 29e9f4f0c..81152cd65 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -81,7 +81,7 @@ impl From<&RPolarsExpr> for pl::PolarsResult { .map(|df| { df.select_at_idx(0) .cloned() - .unwrap_or_else(|| pl::Series::new_empty("", &pl::DataType::Null)) + .unwrap_or_else(|| pl::Series::new_empty("".into(), &pl::DataType::Null)) .into() }) } @@ -118,7 +118,7 @@ impl RPolarsSeries { } //any mut method exposed in R suffixed _mut pub fn rename_mut(&mut self, name: &str) { - self.0.rename(name); + self.0.rename(name.into()); } //any other method or trait method in alphabetical order @@ -169,7 +169,7 @@ impl RPolarsSeries { normalize: bool, ) -> std::result::Result { self.0 - .value_counts(sort, parallel, name, normalize) + .value_counts(sort, parallel, name.into(), normalize) .map(RPolarsDataFrame) .map_err(|err| format!("in value_counts: {:?}", err)) } @@ -315,7 +315,7 @@ impl RPolarsSeries { pub fn alias(&self, name: &str) -> RPolarsSeries { let mut s = self.0.clone(); - s.rename(name); + s.rename(name.into()); RPolarsSeries(s) } @@ -512,11 +512,11 @@ impl RPolarsSeries { .cast(&DataType::UInt8) .map_err(polars_to_rpolars_err)? .mean_reduce() - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => { - let s = self.0.mean_reduce().into_series(""); + let s = self.0.mean_reduce().into_series("".into()); RPolarsSeries(s).to_r("double") } _ => Ok(self.0.mean().into()), @@ -532,7 +532,7 @@ impl RPolarsSeries { .map_err(polars_to_rpolars_err)? .median_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => { @@ -540,7 +540,7 @@ impl RPolarsSeries { .0 .median_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""); + .into_series("".into()); RPolarsSeries(s).to_r("double") } _ => Ok(self.0.median().into()), @@ -552,7 +552,7 @@ impl RPolarsSeries { self.0 .min_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -562,7 +562,7 @@ impl RPolarsSeries { self.0 .max_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -572,7 +572,7 @@ impl RPolarsSeries { self.0 .sum_reduce() .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -584,7 +584,7 @@ impl RPolarsSeries { self.0 .std_reduce(ddof) .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -596,7 +596,7 @@ impl RPolarsSeries { self.0 .var_reduce(ddof) .map_err(polars_to_rpolars_err)? - .into_series(""), + .into_series("".into()), ) .to_r("double") } @@ -631,7 +631,7 @@ impl RPolarsSeries { pub fn export_stream(&self, stream_ptr: &str, compat_level: Robj) { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let data_type = self.0.dtype().to_arrow(compat_level); - let field = pl::ArrowField::new("", data_type, false); + let field = pl::ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedSeriesIterator::new(self.0.clone(), compat_level)); let mut stream = arrow::ffi::export_iterator(iter_boxed, field); @@ -668,7 +668,7 @@ impl RPolarsSeries { let name = robj_to!(str, name)?; let arr = crate::arrow_interop::to_rust::arrow_array_to_rust(array)?; - match arr.data_type() { + match arr.dtype() { ArrowDataType::LargeList(_) => { let array = arr.as_any().downcast_ref::().unwrap(); @@ -681,7 +681,7 @@ impl RPolarsSeries { } previous = o; } - let mut out = unsafe { ListChunked::from_chunks(name, vec![arr]) }; + let mut out = unsafe { ListChunked::from_chunks(name.into(), vec![arr]) }; if fast_explode { out.set_fast_explode() } From 74198c5a01610dc70f36ab95e038b271af5c89e8 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:45:11 +0200 Subject: [PATCH 19/33] more [skip ci] --- src/rust/src/rlib.rs | 105 ++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 10df4c0e7..86190b638 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -6,61 +6,50 @@ use crate::RFnSignature; use crate::CONFIG; use extendr_api::prelude::*; use polars::chunked_array::ops::SortMultipleOptions; +use polars::lazy::dsl; use polars::prelude as pl; use std::result::Result; #[extendr] fn min_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::min_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::min_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn max_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::max_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::max_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn sum_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::sum_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::sum_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn mean_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::mean_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::mean_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn all_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::all_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::all_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] fn any_horizontal(dotdotdot: Robj) -> RResult { - Ok( - polars::lazy::dsl::any_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) - .map_err(polars_to_rpolars_err)? - .into(), - ) + Ok(dsl::any_horizontal(robj_to!(VecPLExprCol, dotdotdot)?) + .map_err(polars_to_rpolars_err)? + .into()) } #[extendr] @@ -87,7 +76,7 @@ fn concat_str(dotdotdot: Robj, separator: Robj, ignore_nulls: Robj) -> RResult RResult { - Ok(RPolarsExpr(polars::lazy::prelude::date_range( + Ok(RPolarsExpr(dsl::date_range( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), @@ -97,7 +86,7 @@ fn date_range(start: Robj, end: Robj, interval: &str, closed: Robj) -> RResult RResult { - Ok(RPolarsExpr(polars::lazy::prelude::date_ranges( + Ok(RPolarsExpr(dsl::date_ranges( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), @@ -114,13 +103,13 @@ fn datetime_range( time_unit: Robj, time_zone: Robj, ) -> RResult { - Ok(RPolarsExpr(polars::lazy::prelude::datetime_range( + Ok(RPolarsExpr(dsl::datetime_range( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + robj_to!(Option, String, time_zone)?.map(|x| x.into()), ))) } @@ -133,13 +122,13 @@ fn datetime_ranges( time_unit: Robj, time_zone: Robj, ) -> RResult { - Ok(RPolarsExpr(polars::lazy::prelude::datetime_ranges( + Ok(RPolarsExpr(dsl::datetime_ranges( robj_to!(PLExprCol, start)?, robj_to!(PLExprCol, end)?, pl::Duration::parse(interval), robj_to!(ClosedWindow, closed)?, robj_to!(Option, timeunit, time_unit)?, - robj_to!(Option, String, time_zone)?, + robj_to!(Option, String, time_zone)?.map(|x| x.into()), ))) } @@ -284,19 +273,17 @@ pub fn duration( time_unit: Robj, ) -> RResult { let args = pl::DurationArgs { - weeks: robj_to!(Option, PLExprCol, weeks)?.unwrap_or(polars::lazy::dsl::lit(0)), - days: robj_to!(Option, PLExprCol, days)?.unwrap_or(polars::lazy::dsl::lit(0)), - hours: robj_to!(Option, PLExprCol, hours)?.unwrap_or(polars::lazy::dsl::lit(0)), - minutes: robj_to!(Option, PLExprCol, minutes)?.unwrap_or(polars::lazy::dsl::lit(0)), - seconds: robj_to!(Option, PLExprCol, seconds)?.unwrap_or(polars::lazy::dsl::lit(0)), - milliseconds: robj_to!(Option, PLExprCol, milliseconds)? - .unwrap_or(polars::lazy::dsl::lit(0)), - microseconds: robj_to!(Option, PLExprCol, microseconds)? - .unwrap_or(polars::lazy::dsl::lit(0)), - nanoseconds: robj_to!(Option, PLExprCol, nanoseconds)?.unwrap_or(polars::lazy::dsl::lit(0)), + weeks: robj_to!(Option, PLExprCol, weeks)?.unwrap_or(dsl::lit(0)), + days: robj_to!(Option, PLExprCol, days)?.unwrap_or(dsl::lit(0)), + hours: robj_to!(Option, PLExprCol, hours)?.unwrap_or(dsl::lit(0)), + minutes: robj_to!(Option, PLExprCol, minutes)?.unwrap_or(dsl::lit(0)), + seconds: robj_to!(Option, PLExprCol, seconds)?.unwrap_or(dsl::lit(0)), + milliseconds: robj_to!(Option, PLExprCol, milliseconds)?.unwrap_or(dsl::lit(0)), + microseconds: robj_to!(Option, PLExprCol, microseconds)?.unwrap_or(dsl::lit(0)), + nanoseconds: robj_to!(Option, PLExprCol, nanoseconds)?.unwrap_or(dsl::lit(0)), time_unit: robj_to!(timeunit, time_unit)?, }; - Ok(polars::lazy::dsl::duration(args).into()) + Ok(dsl::duration(args).into()) } #[extendr] @@ -317,15 +304,15 @@ pub fn datetime( year: robj_to!(PLExprCol, year)?, month: robj_to!(PLExprCol, month)?, day: robj_to!(PLExprCol, day)?, - hour: robj_to!(Option, PLExprCol, hour)?.unwrap_or(polars::lazy::dsl::lit(0)), - minute: robj_to!(Option, PLExprCol, minute)?.unwrap_or(polars::lazy::dsl::lit(0)), - second: robj_to!(Option, PLExprCol, second)?.unwrap_or(polars::lazy::dsl::lit(0)), - microsecond: robj_to!(Option, PLExprCol, microsecond)?.unwrap_or(polars::lazy::dsl::lit(0)), + hour: robj_to!(Option, PLExprCol, hour)?.unwrap_or(dsl::lit(0)), + minute: robj_to!(Option, PLExprCol, minute)?.unwrap_or(dsl::lit(0)), + second: robj_to!(Option, PLExprCol, second)?.unwrap_or(dsl::lit(0)), + microsecond: robj_to!(Option, PLExprCol, microsecond)?.unwrap_or(dsl::lit(0)), time_unit: robj_to!(timeunit, time_unit)?, - time_zone: robj_to!(Option, String, time_zone)?, + time_zone: robj_to!(Option, String, time_zone)?.map(|x| x.into()), ambiguous: robj_to!(PLExpr, ambiguous)?, }; - Ok(polars::lazy::dsl::datetime(args).into()) + Ok(dsl::datetime(args).into()) } #[extendr] @@ -363,7 +350,7 @@ pub fn int_range(start: Robj, end: Robj, step: i64, dtype: Robj) -> RResult RResult RResult RResult { let names = robj_to!(Vec, String, names)?; - Ok(pl::Expr::Field( - names - .into_iter() - .map(|name| pl::Arc::from(name.as_str())) - .collect(), - ) - .into()) + Ok(pl::Expr::Field(names.into_iter().map(|name| name.into()).collect()).into()) } extendr_module! { From 6e617a17edddac56893dbce3f581ff6bc636732c Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:56:56 +0200 Subject: [PATCH 20/33] more [skip ci] --- src/rust/src/conversion_r_to_s.rs | 33 +++++++++++++---------------- src/rust/src/conversion_s_to_r.rs | 2 +- src/rust/src/rdataframe/read_csv.rs | 10 +++++---- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/rust/src/conversion_r_to_s.rs b/src/rust/src/conversion_r_to_s.rs index 6335fb3ee..3c758fa63 100644 --- a/src/rust/src/conversion_r_to_s.rs +++ b/src/rust/src/conversion_r_to_s.rs @@ -75,7 +75,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult(real_slice) }; - Ok(SeriesTree::Series(pl::Series::new(name, i64_slice))) + Ok(SeriesTree::Series(pl::Series::new(name.into(), i64_slice))) } else { let mut s: pl::Series = rdouble //convert R NAs to rust options .iter() @@ -89,7 +89,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult pl::PolarsResult pl::PolarsResult pl::PolarsResult Ok(SeriesTree::Series( @@ -138,14 +138,14 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult { let rints = x.as_integers().expect("as matched"); let s = if rints.no_na().is_true() { - pl::Series::new(name, x.as_integer_slice().expect("as matched")) + pl::Series::new(name.into(), x.as_integer_slice().expect("as matched")) } else { //convert R NAs to rust options let mut s: pl::Series = rints .iter() .map(|x| if x.is_na() { None } else { Some(x.inner()) }) .collect(); - s.rename(name); + s.rename(name.into()); s }; @@ -175,7 +175,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult pl::PolarsResult { Ok(SeriesTree::Series( (s * 1_000f64).cast(&pl::DataType::Int64)?.cast( - &pl::DataType::Datetime(pl::TimeUnit::Milliseconds, Some(tz)), + &pl::DataType::Datetime(pl::TimeUnit::Milliseconds, Some(tz.into())), )?, )) } @@ -235,17 +235,14 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult Ok(s), // SeriesTree is just a regular Series, return as is SeriesTree::SeriesEmptyVec => { // Create Series of empty array and cast to the found leaf_dtype. use polars::prelude::ListBuilderTrait; - let empty_list_series = pl::ListBinaryChunkedBuilder::new(name, 0,0).finish().into_series(); + let empty_list_series = pl::ListBinaryChunkedBuilder::new(name.into(), 0,0).finish().into_series(); //cast to any discovered leaftype to allow concatenation without Error if let Some(leaf_dt_ref) = leaf_dtype { @@ -342,7 +339,7 @@ fn concat_series_tree( } // use polars new method to concat concatenated series - Ok(pl::Series::new(name, series_vec)) + Ok(pl::Series::new(name.into(), series_vec)) } } } @@ -350,7 +347,7 @@ fn concat_series_tree( //handle R character/strings to utf8 fn robj_to_utf8_series(rstrings: Strings, name: &str) -> pl::Series { if rstrings.no_na().is_true() { - pl::Series::new(name, rstrings.as_robj().as_str_vector().unwrap()) + pl::Series::new(name.into(), rstrings.as_robj().as_str_vector().unwrap()) } else { //convert R NAs to rust options let mut s: Vec> = Vec::with_capacity(rstrings.len()); @@ -360,6 +357,6 @@ fn robj_to_utf8_series(rstrings: Strings, name: &str) -> pl::Series { .map(|x| if x.is_na() { None } else { Some(x.as_str()) }), ); - pl::Series::new(name, s) + pl::Series::new(name.into(), s) } } diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index 8199148e1..c08e15cb3 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -226,7 +226,7 @@ pub fn pl_series_to_list( pl::DataFrame::new(vec![s.clone()])? .lazy() .select([col(s_name).dt().replace_time_zone( - Some(sys_tz), + Some(sys_tz.into()), pl::lit("raise"), pl::NonExistent::Raise, )]) diff --git a/src/rust/src/rdataframe/read_csv.rs b/src/rust/src/rdataframe/read_csv.rs index bc83425dc..32072a524 100644 --- a/src/rust/src/rdataframe/read_csv.rs +++ b/src/rust/src/rdataframe/read_csv.rs @@ -25,15 +25,17 @@ impl RPolarsRNullValues { RPolarsRNullValues(pl::NullValues::AllColumnsSingle(x.into())) } pub fn new_columns(x: Vec) -> Self { - RPolarsRNullValues(pl::NullValues::AllColumns(x)) + RPolarsRNullValues(pl::NullValues::AllColumns( + x.into_iter().map(|xi| xi.into()).collect(), + )) } pub fn new_named(robj: Robj) -> Self { let null_markers = robj.as_str_iter().expect("must be str"); let column_names = robj.names().expect("names were missing"); - let key_val_pair: Vec<(String, String)> = column_names + let key_val_pair: Vec<(pl::PlSmallStr, pl::PlSmallStr)> = column_names .zip(null_markers) - .map(|(k, v)| (k.to_owned(), v.to_owned())) + .map(|(k, v)| (k.into(), v.into())) .collect(); RPolarsRNullValues(pl::NullValues::Named(key_val_pair)) } @@ -113,7 +115,7 @@ pub fn new_from_csv( .with_cache(robj_to!(bool, cache)?) .with_dtype_overwrite(schema.map(|schema| std::sync::Arc::new(schema))) .with_low_memory(robj_to!(bool, low_memory)?) - .with_comment_prefix(robj_to!(Option, str, comment_prefix)?) + .with_comment_prefix(robj_to!(Option, str, comment_prefix)?.map(|x| x.into())) .with_quote_char(robj_to!(Option, Utf8Byte, quote_char)?) .with_eol_char(robj_to!(Utf8Byte, eol_char)?) .with_rechunk(robj_to!(bool, rechunk)?) From a587270a05061881915ef31bf83f6e53c07fa1fa Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 13:31:29 +0200 Subject: [PATCH 21/33] more [skip ci] --- src/rust/src/lazy/dataframe.rs | 9 ++++++--- src/rust/src/rdataframe/mod.rs | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 9d42b09a2..04857e68a 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -342,7 +342,10 @@ impl RPolarsLazyFrame { let maintain_order = robj_to!(bool, maintain_order)?; let subset = robj_to!(Option, Vec, String, subset)?; let lf = if maintain_order { - self.0.clone().unique_stable(subset, ke) + self.0.clone().unique_stable( + subset.map(|x| x.into_iter().map(|y| y.into()).collect()), + ke, + ) } else { self.0.clone().unique(subset, ke) }; @@ -699,14 +702,14 @@ impl RPolarsLazyFrame { .iter() .map(|(k, v)| { let data_type = robj_to!(RPolarsDataType, v)?; - Ok(pl::Field::new(k, data_type.0)) + Ok(pl::Field::new(k.into(), data_type.0)) }) .collect::>>()?; let mut cast_map = PlHashMap::with_capacity(dtypes.len()); cast_map.extend( dtypes .iter() - .map(|f| (f.name().as_ref(), f.data_type().clone())), + .map(|f| (f.name().as_ref(), f.dtype().clone())), ); Ok(self.0.clone().cast(cast_map, strict).into()) } diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 2b19f4e42..06fe8ef7a 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -18,13 +18,13 @@ pub use lazy::dataframe::*; use crate::conversion_s_to_r::pl_series_to_list; pub use crate::series::*; +use crate::utils::{collect_hinted_result, r_result_list}; use arrow::datatypes::ArrowDataType; use polars::prelude::ArrowField; +use polars::prelude::SchemaExt; use polars_core::error::PolarsError; use polars_core::utils::arrow; -use crate::utils::{collect_hinted_result, r_result_list}; - use crate::conversion::strings_to_smartstrings; use polars::frame::explode::UnpivotArgsIR; use polars::prelude::pivot::{pivot, pivot_stable}; @@ -40,7 +40,8 @@ pub struct OwnedDataFrameIterator { impl OwnedDataFrameIterator { pub fn new(df: polars::frame::DataFrame, compat_level: CompatLevel) -> Self { let schema = df.schema().to_arrow(compat_level); - let data_type = ArrowDataType::Struct(schema.fields); + // TODO: changed when bumping to 0.43.1, might need refactor + let data_type = ArrowDataType::Struct(schema.iter_values().map(|x| x.clone()).collect()); let vs = df.get_columns().to_vec(); Self { columns: vs, @@ -221,7 +222,8 @@ impl RPolarsDataFrame { pub fn schema(&self) -> List { let mut l = self.dtypes(); - l.set_names(self.0.get_column_names()).unwrap(); + let nms = self.0.get_column_names().into_iter().map(|x| x.as_str()); + l.set_names(nms).unwrap(); l } @@ -354,7 +356,8 @@ impl RPolarsDataFrame { pub fn export_stream(&self, stream_ptr: &str, compat_level: Robj) { let compat_level = robj_to!(CompatLevel, compat_level).unwrap(); let schema = self.0.schema().to_arrow(compat_level); - let data_type = ArrowDataType::Struct(schema.fields); + // TODO: changed when bumping to 0.43.1, might need refactor + let data_type = ArrowDataType::Struct(schema.iter_values().map(|x| x.clone()).collect()); let field = ArrowField::new("".into(), data_type, false); let iter_boxed = Box::new(OwnedDataFrameIterator::new(self.0.clone(), compat_level)); @@ -393,8 +396,14 @@ impl RPolarsDataFrame { ) -> RResult { use polars::prelude::UnpivotDF; let args = UnpivotArgsIR { - on: strings_to_smartstrings(robj_to!(Vec, String, on)?), - index: strings_to_smartstrings(robj_to!(Vec, String, index)?), + on: robj_to!(Vec, String, on)? + .into_iter() + .map(|x| x.into()) + .collect(), + index: robj_to!(Vec, String, index)? + .into_iter() + .map(|x| x.into()) + .collect(), value_name: robj_to!(Option, String, value_name)?.map(|s| s.into()), variable_name: robj_to!(Option, String, variable_name)?.map(|s| s.into()), }; From 410c41a57c4a51c08be9fb8a75031eec242b8f01 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 13:47:28 +0200 Subject: [PATCH 22/33] more [skip ci] --- src/rust/src/conversion_r_to_s.rs | 2 +- src/rust/src/conversion_s_to_r.rs | 4 ++-- src/rust/src/rdatatype.rs | 15 +++++---------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/rust/src/conversion_r_to_s.rs b/src/rust/src/conversion_r_to_s.rs index 3c758fa63..cc070fbf6 100644 --- a/src/rust/src/conversion_r_to_s.rs +++ b/src/rust/src/conversion_r_to_s.rs @@ -240,7 +240,7 @@ fn recursive_robjname2series_tree(x: &Robj, name: &str) -> pl::PolarsResult { - let df = s.clone().into_frame().unnest([s.name()]).unwrap(); + let df = s.clone().into_frame().unnest([s.name().clone()]).unwrap(); let mut l = RPolarsDataFrame(df).to_list_result(int64_conversion)?; //TODO contribute extendr_api set_attrib mutates &self, change signature to surprise anyone @@ -225,7 +225,7 @@ pub fn pl_series_to_list( let s_name = s.name(); pl::DataFrame::new(vec![s.clone()])? .lazy() - .select([col(s_name).dt().replace_time_zone( + .select([col(s_name.clone()).dt().replace_time_zone( Some(sys_tz.into()), pl::lit("raise"), pl::NonExistent::Raise, diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index cab767c0b..5ba69414e 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -102,9 +102,9 @@ impl RPolarsDataType { ))) } - pub fn new_datetime(tu: Robj, tz: Nullable) -> RResult { - robj_to!(timeunit, tu) - .map(|dt| RPolarsDataType(pl::DataType::Datetime(dt, null_to_opt(tz)))) + pub fn new_datetime(tu: Robj, tz: Robj) -> RResult { + let tz = robj_to!(Option, String, tz)?.map(|x| x.into()); + robj_to!(timeunit, tu).map(|dt| RPolarsDataType(pl::DataType::Datetime(dt, tz))) } pub fn new_duration(tu: Robj) -> RResult { @@ -475,12 +475,7 @@ pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult Ok(av::UInt64(x)), lv::UInt8(x) => Ok(av::UInt8(x)), // lv::Utf8(x) => Ok(av::Utf8(x.as_str())), - lv::String(x) => { - let mut s = SString::new(); - - s.push_str(x.as_str()); - Ok(av::StringOwned(s)) - } + lv::String(x) => Ok(av::StringOwned(x)), x => rerr().notachoice(format!("cannot convert LiteralValue {:?} to AnyValue", x)), } } @@ -741,8 +736,8 @@ pub fn robj_to_statistics_options(robj: Robj) -> RResult pub fn robj_to_wrap_schema(robj: Robj) -> RResult> { use pl::Schema; - let mut schema = Schema::new(); let hm = robj.as_list().unwrap().into_hashmap(); + let mut schema = Schema::with_capacity(hm.capacity()); for (key, value) in hm.into_iter() { let dt = crate::utils::robj_to_datatype(value)?; From a93b5ec10111a977195b73c865898f8e4fabde53 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 14:05:58 +0200 Subject: [PATCH 23/33] more [skip ci] --- src/rust/src/lazy/dsl.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index bb2151f0d..c6bcc4ced 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -2118,13 +2118,13 @@ impl RPolarsExpr { // //wrap as series // }; - let f = move |name: &str| -> pl::PolarsResult { + let f = move |name: &pl::PlSmallStr| -> pl::PolarsResult { let robj = probj.clone().0; let rfun = robj .as_function() .expect("internal error: this is not an R function"); - let newname_robj = rfun.call(pairlist!(name)).map_err(|err| { + let newname_robj = rfun.call(pairlist!(name.as_str())).map_err(|err| { let es = format!("in $name$map(): user function raised this error: {:?}", err).into(); pl_error::ComputeError(es) @@ -2138,7 +2138,7 @@ impl RPolarsExpr { .into(); pl_error::ComputeError(es) }) - .map(|str| str.to_string()) + .map(|str| str.into()) }; Ok(self.clone().0.name().map(f).into()) From 05984971f6a9ae1b9ab6d260f16d76842f7ad12f Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 14:12:27 +0200 Subject: [PATCH 24/33] remove some unused imports [skip ci] --- src/rust/src/lazy/dsl.rs | 1 - src/rust/src/rdatatype.rs | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index c6bcc4ced..622595e9b 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -25,7 +25,6 @@ pub type NameGenerator = pl::Arc String + Send + Sync>; use crate::rdatatype::robjs_to_ewm_options; use crate::utils::r_expr_to_rust_expr; use crate::utils::unpack_r_eval; -use smartstring::{LazyCompact, SmartString}; use std::sync::Arc; #[derive(Clone, Debug)] diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index 5ba69414e..81678433e 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -8,7 +8,6 @@ use polars_core::prelude::QuantileInterpolOptions; use crate::rpolarserr::{polars_to_rpolars_err, rerr, RPolarsErr, RResult, WithRctx}; use crate::utils::collect_hinted_result; use crate::utils::robj_to_rchoice; -use crate::utils::wrappers::null_to_opt; use pl::UniqueKeepStrategy; use polars::prelude::AsofStrategy; use std::num::NonZeroUsize; @@ -452,7 +451,6 @@ pub fn robj_to_window_mapping(robj: Robj) -> RResult { pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult> { use pl::AnyValue as av; use pl::LiteralValue as lv; - use smartstring::alias::String as SString; match litval { lv::Boolean(x) => Ok(av::Boolean(x)), //lv::Datetime(datetime, unit) => Ok(av::Datetime(datetime, unit, &None)), #check how to convert From d2735d3dc671dc9b1faac4841ab6fc2041533f1a Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 14 Sep 2024 15:06:54 +0000 Subject: [PATCH 25/33] fix: fix all rust lib compile errors --- R/extendr-wrappers.R | 2 +- src/rust/src/arrow_interop/to_rust.rs | 10 +++++++--- src/rust/src/conversion.rs | 8 -------- src/rust/src/lib.rs | 1 - src/rust/src/rdataframe/mod.rs | 1 - src/rust/src/series.rs | 4 ++-- 6 files changed, 10 insertions(+), 16 deletions(-) delete mode 100644 src/rust/src/conversion.rs diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 166d790a9..0eea4d007 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -94,7 +94,7 @@ concat_series <- function(l, rechunk, to_supertypes) .Call(wrap__concat_series, new_from_csv <- function(path, has_header, separator, comment_prefix, quote_char, skip_rows, dtypes, null_values, ignore_errors, cache, infer_schema_length, n_rows, encoding, low_memory, rechunk, skip_rows_after_header, row_index_name, row_index_offset, try_parse_dates, eol_char, raise_if_empty, truncate_ragged_lines) .Call(wrap__new_from_csv, path, has_header, separator, comment_prefix, quote_char, skip_rows, dtypes, null_values, ignore_errors, cache, infer_schema_length, n_rows, encoding, low_memory, rechunk, skip_rows_after_header, row_index_name, row_index_offset, try_parse_dates, eol_char, raise_if_empty, truncate_ragged_lines) -import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_index, memory_map, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_index, memory_map, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) +import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_index, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_index, hive_partitioning, hive_schema, try_parse_hive_dates, include_file_paths) new_from_ndjson <- function(path, infer_schema_length, batch_size, n_rows, low_memory, rechunk, row_index_name, row_index_offset, ignore_errors) .Call(wrap__new_from_ndjson, path, infer_schema_length, batch_size, n_rows, low_memory, rechunk, row_index_name, row_index_offset, ignore_errors) diff --git a/src/rust/src/arrow_interop/to_rust.rs b/src/rust/src/arrow_interop/to_rust.rs index 43ac5556f..949b5e269 100644 --- a/src/rust/src/arrow_interop/to_rust.rs +++ b/src/rust/src/arrow_interop/to_rust.rs @@ -46,7 +46,10 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { }; let names = robj_record_batch_names .as_str_vector() - .ok_or_else(|| "internal error: Robj$schema$names is not a char vec".to_string())?; + .ok_or_else(|| "internal error: Robj$schema$names is not a char vec".to_string())? + .into_iter() + .map(PlSmallStr::from_str) + .collect::>(); //iterate over record batches let rb_len = rb.len(); @@ -83,14 +86,15 @@ pub unsafe fn to_rust_df(rb: Robj) -> Result { .into_par_iter() .zip(names.par_iter()) .map(|(arr, name)| { - let s = Series::try_from((*name, arr)).map_err(|err| err.to_string())?; + let s = + Series::try_from((name.clone(), arr)).map_err(|err| err.to_string())?; Ok(s) }) .collect::, String>>() }) } else { let iter = arrays_vec.into_iter().zip(names.iter()).map(|(arr, name)| { - let s = Series::try_from((*name, arr)).map_err(|err| err.to_string())?; + let s = Series::try_from((name.clone(), arr)).map_err(|err| err.to_string())?; Ok(s) }); crate::utils::collect_hinted_result(n_columns, iter) diff --git a/src/rust/src/conversion.rs b/src/rust/src/conversion.rs deleted file mode 100644 index 62a656579..000000000 --- a/src/rust/src/conversion.rs +++ /dev/null @@ -1,8 +0,0 @@ -use smartstring::alias::String as SmartString; -pub(crate) fn strings_to_smartstrings(container: I) -> Vec -where - I: IntoIterator, - S: AsRef, -{ - container.into_iter().map(|s| s.as_ref().into()).collect() -} diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index f87968b3a..54a191ec1 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -14,7 +14,6 @@ pub mod lazy; pub mod arrow_interop; pub mod concat; -pub mod conversion; pub mod conversion_r_to_s; pub mod conversion_s_to_r; pub mod info; diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 06fe8ef7a..916164675 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -25,7 +25,6 @@ use polars::prelude::SchemaExt; use polars_core::error::PolarsError; use polars_core::utils::arrow; -use crate::conversion::strings_to_smartstrings; use polars::frame::explode::UnpivotArgsIR; use polars::prelude::pivot::{pivot, pivot_stable}; diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 81152cd65..461683025 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -647,7 +647,7 @@ impl RPolarsSeries { } pub fn import_stream(name: Robj, stream_ptr: Robj) -> RResult { - let name = robj_to!(str, name)?; + let name: PlSmallStr = robj_to!(str, name)?.into(); let stream_in_ptr_addr = robj_to!(usize, stream_ptr)?; let stream_in_ptr = unsafe { Box::from_raw(stream_in_ptr_addr as *mut arrow::ffi::ArrowArrayStream) }; @@ -665,7 +665,7 @@ impl RPolarsSeries { } pub fn from_arrow_array_robj(name: Robj, array: Robj) -> Result { - let name = robj_to!(str, name)?; + let name: PlSmallStr = robj_to!(str, name)?.into(); let arr = crate::arrow_interop::to_rust::arrow_array_to_rust(array)?; match arr.dtype() { From a3cf492fefa8e0328520c0f055af2680439bb8ad Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 14 Sep 2024 15:10:59 +0000 Subject: [PATCH 26/33] chore: bump rust toolchain --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ff4c34d76..c7b6d385f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -119,4 +119,4 @@ Collate: Config/rextendr/version: 0.3.1 VignetteBuilder: knitr Config/polars/LibVersion: 0.42.2 -Config/polars/RustToolchainVersion: nightly-2024-07-26 +Config/polars/RustToolchainVersion: nightly-2024-08-26 From 12b821d800ef76a174a804196c0cb2d58776557c Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sat, 14 Sep 2024 15:12:34 +0000 Subject: [PATCH 27/33] chore: bump lib version --- src/rust/Cargo.lock | 2 +- src/rust/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 4a64d4016..89f7017aa 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -2277,7 +2277,7 @@ dependencies = [ [[package]] name = "r-polars" -version = "0.42.2" +version = "0.43.0" dependencies = [ "either", "extendr-api", diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 1cd2f1474..53d9e50ef 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "r-polars" -version = "0.42.2" +version = "0.43.0" edition = "2021" rust-version = "1.80.0" publish = false From 0febcba5c946f60fc9dd30b6a2eff3b21fff9187 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 14 Sep 2024 18:37:24 +0200 Subject: [PATCH 28/33] fix most failures [skip ci] --- R/as_polars.R | 4 ++-- R/io_ipc.R | 1 - tests/testthat/_snaps/lazy.md | 8 +++----- tests/testthat/test-ipc.R | 1 - tests/testthat/test-lazy.R | 4 ++-- tests/testthat/test-sink_stream.R | 8 ++++---- 6 files changed, 11 insertions(+), 15 deletions(-) diff --git a/R/as_polars.R b/R/as_polars.R index 697f6dcd5..40ede5aa6 100644 --- a/R/as_polars.R +++ b/R/as_polars.R @@ -383,14 +383,14 @@ as_polars_series.POSIXlt = function(x, name = NULL, ...) { #' @rdname as_polars_series #' @export as_polars_series.data.frame = function(x, name = NULL, ...) { - as_polars_df(x)$to_struct(name = name) + as_polars_df(x)$to_struct(name = name %||% "") } #' @rdname as_polars_series #' @export as_polars_series.vctrs_rcrd = function(x, name = NULL, ...) { - pl$select(unclass(x))$to_struct(name = name) + pl$select(unclass(x))$to_struct(name = name %||% "") } diff --git a/R/io_ipc.R b/R/io_ipc.R index 1ced8b429..dbb5d6725 100644 --- a/R/io_ipc.R +++ b/R/io_ipc.R @@ -60,7 +60,6 @@ pl_scan_ipc = function( rechunk = rechunk, row_name = row_index_name, row_index = row_index_offset, - memory_map = memory_map, hive_partitioning = hive_partitioning, hive_schema = hive_schema, try_parse_hive_dates = try_parse_hive_dates, diff --git a/tests/testthat/_snaps/lazy.md b/tests/testthat/_snaps/lazy.md index 7d1810ac5..8591cf0d8 100644 --- a/tests/testthat/_snaps/lazy.md +++ b/tests/testthat/_snaps/lazy.md @@ -10,7 +10,7 @@ FILTER [(col("a")) == (2)] FROM DF ["a", "b"]; PROJECT */2 COLUMNS; SELECTION: None -# LazyFrame serialize/deseialize +# LazyFrame serialize/deserialize Code jsonlite::prettify(json) @@ -51,13 +51,11 @@ ] }, "schema": { - "inner": { + "fields": { "a": "Int32", "b": "String" } - }, - "output_schema": null, - "filter": null + } } }, "predicate": { diff --git a/tests/testthat/test-ipc.R b/tests/testthat/test-ipc.R index 9034e81e3..f2db59fd3 100644 --- a/tests/testthat/test-ipc.R +++ b/tests/testthat/test-ipc.R @@ -34,7 +34,6 @@ test_that("Test reading data from Apache Arrow IPC", { expect_grepl_error(pl$scan_ipc(tmpf, rechunk = list())) expect_grepl_error(pl$scan_ipc(tmpf, row_index_name = c("x", "y"))) expect_grepl_error(pl$scan_ipc(tmpf, row_index_name = "name", row_index_offset = data.frame())) - expect_grepl_error(pl$scan_ipc(tmpf, memory_map = NULL)) }) diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 94c596ca3..ce42b9f88 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -27,7 +27,7 @@ test_that("create LazyFrame", { }) -test_that("LazyFrame serialize/deseialize", { +test_that("LazyFrame serialize/deserialize", { skip_if_not_installed("jsonlite") df = pl$DataFrame( @@ -48,7 +48,7 @@ test_that("LazyFrame serialize/deseialize", { df$lazy()$select( pl$col("a")$map_elements(\(x) -abs(x)) )$serialize(), - "serialize not supported for this 'opaque' function" + "serialization not supported for this 'opaque' function" ) }) diff --git a/tests/testthat/test-sink_stream.R b/tests/testthat/test-sink_stream.R index 9427ab15a..c49335b10 100644 --- a/tests/testthat/test-sink_stream.R +++ b/tests/testthat/test-sink_stream.R @@ -49,27 +49,27 @@ test_that("Test sinking data to IPC file", { on.exit(unlink(tmpf)) lf$sink_ipc(tmpf) expect_grepl_error(lf$sink_ipc(tmpf, compression = "rar")) - expect_identical(pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame(), rdf) + expect_identical(pl$scan_ipc(tmpf)$collect()$to_data_frame(), rdf) # update with new data lf$slice(5, 5)$sink_ipc(tmpf) expect_equal( - pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame(), + pl$scan_ipc(tmpf)$collect()$to_data_frame(), lf$slice(5, 5)$collect()$to_data_frame() ) lf$sink_ipc(tmpf) # from another process via rcall rdf_callr = callr::r(\(tmpf) { - polars::pl$scan_ipc(tmpf, memory_map = FALSE)$collect()$to_data_frame() + polars::pl$scan_ipc(tmpf)$collect()$to_data_frame() }, args = list(tmpf = tmpf)) expect_identical(rdf_callr, rdf) # from another process via rpool f_ipc_to_s = \(s) { - polars::pl$scan_ipc(s$to_r(), memory_map = FALSE)$ + polars::pl$scan_ipc(s$to_r())$ select(polars::pl$struct(polars::pl$all()))$ collect()$ to_series() From 6b7f9b61201ae7a5b9f2f40aa5a17664ea1f5a1e Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 21 Sep 2024 11:35:23 +0200 Subject: [PATCH 29/33] fix for literal of length 1 --- src/rust/src/lazy/dsl.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 622595e9b..52cfae57f 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -87,8 +87,12 @@ impl RPolarsExpr { (Rtype::Raw, _) => Ok(dsl::lit(robj_to_binary_vec(robj)?)), // Raw in R is seen as a vector of bytes, in polars it is a Literal, not wrapped in a Series. (_, rlen) if rlen != 1 => to_series_then_lit(robj), (Rtype::List, _) => to_series_then_lit(robj), - (_, _) if robj_inherits(&robj, ["POSIXct", "PTime", "Date"]) => { - to_series_then_lit(robj) + (_, rlen) if robj_inherits(&robj, ["POSIXct", "PTime", "Date"]) => { + if rlen == 1 { + Ok(to_series_then_lit(robj)?.first()) + } else { + to_series_then_lit(robj) + } } (Rtype::Integers, 1) => { From a8d23821b0244a909205922921745db09e777784 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 21 Sep 2024 11:43:14 +0200 Subject: [PATCH 30/33] news [skip ci] --- NEWS.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS.md b/NEWS.md index e77f59daa..dff1e7c99 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,15 @@ ## Polars R Package (development version) +- Updated rust-polars to 0.43.1 (#1230). + +### Breaking changes + +- In `pl$scan_ipc()` and `pl$read_ipc()`, the argument `memory_map` is removed + (#1230). +- In `$serialize()`, in the field `schema`, the field `inner` is renamed `fields`, + and the fields `output_schema` and `filter` are removed (#1230). + ### New features - New method `$cast()` for `DataFrame` and `LazyFrame` (#1219). From 1ed06e3dea1caa701ef953b4379f9a19db99feb6 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 21 Sep 2024 12:24:05 +0200 Subject: [PATCH 31/33] fix failing examples --- R/dataframe__frame.R | 2 +- R/expr__expr.R | 2 +- R/lazyframe__lazy.R | 2 +- man/Expr_slice.Rd | 2 +- man/pl_DataFrame.Rd | 2 +- man/pl_LazyFrame.Rd | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 80d692ebb..6e02c2dad 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -245,7 +245,7 @@ DataFrame_width = method_as_active_binding(\() .pr$DataFrame$shape(self)[2L]) #' #' @examples #' pl$DataFrame( -#' a = list(c(1, 2, 3, 4, 5)), # NB if first column should be a list, wrap it in a Series +#' a = c(1, 2, 3, 4, 5), #' b = 1:5, #' c = letters[1:5], #' d = list(1:1, 1:2, 1:3, 1:4, 1:5) diff --git a/R/expr__expr.R b/R/expr__expr.R index 52afcc79e..2aaecda54 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1181,7 +1181,7 @@ Expr_is_not_nan = use_extendr_wrapper #' ) #' #' # recycling -#' pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)) +#' pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)$first()) Expr_slice = function(offset, length = NULL) { .pr$Expr$slice(self, offset, wrap_e(length)) |> unwrap("in $slice():") diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index d9dc74c6e..35ab42575 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -173,7 +173,7 @@ LazyFrame_width = method_as_active_binding(\() length(self$schema)) #' #' @examples #' pl$LazyFrame( -#' a = list(c(1, 2, 3, 4, 5)), +#' a = c(1, 2, 3, 4, 5), #' b = 1:5, #' c = letters[1:5], #' d = list(1:1, 1:2, 1:3, 1:4, 1:5) diff --git a/man/Expr_slice.Rd b/man/Expr_slice.Rd index 28fa0a8a1..1f5bd0cfe 100644 --- a/man/Expr_slice.Rd +++ b/man/Expr_slice.Rd @@ -37,5 +37,5 @@ pl$DataFrame(list(a = 0:100))$select( ) # recycling -pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)) +pl$DataFrame(mtcars)$with_columns(pl$col("mpg")$slice(0, 1)$first()) } diff --git a/man/pl_DataFrame.Rd b/man/pl_DataFrame.Rd index 93cb4e334..a644e38b3 100644 --- a/man/pl_DataFrame.Rd +++ b/man/pl_DataFrame.Rd @@ -32,7 +32,7 @@ Create a new polars DataFrame } \examples{ pl$DataFrame( - a = list(c(1, 2, 3, 4, 5)), # NB if first column should be a list, wrap it in a Series + a = c(1, 2, 3, 4, 5), b = 1:5, c = letters[1:5], d = list(1:1, 1:2, 1:3, 1:4, 1:5) diff --git a/man/pl_LazyFrame.Rd b/man/pl_LazyFrame.Rd index 11efc24fc..829f3c961 100644 --- a/man/pl_LazyFrame.Rd +++ b/man/pl_LazyFrame.Rd @@ -19,7 +19,7 @@ be used for making examples and quick demonstrations. } \examples{ pl$LazyFrame( - a = list(c(1, 2, 3, 4, 5)), + a = c(1, 2, 3, 4, 5), b = 1:5, c = letters[1:5], d = list(1:1, 1:2, 1:3, 1:4, 1:5) From a313e10039c24f7a1c01312d52a97cb0d8974960 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 21 Sep 2024 13:30:19 +0200 Subject: [PATCH 32/33] remove memory_map test --- tests/testthat/test-ipc.R | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/tests/testthat/test-ipc.R b/tests/testthat/test-ipc.R index f2db59fd3..7273494a2 100644 --- a/tests/testthat/test-ipc.R +++ b/tests/testthat/test-ipc.R @@ -88,27 +88,6 @@ patrick::with_parameters_test_that("input/output DataFrame as raw vector", ) -test_that("memory_map", { - tmpf = tempfile(fileext = ".arrow") - on.exit(unlink(tmpf)) - pl$DataFrame(x = 1)$write_ipc(tmpf, compression = "uncompressed") - - df = pl$read_ipc(tmpf, memory_map = TRUE) - - expect_true( - df$equals(pl$DataFrame(x = 1)) - ) - - # On Windows, the file is still open so overwriting it is not allowed - skip_on_os("windows") - pl$DataFrame(y = 2)$write_ipc(tmpf, compression = "uncompressed") - - expect_true( - df$equals(pl$DataFrame(x = 2)) - ) -}) - - test_that("scanning from hive partition works", { skip_if_not_installed("arrow") skip_if_not_installed("withr") From 9ae97ec0fe16cc2239b1fc29b4a858a83cd3fc45 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sat, 21 Sep 2024 14:10:20 +0200 Subject: [PATCH 33/33] bump toolchain to try fixing compilation error on macos --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c7b6d385f..47a1b2cf9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -119,4 +119,4 @@ Collate: Config/rextendr/version: 0.3.1 VignetteBuilder: knitr Config/polars/LibVersion: 0.42.2 -Config/polars/RustToolchainVersion: nightly-2024-08-26 +Config/polars/RustToolchainVersion: nightly-2024-09-19