diff --git a/Cargo.lock b/Cargo.lock index bc4d96d..26caa58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,9 +56,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" [[package]] name = "android-tzdata" @@ -121,7 +121,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -132,7 +132,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -258,7 +258,7 @@ checksum = "bcfcc3cd946cb52f0bbfdbbcfa2f4e24f75ebb6c0e1002f7c25904fada18b9ec" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -284,9 +284,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.31" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" dependencies = [ "jobserver", "libc", @@ -313,8 +313,10 @@ checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-targets", ] @@ -369,9 +371,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ "crossterm", "strum", @@ -483,14 +485,14 @@ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crossterm" -version = "0.27.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ "bitflags", "crossterm_winapi", - "libc", "parking_lot", + "rustix", "winapi", ] @@ -529,6 +531,17 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -568,7 +581,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -607,15 +620,15 @@ checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" [[package]] name = "fastrand" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "flate2" -version = "1.0.34" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "miniz_oxide", @@ -732,7 +745,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -853,9 +866,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" dependencies = [ "allocator-api2", "equivalent", @@ -1040,13 +1053,142 @@ dependencies = [ ] [[package]] -name = "idna" -version = "0.5.0" +name = "icu_collections" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -1056,7 +1198,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "serde", ] @@ -1112,10 +1254,21 @@ dependencies = [ ] [[package]] -name = "libc" -version = "0.2.161" +name = "jsonpath_lib_polars_vendor" +version = "0.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "f4bd9354947622f7471ff713eacaabdb683ccb13bba4edccaab9860abf480b7d" +dependencies = [ + "log", + "serde", + "serde_json", +] + +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" [[package]] name = "libm" @@ -1129,6 +1282,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -1164,6 +1323,16 @@ dependencies = [ "libc", ] +[[package]] +name = "matrixmultiply" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" +dependencies = [ + "autocfg", + "rawpointer", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1229,6 +1398,7 @@ dependencies = [ name = "msyrs" version = "0.0.1" dependencies = [ + "chrono", "crossbeam", "futures", "log", @@ -1282,6 +1452,19 @@ dependencies = [ "tempfile", ] +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + [[package]] name = "now" version = "0.1.3" @@ -1300,6 +1483,24 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1388,7 +1589,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -1549,7 +1750,7 @@ dependencies = [ "ethnum", "fast-float", "getrandom", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "itoa", "itoap", "lz4", @@ -1610,8 +1811,9 @@ dependencies = [ "comfy-table", "either", "hashbrown 0.14.5", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "indexmap", + "ndarray", "num-traits", "once_cell", "polars-arrow", @@ -1627,7 +1829,7 @@ dependencies = [ "serde", "serde_json", "strum_macros", - "thiserror", + "thiserror 1.0.69", "version_check", "xxhash-rust", ] @@ -1642,7 +1844,7 @@ dependencies = [ "polars-arrow-format", "regex", "simdutf8", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1653,7 +1855,7 @@ checksum = "ea1b431ed816cba1120cff200f06b962748001bbb2e615ce53cfbbdf701cc136" dependencies = [ "ahash", "bitflags", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "num-traits", "once_cell", "polars-arrow", @@ -1681,11 +1883,12 @@ dependencies = [ "blake3", "bytes", "chrono", + "chrono-tz", "fast-float", "fs4", "futures", "glob", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "home", "itoa", "memchr", @@ -1724,8 +1927,9 @@ checksum = "d5c8c057ef04feaf34b6ce52096bdea3a766fa4725f50442078c8a4ee86397bf" dependencies = [ "ahash", "chrono", + "chrono-tz", "fallible-streaming-iterator", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "indexmap", "itoa", "num-traits", @@ -1751,6 +1955,7 @@ dependencies = [ "polars-core", "polars-expr", "polars-io", + "polars-json", "polars-mem-engine", "polars-ops", "polars-pipe", @@ -1798,21 +2003,25 @@ dependencies = [ "chrono", "chrono-tz", "either", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "hex", "indexmap", + "jsonpath_lib_polars_vendor", "memchr", "num-traits", "polars-arrow", "polars-compute", "polars-core", "polars-error", + "polars-json", "polars-schema", "polars-utils", + "rand", "rayon", "regex", "regex-syntax", "serde", + "serde_json", "strum_macros", "unicode-reverse", "version_check", @@ -1832,7 +2041,7 @@ dependencies = [ "ethnum", "flate2", "futures", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "lz4", "num-traits", "polars-arrow", @@ -1866,7 +2075,8 @@ dependencies = [ "crossbeam-channel", "crossbeam-queue", "enum_dispatch", - "hashbrown 0.15.0", + "futures", + "hashbrown 0.15.1", "num-traits", "polars-arrow", "polars-compute", @@ -1897,7 +2107,7 @@ dependencies = [ "ciborium", "either", "futures", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "memmap2", "num-traits", "once_cell", @@ -2027,7 +2237,7 @@ dependencies = [ "bytemuck", "bytes", "compact_str", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "indexmap", "libc", "memmap2", @@ -2069,9 +2279,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" dependencies = [ "cc", ] @@ -2123,7 +2333,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2136,7 +2346,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2151,9 +2361,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ "bytes", "pin-project-lite", @@ -2162,33 +2372,36 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror", + "thiserror 2.0.3", "tokio", "tracing", ] [[package]] name = "quinn-proto" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", + "getrandom", "rand", "ring", "rustc-hash", "rustls", + "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.3", "tinyvec", "tracing", + "web-time", ] [[package]] name = "quinn-udp" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" +checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" dependencies = [ "cfg_aliases", "libc", @@ -2256,6 +2469,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "rayon" version = "1.10.0" @@ -2293,7 +2512,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2322,7 +2541,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2339,9 +2558,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -2434,9 +2653,9 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustix" -version = "0.38.38" +version = "0.38.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" +checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" dependencies = [ "bitflags", "errno", @@ -2486,6 +2705,9 @@ name = "rustls-pki-types" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +dependencies = [ + "web-time", +] [[package]] name = "rustls-webpki" @@ -2549,9 +2771,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -2574,7 +2796,7 @@ checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2583,6 +2805,7 @@ version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", @@ -2713,6 +2936,12 @@ dependencies = [ "log", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "stacker" version = "0.1.17" @@ -2769,7 +2998,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2791,9 +3020,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.86" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -2809,6 +3038,17 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "sysinfo" version = "0.31.4" @@ -2857,9 +3097,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", @@ -2870,22 +3110,42 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", ] [[package]] name = "thiserror-impl" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", ] [[package]] @@ -2897,6 +3157,16 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -2936,7 +3206,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -2998,7 +3268,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -3022,27 +3292,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "unicode-bidi" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" - [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-reverse" version = "1.0.9" @@ -3060,9 +3315,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] name = "unindent" @@ -3078,15 +3333,27 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", "idna", "percent-encoding", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.11.0" @@ -3167,7 +3434,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -3201,7 +3468,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3235,6 +3502,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3305,7 +3582,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -3316,7 +3593,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", ] [[package]] @@ -3440,12 +3717,48 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xxhash-rust" version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -3464,7 +3777,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.86", + "syn 2.0.87", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", ] [[package]] @@ -3473,6 +3807,28 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zstd" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index 272fe76..2e8ed48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,8 @@ reqwest = { version = "0.12.9", features = ["blocking", "json"] } serde_json = "1.0" serde_urlencoded = "0.7" serde = { version = "1.0.215", features = ["derive"] } -polars = { version = "0.44.2", features = ["lazy"] } +# polars = { version = "0.44.2", features = ["lazy"] } +chrono = "0.4.38" rand = "0.8" threadpool = "1.8.1" log = "0.4.22" @@ -24,3 +25,63 @@ crossbeam = "0.8" rayon = "1.5" tokio = "1.41.1" futures = "0.3" +polars = { version = "^0.44.0", features = [ + "lazy", + "temporal", + "describe", + "json", + "parquet", + "dtype-datetime", + "strings", + "timezones", + "ndarray", + "concat_str", + + # "serde-lazy", + # "parquet", + # "decompress", + # "zip", + # "gzip", + "dynamic_group_by", + "rows", + "cross_join", + "semi_anti_join", + "row_hash", + "diagonal_concat", + "dataframe_arithmetic", + "partition_by", + "is_in", + "zip_with", + "round_series", + "repeat_by", + "is_first_distinct", + "is_last_distinct", + "checked_arithmetic", + "dot_product", + "concat_str", + "reinterpret", + "take_opt_iter", + "mode", + "cum_agg", + "rolling_window", + "interpolate", + "rank", + "moment", + "ewma", + "abs", + "product", + "diff", + "pct_change", + "unique_counts", + "log", + "list_to_struct", + "list_count", + "list_eval", + "cumulative_eval", + "arg_where", + "search_sorted", + "offset_by", + "trigonometry", + "sign", + "propagate_nans", +] } diff --git a/src/main.rs b/src/main.rs index 85017af..f6510e3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,8 @@ use msyrs::download::jpmaqsdownload::{JPMaQSDownload, JPMaQSDownloadGetIndicatorArgs}; -use msyrs::utils::dftools::is_quantamental_dataframe; -fn main() { +use msyrs::utils::dftools as msyrs_dftools; + +#[allow(dead_code)] +fn download_stuff() { println!("Authentication to DataQuery API"); let mut jpamqs_download = JPMaQSDownload::default(); @@ -25,28 +27,6 @@ fn main() { // let mut df_deets = Vec::new(); println!("Retrieving indicators for {} tickers", sel_tickers.len()); - // start = std::time::Instant::now(); - // let all_metrics: Vec = ["value", "grading", "eop_lag", "mop_lag"] - // .iter() - // .map(|x| x.to_string()) - // .collect(); - // let res = jpamqs_download.save_indicators_as_csv( - // JPMaQSDownloadGetIndicatorArgs { - // tickers: sel_tickers.clone(), - // metrics: all_metrics, - // ..Default::default() - // }, - // "./data/", - // ); - - // match res { - // Ok(_) => println!( - // "Saved indicators for {} tickers in {:?}", - // sel_tickers.len(), - // start.elapsed() - // ), - // Err(e) => println!("Error saving indicators: {:?}", e), - // } let res_df = jpamqs_download .get_indicators_qdf(JPMaQSDownloadGetIndicatorArgs { @@ -62,9 +42,16 @@ fn main() { start.elapsed() ); - if !is_quantamental_dataframe(&res_df) { + if !msyrs_dftools::is_quantamental_dataframe(&res_df) { println!("DataFrame is not a quantamental DataFrame"); } else { println!("DataFrame is a quantamental DataFrame"); } } + +fn main() { + // E:\Work\ruzt\msyrs\data\JPMaQSData\ALLIFCDSGDP\AUD_ALLIFCDSGDP_NSA.csv + let pth = "E:/Work/ruzt/msyrs/data/JPMaQSData/ALLIFCDSGDP/AUD_ALLIFCDSGDP_NSA.csv"; + let df = msyrs_dftools::load_quantamental_dataframe(pth).unwrap(); + println!("{:?}", df); +} diff --git a/src/utils/dftools.rs b/src/utils/dftools.rs index 83c68c9..c7ca3f5 100644 --- a/src/utils/dftools.rs +++ b/src/utils/dftools.rs @@ -1,4 +1,7 @@ +use crate::utils::misc::*; +use polars::datatypes::DataType; use polars::prelude::*; +use std::error::Error; /// The standard metrics provided by JPMaQS (`value`, `grading`, `eop_lag`, `mop_lag`). pub const DEFAULT_JPMAQS_METRICS: [&str; 4] = ["value", "grading", "eop_lag", "mop_lag"]; @@ -6,7 +9,6 @@ pub const DEFAULT_JPMAQS_METRICS: [&str; 4] = ["value", "grading", "eop_lag", "m /// The required columns for a Quantamental DataFrame. pub const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"]; - /// Check if a DataFrame is a quantamental DataFrame. /// A standard Quantamental DataFrame has the following columns: /// - `real_date`: Date column as a date type @@ -15,49 +17,302 @@ pub const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"]; /// /// Additionally, the DataFrame should have atleast 1 more column. /// Typically, this is one (or more) of the default JPMaQS metics. -pub fn is_quantamental_dataframe(df: &DataFrame) -> bool { - let columns = df - .get_column_names() - .iter() - .map(|s| s.as_str()) - .collect::>(); - let has_idx_columns = QDF_INDEX_COLUMNS.iter().all(|col| columns.contains(col)); - if !has_idx_columns { - return false; +pub fn check_quantamental_dataframe(df: &DataFrame) -> Result<(), Box> { + let expected_cols = ["real_date", "cid", "xcat"]; + let expected_dtype = [DataType::Date, DataType::String, DataType::String]; + for (col, dtype) in expected_cols.iter().zip(expected_dtype.iter()) { + let col = df.column(col); + if col.is_err() { + return Err(format!("Column {:?} not found", col).into()); + } + let col = col?; + if col.dtype() != dtype { + return Err(format!("Column {:?} has wrong dtype", col).into()); + } } - let real_date_col = df.select(["real_date"]); - match real_date_col { - Ok(_) => {} - Err(_) => return false, - }; - - let is_date_dtype = real_date_col - .unwrap() - .dtypes() - .iter() - .all(|dtype| dtype == &DataType::Date); - - if !is_date_dtype { - return false; - } - - let cid_col = df.select(["cid"]); - match cid_col { - Ok(_) => {} - Err(_) => return false, - }; - - let xcat_col = df.select(["xcat"]); - match xcat_col { - Ok(_) => {} - Err(_) => return false, - }; - - // has atleast 1 more column - let has_other_columns = columns.len() > 3; - if !has_other_columns { - return false; - } - - return true; + Ok(()) +} + +/// Check if a DataFrame is a quantamental DataFrame. +/// Returns true if the DataFrame is a quantamental DataFrame, false otherwise. +/// Uses the `check_quantamental_dataframe` function to check if the DataFrame is a quantamental DataFrame. +pub fn is_quantamental_dataframe(df: &DataFrame) -> bool { + check_quantamental_dataframe(df).is_ok() +} + +pub fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box> { + let index_columns = ["real_date", "cid", "xcat"]; + let known_metrics = ["value", "grading", "eop_lag", "mop_lag"]; + + let df_columns = qdf + .get_column_names() + .into_iter() + .map(|s| s.clone().into_string()) + .collect::>(); + + let mut unknown_metrics: Vec = df_columns + .iter() + .filter(|&m| !known_metrics.contains(&m.as_str())) + .filter(|&m| !index_columns.contains(&m.as_str())) + .cloned() + .collect(); + + let mut new_columns: Vec = vec![]; + new_columns.extend(index_columns.iter().map(|s| s.to_string())); + for &colname in &known_metrics { + if df_columns.contains(&colname.into()) { + new_columns.push(colname.to_string()); + } + } + + unknown_metrics.sort(); + new_columns.extend(unknown_metrics); + *qdf = qdf + .select(new_columns.clone()) + .expect("Failed to select columns"); + + Ok(()) +} + +pub fn load_quantamental_dataframe( + file_path: &str, +) -> Result> { + // get the file base name + let file_name = std::path::Path::new(file_path) + .file_stem() + .unwrap() + .to_str() + .unwrap() + .to_string(); + + // if filename does not have _ then it is not a Quantamental DataFrame + if !file_name.contains('_') { + return Err("The file name must be in the format `cid_xcat.csv` (`ticker.csv`)".into()); + } + + let ticker = file_name.split('.').collect::>()[0]; + let (cid, xcat) = split_ticker(ticker)?; + + let mut df = CsvReadOptions::default() + .try_into_reader_with_file_path(Some(file_path.into())) + .unwrap() + .finish() + .unwrap(); + + let err = "The dataframe must have a `real_date` column and atleast 1 additional value column"; + if df.column("real_date").is_err() || df.width() < 2 { + return Err(err.into()); + } + let real_date_col = df + .column("real_date".into()) + .unwrap() + .cast(&DataType::Date)?; + + df.with_column(real_date_col)?; + df.with_column(Series::new("cid".into(), vec![cid; df.height()]))?; + df.with_column(Series::new("xcat".into(), vec![xcat; df.height()]))?; + + sort_qdf_columns(&mut df)?; + + Ok(df) +} + +/// Get intersecting cross-sections from a DataFrame. +pub fn get_intersecting_cids( + df: &DataFrame, + xcats: &Option>, +) -> Result, Box> { + let rel_xcats = xcats + .clone() + .unwrap_or_else(|| get_unique_xcats(df).unwrap()); + let found_tickers = get_unique_tickers(df)?; + let found_cids = get_unique_cids(df)?; + let keep_cids = get_intersecting_cids_str_func(&found_cids, &rel_xcats, &found_tickers); + Ok(keep_cids) +} + +/// Get intersecting tickers from a DataFrame. +#[allow(dead_code)] +fn get_tickers_interesecting_on_xcat( + df: &DataFrame, + xcats: &Option>, +) -> Result, Box> { + let rel_cids = get_intersecting_cids(df, xcats)?; + let rel_xcats = xcats + .clone() + .unwrap_or_else(|| get_unique_xcats(df).unwrap()); + let rel_cids_str: Vec<&str> = rel_cids.iter().map(AsRef::as_ref).collect(); + let rel_xcats_str: Vec<&str> = rel_xcats.iter().map(AsRef::as_ref).collect(); + Ok(create_interesecting_tickers(&rel_cids_str, &rel_xcats_str)) +} + +/// Get the unique tickers from a Quantamental DataFrame. +pub fn get_ticker_column_for_quantamental_dataframe( + df: &DataFrame, +) -> Result> { + check_quantamental_dataframe(df)?; + let mut ticker_df = + DataFrame::new(vec![df.column("cid")?.clone(), df.column("xcat")?.clone()])? + .lazy() + .select([concat_str([col("cid"), col("xcat")], "_", true)]) + .collect()?; + + Ok(ticker_df + .rename("cid", "ticker".into()) + .unwrap() + .column("ticker") + .unwrap() + .clone()) +} + +/// Get the unique tickers from a DataFrame. +/// Returns a Vec of unique tickers. +pub fn get_unique_tickers(df: &DataFrame) -> Result, Box> { + let ticker_col = get_ticker_column_for_quantamental_dataframe(df)?; + _get_unique_strs_from_str_column_object(&ticker_col) +} + +/// Get the unique cross-sectional identifiers (`cids`) from a DataFrame. +pub fn get_unique_cids(df: &DataFrame) -> Result, Box> { + check_quantamental_dataframe(df)?; + get_unique_from_str_column(df, "cid") +} + +/// Get the unique extended categories (`xcats`) from a DataFrame. +pub fn get_unique_xcats(df: &DataFrame) -> Result, Box> { + check_quantamental_dataframe(df)?; + get_unique_from_str_column(df, "xcat") +} + +/// Filter a dataframe based on the given parameters. +/// - `cids`: Filter by cross-sectional identifiers +/// - `xcats`: Filter by extended categories +/// - `metrics`: Filter by metrics +/// - `start`: Filter by start date +/// - `end`: Filter by end date +/// - `intersect`: If true, intersect only return `cids` that are present for all `xcats`. +/// Returns a new DataFrame with the filtered data, without modifying the original DataFrame. +/// If no filters are provided, the original DataFrame is returned. +pub fn reduce_dataframe( + df: &DataFrame, + cids: Option>, + xcats: Option>, + metrics: Option>, + start: Option<&str>, + end: Option<&str>, + intersect: bool, +) -> Result> { + check_quantamental_dataframe(df)?; + + let mut new_df: DataFrame = df.clone(); + + let ticker_col: Column = get_ticker_column_for_quantamental_dataframe(&new_df)?; + + // if cids is not provided, get all unique cids + let u_cids: Vec = get_unique_cids(&new_df)?; + let u_xcats: Vec = get_unique_xcats(&new_df)?; + let u_tickers: Vec = _get_unique_strs_from_str_column_object(&ticker_col)?; + + let specified_cids: Vec = cids.unwrap_or_else(|| u_cids.clone()); + let specified_xcats: Vec = xcats.unwrap_or_else(|| u_xcats.clone()); + let specified_metrics: Vec = metrics.unwrap_or_else(|| { + DEFAULT_JPMAQS_METRICS + .iter() + .map(|&s| s.to_string()) + .collect() + }); + let specified_tickers: Vec = create_interesecting_tickers( + &specified_cids + .iter() + .map(AsRef::as_ref) + .collect::>(), + &specified_xcats + .iter() + .map(AsRef::as_ref) + .collect::>(), + ); + + let keep_tickers: Vec = match intersect { + true => get_intersecting_cids_str_func(&u_cids, &u_xcats, &u_tickers), + false => specified_tickers.clone(), + }; + let kticks: Vec<&str> = keep_tickers + .iter() + .map(AsRef::as_ref) + .collect::>(); + + // Create a boolean mask to filter rows based on the tickers + let mut mask = vec![false; ticker_col.len()]; + for (i, ticker) in ticker_col.str()?.iter().enumerate() { + if let Some(t) = ticker { + if kticks.contains(&t) { + mask[i] = true; + } + } + } + let mask = BooleanChunked::from_slice("mask".into(), &mask); + new_df = new_df.filter(&mask)?; + + // Apply date filtering if `start` or `end` is provided + if let Some(start_date) = start { + new_df = new_df + .lazy() + .filter(col("real_date").gt_eq(start_date)) + .collect()?; + } + if let Some(end_date) = end { + new_df = new_df + .lazy() + .filter(col("real_date").lt_eq(end_date)) + .collect()?; + } + + // Filter based on metrics if provided + assert!(specified_metrics.len() > 0); + + // remove columns that are not in the specified metrics + let mut cols_to_remove = Vec::new(); + for col in new_df.get_column_names() { + if !specified_metrics.contains(&col.to_string()) { + cols_to_remove.push(col); + } + } + new_df = new_df.drop_many( + cols_to_remove + .iter() + .map(|s| s.to_string()) + .collect::>(), + ); + + Ok(new_df) +} + +/// Update a Quantamental DataFrame with new data. +/// - `df`: The original DataFrame +/// - `df_add`: The new DataFrame to add +/// +pub fn update_dataframe( + df: &DataFrame, + df_add: &DataFrame, + // xcat_replace: Option<&str>, +) -> Result> { + check_quantamental_dataframe(df)?; + check_quantamental_dataframe(df_add)?; + if df.is_empty() { + return Ok(df_add.clone()); + } else if df_add.is_empty() { + return Ok(df.clone()); + }; + + // vstack and drop duplicates keeping last + let mut new_df = df.vstack(df_add)?; + // help? + let idx_cols_vec = QDF_INDEX_COLUMNS + .iter() + .map(|s| s.to_string()) + .collect::>(); + + new_df = new_df.unique_stable(Some(&idx_cols_vec), UniqueKeepStrategy::Last, None)?; + + Ok(new_df) } diff --git a/src/utils/misc.rs b/src/utils/misc.rs new file mode 100644 index 0000000..d1491a3 --- /dev/null +++ b/src/utils/misc.rs @@ -0,0 +1,83 @@ +use polars::prelude::*; +use std::collections::HashMap; +use std::error::Error; + +pub fn split_ticker(ticker: &str) -> Result<(&str, &str), Box> { + // split by the first underscore character. return the first and second parts. + let parts: Vec<&str> = ticker.splitn(2, '_').collect(); + if parts.len() != 2 { + return Err("Invalid ticker format".into()); + } + Ok((parts[0], parts[1])) +} + +#[allow(dead_code)] +pub fn get_cid(ticker: &str) -> Result<&str, Box> { + split_ticker(ticker).map(|(cid, _)| cid) +} + +#[allow(dead_code)] +pub fn get_xcat(ticker: &str) -> Result<&str, Box> { + split_ticker(ticker).map(|(_, xcat)| xcat) +} + +pub fn create_ticker(cid: &str, xcat: &str) -> String { + format!("{}_{}", cid, xcat) +} + +pub fn create_interesecting_tickers(cids: &[&str], xcats: &[&str]) -> Vec { + let mut tickers = Vec::new(); + for cid in cids { + for xcat in xcats { + tickers.push(create_ticker(cid, xcat)); + } + } + tickers +} + +/// Backed function to get unique strings from a string column object. +pub fn _get_unique_strs_from_str_column_object( + col: &Column, +) -> Result, Box> { + let res = col + .unique()? + .sort(SortOptions::default())? + .drop_nulls() + .str()? + .iter() + .map(|x| x.unwrap_or_default().to_string()) + .collect(); + + Ok(res) +} + +/// Get the unique values from a string column in a DataFrame. +pub fn get_unique_from_str_column( + df: &DataFrame, + col: &str, +) -> Result, Box> { + _get_unique_strs_from_str_column_object(&df.column(col).unwrap()) +} +pub fn get_intersecting_cids_str_func( + cids: &Vec, + xcats: &Vec, + found_tickers: &Vec, +) -> Vec { + let mut keep_cids = cids.clone(); + // make a hashmap of cids to xcats + let mut cid_xcat_map = HashMap::new(); + for ticker in found_tickers { + let (cid, xcat) = split_ticker(&ticker).unwrap(); + cid_xcat_map.insert(cid.to_string(), xcat.to_string()); + } + + // filter out cids that are not present in all xcats + for (cid, xcats_for_cid) in cid_xcat_map.iter() { + // if the all xcats are not present, remove the cid + if !xcats.iter().all(|xcat| xcats_for_cid.contains(xcat)) { + keep_cids.retain(|x| x != cid); + } + } + + keep_cids +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 51ba6a5..7c7a0b2 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1 +1,2 @@ -pub mod dftools; \ No newline at end of file +pub mod dftools; +pub mod misc; \ No newline at end of file