diff --git a/.github/workflows/pr_ci.yml b/.github/workflows/pr_ci.yml index cce01a5..b374502 100644 --- a/.github/workflows/pr_ci.yml +++ b/.github/workflows/pr_ci.yml @@ -27,7 +27,7 @@ jobs: run: cargo test working-directory: crates - name: run doc build - run: cargo doc + run: cargo doc --document-private-items working-directory: crates - name: check formatting run: cargo fmt -- --check diff --git a/README.md b/README.md index 7e7bcc1..48ad7e5 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,11 @@ ## What is this? An embedded, in-memory database that is generated by a schema compiler, and can be embedded in a rust application, with rust embedded inside of it. +This project is an experiment and while functional, it is not fully tested & the interface is unstable. + ## Project Structure ### [`./crates` → Contains the libraries developed for this project](./crates) +### [`./bench` → Benchmarks against other systems](./bench) ### [`./book` → The emDB book](./book) [→ hosted here](https://oliverkillane.github.io/emDB/) ### [`./papers` → Academic works developed alongside this project](./papers/) diff --git a/bench/.gitignore b/bench/.gitignore new file mode 100644 index 0000000..9f97022 --- /dev/null +++ b/bench/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/bench/Cargo.lock b/bench/Cargo.lock new file mode 100644 index 0000000..17f763f --- /dev/null +++ b/bench/Cargo.lock @@ -0,0 +1,1961 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if 1.0.0", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +dependencies = [ + "ahash 0.8.11", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ord" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown 0.14.5", +] + +[[package]] +name = "arrow-schema" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "arrow-select" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "assume" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6f9ca11400f14ef046700eb6401c706c587871303453a5e7586efb82340c3d" + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "borsh" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", + "syn_derive", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", +] + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets 0.52.5", +] + +[[package]] +name = "clap" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +dependencies = [ + "anstyle", + "clap_lex", + "terminal_size", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "combi" +version = "0.2.0" +dependencies = [ + "derive-where", + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "rustc_version", + "syn 2.0.66", +] + +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum 0.26.2", + "strum_macros 0.26.4", + "unicode-width", +] + +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "derivative" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c6d883546668a3e2011b6a716a7330b82eabb0151b138217f632c8243e17135" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "syn 0.15.44", +] + +[[package]] +name = "derive-where" +version = "1.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "divan" +version = "0.1.14" +source = "git+https://github.com/OliverKillane/divan.git?branch=enh/file-output#c54ac74a8b85e3862a4bcbaea9e08d9e5095caa5" +dependencies = [ + "cfg-if 1.0.0", + "clap", + "condtype", + "divan-macros", + "libc", + "regex-lite", + "serde_json", +] + +[[package]] +name = "divan-macros" +version = "0.1.14" +source = "git+https://github.com/OliverKillane/divan.git?branch=enh/file-output#c54ac74a8b85e3862a4bcbaea9e08d9e5095caa5" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "dot" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a74b6c4d4a1cff5f454164363c16b72fa12463ca6b31f4b5f2035a65fa3d5906" + +[[package]] +name = "duckdb" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424ede399a5d1084e65c0888fda71e407e5809400c92ff2cf510bfd1697b9c76" +dependencies = [ + "arrow", + "cast", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink 0.8.4", + "libduckdb-sys", + "memchr", + "rust_decimal", + "smallvec", + "strum 0.25.0", +] + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "embedded_db_comparisons" +version = "0.1.0" +dependencies = [ + "divan", + "duckdb", + "emdb", + "rand", + "rusqlite", +] + +[[package]] +name = "emdb" +version = "0.1.0" +dependencies = [ + "emdb_core", + "minister", + "pulpit", +] + +[[package]] +name = "emdb_core" +version = "0.1.0" +dependencies = [ + "combi", + "dot", + "enumtrait", + "itertools", + "prettyplease", + "proc-macro-error", + "proc-macro2 1.0.85", + "pulpit", + "quote 1.0.36", + "quote_debug", + "rand", + "syn 2.0.66", + "typed-arena", + "typed-generational-arena", +] + +[[package]] +name = "enumtrait" +version = "0.1.0" +dependencies = [ + "combi", + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall", + "windows-sys 0.52.0", +] + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if 1.0.0", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown 0.14.5", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libduckdb-sys" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b3f02cecc430f61561bde538d42af4be2d9d5a8b058f74883e460bc1055461" +dependencies = [ + "autocfg", + "cc", + "flate2", + "pkg-config", + "serde", + "serde_json", + "tar", + "vcpkg", +] + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "minister" +version = "0.1.0" +dependencies = [ + "rayon", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +dependencies = [ + "adler", +] + +[[package]] +name = "nonzero_ext" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db1b4163932b207be6e3a06412aed4d84cca40dc087419f231b3a38cba2ca8e9" + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2 1.0.85", + "syn 2.0.66", +] + +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", +] + +[[package]] +name = "pulpit" +version = "0.1.0" +dependencies = [ + "assume", + "combi", + "enumtrait", + "proc-macro-error", + "proc-macro2 1.0.85", + "pulpit_gen", + "pulpit_macro", + "quote 1.0.36", + "syn 2.0.66", + "thunderdome", + "typed-generational-arena", +] + +[[package]] +name = "pulpit_gen" +version = "0.1.0" +dependencies = [ + "bimap", + "combi", + "enumtrait", + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "quote_debug", + "syn 2.0.66", +] + +[[package]] +name = "pulpit_macro" +version = "0.1.0" +dependencies = [ + "proc-macro-error", + "proc-macro2 1.0.85", + "pulpit_gen", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2 1.0.85", +] + +[[package]] +name = "quote_debug" +version = "0.1.0" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", +] + +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags 2.5.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink 0.9.1", + "libsqlite3-sys", + "smallvec", +] + +[[package]] +name = "rust_decimal" +version = "1.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", +] + +[[package]] +name = "strum" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2 1.0.85", + "quote 1.0.36", + "rustversion", + "syn 2.0.66", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2 1.0.85", + "quote 1.0.36", + "rustversion", + "syn 2.0.66", +] + +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "unicode-ident", +] + +[[package]] +name = "syn_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" +dependencies = [ + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tar" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "thunderdome" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e170f93360bf9ae6fe3c31116bbf27adb1d054cedd6bc3d7857e34f2d98d0b" + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml_datetime" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" + +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + +[[package]] +name = "typed-generational-arena" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf29f9e3fa0ef5fa0fccf55a1c4347c032a196324e152611d5af93641ed64c0" +dependencies = [ + "cfg-if 0.1.10", + "derivative", + "nonzero_ext", + "num-traits", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-width" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote 1.0.36", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] diff --git a/bench/Cargo.toml b/bench/Cargo.toml new file mode 100644 index 0000000..27227f2 --- /dev/null +++ b/bench/Cargo.toml @@ -0,0 +1,8 @@ +[workspace] +resolver = "2" +members = ["embedded_db_comparisons"] + +[workspace.package] +homepage = "https://github.com/OliverKillane/emDB" +repository = "https://github.com/OliverKillane/emDB" +license-file = "LICENSE" diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000..223d92b --- /dev/null +++ b/bench/README.md @@ -0,0 +1,4 @@ +## Benchmarks for the emDB project +This workspace is for benchmarks comparing emDb with other projects. +- Ensures one can work on [the crates](./../crates) without having to rebuild large crates (duckdb, rusqlite). +- Should contain any benchmarks used for papers. diff --git a/bench/embedded_db_comparisons/.gitignore b/bench/embedded_db_comparisons/.gitignore new file mode 100644 index 0000000..9f97022 --- /dev/null +++ b/bench/embedded_db_comparisons/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/bench/embedded_db_comparisons/Cargo.lock b/bench/embedded_db_comparisons/Cargo.lock new file mode 100644 index 0000000..5a0575c --- /dev/null +++ b/bench/embedded_db_comparisons/Cargo.lock @@ -0,0 +1,1961 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if 1.0.0", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +dependencies = [ + "ahash 0.8.11", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ord" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown 0.14.5", +] + +[[package]] +name = "arrow-schema" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "arrow-select" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "assume" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6f9ca11400f14ef046700eb6401c706c587871303453a5e7586efb82340c3d" + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "borsh" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", + "syn_derive", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", +] + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets 0.52.5", +] + +[[package]] +name = "clap" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +dependencies = [ + "anstyle", + "clap_lex", + "terminal_size", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "combi" +version = "0.2.0" +dependencies = [ + "derive-where", + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "rustc_version", + "syn 2.0.66", +] + +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum 0.26.2", + "strum_macros 0.26.4", + "unicode-width", +] + +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "derivative" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c6d883546668a3e2011b6a716a7330b82eabb0151b138217f632c8243e17135" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "syn 0.15.44", +] + +[[package]] +name = "derive-where" +version = "1.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "divan" +version = "0.1.14" +source = "git+https://github.com/OliverKillane/divan.git?branch=enh/file-output#c54ac74a8b85e3862a4bcbaea9e08d9e5095caa5" +dependencies = [ + "cfg-if 1.0.0", + "clap", + "condtype", + "divan-macros", + "libc", + "regex-lite", + "serde_json", +] + +[[package]] +name = "divan-macros" +version = "0.1.14" +source = "git+https://github.com/OliverKillane/divan.git?branch=enh/file-output#c54ac74a8b85e3862a4bcbaea9e08d9e5095caa5" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "dot" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a74b6c4d4a1cff5f454164363c16b72fa12463ca6b31f4b5f2035a65fa3d5906" + +[[package]] +name = "duckdb" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424ede399a5d1084e65c0888fda71e407e5809400c92ff2cf510bfd1697b9c76" +dependencies = [ + "arrow", + "cast", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink 0.8.4", + "libduckdb-sys", + "memchr", + "rust_decimal", + "smallvec", + "strum 0.25.0", +] + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "emdb" +version = "0.1.0" +dependencies = [ + "emdb_core", + "minister", + "pulpit", +] + +[[package]] +name = "emdb_core" +version = "0.1.0" +dependencies = [ + "combi", + "dot", + "enumtrait", + "itertools", + "prettyplease", + "proc-macro-error", + "proc-macro2 1.0.85", + "pulpit", + "quote 1.0.36", + "quote_debug", + "rand", + "syn 2.0.66", + "typed-arena", + "typed-generational-arena", +] + +[[package]] +name = "enumtrait" +version = "0.1.0" +dependencies = [ + "combi", + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "experiments2" +version = "0.1.0" +dependencies = [ + "divan", + "duckdb", + "emdb", + "rand", + "rusqlite", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall", + "windows-sys 0.52.0", +] + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if 1.0.0", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown 0.14.5", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libduckdb-sys" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b3f02cecc430f61561bde538d42af4be2d9d5a8b058f74883e460bc1055461" +dependencies = [ + "autocfg", + "cc", + "flate2", + "pkg-config", + "serde", + "serde_json", + "tar", + "vcpkg", +] + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "minister" +version = "0.1.0" +dependencies = [ + "rayon", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +dependencies = [ + "adler", +] + +[[package]] +name = "nonzero_ext" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db1b4163932b207be6e3a06412aed4d84cca40dc087419f231b3a38cba2ca8e9" + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2 1.0.85", + "syn 2.0.66", +] + +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", +] + +[[package]] +name = "pulpit" +version = "0.1.0" +dependencies = [ + "assume", + "combi", + "enumtrait", + "proc-macro-error", + "proc-macro2 1.0.85", + "pulpit_gen", + "pulpit_macro", + "quote 1.0.36", + "syn 2.0.66", + "thunderdome", + "typed-generational-arena", +] + +[[package]] +name = "pulpit_gen" +version = "0.1.0" +dependencies = [ + "bimap", + "combi", + "enumtrait", + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "quote_debug", + "syn 2.0.66", +] + +[[package]] +name = "pulpit_macro" +version = "0.1.0" +dependencies = [ + "proc-macro-error", + "proc-macro2 1.0.85", + "pulpit_gen", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2 1.0.85", +] + +[[package]] +name = "quote_debug" +version = "0.1.0" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 1.0.109", +] + +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags 2.5.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink 0.9.1", + "libsqlite3-sys", + "smallvec", +] + +[[package]] +name = "rust_decimal" +version = "1.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", +] + +[[package]] +name = "strum" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2 1.0.85", + "quote 1.0.36", + "rustversion", + "syn 2.0.66", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2 1.0.85", + "quote 1.0.36", + "rustversion", + "syn 2.0.66", +] + +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "unicode-ident", +] + +[[package]] +name = "syn_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" +dependencies = [ + "proc-macro-error", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tar" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "thunderdome" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e170f93360bf9ae6fe3c31116bbf27adb1d054cedd6bc3d7857e34f2d98d0b" + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml_datetime" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" + +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + +[[package]] +name = "typed-generational-arena" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf29f9e3fa0ef5fa0fccf55a1c4347c032a196324e152611d5af93641ed64c0" +dependencies = [ + "cfg-if 0.1.10", + "derivative", + "nonzero_ext", + "num-traits", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-width" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote 1.0.36", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2 1.0.85", + "quote 1.0.36", + "syn 2.0.66", +] diff --git a/bench/embedded_db_comparisons/Cargo.toml b/bench/embedded_db_comparisons/Cargo.toml new file mode 100644 index 0000000..deedd02 --- /dev/null +++ b/bench/embedded_db_comparisons/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "embedded_db_comparisons" +version = "0.1.0" +edition = "2021" + +readme = "README.md" +description = "A collection of experiments to demonstrate emDB performance" +keywords = ["experiment"] +categories = ["experiment"] + +repository.workspace = true +homepage.workspace = true +license-file.workspace = true + +[dependencies] +duckdb = { version = "0.10.2", features = ["bundled"] } +rusqlite = { version = "0.31.0", features = ["bundled"] } +emdb = { path = "../../crates/emdb" } +rand = "0.8" + +[dev-dependencies] +divan = { git = "https://github.com/OliverKillane/divan.git", branch = "enh/file-output" } + +[[bench]] +name = "user_details" +harness = false + +[[bench]] +name = "sales_analytics" +harness = false + +[[bench]] +name = "data_logs" +harness = false diff --git a/bench/embedded_db_comparisons/README.md b/bench/embedded_db_comparisons/README.md new file mode 100644 index 0000000..2124578 --- /dev/null +++ b/bench/embedded_db_comparisons/README.md @@ -0,0 +1,28 @@ +## Embedded Databases Comparisons +The duckdb and sqlite dependencies are very large as they include the respective databases. + +- For duckDB compilation may fail if there is insufficient memory and swap. +- Both the duckDB and sqlite benchmarks take a very long time compared to the rust implemented comparisons. + +Consider increasing swap to avoid build failures. + +```bash +cargo bench +``` + +#### On WSL: +```toml +# In your windows home directory, in .wslconfig +# settings apply across all Linux distros running on WSL 2 +# Can see memory in wsl2 with "free -m" + +[wsl2] +# Limits VM memory to use no more than 48 GB, defaults to 50% of ram +memory=8GB + +# Sets the VM to use 8 virtual processors +processors=8 + +# Sets the amount of swap storage space to 8GB, default is 25% of available RAM +swap=16GB +``` diff --git a/bench/embedded_db_comparisons/benches/data_logs.rs b/bench/embedded_db_comparisons/benches/data_logs.rs new file mode 100644 index 0000000..17125d8 --- /dev/null +++ b/bench/embedded_db_comparisons/benches/data_logs.rs @@ -0,0 +1,64 @@ +use divan::{self, black_box_drop, Bencher}; +use embedded_db_comparisons::data_logs::{ + data_logs::{Database, Datastore}, + duckdb_impl::DuckDB, + emdb_table_thunderdome_impl::EmDBThunderdome, + emdb_iter_impl::EmDBIter, + populate_table, + sqlite_impl::SQLite, +}; + +const TABLE_SIZES: [usize; 1] = [32384]; // [524288, 1048576, 2097152]; + +#[divan::bench( + name = "demote_errors_data_cleaning", + types = [EmDBIter, EmDBThunderdome, SQLite, DuckDB], + consts = TABLE_SIZES, + sample_size = 5, + sample_count = 3, +)] +fn demote_errors_data_cleaning(bencher: Bencher) { + bencher + .with_inputs(|| populate_table(&mut rand::thread_rng(), SIZE)) + .bench_local_values(|mut ds: DS| { + let mut db = ds.db(); + black_box_drop(db.demote_error_logs()); + }) +} + +#[divan::bench( + name = "get_errors_per_minute", + types = [EmDBIter, EmDBThunderdome, SQLite, DuckDB], + consts = TABLE_SIZES, + sample_size = 5, + sample_count = 3, +)] +fn get_errors_per_minute(bencher: Bencher) { + bencher + .with_inputs(|| populate_table(&mut rand::thread_rng(), SIZE)) + .bench_local_values(|mut ds: DS| { + let db = ds.db(); + black_box_drop(db.get_errors_per_minute()); + }) +} + +#[divan::bench( + name = "get_comment_summaries", + types = [EmDBIter, EmDBThunderdome, SQLite, DuckDB], + consts = TABLE_SIZES, + sample_size = 5, + sample_count = 3, +)] +fn get_comment_summaries(bencher: Bencher) { + bencher + .with_inputs(|| populate_table(&mut rand::thread_rng(), SIZE)) + .bench_local_values(|mut ds: DS| { + let db = ds.db(); + // for the entire database + black_box_drop(db.get_comment_summaries(0, SIZE)); + }) +} + +fn main() { + divan::main() +} diff --git a/bench/embedded_db_comparisons/benches/sales_analytics.rs b/bench/embedded_db_comparisons/benches/sales_analytics.rs new file mode 100644 index 0000000..569ef60 --- /dev/null +++ b/bench/embedded_db_comparisons/benches/sales_analytics.rs @@ -0,0 +1,41 @@ +use divan::{black_box_drop, Bencher}; +use embedded_db_comparisons::{ + sales_analytics::{ + duckdb_impl::DuckDB, + emdb_iter_impl::EmDBIter, + sales_analytics::{Database, Datastore}, + sqlite_impl::SQLite, + TableConfig, + }, + utils::{choose, choose_internal, total}, +}; +use rand::{rngs::ThreadRng, Rng}; + +#[divan::bench( + name = "random_workloads", + types = [EmDBIter, SQLite, DuckDB], + consts = [1024, 8192, 16384], + max_time = 10 +)] +fn mixed_workload(bencher: Bencher) { + bencher + .with_inputs(|| { + let mut rng = rand::thread_rng(); + let config = TableConfig::from_size(SIZE); + (TableConfig::populate_database(&config, &mut rng), rng, config) + }) + .bench_local_values(|(mut ds, mut rng, config): (DS, ThreadRng, TableConfig)| { + let db = ds.db(); + for _ in 0..1000 { + choose! { rng + 1 => black_box_drop(db.category_sales(0.2, 2.3)), + 1 => black_box_drop(db.product_customers(rng.gen_range(0..config.products), 0.9, 1.2)), + 1 => black_box_drop(db.customer_value(1.5, 8.8, rng.gen_range(0..config.customers))), + } + } + }) +} + +fn main() { + divan::main() +} diff --git a/bench/embedded_db_comparisons/benches/user_details.rs b/bench/embedded_db_comparisons/benches/user_details.rs new file mode 100644 index 0000000..685b33a --- /dev/null +++ b/bench/embedded_db_comparisons/benches/user_details.rs @@ -0,0 +1,157 @@ +use divan::{black_box, black_box_drop, Bencher}; +use embedded_db_comparisons::{ + user_details::{ + duckdb_impl::DuckDB, + emdb_iter_impl::EmDBIter, + random_table, random_user, + sqlite_impl::SQLite, + user_details::{Database, Datastore}, + GetNewUserKey, + }, + utils::{choose, choose_internal, total}, +}; +use rand::Rng; + +// const TABLE_SIZES: [usize; 9] = [1, 8, 64, 128, 512, 4096, 16384, 65536, 262144]; +const TABLE_SIZES: [usize; 4] = [1, 8, 16, 512]; + +fn main() { + divan::main(); +} + +/// Time taken for a number of inserts of random premium/non-premium +#[divan::bench( + name = "random_inserts", + types = [EmDBIter, SQLite, DuckDB], + consts = TABLE_SIZES +)] +fn inserts(bencher: Bencher) +where + T: Datastore, +{ + bencher + .with_inputs(|| { + let db = T::new(); + let mut rng = rand::thread_rng(); + + ( + (0..N).map(|i| random_user(&mut rng, i)).collect::>(), + db, + ) + }) + .bench_local_values(|(users, mut ds)| { + let mut db = ds.db(); + for (name, prem, initial) in users { + black_box_drop(db.new_user(name, prem, initial)); + } + }) +} + +/// Time taken to get ids in random order +#[divan::bench( + name = "random_get_ids", + types = [EmDBIter, SQLite, DuckDB], + consts = TABLE_SIZES +)] +fn gets(bencher: Bencher) +where + T: Datastore + GetNewUserKey, +{ + bencher + .with_inputs(random_table::) + .bench_local_refs(|(ids, ds)| { + let db = ds.db(); + for id in ids { + black_box_drop(db.get_info(*id)); + } + }) +} + +/// Time taken to get a snapshot +#[divan::bench( + name = "snapshot", + types = [EmDBIter, SQLite, DuckDB], + consts = TABLE_SIZES +)] +fn snapshot(bencher: Bencher) +where + T: Datastore + GetNewUserKey, +{ + bencher + .with_inputs(random_table::) + .bench_local_refs(|(_, ds)| { + let db = ds.db(); + black_box_drop(db.get_snapshot()) + }) +} + +/// Time taken to get the total credits of premium users +#[divan::bench( + name = "get_total_prem_credits", + types = [EmDBIter, SQLite, DuckDB], + consts = TABLE_SIZES, + max_time = 1 +)] +fn premium_credits<'a, T, const N: usize>(bencher: Bencher) +where + T: Datastore + GetNewUserKey, +{ + bencher + .with_inputs(random_table::) + .bench_local_refs(|(_, ds)| { + let db = ds.db(); + black_box_drop(db.total_premium_credits()) + }) +} + +/// Time taken to reward premium users +#[divan::bench( + name = "reward_premium_users", + types = [EmDBIter, SQLite, DuckDB], + consts = TABLE_SIZES, + max_time = 1 +)] +fn reward_premium(bencher: Bencher) +where + T: Datastore + GetNewUserKey, +{ + bencher + .with_inputs(random_table::) + .bench_local_refs(|(_, ds)| { + let mut db = ds.db(); + black_box_drop(db.reward_premium(2f32)) + }) +} + +/// Random workload of N actions +#[divan::bench( + name = "random_workloads", + types = [EmDBIter, SQLite, DuckDB], + consts = [1024, 2048, 4096], + max_time = 100 +)] +fn mixed_workload(bencher: Bencher) +where + DS: Datastore + GetNewUserKey, +{ + bencher.bench_local(|| { + let mut ds = DS::new(); + let mut db = ds.db(); + let mut rng = rand::thread_rng(); + + // avoid reallocations + let mut ids = Vec::with_capacity(N); + ids.push(DS::new_user_wrap(&mut db, String::from("bob"), true, Some(3))); + + for _ in 0..N { + choose! { rng + 10 => { ids.push(DS::new_user_wrap(&mut db, String::from("bob"), true, Some(3))); }, + 20 => { black_box(db.get_info(ids[rng.gen_range(0..ids.len())])); }, + 1 => { black_box(db.get_snapshot()); }, + 2 => { black_box(db.total_premium_credits()); }, + 1 => { let _ = black_box(db.reward_premium(1.2f32)); }, + 20 => { let _ = black_box(db.add_credits(ids[rng.gen_range(0..ids.len())], rng.gen_range(2..100))); }, + } + } + }) +} diff --git a/bench/embedded_db_comparisons/src/data_logs/duckdb_impl.rs b/bench/embedded_db_comparisons/src/data_logs/duckdb_impl.rs new file mode 100644 index 0000000..e3fe089 --- /dev/null +++ b/bench/embedded_db_comparisons/src/data_logs/duckdb_impl.rs @@ -0,0 +1,143 @@ +use duckdb::{params, Connection}; + +use super::data_logs::{Database, Datastore}; + +pub struct DuckDB { + conn: Connection, +} + +pub struct DuckDBDatabase<'imm> { + conn: &'imm Connection, +} + +impl Datastore for DuckDB { + type DB<'imm> = DuckDBDatabase<'imm>; + + fn new() -> Self { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + " + CREATE TABLE logs ( + timestamp INTEGER, + comment TEXT, + level UTINYINT -- 0 error, 1 warn, 2 info + ); + ", + ) + .unwrap(); + DuckDB { conn } + } + + fn db(&mut self) -> Self::DB<'_> { + DuckDBDatabase { conn: &self.conn } + } +} + +impl<'imm> Database<'imm> for DuckDBDatabase<'imm> { + type Datastore = DuckDB; + + fn add_event( + &mut self, + timestamp: usize, + comment: Option, + log_level: crate::data_logs::LogLevel, + ) { + self.conn + .prepare_cached("INSERT INTO logs (timestamp, comment, level) VALUES (?, ?, ?);") + .unwrap() + .execute(params![ + timestamp, + comment, + match log_level { + crate::data_logs::LogLevel::Error => 0, + crate::data_logs::LogLevel::Warning => 1, + crate::data_logs::LogLevel::Info => 2, + } + ]) + .unwrap(); + } + + fn get_errors_per_minute(&self) -> Vec<(usize, usize)> { + self.conn + .prepare_cached( + " + WITH error_logs AS ( + SELECT + timestamp, + comment, + level + FROM + logs + WHERE + level = 0 -- Assuming 0 corresponds to 'Error' log level + ), + minute_logs AS ( + SELECT + timestamp % 60 AS min + FROM + error_logs + ), + errors_per_minute AS ( + SELECT + min, + COUNT(*) AS errors + FROM + minute_logs + GROUP BY + min + ) + SELECT + min, + errors + FROM + errors_per_minute; + ", + ) + .unwrap() + .query_map(params![], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .collect::, _>>() + .unwrap() + } + + fn get_comment_summaries( + &self, + time_start: usize, + time_end: usize, + ) -> Vec<(String, usize)> { + self.conn + .prepare_cached( + " + SELECT + SUBSTRING(comment, 1, 30) AS comment_summary, + LENGTH(comment) AS comment_length + FROM + logs + WHERE + timestamp BETWEEN ? AND ? + AND comment IS NOT NULL; + ", + ) + .unwrap() + .query_map(params![time_start, time_end], |row| { + Ok((row.get(0)?, row.get(1)?)) + }) + .unwrap() + .collect::, _>>() + .unwrap() + } + + fn demote_error_logs(&mut self) { + self.conn + .prepare_cached( + " + UPDATE logs + SET level = 1 -- Assuming 1 corresponds to 'Warning' log level + WHERE level = 0; -- Assuming 0 corresponds to 'Error' log level + ", + ) + .unwrap() + .execute(params![]) + .unwrap(); + } +} diff --git a/bench/embedded_db_comparisons/src/data_logs/mod.rs b/bench/embedded_db_comparisons/src/data_logs/mod.rs new file mode 100644 index 0000000..4550fe3 --- /dev/null +++ b/bench/embedded_db_comparisons/src/data_logs/mod.rs @@ -0,0 +1,138 @@ +use crate::utils::{choose, choose_internal, total}; +use data_logs::Database; +use emdb::macros::emql; +use rand::{rngs::ThreadRng, Rng}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LogLevel { + Error, + Warning, + Info, +} + +emql! { + impl data_logs as Interface{ + pub = on, + }; + impl emdb_parallel_impl as Serialized{ + interface = data_logs, + pub = on, + ds_name = EmDBParallel, + op_impl = Parallel, + }; + impl emdb_basic_impl as Serialized{ + interface = data_logs, + pub = on, + ds_name = EmDBBasic, + op_impl = Basic, + }; + impl emdb_iter_impl as Serialized{ + interface = data_logs, + pub = on, + ds_name = EmDBIter, + op_impl = Iter, + }; + impl emdb_chunk_impl as Serialized{ + interface = data_logs, + pub = on, + ds_name = EmDBChunk, + op_impl = Chunk, + }; + + table logs { + timestamp: usize, + comment: Option, + level: crate::data_logs::LogLevel, + } + + query add_event( + timestamp: usize, + comment: Option, + log_level: crate::data_logs::LogLevel, + ) { + row( + timestamp: usize = timestamp, + comment: Option = comment, + level: crate::data_logs::LogLevel = log_level, + ) ~> insert(logs as ref log_id); + } + + // Description: + // Get the number of errors per minute counter + // Reasoning: + // Requires a large mapping (accellerated by parallelism), and a groupby + // aggregation. For demonstrating OLAP performance. + query get_errors_per_minute() { + use logs + |> filter(*level == crate::data_logs::LogLevel::Error) + |> map(min: usize = timestamp % 60) + |> groupby(min for let errors in { + use errors + |> count(num_logs) + ~> map(min: usize = min, errors: usize = num_logs) + ~> return; + }) + |> collect(errors) + ~> return; + } + + // Description: + // Get the first 30 characters of each comment, and the length of the + // comment. + // Reasoning: + // Requires a fast map over a large stream of values, a common OLAP workload. + query get_comment_summaries(time_start: usize, time_end: usize) { + use logs + |> filter(**timestamp >= time_start && **timestamp <= time_end && comment.is_some()) + |> map(slice: &'db str = comment.as_ref().unwrap()) + |> map( + comment: &'db str = &slice[..(std::cmp::min(30, slice.len()))], + length: usize = slice.len() + ) + |> collect(comments) + ~> return; + } + + // Description: + // Demote all errors to warnings. + // Reasoning: + // A data cleaning workload. + query demote_error_logs() { + ref logs as log_ref + |> deref(log_ref as log_data) + |> update(log_ref use level = ( + if crate::data_logs::LogLevel::Error == log_data.level { + crate::data_logs::LogLevel::Warning + } else { + log_data.level + } + )); + } +} + +pub fn populate_table(rng: &mut ThreadRng, size: usize) -> DS { + let mut ds = DS::new(); + { + let mut db = ds.db(); + for t in 0..size { + db.add_event( + t, + choose! { rng + 3 => None, + 2 => Some(format!("This is a short {t} string")), + 1 => Some(format!("This is a {t} very very very {t} very very {t} very very very {t} long string")), + }, + choose! { rng + 1 => LogLevel::Error, + 2 => LogLevel::Warning, + 3 => LogLevel::Info, + }, + ); + } + } + ds +} + +pub mod duckdb_impl; +pub mod sqlite_impl; +mod thunderdome_emdb_impl; pub use thunderdome_emdb_impl::*; \ No newline at end of file diff --git a/bench/embedded_db_comparisons/src/data_logs/sqlite_impl.rs b/bench/embedded_db_comparisons/src/data_logs/sqlite_impl.rs new file mode 100644 index 0000000..c55ca3c --- /dev/null +++ b/bench/embedded_db_comparisons/src/data_logs/sqlite_impl.rs @@ -0,0 +1,143 @@ +use rusqlite::{params, Connection}; + +use super::data_logs::{Database, Datastore}; + +pub struct SQLite { + conn: Connection, +} + +pub struct SQLiteDatabase<'imm> { + conn: &'imm Connection, +} + +impl Datastore for SQLite { + type DB<'imm> = SQLiteDatabase<'imm>; + + fn new() -> Self { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + " + CREATE TABLE logs ( + timestamp INTEGER, + comment VARCHAR, + level INT8 -- 0 error, 1 warn, 2 info + ); + ", + ) + .unwrap(); + SQLite { conn } + } + + fn db(&mut self) -> Self::DB<'_> { + SQLiteDatabase { conn: &self.conn } + } +} + +impl<'imm> Database<'imm> for SQLiteDatabase<'imm> { + type Datastore = SQLite; + + fn add_event( + &mut self, + timestamp: usize, + comment: Option, + log_level: crate::data_logs::LogLevel, + ) { + self.conn + .prepare_cached("INSERT INTO logs (timestamp, comment, level) VALUES (?, ?, ?);") + .unwrap() + .execute(params![ + timestamp, + comment, + match log_level { + crate::data_logs::LogLevel::Error => 0, + crate::data_logs::LogLevel::Warning => 1, + crate::data_logs::LogLevel::Info => 2, + } + ]) + .unwrap(); + } + + fn get_errors_per_minute(&self) -> Vec<(usize, usize)> { + self.conn + .prepare_cached( + " + WITH error_logs AS ( + SELECT + timestamp, + comment, + level + FROM + logs + WHERE + level = 0 -- Assuming 0 corresponds to 'Error' log level + ), + minute_logs AS ( + SELECT + timestamp % 60 AS min + FROM + error_logs + ), + errors_per_minute AS ( + SELECT + min, + COUNT(*) AS errors + FROM + minute_logs + GROUP BY + min + ) + SELECT + min, + errors + FROM + errors_per_minute; + ", + ) + .unwrap() + .query_map(params![], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .collect::, _>>() + .unwrap() + } + + fn get_comment_summaries( + &self, + time_start: usize, + time_end: usize, + ) -> Vec<(String, usize)> { + self.conn + .prepare_cached( + " + SELECT + SUBSTRING(comment, 1, 30) AS comment_summary, + LENGTH(comment) AS comment_length + FROM + logs + WHERE + timestamp BETWEEN ? AND ? + AND comment IS NOT NULL; + ", + ) + .unwrap() + .query_map(params![time_start, time_end], |row| { + Ok((row.get(0)?, row.get(1)?)) + }) + .unwrap() + .collect::, _>>() + .unwrap() + } + + fn demote_error_logs(&mut self) { + self.conn + .prepare_cached( + " + UPDATE logs + SET level = 1 -- Assuming 1 corresponds to 'Warning' log level + WHERE level = 0; -- Assuming 0 corresponds to 'Error' log level + ", + ) + .unwrap() + .execute(params![]) + .unwrap(); + } +} diff --git a/bench/embedded_db_comparisons/src/data_logs/thunderdome_emdb_impl.rs b/bench/embedded_db_comparisons/src/data_logs/thunderdome_emdb_impl.rs new file mode 100644 index 0000000..62a4160 --- /dev/null +++ b/bench/embedded_db_comparisons/src/data_logs/thunderdome_emdb_impl.rs @@ -0,0 +1,66 @@ +use super::*; + +emql! { + impl emdb_table_thunderdome_impl as Serialized{ + interface = data_logs, + pub = on, + ds_name = EmDBThunderdome, + op_impl = Iter, + table_select = Thunderdome, + }; + + table logs { + timestamp: usize, + comment: Option, + level: crate::data_logs::LogLevel, + } + + query add_event( + timestamp: usize, + comment: Option, + log_level: crate::data_logs::LogLevel, + ) { + row( + timestamp: usize = timestamp, + comment: Option = comment, + level: crate::data_logs::LogLevel = log_level, + ) ~> insert(logs as ref log_id); + } + + query get_errors_per_minute() { + use logs + |> filter(*level == crate::data_logs::LogLevel::Error) + |> map(min: usize = timestamp % 60) + |> groupby(min for let errors in { + use errors + |> count(num_logs) + ~> map(min: usize = min, errors: usize = num_logs) + ~> return; + }) + |> collect(errors) + ~> return; + } + + query get_comment_summaries(time_start: usize, time_end: usize) { + use logs + |> filter(*timestamp >= time_start && *timestamp <= time_end && comment.is_some()) + |> map( + length: usize = comment.as_ref().unwrap().len(), + slice: String = comment.unwrap().chars().take(30).collect::() + ) + |> collect(comments) + ~> return; + } + + query demote_error_logs() { + ref logs as log_ref + |> deref(log_ref as log_data) + |> update(log_ref use level = ( + if crate::data_logs::LogLevel::Error == log_data.level { + crate::data_logs::LogLevel::Warning + } else { + log_data.level + } + )); + } +} \ No newline at end of file diff --git a/bench/embedded_db_comparisons/src/lib.rs b/bench/embedded_db_comparisons/src/lib.rs new file mode 100644 index 0000000..9f0fd5b --- /dev/null +++ b/bench/embedded_db_comparisons/src/lib.rs @@ -0,0 +1,19 @@ +#![allow(refining_impl_trait)] // for refining the return `impl Any` types from emql generated traits. +//! # Benchmarks for EmDB against DuckDB and SQLite +//! For each benchmark we use [`emdb`] to generate the implementation, as well as a trait we can +//! used to define the other implementations (allows us to write a single generic benchmark based +//! on that trait). +//! - Additional trait bounds can be specified using [emdb]'s `Interface` backend. +//! - All return types are unconstrained, so other implementations can return their +//! own types +//! +//! ## Constraints +//! These are particularly slow in duckdb, and are not used to speed up joins (see +//! [duckdb constraints](https://duckdb.org/docs/guides/performance/schema#constraints)). + +// The schema implementations +pub mod data_logs; +pub mod sales_analytics; +pub mod user_details; + +pub mod utils; diff --git a/bench/embedded_db_comparisons/src/sales_analytics/duckdb_impl.rs b/bench/embedded_db_comparisons/src/sales_analytics/duckdb_impl.rs new file mode 100644 index 0000000..a8d6986 --- /dev/null +++ b/bench/embedded_db_comparisons/src/sales_analytics/duckdb_impl.rs @@ -0,0 +1,336 @@ +use super::sales_analytics::{Database, Datastore}; +use duckdb::{params, Connection}; + +pub struct DuckDB { + conn: Connection, +} + +pub struct DuckDBDatabase<'imm> { + conn: &'imm mut Connection, +} + +impl Datastore for DuckDB { + type DB<'imm> = DuckDBDatabase<'imm>; + + fn new() -> Self { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + " + -- Product Categories are 'Electronics' (0), 'Clothing' (1) or 'Food' (2) + -- Currencies are 'GBP' (0), 'USD' (1) or 'BTC' (2) + -- Cannot use enums due to the sensible prices constraint, and this bug https://github.com/duckdb/duckdb-rs/issues/334 + -- Additionally difficult to convert back to rust type, so use u8s instead + + CREATE TABLE products ( + serial UBIGINT NOT NULL, + name VARCHAR NOT NULL, + category UTINYINT NOT NULL, + CONSTRAINT unique_serial_number UNIQUE(serial) + ); + + CREATE TABLE purchases ( + customer_reference UBIGINT, + product_serial UBIGINT, + quantity UTINYINT, + price UBIGINT, + currency UTINYINT NOT NULL, + CONSTRAINT sensible_prices CHECK ( + (currency = 1 AND price <= 10000 * 100) OR + (currency = 2 AND price < 20) OR + (currency = 0) -- No constraint for GBP + ) + ); + + CREATE TABLE customers ( + reference UBIGINT NOT NULL, + name VARCHAR NOT NULL, + address VARCHAR NOT NULL, + CONSTRAINT unique_customer_reference UNIQUE(reference), + CONSTRAINT unique_customer_address UNIQUE(address), + CONSTRAINT sensible_name CHECK (LENGTH(name) > 2), + CONSTRAINT non_empty_address CHECK (LENGTH(address) > 0) + ); + + CREATE TABLE old_customers ( + reference UBIGINT NOT NULL, + ); + ", + ) + .unwrap(); + Self { conn } + } + + fn db(&mut self) -> Self::DB<'_> { + DuckDBDatabase { + conn: &mut self.conn, + } + } +} + +impl<'imm> Database<'imm> for DuckDBDatabase<'imm> { + type Datastore = DuckDB; + + fn new_customer(&mut self, reference: usize, name: String, address: String) { + self.conn + .prepare_cached(" INSERT INTO customers (reference, name, address) VALUES (?, ?, ?) ") + .unwrap() + .query_row(params![reference, name, address], |_| Ok(())) + .unwrap() + } + + fn new_sale( + &mut self, + customer_reference: usize, + product_serial: usize, + quantity: u8, + price: u64, + currency: crate::sales_analytics::Currency, + ) { + self.conn + .prepare_cached(" INSERT INTO purchases (customer_reference, product_serial, quantity, price, currency) VALUES (?, ?, ?, ?, ?)") + .unwrap() + .query_row(params![ + customer_reference, + product_serial, + quantity, + price, + match currency { + super::Currency::GBP => 0, + super::Currency::USD => 1, + super::Currency::BTC => 2, + } + ], |_| Ok(())).unwrap() + } + + fn customer_leaving(&mut self, reference: usize) { + let trans = self.conn.transaction().unwrap(); + trans + .prepare_cached("DELETE FROM customers WHERE reference = ?") + .unwrap() + .query_row(params![reference], |_| Ok(())) + .unwrap(); + trans + .prepare_cached("INSERT INTO old_customers (reference) VALUES (?)") + .unwrap() + .query_row(params![reference], |_| Ok(())) + .unwrap(); + trans.commit().unwrap(); + } + + fn new_product( + &mut self, + serial: usize, + name: String, + category: crate::sales_analytics::ProductCategory, + ) { + self.conn + .prepare_cached(" INSERT INTO products (serial, name, category) VALUES (?, ?, ?)") + .unwrap() + .query_row( + params![ + serial, + name, + match category { + super::ProductCategory::Electronics => 0, + super::ProductCategory::Clothing => 1, + super::ProductCategory::Food => 2, + } + ], + |_| Ok(()), + ) + .unwrap() + } + + fn customer_value( + &self, + btc_rate: f64, + usd_rate: f64, + cust_ref_outer: usize, + ) -> (usize, u64, usize, usize, usize) { + let res = self + .conn + .prepare_cached( + " + WITH customer_purchases AS ( + SELECT + p.customer_reference, + p.product_serial, + p.quantity, + p.price, + p.currency, + pr.category + FROM + purchases p + JOIN + products pr ON p.product_serial = pr.serial + WHERE + p.customer_reference = ? -- cust_ref_outer + ), + purchase_totals AS ( + SELECT + customer_reference, + SUM( + CASE + WHEN currency = 1 THEN price * ? + WHEN currency = 2 THEN price * ? + ELSE price + END * quantity + ) AS money_spent, + SUM( + CASE + WHEN category = 0 THEN quantity + ELSE 0 + END + ) AS electronics, + SUM( + CASE + WHEN category = 1 THEN quantity + ELSE 0 + END + ) AS clothes, + SUM( + CASE + WHEN category = 2 THEN quantity + ELSE 0 + END + ) AS food + FROM + customer_purchases + GROUP BY + customer_reference + ) + SELECT + ct.customer_reference, + ct.money_spent, + ct.electronics, + ct.clothes, + ct.food + FROM + purchase_totals ct + JOIN + customers cc ON ct.customer_reference = cc.reference; + ", + ) + .unwrap() + .query_map(params![cust_ref_outer, usd_rate, btc_rate], |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + }) + .unwrap() + .collect::, _>>() + .unwrap(); + if res.is_empty() { + (0, 0, 0, 0, 0) + } else { + res[0] + } + } + + fn product_customers( + &self, + serial: usize, + btc_rate: f64, + usd_rate: f64, + ) -> Vec<(usize, u64, u64)> { + self.conn + .prepare_cached( + " + WITH filtered_purchases AS ( + SELECT + customer_reference, + quantity, + price, + currency + FROM + purchases + WHERE + product_serial = ? + ), + exchange_rates AS ( + SELECT + CASE + WHEN currency = 1 THEN ROUND(price * ?, 0) + WHEN currency = 2 THEN ROUND(price * ?, 0) + ELSE price + END * quantity AS total_spent, + customer_reference + FROM + filtered_purchases + ), + total_spent_by_customer AS ( + SELECT + customer_reference, + SUM(total_spent) AS total_spent + FROM + exchange_rates + GROUP BY + customer_reference + ) + SELECT + ? + AS product_serial, + customer_reference, + total_spent + FROM + total_spent_by_customer; + ", + ) + .unwrap() + .query_map(params![serial, usd_rate, btc_rate, serial], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + }) + .unwrap() + .collect::, _>>() + .unwrap() + } + + fn category_sales(&self, btc_rate: f64, usd_rate: f64) -> Vec<(u8, u64)> { + self.conn + .prepare_cached( + " + WITH joined_data AS ( + SELECT + pr.category, + pu.quantity, + pu.price, + pu.currency, + CASE + WHEN pu.currency = 1 THEN ROUND(pu.price * ?, 0) + WHEN pu.currency = 2 THEN ROUND(pu.price * ?, 0) + ELSE pu.price + END * pu.quantity AS money + FROM + purchases pu + INNER JOIN + products pr ON pu.product_serial = pr.serial + ), + aggregated_data AS ( + SELECT + category, + SUM(money) AS total + FROM + joined_data + GROUP BY + category + ) + SELECT + category, + total + FROM + aggregated_data; + ", + ) + .unwrap() + .query_map(params![usd_rate, btc_rate], |row| { + Ok((row.get(0)?, row.get(1)?)) + }) + .unwrap() + .collect::, _>>() + .unwrap() + } +} diff --git a/bench/embedded_db_comparisons/src/sales_analytics/mod.rs b/bench/embedded_db_comparisons/src/sales_analytics/mod.rs new file mode 100644 index 0000000..df947f3 --- /dev/null +++ b/bench/embedded_db_comparisons/src/sales_analytics/mod.rs @@ -0,0 +1,340 @@ +//! ## A complex analytical workload +//! To test [`emdb`]'s OLAP performance, particularly against [`duckdb`]. +//! - Embeds buisness logic in database (advantageous for [`emdb`]) +//! - Complex aggregations + +use crate::utils::{choose, choose_internal, total}; +use emdb::macros::emql; +use rand::{rngs::ThreadRng, Rng}; +use sales_analytics::{Database, Datastore}; + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub enum ProductCategory { + Electronics, + Clothing, + Food, +} + +#[derive(Clone, Copy, Debug)] +pub enum Currency { + GBP, + USD, + BTC, +} + +/// Validate a proce by the rules: +/// - No more than $10k in dollars +/// - Fewer than 20 in BTC +fn validate_price(price: &u64, currency: &Currency) -> bool { + const DECIMAL: u64 = 100; + match currency { + Currency::GBP => true, + Currency::USD => *price <= 10_000 * DECIMAL, + Currency::BTC => *price < 20, + } +} + +fn exchange(btc_rate: f64, usd_rate: f64, price: u64, currency: Currency) -> u64 { + match currency { + Currency::GBP => price, + Currency::USD => (price as f64 * usd_rate) as u64, + Currency::BTC => (price as f64 * btc_rate) as u64, + } +} + +#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Default)] +struct Aggregate { + clothes: usize, + electronics: usize, + food: usize, + money_spent: u64, +} + + +emql! { + impl sales_analytics as Interface{ + pub = on, + }; + impl emdb_parallel_impl as Serialized{ + interface = sales_analytics, + pub = on, + ds_name = EmDBParallel, + op_impl = Parallel, + }; + impl emdb_basic_impl as Serialized{ + interface = sales_analytics, + pub = on, + ds_name = EmDBBasic, + op_impl = Basic, + }; + impl emdb_iter_impl as Serialized{ + interface = sales_analytics, + pub = on, + ds_name = EmDBIter, + op_impl = Iter, + }; + impl emdb_chunk_impl as Serialized{ + interface = sales_analytics, + pub = on, + ds_name = EmDBChunk, + op_impl = Chunk, + }; + + table products { + serial: usize, + name: String, + category: crate::sales_analytics::ProductCategory, + } @ [unique(serial) as unique_serial_number] + + table purchases { + customer_reference: usize, + product_serial: usize, + quantity: u8, + price: u64, + currency: crate::sales_analytics::Currency, + } @ [pred(crate::sales_analytics::validate_price(price, currency)) as sensible_prices] + + // We delete old customers, but keep their references + table current_customers { + reference: usize, + name: String, + address: String, + } @ [ + unique(reference) as unique_customer_reference, + unique(address) as unique_customer_address, + pred(name.len() > 2) as sensible_name, + pred(!address.is_empty()) as non_empty_address, + ] + + // Old customers, deleted but references kept for purchases + table old_customers { + reference: usize, + } + + // Basic queries for data population ======================================= + query new_customer( + reference: usize, + name: String, + address: String, + ) { + row( + reference: usize = reference, + name: String = name, + address: String = address, + ) ~> insert(current_customers as ref customer_ref); + } + query new_sale( + customer_reference: usize, + product_serial: usize, + quantity: u8, + price: u64, + currency: crate::sales_analytics::Currency, + ) { + row( + customer_reference: usize = customer_reference, + product_serial: usize = product_serial, + quantity: u8 = quantity, + price: u64 = price, + currency: crate::sales_analytics::Currency = currency, + ) ~> insert(purchases as ref sale_ref); + } + query customer_leaving( + reference: usize, + ) { + row( + reference: usize = reference, + ) + ~> unique(reference for current_customers.reference as ref customer_ref) + ~> delete(customer_ref) + ~> map(reference: usize = reference) + ~> insert(old_customers as ref customer_ref); + } + + query new_product( + serial: usize, + name: String, + category: crate::sales_analytics::ProductCategory, + ) { + row( + serial: usize = serial, + name: String = name, + category: crate::sales_analytics::ProductCategory = category, + ) ~> insert(products as ref product_ref); + } + + // Anaysis queries ========================================================= + + // Description: + // Get the total value of a customer's purchases, using the current + // exchange rates, but only if they are a current customer. + // + // Additionally get the sum of all products they have purchased in each product + // category. + // Reasoning: + // Allows us to demonstrate embedding of business logic into the database. + query customer_value(btc_rate: f64, usd_rate: f64, cust_ref_outer: usize) { + row(cust_ref: usize = cust_ref_outer) + ~> unique(cust_ref for current_customers.reference as ref customer_ref) + ~> deref(customer_ref as customer) + ~> lift( + use purchases + |> filter(**customer_reference == cust_ref_outer) + |> let customer_purchases; + + use products |> let all_prods; + + join(use all_prods [inner equi(serial = product_serial)] use customer_purchases) + |> map(result: crate::sales_analytics::Aggregate = { + use crate::sales_analytics::ProductCategory::*; + let q = *customer_purchases.quantity as usize; + let (electronics, clothes, food) = match all_prods.category { + Electronics => (q, 0, 0), + Clothing => (0, q, 0), + Food => (0, 0, q), + }; + crate::sales_analytics::Aggregate { + clothes, + electronics, + food, + money_spent: (*customer_purchases.quantity as u64) * crate::sales_analytics::exchange(btc_rate, usd_rate, *customer_purchases.price, *customer_purchases.currency), + } + }) + |> combine(use left + right in result[crate::sales_analytics::Aggregate::default()] = [crate::sales_analytics::Aggregate { + clothes: left.result.clothes + right.result.clothes, + electronics: left.result.electronics + right.result.electronics, + food: left.result.food + right.result.food, + money_spent: left.result.money_spent + right.result.money_spent, + }]) + ~> return; + ) ~> return; + } + + // Description: + // For a given product get for each purchasing customer: + // - customer reference + // - total spent by the customer on the product + // Reasoning: + // To demonstrate complex aggregations, and returning data structures + query product_customers(serial: usize, btc_rate: f64, usd_rate: f64) { + row(serial: usize = serial) + ~> unique(serial for products.serial as ref product_ref) + ~> deref(product_ref as product) + ~> lift( + use purchases + |> filter(**product_serial == serial) + |> groupby(customer_reference for let filtered_purchases in { + use filtered_purchases + |> map(sum: u64 = (*quantity as u64) * crate::sales_analytics::exchange(btc_rate, usd_rate, *price, *currency)) + |> combine(use left + right in sum[0] = [left.sum + right.sum]) + ~> map(customer: &'db usize = customer_reference, total: u64 = sum) + ~> return; + }) + |> collect(customers as type customers_for_prod) + ~> map(product_serial: usize = serial, customers: type customers_for_prod = customers) + ~> return ; + ) + ~> return; + } + + // Description: + // Get the total sales per category, in the different currencies + // Reasoning: + // Demonstrating aggregation over a large table + query category_sales(btc_rate: f64, usd_rate: f64) { + use purchases |> let purchase_data; + use products |> let product_data; + + join(use purchase_data [inner equi(product_serial = serial)] use product_data) + |> map( + category: crate::sales_analytics::ProductCategory = *product_data.category, + money: u64 = (*purchase_data.quantity as u64) * crate::sales_analytics::exchange( + btc_rate, usd_rate, *purchase_data.price, *purchase_data.currency + ) + ) + |> groupby(category for let category_purchase_data in { + use category_purchase_data + |> combine(use left + right in money[0] = [left.money + right.money]) + ~> map(category: crate::sales_analytics::ProductCategory = category, total: u64 = money) + ~> return; + }) + |> collect(category_totals) + ~> return; + } +} + +pub mod duckdb_impl; +pub mod sqlite_impl; + +pub struct TableConfig { + pub customers: usize, + pub sales: usize, + pub products: usize, +} + +impl TableConfig { + pub fn from_size(size: usize) -> Self { + TableConfig { + customers: size / 2, + sales: size, + products: size / 4, + } + } + + pub fn populate_database( + Self { + customers, + sales, + products, + }: &Self, + rng: &mut ThreadRng, + ) -> DS { + let mut ds = DS::new(); + + { + let mut db = ds.db(); + + for i in 0..*customers { + db.new_customer( + i, + format!("Test Subject {i}"), + format!("Address for person {i}"), + ); + } + + for i in 0..*products { + db.new_product( + i, + format!("Product {i}"), + choose! { rng + 1 => ProductCategory::Electronics, + 1 => ProductCategory::Clothing, + 1 => ProductCategory::Food, + }, + ); + } + for _ in 0..*sales { + let currency = choose! { rng + 1 => Currency::GBP, + 1 => Currency::USD, + 1 => Currency::BTC, + }; + + let price = match currency { + Currency::GBP => rng.gen_range(0..100000), + Currency::USD => rng.gen_range(0..=10000), + Currency::BTC => rng.gen_range(0..20), + }; + + db.new_sale( + rng.gen_range(0..*customers), + rng.gen_range(0..*products), + rng.gen_range(0..10), + price, + currency, + ); + } + } + ds + } +} diff --git a/bench/embedded_db_comparisons/src/sales_analytics/sqlite_impl.rs b/bench/embedded_db_comparisons/src/sales_analytics/sqlite_impl.rs new file mode 100644 index 0000000..1d2c51a --- /dev/null +++ b/bench/embedded_db_comparisons/src/sales_analytics/sqlite_impl.rs @@ -0,0 +1,350 @@ +use super::sales_analytics::{Database, Datastore}; +use rusqlite::{params, Connection}; + +pub struct SQLite { + conn: Connection, +} + +pub struct SQLiteDatabase<'imm> { + conn: &'imm mut Connection, +} + +impl Datastore for SQLite { + type DB<'imm> = SQLiteDatabase<'imm>; + + /// IMPORTANT NOTE: This implementation is less constrained than the emdb one, + /// as it ommits the `sensible_prices` predicate on the `purchases` + /// table. + /// + /// The schema should include: + /// ```sql + /// CREATE TABLE purchases ( + /// customer_reference UNSIGNED BIG INT, + /// product_serial UNSIGNED BIG INT, + /// quantity UTINYINT, + /// price UNSIGNED BIG INT, + /// currency Currency NOT NULL, + /// CONSTRAINT sensible_prices CHECK ( + /// (currency = 'USD' AND price <= 10000 * 100) OR + /// (currency = 'BTC' AND price < 20) OR + /// (currency = 'GBP') -- No constraint for GBP + /// ) + /// ); + /// ``` + /// However due to a bug in duckdb, on insert the database will crash. + /// - This bug has an issue [here](https://github.com/duckdb/duckdb-rs/issues/334) + /// + /// Hence we are forgiving, and do not enforce this constraint (to duckdb's + /// performance advantage). + fn new() -> Self { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + " + -- Currency is 'GBP', 'USD' or 'BTC' + -- Product Category is 'Electronics' (0), 'Clothing' (1) or 'Food' (2) + + CREATE TABLE products ( + serial UNSIGNED BIG INT NOT NULL, + name VARCHAR NOT NULL, + category INT8 NOT NULL, + CONSTRAINT unique_serial_number UNIQUE(serial) + ); + + CREATE TABLE purchases ( + customer_reference UNSIGNED BIG INT, + product_serial UNSIGNED BIG INT, + quantity UTINYINT, + price UNSIGNED BIG INT, + currency INT8 NOT NULL + ); + + CREATE TABLE customers ( + reference UNSIGNED BIG INT NOT NULL, + name VARCHAR NOT NULL, + address VARCHAR NOT NULL, + CONSTRAINT unique_customer_reference UNIQUE(reference), + CONSTRAINT unique_customer_address UNIQUE(address), + CONSTRAINT sensible_name CHECK (LENGTH(name) > 2), + CONSTRAINT non_empty_address CHECK (LENGTH(address) > 0) + ); + + CREATE TABLE old_customers ( + reference UNSIGNED BIG INT NOT NULL + ); + ", + ) + .unwrap(); + Self { conn } + } + + fn db(&mut self) -> Self::DB<'_> { + SQLiteDatabase { + conn: &mut self.conn, + } + } +} + +impl<'imm> Database<'imm> for SQLiteDatabase<'imm> { + type Datastore = SQLite; + + fn new_customer(&mut self, reference: usize, name: String, address: String) { + self.conn + .prepare_cached("INSERT INTO customers (reference, name, address) VALUES (?, ?, ?)") + .unwrap() + .execute(params![reference, name, address]) + .unwrap(); + } + + fn new_sale( + &mut self, + customer_reference: usize, + product_serial: usize, + quantity: u8, + price: u64, + currency: crate::sales_analytics::Currency, + ) { + self.conn + .prepare_cached(" INSERT INTO purchases (customer_reference, product_serial, quantity, price, currency) VALUES (?, ?, ?, ?, ?)") + .unwrap() + .execute(params![ + customer_reference, + product_serial, + quantity, + price, + match currency { + super::Currency::GBP => 0, + super::Currency::USD => 1, + super::Currency::BTC => 2, + } + ]).unwrap(); + } + + fn customer_leaving(&mut self, reference: usize) { + let trans = self.conn.transaction().unwrap(); + trans + .prepare_cached("DELETE FROM customers WHERE reference = ?") + .unwrap() + .execute(params![reference]) + .unwrap(); + trans + .prepare_cached("INSERT INTO old_customers (reference) VALUES (?)") + .unwrap() + .execute(params![reference]) + .unwrap(); + trans.commit().unwrap(); + } + + fn new_product( + &mut self, + serial: usize, + name: String, + category: crate::sales_analytics::ProductCategory, + ) { + self.conn + .prepare_cached(" INSERT INTO products (serial, name, category) VALUES (?, ?, ?)") + .unwrap() + .execute(params![ + serial, + name, + match category { + super::ProductCategory::Electronics => 0, + super::ProductCategory::Clothing => 1, + super::ProductCategory::Food => 2, + } + ]) + .unwrap(); + } + + fn customer_value( + &self, + btc_rate: f64, + usd_rate: f64, + cust_ref_outer: usize, + ) -> (usize, f64, usize, usize, usize) { + let res = self + .conn + .prepare_cached( + " + WITH customer_purchases AS ( + SELECT + p.customer_reference, + p.product_serial, + p.quantity, + p.price, + p.currency, + pr.category + FROM + purchases p + JOIN + products pr ON p.product_serial = pr.serial + WHERE + p.customer_reference = ? -- cust_ref_outer + ), + purchase_totals AS ( + SELECT + customer_reference, + SUM( + CASE + WHEN currency = 1 THEN price * ? + WHEN currency = 2 THEN price * ? + ELSE price + END * quantity + ) AS money_spent, + SUM( + CASE + WHEN category = 0 THEN quantity + ELSE 0 + END + ) AS electronics, + SUM( + CASE + WHEN category = 1 THEN quantity + ELSE 0 + END + ) AS clothes, + SUM( + CASE + WHEN category = 2 THEN quantity + ELSE 0 + END + ) AS food + FROM + customer_purchases + GROUP BY + customer_reference + ) + SELECT + ct.customer_reference, + ct.money_spent, + ct.electronics, + ct.clothes, + ct.food + FROM + purchase_totals ct + JOIN + customers cc ON ct.customer_reference = cc.reference; + ", + ) + .unwrap() + .query_map(params![cust_ref_outer, usd_rate, btc_rate], |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + }) + .unwrap() + .collect::, _>>() + .unwrap(); + if res.is_empty() { + (0, 0.0, 0, 0, 0) + } else { + res[0] + } + } + + fn product_customers( + &self, + serial: usize, + btc_rate: f64, + usd_rate: f64, + ) -> Vec<(usize, u64, f64)> { + self.conn + .prepare_cached( + " + WITH filtered_purchases AS ( + SELECT + customer_reference, + quantity, + price, + currency + FROM + purchases + WHERE + product_serial = ? + ), + exchange_rates AS ( + SELECT + CASE + WHEN currency = 1 THEN ROUND(price * ?, 0) + WHEN currency = 2 THEN ROUND(price * ?, 0) + ELSE price + END * quantity AS total_spent, + customer_reference + FROM + filtered_purchases + ), + total_spent_by_customer AS ( + SELECT + customer_reference, + SUM(total_spent) AS total_spent + FROM + exchange_rates + GROUP BY + customer_reference + ) + SELECT + ? + AS product_serial, + customer_reference, + total_spent + FROM + total_spent_by_customer; + ", + ) + .unwrap() + .query_map(params![serial, usd_rate, btc_rate, serial], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + }) + .unwrap() + .collect::, _>>() + .unwrap() + } + + fn category_sales(&self, btc_rate: f64, usd_rate: f64) -> Vec<(u8, f64)> { + self.conn + .prepare_cached( + " + WITH joined_data AS ( + SELECT + pr.category, + pu.quantity, + pu.price, + pu.currency, + CASE + WHEN pu.currency = 1 THEN ROUND(pu.price * ?, 0) + WHEN pu.currency = 2 THEN ROUND(pu.price * ?, 0) + ELSE pu.price + END * pu.quantity AS money + FROM + purchases pu + INNER JOIN + products pr ON pu.product_serial = pr.serial + ), + aggregated_data AS ( + SELECT + category, + SUM(money) AS total + FROM + joined_data + GROUP BY + category + ) + SELECT + category, + total + FROM + aggregated_data; + ", + ) + .unwrap() + .query_map(params![usd_rate, btc_rate], |row| { + Ok((row.get(0)?, row.get(1)?)) + }) + .unwrap() + .collect::, _>>() + .unwrap() + } +} diff --git a/bench/embedded_db_comparisons/src/user_details/duckdb_impl.rs b/bench/embedded_db_comparisons/src/user_details/duckdb_impl.rs new file mode 100644 index 0000000..0d26729 --- /dev/null +++ b/bench/embedded_db_comparisons/src/user_details/duckdb_impl.rs @@ -0,0 +1,156 @@ +use super::Database as _; +use duckdb::{params, Connection, OptionalExt}; + +pub struct DuckDB { + conn: Connection, +} + +pub struct Database<'imm> { + conn: &'imm mut Connection, +} + +impl super::user_details::Datastore for DuckDB { + type DB<'imm> = Database<'imm>; + type users_key = usize; + fn new() -> Self { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + " + CREATE SEQUENCE user_ids START 1; + CREATE TABLE users ( + id BIGINT PRIMARY KEY DEFAULT NEXTVAL('user_ids'), + name VARCHAR NOT NULL, + premium BOOLEAN NOT NULL, + credits INTEGER NOT NULL, + + CONSTRAINT premcredits CHECK (premium OR credits >= 0) + ); + ", + ) + .unwrap(); + Self { conn } + } + + fn db(&mut self) -> Self::DB<'_> { + Database { + conn: &mut self.conn, + } + } +} + +impl<'imm> super::user_details::Database<'imm> for Database<'imm> { + type Datastore = DuckDB; + fn new_user( + &mut self, + username: String, + prem: bool, + start_creds: Option, + ) -> usize { + self.conn + .prepare_cached( + "INSERT INTO users (name, premium, credits) VALUES (?, ?, ?) RETURNING id", + ) + .unwrap() + .query_row::<::users_key, _, _>( + params![username, prem, start_creds.unwrap_or(0)], + |row| row.get(0), + ) + .unwrap() + } + + fn get_info( + &self, + user_id: ::users_key, + ) -> Result<(usize, String, bool, i32), ()> { + self.conn + .prepare_cached("SELECT name, premium, credits FROM users WHERE id = ?") + .unwrap() + .query_row(params![user_id], |row| { + Ok((user_id, row.get(0)?, row.get(1)?, row.get(2)?)) + }) + .optional() + .unwrap().ok_or(()) + } + + fn get_snapshot(&self) -> Vec<(usize, String, bool, i32)> { + self.conn + .prepare_cached("SELECT id, name, premium, credits FROM users") + .unwrap() + .query_map(params![], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) + }) + .unwrap() + .map(|row| row.unwrap()) + .collect() + } + + fn add_credits( + &mut self, + user: ::users_key, + creds: i32, + ) -> Result<(), ()> { + let rows = self + .conn + .prepare_cached("UPDATE users SET credits = credits + ? WHERE id = ?") + .unwrap() + .execute(params![creds, user]) + .unwrap(); + if rows == 0 { + Err(()) + } else { + Ok(()) + } + } + + fn reward_premium(&mut self, cred_bonus: f32) -> Result { + let trans = self.conn.transaction().unwrap(); + + let diff = { + let mut prem_creds_stat = trans + .prepare_cached("SELECT SUM(credits) FROM users WHERE premium = TRUE") + .unwrap(); + + let before: i64 = prem_creds_stat + .query_row([], |a| Ok(a.get(0))) + .unwrap() + .unwrap_or(0); + + trans + .prepare_cached("UPDATE users SET credits = credits * ? WHERE premium = TRUE") + .unwrap() + .execute(params![cred_bonus]) + .map_err(|_| ())?; + + let after: i64 = prem_creds_stat + .query_row([], |a| Ok(a.get(0))) + .unwrap() + .unwrap_or(0); + + after - before + }; + + trans.commit().unwrap(); + + Ok(diff) + } + + fn total_premium_credits(&self) -> i64 { + self.conn + .prepare_cached("SELECT SUM(credits) FROM users WHERE premium = TRUE") + .unwrap() + .query_row([], |a| Ok(a.get(0))) + .unwrap() + .unwrap_or(0) + } +} + +impl super::GetNewUserKey for DuckDB { + fn new_user_wrap( + db: &mut Self::DB<'_>, + username: String, + prem: bool, + start_creds: Option, + ) -> ::users_key { + db.new_user(username, prem, start_creds) + } +} diff --git a/bench/embedded_db_comparisons/src/user_details/mod.rs b/bench/embedded_db_comparisons/src/user_details/mod.rs new file mode 100644 index 0000000..f001a9e --- /dev/null +++ b/bench/embedded_db_comparisons/src/user_details/mod.rs @@ -0,0 +1,226 @@ +//! ## The motivating example for [`emdb`] +//! Supporting a complex OLTP workload. +//! - Just above the abstraction for a key-value store due to aggregations. + +use emdb::macros::emql; +use rand::{rngs::ThreadRng, seq::SliceRandom, Rng}; +use user_details::Database; + +emql! { + impl user_details as Interface{ + pub = on, + }; + impl emdb_parallel_impl as Serialized{ + interface = user_details, + pub = on, + ds_name = EmDBParallel, + op_impl = Parallel, + }; + impl emdb_basic_impl as Serialized{ + interface = user_details, + pub = on, + ds_name = EmDBBasic, + op_impl = Basic, + }; + impl emdb_iter_impl as Serialized{ + interface = user_details, + pub = on, + ds_name = EmDBIter, + op_impl = Iter, + }; + impl emdb_chunk_impl as Serialized{ + interface = user_details, + pub = on, + ds_name = EmDBChunk, + op_impl = Chunk, + }; + + // Reasoning: + // - Constraint checking required, needs to fail immediately (hybrid IVM) + // - premium is immutable, and iterated over. So we can maintain a view of + // two tables for premium & non-premium users + // - Very simple table + table users { + name: String, + premium: bool, + credits: i32, + } @ [ + pred(*premium || *credits > 0) as prem_credits + ] + + // Description: + // Create a row, pipe to insert, insert returns gen_pk id + // Reasoning: + // - Needed for data insert, generation of id only occurs from here, + // hence we know the table alone determines id + // - Move semantics (taking ownership of data structure from outside the database) + query new_user(username: String, prem: bool, start_creds: Option) { + row(name: String = username, premium: bool = prem, credits: i32 = start_creds.unwrap_or(0) ) + ~> insert(users as ref user_id) + ~> return; + } + + // Description + // Get an individual user's data. + // Reasoning: + // - Performance reliant on access to users data structure + // hence need to make a good choice of mapping (user id -> data) here. + query get_info(user_id: ref users) { + row(it: ref users = user_id) + ~> deref(it as userdata) + ~> return; + } + + // Description: + // Get a snapshot of the entire users table state + // Reasoning: + // - We can collect the database to a single structure decided by the compiler. + // - This can be radically sped up by removing copying of the string (no row deletions, + // immutable attribute, return reference bound to lifetime of database). + // - choosing a data structure for `users` table that is good for iteration + query get_snapshot() { + use users + |> collect(it as type user_t) + ~> return; + } + + // Description + // Update a given user's credits + // Reasoning: + // - Need to apply constraint immediately + // - Need to index data structure + // - Database can see only credits is updated + query add_credits(user: ref users, creds: i32) { + row(user_id: ref users = user) + ~> deref(user_id as user) + ~> update(user_id use credits = user.credits + creds); + } + + // Description: + // Apply multiplier bonus to premium users, and return the number of credits added + // Reasoning: + // - Applying function over a tight loop + // - Iteration advantage form splitting premium users & non-premium + // - can be inlined to very simple iterate over &mut and increment sum + query reward_premium(cred_bonus: f32) { + ref users as users_ref + |> deref(users_ref as it) + |> filter(*it.premium) + |> map(users_ref: ref users = users_ref, new_creds: i32 = ((it.credits as f32) * cred_bonus) as i32) + |> update(users_ref use credits = new_creds) + |> map(creds: i32 = new_creds) + |> fold(sum: i64 = 0 -> sum + creds as i64) + ~> return; + } + + // Description: + // Get the total number of credits in the premium table + // Reasoning: + // Easy IVM case, all updates & inserts just need to add difference to + // the view + query total_premium_credits() { + use users + |> filter(**premium) + |> map(credits: i64 = credits as i64) + |> fold(sum: i64 = 0 -> sum + credits) + ~> return; + } +} + +// Required to get new user keys for other queries +pub trait GetNewUserKey: user_details::Datastore { + fn new_user_wrap( + db: &mut Self::DB<'_>, + username: String, + prem: bool, + start_creds: Option, + ) -> ::users_key; +} + +impl GetNewUserKey for emdb_basic_impl::EmDBBasic { + fn new_user_wrap( + db: &mut Self::DB<'_>, + username: String, + prem: bool, + start_creds: Option, + ) -> ::users_key { + db.new_user(username, prem, start_creds).unwrap().user_id + } +} + +impl GetNewUserKey for emdb_parallel_impl::EmDBParallel { + fn new_user_wrap( + db: &mut Self::DB<'_>, + username: String, + prem: bool, + start_creds: Option, + ) -> ::users_key { + db.new_user(username, prem, start_creds).unwrap().user_id + } +} + +impl GetNewUserKey for emdb_iter_impl::EmDBIter { + fn new_user_wrap( + db: &mut Self::DB<'_>, + username: String, + prem: bool, + start_creds: Option, + ) -> ::users_key { + db.new_user(username, prem, start_creds).unwrap().user_id + } +} + +impl GetNewUserKey for emdb_chunk_impl::EmDBChunk { + fn new_user_wrap( + db: &mut Self::DB<'_>, + username: String, + prem: bool, + start_creds: Option, + ) -> ::users_key { + db.new_user(username, prem, start_creds).unwrap().user_id + } +} + +pub mod duckdb_impl; +pub mod sqlite_impl; + +pub fn random_user(rng: &mut ThreadRng, id: usize) -> (String, bool, Option) { + let prem = rng.gen_bool(0.5); + ( + format!("User{id}"), + prem, + if prem { + if rng.gen_bool(0.5) { + Some(rng.gen_range(2..100)) + } else { + None + } + } else { + Some(rng.gen_range(2..100)) + }, + ) +} + +pub fn random_table( +) -> (Vec, DS) { + let mut ds = DS::new(); + let mut ids; + { + let mut db = ds.db(); + let mut rng = rand::thread_rng(); + + ids = (0..SIZE) + .map(|i| { + let (user, prem, init) = random_user(&mut rng, i); + DS::new_user_wrap(&mut db, user, prem, init) + }) + .collect::>(); + ids.shuffle(&mut rng); + + for id in ids.iter() { + db.add_credits(*id, rng.gen_range(2..100)); + } + db.reward_premium(2f32); + } + (ids, ds) +} diff --git a/bench/embedded_db_comparisons/src/user_details/sqlite_impl.rs b/bench/embedded_db_comparisons/src/user_details/sqlite_impl.rs new file mode 100644 index 0000000..15cb8aa --- /dev/null +++ b/bench/embedded_db_comparisons/src/user_details/sqlite_impl.rs @@ -0,0 +1,171 @@ +use super::Database as _; +use rusqlite::{params, Connection, OptionalExtension}; + +pub struct SQLite { + conn: Connection, +} + +pub struct Database<'imm> { + conn: &'imm mut Connection, +} + +fn mod_sqlite_int(inp: i64) -> i32 { + inp as i32 +} + +impl super::user_details::Datastore for SQLite { + type DB<'imm> = Database<'imm>; + type users_key = usize; + fn new() -> Self { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + " + CREATE TABLE users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name VARCHAR NOT NULL, + premium BOOLEAN NOT NULL, + credits MEDIUMINT NOT NULL, + + CONSTRAINT premcredits CHECK (premium OR credits >= 0) + ); + ", + ) + .unwrap(); + Self { conn } + } + + fn db(&mut self) -> Self::DB<'_> { + Database { + conn: &mut self.conn, + } + } +} + +impl<'imm> super::user_details::Database<'imm> for Database<'imm> { + type Datastore = SQLite; + fn new_user( + &mut self, + username: String, + prem: bool, + start_creds: Option, + ) -> usize { + self.conn + .prepare_cached( + "INSERT INTO users (name, premium, credits) VALUES (?, ?, ?) RETURNING id", + ) + .unwrap() + .query_row::<::users_key, _, _>( + params![username, prem, start_creds.unwrap_or(0)], + |row| row.get(0), + ) + .unwrap() + } + + fn get_info( + &self, + user_id: ::users_key, + ) -> Result<(usize, String, bool, i32), ()> { + self.conn + .prepare_cached("SELECT name, premium, credits FROM users WHERE id = ?") + .unwrap() + .query_row(params![user_id], |row| { + Ok(( + user_id, + row.get(0)?, + row.get(1)?, + mod_sqlite_int(row.get(2)?), + )) + }) + .optional() + .unwrap().ok_or(()) + } + + fn get_snapshot(&self) -> Vec<(usize, String, bool, i32)> { + self.conn + .prepare_cached("SELECT id, name, premium, credits FROM users") + .unwrap() + .query_map(params![], |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + mod_sqlite_int(row.get(3)?), + )) + }) + .unwrap() + .map(|row| row.unwrap()) + .collect() + } + + fn add_credits( + &mut self, + user: ::users_key, + creds: i32, + ) -> Result<(), ()> { + let rows = self + .conn + .prepare_cached("UPDATE users SET credits = credits + ? WHERE id = ?") + .unwrap() + .execute(params![creds, user]) + .unwrap(); + if rows == 0 { + Err(()) + } else { + Ok(()) + } + } + + fn reward_premium(&mut self, cred_bonus: f32) -> Result { + let trans = self.conn.transaction().unwrap(); + + let diff = { + let mut prem_creds_stat = trans + .prepare_cached("SELECT SUM(credits) FROM users WHERE premium = TRUE") + .unwrap(); + + let before: i64 = prem_creds_stat + .query_row([], |a| Ok(a.get(0))) + .unwrap() + .unwrap_or(0); + + trans + .prepare_cached( + "UPDATE users SET credits = ROUND(credits * ?, 0) WHERE premium = TRUE", + ) + .unwrap() + .execute(params![cred_bonus]) + .map_err(|_| ())?; + + let after: i64 = prem_creds_stat + .query_row([], |a| Ok(a.get(0))) + .unwrap() + .unwrap_or(0); + + after - before + }; + + trans.commit().unwrap(); + + Ok(diff) + } + + fn total_premium_credits(&self) -> i64 { + self.conn + .prepare_cached("SELECT SUM(credits) FROM users WHERE premium = TRUE") + .unwrap() + .query_row([], |a| Ok(a.get(0))) + .unwrap() + .unwrap_or(0) + } +} + +impl super::GetNewUserKey for SQLite { + fn new_user_wrap( + db: &mut Self::DB<'_>, + username: String, + prem: bool, + start_creds: Option, + ) -> ::users_key { + db.new_user(username, prem, start_creds) + } +} diff --git a/bench/embedded_db_comparisons/src/utils.rs b/bench/embedded_db_comparisons/src/utils.rs new file mode 100644 index 0000000..cdbd5f1 --- /dev/null +++ b/bench/embedded_db_comparisons/src/utils.rs @@ -0,0 +1,31 @@ +#[macro_export] +macro_rules! total { + ($($e:literal => $r:expr,)*) => { + 0 $( + $e)* + } +} +pub use total; + +#[macro_export] +macro_rules! choose_internal { + ($rng:ident $total:expr => $e:literal => $r:expr,) => { + $r + }; + ($rng:ident $total:expr => $e:literal => $r:expr, $($rest:tt)+) => { + if $rng.gen_ratio($e, $total) { + $r + } else { + choose_internal!($rng ($total - $e) => $($rest)+ ) + } + }; +} +pub use choose_internal; + +#[macro_export] +macro_rules! choose { + ($rng:ident $($inp:tt)*) => { + {choose_internal!{$rng (total!{$($inp)*}) => $($inp)*}} + } + +} +pub use choose; diff --git a/crates/Cargo.lock b/crates/Cargo.lock index 0d101b5..14cd35d 100644 --- a/crates/Cargo.lock +++ b/crates/Cargo.lock @@ -22,9 +22,15 @@ checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "assume" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6f9ca11400f14ef046700eb6401c706c587871303453a5e7586efb82340c3d" [[package]] name = "autocfg" @@ -32,6 +38,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + [[package]] name = "bitflags" version = "2.5.0" @@ -40,9 +52,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "cc" -version = "1.0.95" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b" +checksum = "065a29261d53ba54260972629f9ca6bffa69bac13cd1fed61420f7fa68b9f8bd" [[package]] name = "cfg-if" @@ -72,7 +84,7 @@ version = "0.1.0" source = "git+https://github.com/OliverKillane/chumsky-proc.git?branch=mitigation/avoiding-cargo-examples-bug#e27180035aa0da9f57cc3203734b4dcc1a7ea2bd" dependencies = [ "chumsky", - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", ] [[package]] @@ -110,10 +122,10 @@ dependencies = [ "derive-where", "divan", "proc-macro-error", - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", "rustc_version", - "syn 2.0.60", + "syn 2.0.65", ] [[package]] @@ -122,6 +134,31 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "derivative" version = "1.0.4" @@ -139,9 +176,9 @@ version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", - "syn 2.0.60", + "syn 2.0.65", ] [[package]] @@ -163,9 +200,9 @@ name = "divan-macros" version = "0.1.14" source = "git+https://github.com/OliverKillane/divan.git?branch=enh/file-output#c54ac74a8b85e3862a4bcbaea9e08d9e5095caa5" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", - "syn 2.0.60", + "syn 2.0.65", ] [[package]] @@ -183,19 +220,30 @@ checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" [[package]] name = "emdb" version = "0.1.0" +dependencies = [ + "emdb_core", + "glob", + "minister", + "pulpit", + "trybuild", +] + +[[package]] +name = "emdb_core" +version = "0.1.0" dependencies = [ "combi", "dot", "enumtrait", - "glob", "itertools", "prettyplease", "proc-macro-error", - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", + "pulpit", "quote 1.0.36", + "quote_debug", "rand", - "syn 2.0.60", - "trybuild", + "syn 2.0.65", "typed-arena", "typed-generational-arena", ] @@ -208,9 +256,9 @@ dependencies = [ "divan", "glob", "proc-macro-error", - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", - "syn 2.0.60", + "syn 2.0.65", "trybuild", ] @@ -249,9 +297,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", "allocator-api2", @@ -269,9 +317,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -284,9 +332,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" [[package]] name = "linux-raw-sys" @@ -300,6 +348,14 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "minister" +version = "0.1.0" +dependencies = [ + "divan", + "rayon", +] + [[package]] name = "nonzero_ext" version = "0.1.5" @@ -333,8 +389,8 @@ version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550" dependencies = [ - "proc-macro2 1.0.81", - "syn 2.0.60", + "proc-macro2 1.0.83", + "syn 2.0.65", ] [[package]] @@ -344,7 +400,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", "syn 1.0.109", "version_check", @@ -356,7 +412,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", "version_check", ] @@ -372,9 +428,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.81" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" dependencies = [ "unicode-ident", ] @@ -388,6 +444,58 @@ dependencies = [ "cc", ] +[[package]] +name = "pulpit" +version = "0.1.0" +dependencies = [ + "assume", + "combi", + "divan", + "enumtrait", + "glob", + "proc-macro-error", + "proc-macro2 1.0.83", + "pulpit_gen", + "pulpit_macro", + "quote 1.0.36", + "syn 2.0.65", + "thunderdome", + "trybuild", + "typed-generational-arena", +] + +[[package]] +name = "pulpit_gen" +version = "0.1.0" +dependencies = [ + "bimap", + "combi", + "divan", + "enumtrait", + "glob", + "prettyplease", + "proc-macro-error", + "proc-macro2 1.0.83", + "quote 1.0.36", + "quote_debug", + "syn 2.0.65", + "trybuild", +] + +[[package]] +name = "pulpit_macro" +version = "0.1.0" +dependencies = [ + "divan", + "glob", + "proc-macro-error", + "proc-macro2 1.0.83", + "pulpit_gen", + "quote 1.0.36", + "syn 2.0.65", + "trybuild", +] + [[package]] name = "quote" version = "0.6.13" @@ -403,7 +511,16 @@ version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", +] + +[[package]] +name = "quote_debug" +version = "0.1.0" +dependencies = [ + "proc-macro2 1.0.83", + "quote 1.0.36", + "syn 2.0.65", ] [[package]] @@ -436,6 +553,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "regex-lite" version = "0.1.5" @@ -478,22 +615,22 @@ checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "serde" -version = "1.0.199" +version = "1.0.200" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c9f6e76df036c77cd94996771fb40db98187f096dd0b9af39c6c6e452ba966a" +checksum = "ddc6f9cc94d67c0e21aaf7eda3a010fd3af78ebf6e096aa6e2e13c79749cce4f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.199" +version = "1.0.200" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11bd257a6541e141e42ca6d24ae26f7714887b47e89aa739099104c7e4d3b7fc" +checksum = "856f046b9400cee3c8c94ed572ecdb752444c24528c035cd35882aad6f492bcb" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", - "syn 2.0.60", + "syn 2.0.65", ] [[package]] @@ -546,17 +683,17 @@ version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "unicode-ident", ] [[package]] name = "syn" -version = "2.0.60" +version = "2.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" +checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", "unicode-ident", ] @@ -580,6 +717,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "thunderdome" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e170f93360bf9ae6fe3c31116bbf27adb1d054cedd6bc3d7857e34f2d98d0b" + [[package]] name = "toml" version = "0.8.12" @@ -865,7 +1008,7 @@ version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ - "proc-macro2 1.0.81", + "proc-macro2 1.0.83", "quote 1.0.36", - "syn 2.0.60", + "syn 2.0.65", ] diff --git a/crates/Cargo.toml b/crates/Cargo.toml index 2e75602..117d3bc 100644 --- a/crates/Cargo.toml +++ b/crates/Cargo.toml @@ -1,8 +1,9 @@ [workspace] resolver = "2" -members = ["combi", "emdb", "enumtrait"] +members = ["combi", "emdb", "emdb_core", "enumtrait", "pulpit", "pulpit_gen", "pulpit_macro", "quote_debug", "minister"] [workspace.package] homepage = "https://github.com/OliverKillane/emDB" repository = "https://github.com/OliverKillane/emDB" license-file = "LICENSE" + diff --git a/crates/README.md b/crates/README.md index cd89103..86c4425 100644 --- a/crates/README.md +++ b/crates/README.md @@ -2,13 +2,72 @@ This workspace contains the usable libraries from emDB. ## Setup +### Basic Development 1. [Get rust](https://www.rust-lang.org/tools/install) 2. Use [cargo](https://doc.rust-lang.org/cargo/) in this directory to build, test, benchmark and create docs +3. Get an IDE supporting rust analyzer (vscode, [rustrover](https://www.jetbrains.com/rust/), clion, zed, nvim, etc. ) +### Additional Tools +A basic toolchain is setup ayutomatically when running `cargo` by [`./rust-toolchain.toml`](./rust-toolchain.toml), however some useful tools to additionally install are... +#### [`cargo expand`](https://github.com/dtolnay/cargo-expand) +Expands procedural macros and outputs highlighted expansion to the terminal. +```bash +cd emdb +cargo expand --test scratch +``` +*Note for single file expansion (e.g. of a macro rules) we can also use `rustc -Zunpretty=expanded .rs`* + +#### [`cargo asm`](https://github.com/pacak/cargo-show-asm) +View intermediate results (mir, llvm, asm) generated by rustc. Allows easy scoping down to the level of individual functions. +```bash +# view the available objects in the dereferencing example +cargo asm -p emdb --example dereferencing + +# for the 2nd object option +cargo asm -p emdb --example dereferencing --mir 1 # view the MIR code +cargo asm -p emdb --example dereferencing --mca 1 # view the llvm mca analysis +``` + +#### [`cargo flamegraph`](https://github.com/flamegraph-rs/flamegraph) +Generates flame graphs using `perf`, from tests and benchmarks. +```bash +CARGO_PROFILE_BENCH_DEBUG=true cargo flamegraph -p combi --bench tokens +``` + +#### [`cargo kani`](https://github.com/model-checking/kani) +`kani` is a bit-precise model checker using [CBMC](https://github.com/diffblue/cbmc), it can be used to verify memory +safety, panic safety (e.g. on asserts) and other behaviour (e.g. arithmetic +overflows). + +```bash +cargo kani +``` + +`kani` is used to verify the correctness of unsafe code in this project. While Kani +is sound (no false negatives - `VERIFIED` means proved no errors), verification +can take a long time, and it is not complete (has false positives - can fail to +verify correct code). + +Furthermore coverage of proofs is important, `kani` only analyses proofs. + +### [`cargo pgo`](https://github.com/Kobzol/cargo-pgo) +Profile guided optimisation addon for cargo. + +## Develop ### Workspace All crates part of this project are contained in a single [cargo workspace](https://doc.rust-lang.org/book/ch14-03-cargo-workspaces.html). - *Beware: The project relies on a [fork of divan](https://github.com/OliverKillane/divan) for benchmarks from outside this repo* +```bash +cargo test +cargo bench +``` + +For test output from prints, and getting panic backtraces, a handy command is +```bash +RUST_BACKTRACE=1 cargo test -- --nocapture +``` + ### Lockfile [`Cargo.lock`](./Cargo.lock) is tracked by version control for reproducability ([see this justification](https://doc.rust-lang.org/cargo/faq.html#why-have-cargolock-in-version-control)). @@ -20,3 +79,7 @@ cargo doc --document-private-items # include private documentation If using vscode, the [live preview](vscode:extension/ms-vscode.live-server) can be used to view documentation built in the [target directory](../target/doc/emdb/). + +## Other Resources +### [Rustonomicon](https://doc.rust-lang.org/nomicon/) +### [Rust Performance Book](https://nnethercote.github.io/perf-book/introduction.html) \ No newline at end of file diff --git a/crates/combi/src/tokens/basic.rs b/crates/combi/src/tokens/basic.rs index f11eb78..7099461 100644 --- a/crates/combi/src/tokens/basic.rs +++ b/crates/combi/src/tokens/basic.rs @@ -748,7 +748,7 @@ impl Combi for terminal { ) -> (Self::Out, CombiResult) { if let Some(tt) = input.next() { // NOTE: `a.join` returns None on Stable, and always Some on nightly. - let big_span = if cfg!(nightly) { + let big_span = if cfg!(feature = "nightly") { // INV: On nightly the result of the join is always Some(..) #[allow(clippy::unwrap_used)] input diff --git a/crates/combi/src/tokens/mod.rs b/crates/combi/src/tokens/mod.rs index 75b0621..23c7cf6 100644 --- a/crates/combi/src/tokens/mod.rs +++ b/crates/combi/src/tokens/mod.rs @@ -12,6 +12,7 @@ pub mod basic; pub mod derived; pub mod error; pub mod matcher; +pub mod options; pub mod recovery; /// A wrapper for [TokenStream] that allows for 1-token lookahead, and records the current and last [Span]s. @@ -97,6 +98,10 @@ impl TokenDiagnostic { list.push_back(self.main); list } + + pub fn from_list(mut list: LinkedList) -> Option { + list.pop_front().map(|main| Self { main, prev: list }) + } } impl CombiErr for TokenDiagnostic { diff --git a/crates/combi/src/tokens/options.rs b/crates/combi/src/tokens/options.rs new file mode 100644 index 0000000..300ee1a --- /dev/null +++ b/crates/combi/src/tokens/options.rs @@ -0,0 +1,233 @@ +//! A combi parser for parsing a structure out of order. + +use std::{ + collections::{HashMap, LinkedList}, + marker::PhantomData, +}; + +use crate::{ + core::{choice, mapall, mapsuc, seq, seqdiff, DiffRes}, + tokens::{ + basic::{collectuntil, getident, gettoken, matchpunct, peekident, peekpunct, terminal}, + derived::listseptrailing, + error::error, + TokenDiagnostic, TokenIter, TokenParser, + }, + Combi, CombiResult, +}; +use proc_macro2::{Span, TokenStream}; +use proc_macro_error::{Diagnostic, Level}; +use syn::Ident; + +pub trait OptParse: Sized { + type Curr; + type Rest; + type All; + + fn construct( + self, + sep_tk: char, + prev: impl TokenParser<(Ident, TokenStream)>, + ) -> impl TokenParser<(Self::All, HashMap)>; + + fn error_key(&self, options: &mut Vec<&'static str>); + + fn gen(self, sep_tk: char) -> impl TokenParser { + let mut options = Vec::new(); + self.error_key(&mut options); + let options_available = options.join(", "); + let options_available2 = options_available.clone(); + mapall( + self.construct( + sep_tk, + error(gettoken, move |t| { + Diagnostic::spanned( + t.span(), + Level::Error, + format!("Expected {options_available}"), + ) + }), + ), + move |(value, others)| { + let errors = others + .into_keys() + .map(|k| { + Diagnostic::spanned( + k.span(), + Level::Error, + format!("{k} is not available, must be one of: {options_available2}"), + ) + }) + .collect::>(); + if errors.is_empty() { + CombiResult::Suc(value) + } else { + CombiResult::Err( + TokenDiagnostic::from_list(errors).expect( + "Non-empty, so at least one element, so we must have a diagnostic", + ), + ) + } + }, + ) + } +} + +pub struct OptEnd; + +impl OptParse for OptEnd { + type Curr = (); + type Rest = (); + type All = (); + + fn construct( + self, + _sep_tk: char, + prev: impl TokenParser<(Ident, TokenStream)>, + ) -> impl TokenParser<(Self::All, HashMap)> { + mapall(listseptrailing(',', prev), |values| { + let mut uniques: HashMap = HashMap::new(); + let mut errors = LinkedList::new(); + for (key, value) in values { + if let Some((k2, _)) = uniques.get_key_value(&key) { + errors.push_back( + Diagnostic::spanned( + key.span(), + Level::Error, + format!("Duplicate option `{key}`"), + ) + .span_error(k2.span(), String::from("originally defined here")), + ) + } else { + uniques.insert(key, value); + } + } + if errors.is_empty() { + CombiResult::Suc(((), uniques)) + } else { + CombiResult::Err( + TokenDiagnostic::from_list(errors) + .expect("Non-empty, so at least one element, so we must have a diagnostic"), + ) + } + }) + } + + fn error_key(&self, _options: &mut Vec<&'static str>) {} +} + +pub struct OptField, F: Fn() -> P> { + name: &'static str, + parser: F, + phantom: PhantomData, +} + +impl, F: Fn() -> P> OptField { + pub fn new(name: &'static str, parser: F) -> Self { + Self { + name, + parser, + phantom: PhantomData, + } + } +} + +impl, R: OptParse, F: Fn() -> P> OptParse for (OptField, R) { + type Curr = O; + type Rest = R::All; + type All = (Option, R::All); + + fn construct( + self, + sep_tk: char, + prev: impl TokenParser<(Ident, TokenStream)>, + ) -> impl TokenParser<(Self::All, HashMap)> { + let ( + OptField { + name, + parser, + phantom: _, + }, + rest, + ) = self; + + mapall( + rest.construct( + sep_tk, + choice( + peekident(name), + seq( + mapsuc(seq(getident(), matchpunct(sep_tk)), |(k, _)| k), + collectuntil(peekpunct(',')), + ), + prev, + ), + ), + move |(rest, mut uniques)| { + if let Some((key, _)) = uniques.get_key_value(&Ident::new(name, Span::call_site())) + { + let key = key.clone(); + let val = uniques + .remove(&key) + .expect("Key was use for access already taken from the map"); + + match (seqdiff(parser(), terminal)).comp(TokenIter::from(val, key.span())) { + (DiffRes::First(_), CombiResult::Suc(_)) => { + unreachable!("Would pass to second") + } // TODO: find nicer way around this from combi + (DiffRes::Second(()), CombiResult::Suc((val, ()))) => { + CombiResult::Suc(((Some(val), rest), uniques)) + } + (DiffRes::First(_), CombiResult::Con(c)) => CombiResult::Con(c), + (DiffRes::First(_), CombiResult::Err(e)) => CombiResult::Err(e), + (DiffRes::Second(()), CombiResult::Con(c)) => CombiResult::Con(c), + (DiffRes::Second(()), CombiResult::Err(e)) => CombiResult::Err(e), + } + } else { + CombiResult::Suc(((None, rest), uniques)) + } + }, + ) + } + + fn error_key(&self, options: &mut Vec<&'static str>) { + options.push(self.0.name); + self.1.error_key(options); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use quote::quote; + + #[test] + fn basic_parse() { + let config_opts = ( + OptField::new("foo", || mapsuc(getident(), |_| true)), + (OptField::new("bar", || getident()), OptEnd), + ) + .gen(':'); + + let input1 = quote! { + foo: foo, + bar: bar, + }; + + let input2 = quote! { + bar: bar, + foo: foo, + }; + + let (_, (_, ())) = config_opts + .comp(TokenIter::from(input1, Span::call_site())) + .1 + .to_result() + .unwrap(); + let (_, (_, ())) = config_opts + .comp(TokenIter::from(input2, Span::call_site())) + .1 + .to_result() + .unwrap(); + } +} diff --git a/crates/emdb/Cargo.toml b/crates/emdb/Cargo.toml index bb08c1d..20f57a4 100644 --- a/crates/emdb/Cargo.toml +++ b/crates/emdb/Cargo.toml @@ -12,23 +12,11 @@ repository.workspace = true homepage.workspace = true license-file.workspace = true -[lib] -proc-macro = true - [dependencies] -prettyplease = "0.2" -syn = { version = "2.0.45", features = ["full", "extra-traits"] } -typed-arena = "2.0.2" -typed-generational-arena = "0.2" -proc-macro2 = { version = "1.0" } -proc-macro-error = "1.0.4" -quote = "1.0.33" -rand = "0.8" -combi = { path = "../combi" } -enumtrait = { path = "../enumtrait" } -dot = "0.1.4" -itertools = "0.12.1" +emdb_core = { path = "../emdb_core" } +pulpit = { path = "../pulpit" } +minister = { path = "../minister" } [dev-dependencies] trybuild = "1.0.91" -glob = "0.3.1" \ No newline at end of file +glob = "0.3.1" diff --git a/crates/emdb/README.md b/crates/emdb/README.md index 295b8e2..5b33cfe 100644 --- a/crates/emdb/README.md +++ b/crates/emdb/README.md @@ -8,30 +8,37 @@ The main `emdb` library to used the emdb project. [dependencies] emdb = ... ``` -2. Use the `emQL` macro to describe your schema and queries. Normal rust expressions and types can be embedded, and errors are propagated to `rustc` and your IDE. +2. Use the `emQL` macro to describe your schema and queries. Normal rust expressions and types can be embedded, and errors are propagated to `rustc` and your IDE. And add your own code to interact! ```rust - use emdb::emql; + #![allow(unused_variables)] + use emdb::macros::emql; - enum RGB { Red, Blue, Green } + #[allow(dead_code)] + #[derive(Debug, Clone, Copy)] + enum RGB { + Red, + Blue, + Green, + } emql! { - impl People as Simple; + impl my_db as Serialized; table people { name: String, age: u8, - fav: super::RGB, + fav: crate::RGB, score: i32, } @ [ - unique(name) as unique_names, - pred(age < 100 && age > 10) as reasonable_ages + unique(name) as unique_names, + pred(*age < 100 && *age > 10) as reasonable_ages ] - query add_new_person(name: String, age: u8, fav: super::RGB) { + query add_new_person(name: String, age: u8, fav: crate::RGB) { row( - name: String = name, - age: u8 = age, - fav: super::RGB = fav, + name: String = name, + age: u8 = age, + fav: crate::RGB = fav, score: i32 = 0 ) ~> insert(people as ref name) @@ -55,25 +62,45 @@ The main `emdb` library to used the emdb project. query update_scores(person: ref people, diff: i32) { row(p: ref people = person) ~> deref(p as person) - ~> update(p use score = score + diff) + ~> update(p use score = person.score + diff) ~> map(score: i32 = person.score) ~> return; } + + query remove_the_elderly(age_cuttoff: u8) { + ref people as person + |> deref(person as p) + |> filter(*p.age > age_cuttoff) + |> delete(person); + } } - ``` -3. Use your database in normal rust. - ```rust + + #[test] fn foo() { - let mut db = People::DB::new(); + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + let bob = db.add_new_person(String::from("Bob"), 23, RGB::Red).expect("empty database").name; + assert!(db.year_passes().is_ok(), "correct age updating code"); + let jim = db.add_new_person(String::from("Jim"), 99, RGB::Blue).expect("name different Bob").name; + db.update_scores(bob, 300).expect("Bob is still in the db"); + + assert!(db.remove_the_elderly(50).is_ok(), "correct dereferencing emql code"); + assert!(db.update_scores(jim, 3).is_err(), "Mike was removed by the age cuttoff"); - let bob_ref = db.add_new_person( - String::from("bob"), 24, RGB::Red - ).unwrap(); - - db.year_passes().unwrap(); + // add a bunch more users + assert!(db.add_new_person(String::from("Mike"), 34, RGB::Blue).is_ok()); + assert!(db.add_new_person(String::from("Mike"), 47, RGB::Red).is_err(), "added Jim twice"); + assert!(db.add_new_person(String::from("Steven"), 200, RGB::Red).is_err(), "Steven is clearly lying"); + assert!(db.add_new_person(String::from("Alex"), 50, RGB::Green).is_ok()); - let bob_old_score = db.update_scores(bob_ref, 23).unwrap(); + for user in db.get_top_scorers(3).expect("Note: the 'use' keyword is a 'ref -> deref' so can error").p.into_iter() { + println!("{}: {}, {}, {:?}", user.name, user.score, user.age, user.fav); + } } + + fn main() {} ``` +3. Enjoy the theraputic benefits of type safe, performant code. *See more in [examples](./examples/)* diff --git a/crates/emdb/examples/basic/nothing.rs b/crates/emdb/examples/basic/nothing.rs deleted file mode 100644 index ec4f5d7..0000000 --- a/crates/emdb/examples/basic/nothing.rs +++ /dev/null @@ -1,8 +0,0 @@ -#![allow(unused_variables)] -use emdb::emql; - -emql! { - impl my_db as SemCheck; -} - -fn main() {} \ No newline at end of file diff --git a/crates/emdb/examples/basic/simple_all.rs b/crates/emdb/examples/basic/simple_all.rs deleted file mode 100644 index 36a85fb..0000000 --- a/crates/emdb/examples/basic/simple_all.rs +++ /dev/null @@ -1,40 +0,0 @@ -#![allow(unused_variables)] -use emdb::emql; - -emql! { - impl my_db as SemCheck; - - table simple { - a: i32, - b: String, - c: (u32, i32), - } @ [unique(a), pred(c.0 > c.1) as c_predicate, pred(b.len() < 10) as b_length] - - // cool comment here - query insert(a_initial: i32) { - row(a: i32 = a_initial, b: String = "hello".to_string(), c: (u32, i32) = (0, 0)) - ~> insert(simple as ref it) - ~> return; - } - - table other {} @ [pred(1 + 1 == 2)] - - query update_b(new_b: String) { - ref simple as simple_ref - |> update(simple_ref use b = new_b) - |> collect(it as type foo) - ~> return; - } - - query single_maths() { - row(a: i32 = 0, b: i32 = 2) - ~> map(c: i32 = a + b) - ~> let x; - - use x - ~> map(z: i32 = c*c) - ~> return; - } -} - -fn main() {} \ No newline at end of file diff --git a/crates/emdb/examples/basic/small_table.rs b/crates/emdb/examples/basic/small_table.rs deleted file mode 100644 index 00b062e..0000000 --- a/crates/emdb/examples/basic/small_table.rs +++ /dev/null @@ -1,14 +0,0 @@ -#![allow(unused_variables)] -use emdb::emql; - -emql! { - impl my_db as SemCheck; - - table simple { - a: i32, - b: String, - c: (u32, i32), - } -} - -fn main() {} \ No newline at end of file diff --git a/crates/emdb/examples/complex/foreach.rs b/crates/emdb/examples/complex/foreach.rs deleted file mode 100644 index 2007596..0000000 --- a/crates/emdb/examples/complex/foreach.rs +++ /dev/null @@ -1,35 +0,0 @@ -#![allow(unused_variables)] -use emdb::emql; - -emql! { - impl my_db as SemCheck; - - table customers { - forename: String, - surname: String, - age: u8, - bonus_points: i32, - } @ [ pred(age < 256) as sensible_ages ] - - table family_bonus { - surname: String, - bonus: i32 - } @ [ unique(surname) as unique_surnames_cons ] - - query customer_age_brackets() { - ref customers as ref_cust - |> deref(ref_cust as person) - |> update(ref_cust use bonus_points = person.bonus_points + 1) - |> foreach(let customer in { - use customer - ~> map(surname: String = person.surname) - ~> unique(surname for family_bonus.surname as ref family_ref) - ~> deref(family_ref as family) - ~> update(family_ref use bonus = family.bonus + 1); - - row() ~> return; // void return - }); - } -} - -fn main() {} diff --git a/crates/emdb/examples/complex/groupby.rs b/crates/emdb/examples/complex/groupby.rs deleted file mode 100644 index 2b058db..0000000 --- a/crates/emdb/examples/complex/groupby.rs +++ /dev/null @@ -1,27 +0,0 @@ -#![allow(unused_variables)] -use emdb::emql; - -emql! { - impl my_db as SemCheck; - - table customers { - forename: String, - surname: String, - age: u8, - } @ [pred(age < 256) as sensible_ages] - - query customer_age_brackets() { - use customers - |> groupby(age for let people in { - use people - |> collect(people as type age_group) - ~> map(age_bracket: u8 = age, group: type age_group = people) - ~> return; - }) - |> filter(age_bracket > 16) - |> collect(brackets) - ~> return; - } -} - -fn main() {} \ No newline at end of file diff --git a/crates/emdb/examples/complex/joins.rs b/crates/emdb/examples/complex/joins.rs deleted file mode 100644 index 2c65c94..0000000 --- a/crates/emdb/examples/complex/joins.rs +++ /dev/null @@ -1,30 +0,0 @@ -#![allow(unused_variables)] -use emdb::emql; - -emql! { - impl my_db as SemCheck; - - table people { - name: String, - friend: Option, - } - - query get_friendships() { - use people |> fork(let person, friend); - - join(use person [ - inner pred { - if let Some(friend_name) = &left.friend { - friend_name == &right.name - } else { - false - } - } - ] use friend) - |> map(peep: String = person.name, buddy: String = friend.name) - |> collect(friends as type friendship) - ~> return; - } -} - -fn main() {} diff --git a/crates/emdb/examples/simple/people.rs b/crates/emdb/examples/simple/people.rs deleted file mode 100644 index fe4d488..0000000 --- a/crates/emdb/examples/simple/people.rs +++ /dev/null @@ -1,58 +0,0 @@ -#![allow(unused_variables)] -use emdb::emql; - -#[allow(dead_code)] -enum RGB { - Red, - Blue, - Green, -} - -emql! { - impl my_db as SemCheck; - - table people { - name: String, - age: u8, - fav: super::RGB, - score: i32, - } @ [ - unique(name) as unique_names, - pred(age < 100 && age > 10) as reasonable_ages - ] - - query add_new_person(name: String, age: u8, fav: super::RGB) { - row( - name: String = name, - age: u8 = age, - fav: super::RGB = fav, - score: i32 = 0 - ) - ~> insert(people as ref name) - ~> return; - } - - query year_passes() { - ref people as p - |> deref(p as person) - |> update(p use score = person.score + 1); - } - - query get_top_scorers(top_n: usize) { - use people - |> sort(score asc) - |> take(top_n) - |> collect(p as type person) - ~> return; - } - - query update_scores(person: ref people, diff: i32) { - row(p: ref people = person) - ~> deref(p as person) - ~> update(p use score = person.score + diff) - ~> map(score: i32 = person.score) - ~> return; - } -} - -fn main() {} \ No newline at end of file diff --git a/crates/emdb/src/analysis/interface/asserts.rs b/crates/emdb/src/analysis/interface/asserts.rs deleted file mode 100644 index 76f4def..0000000 --- a/crates/emdb/src/analysis/interface/asserts.rs +++ /dev/null @@ -1,6 +0,0 @@ -//! ## Checking Types -//! Generates required static assertions (e.g. for types) -//! - [crate::plan::GroupBy], [crate::plan::Unique] require inputs that are hashable and comparable. -//! - [crate::plan::Sort] requires comparable fields. - -// use crate::plan; diff --git a/crates/emdb/src/analysis/interface/contexts.rs b/crates/emdb/src/analysis/interface/contexts.rs deleted file mode 100644 index fb5cd6e..0000000 --- a/crates/emdb/src/analysis/interface/contexts.rs +++ /dev/null @@ -1,379 +0,0 @@ -//! ## Context Closure Generation -//! Generate the closures required for a context, for use by operators. -//! ``` -//! fn my_query(param1: i32) { -//! // allow user's expressions to capture from query parameters -//! let (op1_closure, op2_closure) = ( -//! | some_input: i32 | { some_input + param1 }, -//! | some_input: i32 | { some_input % 2 == 0 } -//! ); -//! -//! // use generated closures in the operators -//! // ... -//! } -//! ``` - -use super::names::ItemNamer; -use crate::plan; -use proc_macro2::{Ident, TokenStream}; -use quote::{quote, ToTokens}; -use syn::Expr; - -pub struct ClosureArgs<'a> { - pub params: Vec<(&'a Ident, Ident)>, - pub value: ClosureValue, -} - -pub struct ClosureValue { - pub expression: TokenStream, - pub datatype: TokenStream, -} - -impl ClosureValue { - fn empty() -> Self { - Self { - expression: quote! {()}, - datatype: quote! {()}, - } - } -} - -#[enumtrait::store(trans_operator_trait)] -trait OperatorClosures { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - None - } -} - -pub fn trans_context( - lp: &plan::Plan, - ctx_key: plan::Key, -) -> ClosureArgs<'_> { - let ctx = lp.get_context(ctx_key); - - let mut expressions = Vec::new(); - let mut data_types = Vec::new(); - for ClosureValue { - expression, - datatype, - } in ctx - .ordering - .iter() - .filter_map(|op_key| lp.get_operator(*op_key).gen_closure::(*op_key, lp)) - { - expressions.push(expression); - data_types.push(datatype); - } - - ClosureArgs { - params: ctx - .params - .iter() - .map(|(id, ty_idx)| (id, Namer::scalar_type(*ty_idx))) - .collect(), - value: ClosureValue { - expression: quote! { ( #(#expressions ,)* ) }, - datatype: quote! { ( #(#data_types ,)* ) }, - }, - } -} - -#[enumtrait::impl_trait(trans_operator_trait for plan::operator_enum)] -impl OperatorClosures for plan::Operator {} - -impl OperatorClosures for plan::UniqueRef {} -impl OperatorClosures for plan::ScanRefs {} -impl OperatorClosures for plan::DeRef {} -impl OperatorClosures for plan::Insert {} -impl OperatorClosures for plan::Expand {} -impl OperatorClosures for plan::Delete {} -impl OperatorClosures for plan::Sort {} -impl OperatorClosures for plan::Collect {} -impl OperatorClosures for plan::Fork {} -impl OperatorClosures for plan::Union {} -impl OperatorClosures for plan::Return {} -impl OperatorClosures for plan::Discard {} - -impl OperatorClosures for plan::Update { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - let (closure_expression, rec_out_ident) = - mapping_expr::(lp, self.update_type, self.mapping.iter()); - Some(single_expr::( - lp, - self_key, - self.input, - closure_expression, - rec_out_ident.into_token_stream(), - )) - } -} -impl OperatorClosures for plan::Fold { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - let (initial_values, rec_return) = mapto_dataflow::( - lp, - self.output, - self.fold_fields.iter().map(|(rf, ff)| (rf, &ff.initial)), - ); - - let (update_field, rec_return2) = mapto_dataflow::( - lp, - self.output, - self.fold_fields.iter().map(|(rf, ff)| (rf, &ff.update)), - ); - let (update_using_previous, rec_return3) = - dataflow_closure::(lp, self.output, update_field); - let (update_using_input, input_type) = - dataflow_closure::(lp, self.input, update_using_previous); - - assert_eq!( - rec_return, rec_return2, - "Return type of initial and update fields must be the same" - ); - assert_eq!( - rec_return, rec_return3, - "Return type of initial and update fields must be the same" - ); - - Some(ClosureValue { - expression: quote! { - (#initial_values, #update_using_input) - }, - datatype: quote! { - (#rec_return, impl Fn(#input_type) -> (impl Fn(#rec_return) -> #rec_return)) - }, - }) - } -} -impl OperatorClosures for plan::Map { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - let (closure_expression, rec_out_ident) = - mapto_dataflow::(lp, self.output, self.mapping.iter().map(|(f, e)| (f, e))); - Some(single_expr::( - lp, - self_key, - self.input, - closure_expression, - rec_out_ident.into_token_stream(), - )) - } -} -impl OperatorClosures for plan::Filter { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - Some(single_expr::( - lp, - self_key, - self.input, - self.predicate.to_token_stream(), - quote!(bool), - )) - } -} -impl OperatorClosures for plan::Assert { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - Some(single_expr::( - lp, - self_key, - self.input, - self.assert.to_token_stream(), - quote!(bool), - )) - } -} -impl OperatorClosures for plan::Take { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - Some(single_expr::( - lp, - self_key, - self.input, - self.top_n.to_token_stream(), - quote!(usize), - )) - } -} -impl OperatorClosures for plan::GroupBy { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - Some(context_namer::(lp, self_key, self.inner_ctx)) - } -} -impl OperatorClosures for plan::ForEach { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - Some(context_namer::(lp, self_key, self.inner_ctx)) - } -} -impl OperatorClosures for plan::Join { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - if let plan::MatchKind::Pred(pred) = &self.match_kind { - let left_t = Namer::record_type(lp.get_dataflow(self.left).get_conn().with.fields); - let right_t = Namer::record_type(lp.get_dataflow(self.right).get_conn().with.fields); - Some(ClosureValue { - expression: quote! { - move | left: &#left_t , right: &#right_t | { - let result: bool = #pred; - result - } - }, - datatype: quote! {impl Fn(&#left_t, &#right_t) -> bool}, - }) - } else { - None - } - } -} -impl OperatorClosures for plan::Row { - fn gen_closure( - &self, - self_key: plan::Key, - lp: &plan::Plan, - ) -> Option { - let (closure_expression, rec_out_ident) = - mapto_dataflow::(lp, self.output, self.fields.iter().map(|(f, e)| (f, e))); - Some(ClosureValue { - expression: closure_expression, - datatype: rec_out_ident.into_token_stream(), - }) - } -} - -/// Given a dataflow as arguments, produce a closure that returns a type -fn single_expr( - lp: &plan::Plan, - op: plan::Key, - df: plan::Key, - expr: TokenStream, - out_type: TokenStream, -) -> ClosureValue { - let (closure, in_type) = - dataflow_closure::(lp, df, quote! { let result: #out_type = {#expr}; result }); - ClosureValue { - expression: closure, - datatype: quote! { Fn(&#in_type) -> #out_type }, - } -} - -fn mapping_expr<'a, Namer: ItemNamer>( - lp: &'a plan::Plan, - output: plan::Key, - mapping: impl Iterator, -) -> (TokenStream, Ident) { - let mut expressions = Vec::new(); - let mut fields = Vec::new(); - - let data_types = &lp.get_record_type_conc(output).fields; - let rec_out_ident = Namer::record_type(output); - - for (field, expr) in mapping { - let expr_typename = Namer::scalar_type(data_types[field]); - let field_name = Namer::record_field(field); - expressions.push(quote! { let #field_name: #expr_typename = #expr ; }); - fields.push(field_name); - } - - ( - quote! { - { #(#expressions )* - #rec_out_ident { #(#fields),* } } - }, - rec_out_ident, - ) -} - -/// Create a mapping to an output field from the data output required. -/// - `output` is the dataflow to output to, and thus the fields for the expressions to assign to. -fn mapto_dataflow<'a, Namer: ItemNamer>( - lp: &'a plan::Plan, - output: plan::Key, - mapping: impl Iterator, -) -> (TokenStream, Ident) { - let rec_out = lp.get_dataflow(output).get_conn().with.fields; - mapping_expr::(lp, rec_out, mapping) -} - -/// Convert a context from an operator (e.g. [`plan::GroupBy`] or [`plan::ForEach`]) into a closure. -fn context_namer( - lp: &plan::Plan, - op: plan::Key, - ctx: plan::Key, -) -> ClosureValue { - let ClosureArgs { - params, - value: ClosureValue { - expression, - datatype, - }, - } = trans_context::(lp, ctx); - let params_tokens: Vec<_> = params.iter().map(|(id, ty)| quote! { #id : #ty }).collect(); - let inp_types: Vec<_> = params.iter().map(|(_, ty)| quote! { #ty }).collect(); - - ClosureValue { - expression: quote! { move | #(#params_tokens , )* | { #expression } }, - datatype: quote! { Fn( #(#inp_types ,)* ) -> ( #datatype ) }, - } -} - -/// Generate a closure using a provided dataflow as input. Returns the identifier for the input type. -fn dataflow_closure( - lp: &plan::Plan, - df_in: plan::Key, - inner: TokenStream, -) -> (TokenStream, Ident) { - let record_key = lp.get_dataflow(df_in).get_conn().with.fields; - let record_type = Namer::record_type(record_key); - let params: Vec<_> = lp - .get_record_type_conc(lp.get_dataflow(df_in).get_conn().with.fields) - .fields - .iter() - .map(|(field_id, ty_idx)| { - let id = Namer::record_field(field_id); - quote! { #id } - }) - .collect(); - - ( - quote! { - move | #record_type { #(#params ),* } | { - #inner - } - }, - record_type, - ) -} diff --git a/crates/emdb/src/analysis/interface/mod.rs b/crates/emdb/src/analysis/interface/mod.rs deleted file mode 100644 index 8345cbe..0000000 --- a/crates/emdb/src/analysis/interface/mod.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! Generates the types and expressions for the user to interact through. -//! - generating closures for user provided expressions that capture -//! [`crate::plan::Query`] parameters -//! - generating the type definitions to use in queries. -//! -//! ```ignore -//! -//! mod my_impl { -//! type Scalar0 = ...; -//! struct Record0 = ...; -//! -//! struct Table = ...; -//! -//! pub struct DB { -//! table1: Table, -//! table2: Table, -//! } -//! -//! impl DB { -//! pub fn query_name(&self, params) -> () { -//! -//! } -//! } -//! } -//! ``` - -pub mod asserts; -pub mod contexts; -pub mod names; -pub mod types; diff --git a/crates/emdb/src/analysis/interface/names.rs b/crates/emdb/src/analysis/interface/names.rs deleted file mode 100644 index e1a6288..0000000 --- a/crates/emdb/src/analysis/interface/names.rs +++ /dev/null @@ -1,69 +0,0 @@ -use crate::plan; -use proc_macro2::{Ident, Span}; - -/// A stateless simple name translator. -/// - Does not require a plan reference to work -/// - Passed as a generic parameter -pub trait ItemNamer { - fn record_type(key: plan::Key) -> Ident; - fn record_field(rf: &plan::RecordField) -> Ident; - - fn scalar_type(key: plan::Key) -> Ident; - - fn table(key: plan::Key) -> Ident; - fn table_ref(key: plan::Key) -> Ident; - - fn context(key: plan::Key) -> Ident; - fn context_pattern(key: plan::Key) -> Ident; - - fn operator(key: plan::Key) -> Ident; - fn operator_pattern(key: plan::Key) -> Ident; -} - -/// A simple [`ItemNamer`] implementation. -pub struct SimpleNamer; - -fn name(id: plan::Key, prefix: &str) -> Ident { - Ident::new(&format!("{}{}", prefix, id.to_idx()), Span::call_site()) -} - -impl ItemNamer for SimpleNamer { - fn record_type(key: plan::Key) -> Ident { - name(key, "RecordType") - } - - fn scalar_type(key: plan::Key) -> Ident { - name(key, "ScalarType") - } - - fn table(key: plan::Key) -> Ident { - name(key, "Table") - } - - fn table_ref(key: plan::Key) -> Ident { - name(key, "TableRef") - } - - fn record_field(rf: &plan::RecordField) -> Ident { - match rf { - plan::RecordField::User(i) => i.clone(), - plan::RecordField::Internal(i) => { - Ident::new(&format!("recordfield_internal_id_{i}"), Span::call_site()) - } - } - } - - fn context(key: plan::Key) -> Ident { - name(key, "Context") - } - fn context_pattern(key: plan::Key) -> Ident { - name(key, "ContextPattern") - } - - fn operator(key: plan::Key) -> Ident { - name(key, "Operator") - } - fn operator_pattern(key: plan::Key) -> Ident { - name(key, "OperatorPattern") - } -} diff --git a/crates/emdb/src/analysis/interface/types.rs b/crates/emdb/src/analysis/interface/types.rs deleted file mode 100644 index 1224b4e..0000000 --- a/crates/emdb/src/analysis/interface/types.rs +++ /dev/null @@ -1,218 +0,0 @@ -//! ## Type Definition Generation -//! Each type is given an alias based on its name using the [`ItemNamer`] trait. -//! The definitions of these aliases are produced by the [`TypeImplementor`] trait. -//! -//! [`SimpleTypeImplementor`] is a basic implementation. -use std::{collections::HashSet, marker::PhantomData}; - -use super::names::ItemNamer; -use crate::plan; - -use proc_macro2::{Ident, TokenStream}; -use quote::{quote, ToTokens}; - -/// A conveinent abstraction to apply transformations to types, and uses `type` -/// aliases to define references to types. -pub trait TypeImplementor { - type Namer: ItemNamer; - - fn translate_scalar( - &self, - name: Ident, - key: plan::Key, - scalar: &plan::ScalarTypeConc, - ) -> TokenStream; - - fn translate_record( - &self, - name: Ident, - key: plan::Key, - record: &plan::RecordConc, - ) -> TokenStream; - - fn translate_table_ref(&self, table: plan::Key) -> TokenStream; - - fn trans_scalar_type( - &self, - ty_key: plan::Key, - ty: &plan::ScalarType, - ) -> TokenStream { - let self_ty = Self::Namer::scalar_type(ty_key); - match ty { - plan::ConcRef::Conc(scalar) => self.translate_scalar(self_ty, ty_key, scalar), - plan::ConcRef::Ref(ref_ty) => { - let ref_name = Self::Namer::scalar_type(*ref_ty); - quote! { - type #self_ty = #ref_name; - } - } - } - } - - fn trans_record_type( - &self, - ty_key: plan::Key, - ty: &plan::RecordType, - ) -> TokenStream { - let self_ty = Self::Namer::record_type(ty_key); - match ty { - plan::ConcRef::Conc(record) => self.translate_record(self_ty, ty_key, record), - plan::ConcRef::Ref(ref_ty) => { - let ref_name = Self::Namer::record_type(*ref_ty); - quote! { - type #self_ty = #ref_name; - } - } - } - } - - fn translate_all_types(&self, lp: &plan::Plan) -> TokenStream { - let mut tks = TokenStream::new(); - tks.extend( - lp.record_types - .iter() - .map(|(key, ty)| self.trans_record_type(key, ty)), - ); - tks.extend( - lp.scalar_types - .iter() - .map(|(key, ty)| self.trans_scalar_type(key, ty)), - ); - tks.extend( - lp.tables - .iter() - .map(|(key, _)| self.translate_table_ref(key)), - ); - tks - } -} - -/// We discard the generation number when tracking indexes from an immutable plan -type KeyIdx = usize; - -/// A basic [`TypeImplementor`], generates public types for parameters and -/// returns. -pub struct SimpleTypeImplementor { - namer: PhantomData, - public_records: HashSet, -} - -fn add_public_record( - lp: &plan::Plan, - set: &mut HashSet, - location: plan::Key, -) { - match lp.get_record_type(location) { - plan::ConcRef::Conc(rec) => { - set.insert(location.to_idx()); - for scalar in rec.fields.values() { - add_public_scalar(lp, set, *scalar) - } - } - plan::ConcRef::Ref(inner) => add_public_record(lp, set, *inner), - } -} - -fn add_public_scalar( - lp: &plan::Plan, - set: &mut HashSet, - location: plan::Key, -) { - match lp.get_scalar_type(location) { - plan::ConcRef::Conc(scalar) => { - if let plan::ScalarTypeConc::Record(r) = scalar { - add_public_record(lp, set, *r) - } - } - plan::ConcRef::Ref(inner) => add_public_scalar(lp, set, *inner), - } -} - -impl SimpleTypeImplementor { - pub fn with_public_types(lp: &plan::Plan) -> Self { - let mut public_records = HashSet::new(); - - for (_, query) in &lp.queries { - let context = lp.get_context(query.ctx); - if let Some(ret_op) = context.returnflow { - let rec_idx = lp - .get_dataflow(lp.get_operator(ret_op).get_return().input) - .get_conn() - .with - .fields; - add_public_record(lp, &mut public_records, rec_idx); - } - for (_, param) in &context.params { - add_public_scalar(lp, &mut public_records, *param); - } - } - - Self { - namer: PhantomData, - public_records, - } - } -} - -impl TypeImplementor for SimpleTypeImplementor { - type Namer = Namer; - - fn translate_scalar( - &self, - name: Ident, - key: plan::Key, - scalar: &plan::ScalarTypeConc, - ) -> TokenStream { - let set_to = match scalar { - plan::ScalarTypeConc::TableRef(t) => Self::Namer::table_ref(*t).to_token_stream(), - plan::ScalarTypeConc::Bag(b) => quote! {()}, - plan::ScalarTypeConc::Record(r) => Self::Namer::record_type(*r).to_token_stream(), - plan::ScalarTypeConc::Rust(ty) => ty.to_token_stream(), - }; - quote! { - type #name = #set_to; - } - } - - fn translate_record( - &self, - name: Ident, - key: plan::Key, - record: &plan::RecordConc, - ) -> TokenStream { - let vis = if self.public_records.contains(&key.to_idx()) { - quote!(pub) - } else { - quote!() - }; - - let fields: Vec<_> = record - .fields - .iter() - .map(|(rf, ty)| { - let rf_id = Self::Namer::record_field(rf); - let ty_id = Self::Namer::scalar_type(*ty); - let rf_pre = if let plan::RecordField::User(_) = rf { - quote!(#vis #rf_id) - } else { - rf_id.into_token_stream() - }; - quote! { #rf_pre: #ty_id } - }) - .collect(); - - quote! { - #vis struct #name { - #(#fields , )* - } - } - } - - fn translate_table_ref(&self, table: plan::Key) -> TokenStream { - let table_name = Self::Namer::table_ref(table); - quote! { - /// Reference to the table - pub struct #table_name {} - } - } -} diff --git a/crates/emdb/src/analysis/mutability/mod.rs b/crates/emdb/src/analysis/mutability/mod.rs deleted file mode 100644 index 7c45d5c..0000000 --- a/crates/emdb/src/analysis/mutability/mod.rs +++ /dev/null @@ -1,87 +0,0 @@ -//! # Mutability Analysis -//! Determining how tables are updated in order to alter the access. - -use crate::plan; -use std::collections::HashMap; - -#[derive(Default)] -struct ColMut { - read: bool, - write: bool, -} - -#[derive(Default)] -struct TableMut<'a> { - insert: bool, - delete: bool, - reference: bool, - per_col: HashMap<&'a plan::RecordField, ColMut>, -} - -type TableAssocKey = usize; - -struct Mutability<'a> { - tables: HashMap>, -} - -impl<'a> Mutability<'a> { - fn from_plan<'b>(lp: &'b plan::Plan) -> Mutability<'b> { - let mut tables: HashMap> = lp - .tables - .iter() - .map(|(key, table)| { - ( - key.to_idx(), - TableMut { - insert: false, - delete: false, - reference: false, - per_col: table - .columns - .keys() - .map(|id| { - ( - id, - ColMut { - read: false, - write: false, - }, - ) - }) - .collect(), - }, - ) - }) - .collect(); - - for (_, op) in lp.operators.iter() { - match op { - plan::Operator::Insert(plan::Insert { table, .. }) => { - tables.get_mut(&table.to_idx()).unwrap().insert = true; - } - plan::Operator::Delete(plan::Delete { table, .. }) => { - tables.get_mut(&table.to_idx()).unwrap().delete = true; - } - plan::Operator::UniqueRef(plan::UniqueRef { table, .. }) - | plan::Operator::ScanRefs(plan::ScanRefs { table, .. }) => { - tables.get_mut(&table.to_idx()).unwrap().reference = true; - } - plan::Operator::Update(plan::Update { table, mapping, .. }) => { - let tablemut = tables.get_mut(&table.to_idx()).unwrap(); - for col in mapping.keys() { - tablemut.per_col.get_mut(col).unwrap().write = true; - } - } - plan::Operator::DeRef(plan::DeRef { table, .. }) => { - let tablemut = tables.get_mut(&table.to_idx()).unwrap(); - for (_, m) in tablemut.per_col.iter_mut() { - m.read = true; - } - } - _ => (), - } - } - - Mutability { tables } - } -} diff --git a/crates/emdb/src/backend/semcheck/mod.rs b/crates/emdb/src/backend/semcheck/mod.rs deleted file mode 100644 index 71909e1..0000000 --- a/crates/emdb/src/backend/semcheck/mod.rs +++ /dev/null @@ -1,119 +0,0 @@ -//! Forwards rust expressions from the plan in order to check their code, when no backend impl is needed. -//! - Can be used for debugging. -//! - less costly, can run with no optimisers. -//! - useful for tests with no artifacts - -use std::{collections::LinkedList, fs::File, io::Write, path::Path}; - -use crate::{ - analysis::interface::{names::SimpleNamer, types::{SimpleTypeImplementor, TypeImplementor}}, utils::misc::singlelist -}; - -use proc_macro2::TokenStream; -use crate::{analysis::interface::{contexts::{trans_context, ClosureArgs}, names::{ItemNamer}}, plan}; - -use super::EMDBBackend; -use combi::{core::{mapsuc, seq, setrepr}, seqs, tokens::{basic::{collectuntil, isempty, matchident, matchpunct, syn}, error::expectederr, TokenDiagnostic, TokenIter}, Combi}; -use proc_macro_error::{Diagnostic, Level}; -use syn::{parse2, File as SynFile, LitStr}; -use quote::quote; -use prettyplease::unparse; - -pub struct SemCheck { - debug: Option -} - -impl EMDBBackend for SemCheck { - const NAME: &'static str = "SemCheck"; - - fn parse_options( - backend_name: &syn::Ident, - options: Option, - ) -> Result> { - if let Some(opts) = options { - let parser = expectederr(mapsuc(seqs!( - matchident("debug_file"), - matchpunct('='), - setrepr(syn(collectuntil(isempty())), "") - ), |(_, (_, file))| Self { debug: Some(file)})); - let (_, res) = parser.comp(TokenIter::from(opts, backend_name.span())); - res.to_result().map_err(TokenDiagnostic::into_list) - } else { - Ok(Self { debug: None}) - } - } - - fn generate_code( - self, - impl_name: syn::Ident, - plan: &crate::plan::Plan, - ) -> Result> { - let ty_impl = SimpleTypeImplementor::::with_public_types(plan); - let types_preamble = ty_impl.translate_all_types(plan); - let queries = translate_all_queries(plan); - let tks = quote! { - mod #impl_name { - #![allow(unused_variables)] - #![allow(dead_code)] - - #types_preamble - #queries - } - }; - - if let Some(debug_path) = self.debug { - debug_output(&debug_path, tks.clone())? - } - - Ok(tks) - } -} - -fn debug_output(debug_path: &LitStr, tks: TokenStream) -> Result<(), LinkedList> { - match parse2::(tks) { - Ok(m) => { - match File::create(Path::new(&debug_path.value())) { - Ok(mut f) => { - match f.write_all(unparse(&m).as_bytes()) { - Ok(()) => Ok(()), - Err(e) => Err(singlelist(Diagnostic::spanned(debug_path.span(), Level::Error, format!("Could not write to file: {e}")))), - } - }, - Err(e) => Err(singlelist(Diagnostic::spanned(debug_path.span(), Level::Error, format!("Could not create file: {e}")))) - } - }, - Err(e) => Err(singlelist(Diagnostic::spanned(debug_path.span(), Level::Error, format!("Could not parse code as file: {e}")))), - } -} - -fn translate_all_queries(lp: &plan::Plan) -> TokenStream { - lp.queries.iter().map(|(key, query)| translate_query(lp, key, query)).collect() -} - -fn translate_query(lp: &plan::Plan, qk: plan::Key, query: &plan::Query) -> TokenStream { - let ClosureArgs { params, value } = trans_context::(lp, query.ctx); - - let query_params = params.iter().map(|(id, ty)| { - quote! { #id: #ty } - }); - let query_name = &query.name; - let query_closure_gen = value.expression; - let query_closure_type = value.datatype; - - let return_type = if let Some(ret_op) = lp.get_context(query.ctx).returnflow { - let ret = lp.get_operator(ret_op).get_return(); - let ret_type = SimpleNamer::record_type(lp.get_dataflow(ret.input).get_conn().with.fields); - quote! { -> #ret_type } - } else { - quote!() - }; - - quote!{ - /// this is a function - pub fn #query_name(#(#query_params ,)*) #return_type { - let closures = #query_closure_gen ; - - todo!() - } - } -} \ No newline at end of file diff --git a/crates/emdb/src/backend/simple/mod.rs b/crates/emdb/src/backend/simple/mod.rs deleted file mode 100644 index c36f8b9..0000000 --- a/crates/emdb/src/backend/simple/mod.rs +++ /dev/null @@ -1,24 +0,0 @@ -//! A simple reference backend using basic volcano operators - -use super::EMDBBackend; -mod ops; -pub struct Simple{} - -impl EMDBBackend for Simple { - const NAME: &'static str = "Simple"; - - fn parse_options( - backend_name: &syn::Ident, - options: Option, - ) -> Result> { - todo!() - } - - fn generate_code( - self, - impl_name: syn::Ident, - plan: &crate::plan::Plan, - ) -> Result> { - todo!() - } -} diff --git a/crates/emdb/src/backend/simple/ops.rs b/crates/emdb/src/backend/simple/ops.rs deleted file mode 100644 index 0bb18ec..0000000 --- a/crates/emdb/src/backend/simple/ops.rs +++ /dev/null @@ -1,29 +0,0 @@ -// //! -// //! - -// trait Buffer { -// type Elems; -// } - - -// trait SingleOperator { -// type Output; -// fn pull_row(&mut self) -> Self::Output; -// } - -// trait BlockingOperator { -// type Output; -// fn pull_buffer(&mut self) -> Buffer; -// } - -// struct MyThing { -// inner: () -// } - -// impl MyThing { -// fn foo(&mut self, arg1: i32) -> () { - -// } -// } - - diff --git a/crates/emdb/src/frontend/emql/operators/op_fork.rs b/crates/emdb/src/frontend/emql/operators/op_fork.rs deleted file mode 100644 index a773228..0000000 --- a/crates/emdb/src/frontend/emql/operators/op_fork.rs +++ /dev/null @@ -1,87 +0,0 @@ -use combi::tokens::derived::listsep; - -use super::*; - -#[derive(Debug)] -pub struct Fork { - call: Ident, - vars: Vec, -} - -impl EMQLOperator for Fork { - const NAME: &'static str = "fork"; - - fn build_parser(ctx_recur: ContextRecurHandle) -> impl TokenParser { - mapsuc( - functional_style(Self::NAME, - seq( - matchident("let"), - listsep(',', setrepr(getident(), "")) - ) - ), - |(call, (_, vars))| Fork {call, vars} - ) - } - - fn build_logical( - self, - lp: &mut plan::Plan, - tn: &HashMap>, - vs: &mut HashMap, - ts: &mut HashMap>, - op_ctx: plan::Key, - cont: Option, - ) -> Result> { - let Self { call, vars } = self; - if let Some(cont) = cont { - let mut errors = LinkedList::new(); - for var in vars.iter() { - if let Some(varstate) = vs.get(var) { - errors.push_back(match varstate { - VarState::Used { created, used } => { - errors::query_let_variable_already_assigned( - var, - *created, - Some(*used), - ) - } - VarState::Available { created, state } => { - errors::query_let_variable_already_assigned(var, *created, None) - } - }) - } - } - - if errors.is_empty() { - let var_edges: Vec> = vars.into_iter().map( - |var| { - let out_edge = lp.dataflow.insert(plan::DataFlow::Null); - vs.insert(var.clone(), VarState::Available { - created: var.span(), - state: Continue { data_type: cont.data_type.clone(), prev_edge: out_edge, last_span: call.span() } - }); - out_edge - } - ).collect(); - - let fork_op = lp.operators.insert( - plan::Fork { input: cont.prev_edge, outputs: var_edges.clone() }.into(), - ); - - for edge in var_edges { - *lp.get_mut_dataflow(edge) = plan::DataFlow::Incomplete { from: fork_op, with: cont.data_type.clone() } - } - - update_incomplete(lp.get_mut_dataflow(cont.prev_edge), fork_op); - lp.get_mut_context(op_ctx).add_operator(fork_op); - - Ok(StreamContext::Nothing { last_span: call.span() }) - - } else { - Err(errors) - } - } else { - Err(singlelist(errors::query_cannot_start_with_operator(&call))) - } - } -} \ No newline at end of file diff --git a/crates/emdb/src/lib.rs b/crates/emdb/src/lib.rs index 246c40f..ea25fbc 100644 --- a/crates/emdb/src/lib.rs +++ b/crates/emdb/src/lib.rs @@ -1,72 +1,13 @@ -// #![warn(clippy::pedantic)] -// #![allow(clippy::linkedlist)] -// linked lists used for quick merging of errors lists, and are only iterated over for fast-escape failure case -#![allow(dead_code)] -#![allow(unused_variables)] -use std::collections::LinkedList; - -// TODO: check readme by including as documentation (needs readme to compile first) - -use proc_macro::TokenStream; -use proc_macro2::TokenStream as TokenStream2; -use proc_macro_error::proc_macro_error; -use quote::quote; - -extern crate proc_macro; - -mod analysis; -mod backend; -mod frontend; -mod optimise; -mod plan; -mod utils; - -fn make_impl(tk: TokenStream) -> TokenStream { - match F::from_tokens(TokenStream2::from(tk)) { - Err(ds) => { - for d in ds { - d.emit(); - } - TokenStream::new() - } - Ok((lp, bks)) => { - let mut errors = LinkedList::new(); - let impls = bks - .impls - .into_iter() - .filter_map( - |(id, backend)| match backend::generate_code(backend, id, &lp) { - Ok(code) => Some(code), - Err(mut e) => { - errors.append(&mut e); - None - } - }, - ) - .collect::>(); - - for e in errors { - e.emit(); - } - - proc_macro::TokenStream::from(quote! { - #(#impls)* - }) - } - } +#![doc = include_str!("../README.md")] + +pub use emdb_core as macros; + +/// The dependencies used by code generated by the emdb macros +/// - Needs to be exported from emdb so the library can support the code it +/// generates +/// - Cannot export both proc-macros and normal items from a proc-macro crate, +/// hence this separation between [`emdb`](crate) and [`emdb_core`](macros) +pub mod dependencies { + pub use minister; + pub use pulpit; } - -macro_rules! create_frontend { - ($frontend:ident as $implement:path => $($t:tt)*) => { - $($t)* - #[proc_macro_error] - #[proc_macro] - pub fn $frontend(tk: TokenStream) -> TokenStream { - make_impl::<$implement>(tk) - } - }; -} - -create_frontend!(emql as frontend::Emql => - /// The `emql` language frontend for [emdb](crate). -); diff --git a/crates/emdb/src/utils/misc.rs b/crates/emdb/src/utils/misc.rs deleted file mode 100644 index 54db3fd..0000000 --- a/crates/emdb/src/utils/misc.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::collections::LinkedList; - -pub(crate) fn singlelist(item: T) -> LinkedList { - let mut list = LinkedList::new(); - list.push_back(item); - list -} - -pub(crate) fn result_to_opt(res: Result, errs: &mut LinkedList) -> Option { - match res { - Ok(o) => Some(o), - Err(e) => { - errs.push_back(e); - None - } - } -} diff --git a/crates/emdb/src/utils/mod.rs b/crates/emdb/src/utils/mod.rs deleted file mode 100644 index 40536e8..0000000 --- a/crates/emdb/src/utils/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub(crate) mod choose; -pub(crate) mod conster; -pub(crate) mod misc; diff --git a/crates/emdb/tests/compile_tests.rs b/crates/emdb/tests/compile_tests.rs deleted file mode 100644 index bf44353..0000000 --- a/crates/emdb/tests/compile_tests.rs +++ /dev/null @@ -1,19 +0,0 @@ -use glob::glob; -use trybuild::TestCases; - -#[test] -fn should_compile() { - let t = TestCases::new(); - for entry in glob("examples/**/*.rs").unwrap() { - t.pass(entry.unwrap()); - } -} - -#[test] -fn should_fail() { - let t = TestCases::new(); - - for entry in glob("tests/invalid/**/*.rs").unwrap() { - t.compile_fail(entry.unwrap()); - } -} diff --git a/crates/emdb/tests/emql.rs b/crates/emdb/tests/emql.rs new file mode 100644 index 0000000..9d0a21d --- /dev/null +++ b/crates/emdb/tests/emql.rs @@ -0,0 +1,55 @@ +use glob::glob; +use trybuild::TestCases; + +/// Test compilation failure, and the error messages produced match expected +#[test] +fn should_fail() { + let t = TestCases::new(); + for entry in glob("tests/invalid/**/*.rs").unwrap() { + t.compile_fail(entry.unwrap()); + } +} + +/// Overcomplicated macro because importing each separately is boring + IDE picks +/// it up nicely +macro_rules! valid_tests { + ( $($section:ident { $($test:ident),+ } ),+ ) => { + mod valid; + $( + mod $section { + $( + #[test] + fn $test() { + super::valid::$section::$test::test(); + } + )+ + } + )+ + }; +} + +valid_tests!( + // complex { + // favourite_colours, + // dereferencing, + // userdetails, + // sales_analytics + // }, + context { + lift_stream, + lift_single, + groupby + }, + extreme { + empty_emql, + empty_items, + just_maths + }, + simple { + no_errors, + basic_join, + limited_table, + sums, + counts + } +); diff --git a/crates/emdb/tests/invalid/shadowing_groupby.rs b/crates/emdb/tests/invalid/shadowing_groupby.rs new file mode 100644 index 0000000..6873eee --- /dev/null +++ b/crates/emdb/tests/invalid/shadowing_groupby.rs @@ -0,0 +1,18 @@ +use emdb::macros::emql; + +emql!{ + table foos { + key: i32, + val: i32, + } + + query shadow_time() { + use foos + |> groupby(key for let foos /* cannot shadow table here! */ in { + row() + ~> return; + }); + } +} + +fn main() {} \ No newline at end of file diff --git a/crates/emdb/tests/invalid/shadowing_groupby.stderr b/crates/emdb/tests/invalid/shadowing_groupby.stderr new file mode 100644 index 0000000..6f2160d --- /dev/null +++ b/crates/emdb/tests/invalid/shadowing_groupby.stderr @@ -0,0 +1,11 @@ +error: [EMQL-55] variables created by let cannot shadow tables, but `foos` does + --> tests/invalid/shadowing_groupby.rs:11:36 + | +11 | |> groupby(key for let foos /* cannot shadow table here! */ in { + | ^^^^ + | +note: Table defined here + --> tests/invalid/shadowing_groupby.rs:4:11 + | +4 | table foos { + | ^^^^ diff --git a/crates/emdb/tests/invalid/simple_bad_type.rs b/crates/emdb/tests/invalid/simple_bad_type.rs index 4a44d8e..35aafb8 100644 --- a/crates/emdb/tests/invalid/simple_bad_type.rs +++ b/crates/emdb/tests/invalid/simple_bad_type.rs @@ -1,4 +1,4 @@ -use emdb::emql; +use emdb::macros::emql; emql! { diff --git a/crates/emdb/tests/invalid/simple_wrong_backend.rs b/crates/emdb/tests/invalid/simple_wrong_backend.rs index 5137c36..233ddd1 100644 --- a/crates/emdb/tests/invalid/simple_wrong_backend.rs +++ b/crates/emdb/tests/invalid/simple_wrong_backend.rs @@ -1,4 +1,4 @@ -use emdb::emql; +use emdb::macros::emql; emql! { impl nonexistent_backend as repeated_impl; diff --git a/crates/emdb/tests/invalid/simple_wrong_backend.stderr b/crates/emdb/tests/invalid/simple_wrong_backend.stderr index 2deb163..30fcee1 100644 --- a/crates/emdb/tests/invalid/simple_wrong_backend.stderr +++ b/crates/emdb/tests/invalid/simple_wrong_backend.stderr @@ -4,7 +4,7 @@ error: No such backend `repeated_impl` 4 | impl nonexistent_backend as repeated_impl; | ^^^^^^^^^^^^^ | - = help: Available backends are: PlanViz, SemCheck, Simple + = help: Available backends are: PlanViz, Serialized, Interface error: No such backend `repeated_impl` --> tests/invalid/simple_wrong_backend.rs:5:35 @@ -12,4 +12,4 @@ error: No such backend `repeated_impl` 5 | impl other_missing_backend as repeated_impl; | ^^^^^^^^^^^^^ | - = help: Available backends are: PlanViz, SemCheck, Simple + = help: Available backends are: PlanViz, Serialized, Interface diff --git a/crates/emdb/tests/scratch.rs b/crates/emdb/tests/scratch.rs index 5bcc079..90faa3e 100644 --- a/crates/emdb/tests/scratch.rs +++ b/crates/emdb/tests/scratch.rs @@ -1,38 +1,81 @@ +#![allow(dead_code, unused_variables)] //! For manually debugging generated code. //! //! - Ensure that the proc macro is built. In vscode on the bottom bar you can //! hover over `rust-analyzer` and click `Rebuild Proc Macros` //! - Saving this file should re-run the emql macro, to generate outputs. -#![allow(unreachable_code)] -use emdb::emql; +use emdb::macros::emql; + +#[derive(Debug, Clone, Copy)] +enum RGB { + Red, + Blue, + Green, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum LogLevel { + Error, + Warning, + Info, +} emql! { - impl debug_code as SemCheck{debug_file = "emdb/tests/debug/code.rs"}; + // impl my_interface as Interface{ + // traits_with_db = { }, + // }; + // impl my_db as Serialized{ + // // debug_file = "emdb/tests/code.rs", + // // interface = my_interface, + // // pub = on, + // ds_name = EmDBDebug, + // // aggressive_inlining = on, + // }; + // impl code_display as PlanViz{ + // path = "emdb/tests/debug/code.dot", + // types = off, + // ctx = on, + // control = on, + // }; - // Use the vscode dots view to see preview update live on save - impl debug_graph as PlanViz{path = "emdb/tests/debug/graph.dot", display_types = on, display_ctx_ops = on, display_control = on}; + impl my_db as Serialized { + // debug_file = "emdb/tests/code.rs", + // op_impl = Parallel, + // table_select = Thunderdome, + }; - // write query to check here! table customers { forename: String, surname: String, age: u8, - } @ [ pred(age < 256) as sensible_ages ] + } @ [pred(*age < 255) as sensible_ages] query customer_age_brackets() { use customers |> groupby(age for let people in { use people |> collect(people as type age_group) - ~> map(age_bracket: u8 = age, group: type age_group = people) + ~> map(age_bracket: u8 = *age, group: type age_group = people) ~> return; }) - |> filter(age_bracket > 16) - |> collect(brackets as type brackets) + |> filter(*age_bracket > 16) + |> collect(brackets) + ~> return; + } + + query new_customer(forename: &str, surname: &str, age: u8) { + row( + forename: String = String::from(forename), + surname: String = String::from(surname), + age: u8 = age + ) + ~> insert(customers as ref name) ~> return; } } fn main() { - debug_code::customer_age_brackets(); + // use my_interface::Datastore; + let mut ds = my_db::Datastore::new(); + let db = ds.db(); } diff --git a/crates/emdb/tests/valid/complex/data_logs.rs b/crates/emdb/tests/valid/complex/data_logs.rs new file mode 100644 index 0000000..e69de29 diff --git a/crates/emdb/examples/complex/dereferencing.rs b/crates/emdb/tests/valid/complex/dereferencing.rs similarity index 65% rename from crates/emdb/examples/complex/dereferencing.rs rename to crates/emdb/tests/valid/complex/dereferencing.rs index 7822e3d..7e52724 100644 --- a/crates/emdb/examples/complex/dereferencing.rs +++ b/crates/emdb/tests/valid/complex/dereferencing.rs @@ -1,9 +1,9 @@ #![allow(unused_variables)] -use emdb::emql; +use emdb::macros::emql; emql! { - impl my_db as SemCheck; - + impl my_db as Serialized; + table cool { name: String, something: i32, @@ -44,20 +44,33 @@ emql! { query complex() { use cool |> map(x: usize = name.len()) - |> filter(x > 10) + |> filter(*x > 10) |> let larger_than_10; use larger_than_10 |> fork(let x1, x2); - use x1 + use x1 |> take(1) - |> collect(it) + |> collect(it) ~> return; - use x2 + use x2 |> sort(x desc); } } -fn main() {} \ No newline at end of file +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + for name in &["a", "b", "c"] { + let _: my_db::tables::cool::Key = db.new_cool(name.to_string()).expect("unique names").it; + } + + let top_10_value = db.collect_most_cool().expect("Correct dereferencing"); + let (c_val, top_cools): (i32, Vec) = (top_10_value.c, top_10_value.blah.into_iter().map(|v| v.id).collect()); + + let _ = db.get_cool(top_cools[0]).expect("Correct dereferencing").score; + db.update_cool(top_cools[0]).expect("Correct key"); +} diff --git a/crates/emdb/tests/valid/complex/favourite_colours.rs b/crates/emdb/tests/valid/complex/favourite_colours.rs new file mode 100644 index 0000000..f620ac4 --- /dev/null +++ b/crates/emdb/tests/valid/complex/favourite_colours.rs @@ -0,0 +1,123 @@ +#![allow(unused_variables)] +use emdb::macros::emql; + +#[allow(dead_code)] +#[derive(Debug, Clone, Copy)] +enum RGB { + Red, + Blue, + Green, +} + +emql! { + impl my_db as Serialized; + + table people { + name: String, + age: u8, + fav: crate::valid::complex::favourite_colours::RGB, + score: i32, + } @ [ + unique(name) as unique_names, + pred(*age < 100 && *age > 10) as reasonable_ages + ] + + query add_new_person(name: String, age: u8, fav: super::RGB) { + row( + name: String = name, + age: u8 = age, + fav: super::RGB = fav, + score: i32 = 0 + ) + ~> insert(people as ref name) + ~> return; + } + + query year_passes() { + ref people as p + |> deref(p as person) + |> update(p use score = person.score + 1); + } + + query get_top_scorers(top_n: usize) { + use people + |> sort(score asc) + |> take(top_n) + |> collect(p as type person) + ~> return; + } + + query update_scores(person: ref people, diff: i32) { + row(p: ref people = person) + ~> deref(p as person) + ~> update(p use score = person.score + diff) + ~> map(score: i32 = person.score) + ~> return; + } + + query remove_the_elderly(age_cuttoff: u8) { + ref people as person + |> deref(person as p) + |> filter(*p.age > age_cuttoff) + |> delete(person); + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + { + let bob = db + .add_new_person(String::from("Bob"), 23, RGB::Red) + .expect("empty database") + .name; + + assert!(db.year_passes().is_ok(), "correct age updating code"); + + let jim = db + .add_new_person(String::from("Jim"), 99, RGB::Blue) + .expect("name different from Bob") + .name; + + db.update_scores(bob, 300).expect("Bob is still in the db"); + + assert!( + db.remove_the_elderly(50).is_ok(), + "correct dereferencing emql code" + ); + assert!( + db.update_scores(jim, 3).is_err(), + "Mike was removed by the age cuttoff" + ); + + // add a bunch more users + assert!(db + .add_new_person(String::from("Mike"), 34, RGB::Blue) + .is_ok()); + assert!( + db.add_new_person(String::from("Mike"), 47, RGB::Red) + .is_err(), + "added Jim twice" + ); + assert!( + db.add_new_person(String::from("Steven"), 200, RGB::Red) + .is_err(), + "Steven is clearly lying" + ); + assert!(db + .add_new_person(String::from("Alex"), 50, RGB::Green) + .is_ok()); + + for user in db + .get_top_scorers(3) + .p + .into_iter() + { + println!( + "{}: {}, {}, {:?}", + user.name, user.score, user.age, user.fav + ); + } + } +} diff --git a/crates/emdb/tests/valid/complex/mod.rs b/crates/emdb/tests/valid/complex/mod.rs new file mode 100644 index 0000000..be458d5 --- /dev/null +++ b/crates/emdb/tests/valid/complex/mod.rs @@ -0,0 +1,8 @@ +//! ## Large Tests for use as examples +//! - Need to use most operators, mutability, etc + +pub mod favourite_colours; +pub mod dereferencing; +pub mod userdetails; +pub mod sales_analytics; +pub mod data_logs; diff --git a/crates/emdb/tests/valid/complex/sales_analytics.rs b/crates/emdb/tests/valid/complex/sales_analytics.rs new file mode 100644 index 0000000..f5a1248 --- /dev/null +++ b/crates/emdb/tests/valid/complex/sales_analytics.rs @@ -0,0 +1,330 @@ +//! ## A complex analytical workload +//! To test [`emdb`]'s OLAP performance. + +use std::{collections::HashMap, fmt::Display}; + +use emdb::macros::emql; + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub enum ProductCategory { + Electronics, + Clothing, + Food, +} + +impl Display for ProductCategory { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ProductCategory::Electronics => write!(f, "📱"), + ProductCategory::Clothing => write!(f, "👕"), + ProductCategory::Food => write!(f, "🍌"), + } + } +} + +#[derive(Clone, Copy, Debug)] +pub enum Currency { + GBP, + USD, + BTC, +} + +/// Validate a proce by the rules: +/// - No more than $10k in dollars +/// - Fewer than 20 in BTC +fn validate_price(price: &u64, currency: &Currency) -> bool { + const DECIMAL: u64 = 100; + match currency { + Currency::GBP => true, + Currency::USD => *price <= 10_000 * DECIMAL, + Currency::BTC => *price < 20, + } +} + +fn exchange(btc_rate: f64, usd_rate: f64,price: u64, currency: Currency) -> u64 { + match currency { + Currency::GBP => price, + Currency::USD => (price as f64 * usd_rate) as u64, + Currency::BTC => (price as f64 * btc_rate) as u64, + } +} + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Aggregate { + clothes: usize, + electronics: usize, + food: usize, + money_spent: u64, +} + +impl Default for Aggregate { + fn default() -> Self { + Aggregate { + clothes: 0, + electronics: 0, + food: 0, + money_spent: 0, + } + } +} + +emql!{ + impl my_db as Serialized { + op_impl = Parallel, + }; + + table products { + serial: usize, + name: String, + category: crate::valid::complex::sales_analytics::ProductCategory, + } @ [unique(serial) as unique_serial_number] + + table purchases { + customer_reference: [u8; 4], + product_serial: usize, + quantity: u8, + price: u64, + currency: crate::valid::complex::sales_analytics::Currency, + } @ [pred(crate::valid::complex::sales_analytics::validate_price(price, currency)) as sensible_prices] + + // We delete old customers, but keep their references + table current_customers { + reference: [u8; 4], + name: String, + address: String, + } @ [ + unique(reference) as unique_customer_reference, + unique(address) as unique_customer_address, + pred(name.len() > 2) as sensible_name, + pred(address.len() > 0) as non_empty_address, + ] + + // Old customers, deleted but references kept for purchases + table old_customers { + reference: [u8; 4], + } + + // Basic queries for data population ======================================= + query new_customer( + reference: [u8; 4], + name: String, + address: String, + ) { + row( + reference: [u8; 4] = reference, + name: String = name, + address: String = address, + ) ~> insert(current_customers as ref customer_ref) ~> return; + } + query new_sale( + customer_reference: [u8; 4], + product_serial: usize, + quantity: u8, + price: u64, + currency: crate::valid::complex::sales_analytics::Currency, + ) { + row( + customer_reference: [u8; 4] = customer_reference, + product_serial: usize = product_serial, + quantity: u8 = quantity, + price: u64 = price, + currency: crate::valid::complex::sales_analytics::Currency = currency, + ) ~> insert(purchases as ref sale_ref) ~> return; + } + query customer_leaving( + reference: [u8; 4], + ) { + row( + reference: [u8; 4] = reference, + ) + ~> unique(reference for current_customers.reference as ref customer_ref) + ~> delete(customer_ref) + ~> map(reference: [u8; 4] = reference) + ~> insert(old_customers as ref customer_ref); + } + + query new_product( + serial: usize, + name: String, + category: crate::valid::complex::sales_analytics::ProductCategory, + ) { + row( + serial: usize = serial, + name: String = name, + category: crate::valid::complex::sales_analytics::ProductCategory = category, + ) ~> insert(products as ref product_ref) ~> return; + } + + // Anaysis queries ========================================================= + + // Description: + // Get the total value of a customer's purchases, using the current + // exchange rates, but only if they are a current customer. + // + // Additionally get the sum of all products they have purchased in each product + // category. + // Reasoning: + // Allows us to demonstrate embedding of business logic into the database. + query customer_value(btc_rate: f64, usd_rate: f64, cust_ref_outer: [u8; 4]) { + row(cust_ref: [u8;4] = cust_ref_outer) + ~> unique(cust_ref for current_customers.reference as ref customer_ref) + ~> deref(customer_ref as customer) + ~> lift( + use purchases + |> filter(**customer_reference == cust_ref_outer) + |> let customer_purchases; + + use products |> let all_prods; + + join(use all_prods [inner equi(serial = product_serial)] use customer_purchases) + |> map(result: crate::valid::complex::sales_analytics::Aggregate = { + use crate::valid::complex::sales_analytics::ProductCategory::*; + let q = *customer_purchases.quantity as usize; + let (electronics, clothes, food) = match all_prods.category { + Electronics => (q, 0, 0), + Clothing => (0, q, 0), + Food => (0, 0, q), + }; + crate::valid::complex::sales_analytics::Aggregate { + clothes, + electronics, + food, + money_spent: (*customer_purchases.quantity as u64) * crate::valid::complex::sales_analytics::exchange(btc_rate, usd_rate, *customer_purchases.price, *customer_purchases.currency), + } + }) + |> combine(use left + right in result[crate::valid::complex::sales_analytics::Aggregate::default()] = [crate::valid::complex::sales_analytics::Aggregate { + clothes: left.result.clothes + right.result.clothes, + electronics: left.result.electronics + right.result.electronics, + food: left.result.food + right.result.food, + money_spent: left.result.money_spent + right.result.money_spent, + }]) + ~> return; + ) ~> return; + } + + // Description: + // For a given product get for each purchasing customer: + // - customer reference + // - total spent by the customer on the product + // Reasoning: + // To demonstrate complex aggregations, and returning data structures + query product_customers(serial: usize, btc_rate: f64, usd_rate: f64) { + row(serial: usize = serial) + ~> unique(serial for products.serial as ref product_ref) + ~> deref(product_ref as product) + ~> lift( + use purchases + |> filter(**product_serial == serial) + |> groupby(customer_reference for let filtered_purchases in { + use filtered_purchases + |> map(sum: u64 = (*quantity as u64) * crate::valid::complex::sales_analytics::exchange(btc_rate, usd_rate, *price, *currency)) + |> combine(use left + right in sum[0] = [ left.sum + right.sum]) + ~> map(customer: &'db[u8; 4] = customer_reference, total: u64 = sum) + ~> return; + }) + |> collect(customers as type customers_for_prod) + ~> map(product_serial: usize = serial, customers: type customers_for_prod = customers) + ~> return ; + ) + ~> return; + } + + // Description: + // Get the total sales per category, in the different currencies + // Reasoning: + // Demonstrating aggregation over a large table + query category_sales(btc_rate: f64, usd_rate: f64) { + + use purchases |> let purchase_data; + use products |> let product_data; + + join(use purchase_data [inner equi(product_serial = serial)] use product_data) + |> map( + category: crate::valid::complex::sales_analytics::ProductCategory = *product_data.category, + money: u64 = (*purchase_data.quantity as u64) * crate::valid::complex::sales_analytics::exchange( + btc_rate, usd_rate, *purchase_data.price, *purchase_data.currency + ) + ) + |> groupby(category for let category_purchase_data in { + use category_purchase_data + |> combine(use left + right in money[0] = [left.money + right.money]) + ~> map(category: crate::valid::complex::sales_analytics::ProductCategory = category, total: u64 = money) + ~> return; + }) + |> collect(category_totals) + ~> return; + } +} + + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + let btc_rate = 10000.7; + let usd_rate = 0.8; + + let tshirt = 1; + let jeans = 2; + let tv = 3; + let phone = 4; + let apple = 5; + + db.new_product(tshirt, "T-shirt".to_string(), ProductCategory::Clothing).unwrap(); + db.new_product(jeans, "Jeans".to_string(), ProductCategory::Clothing).unwrap(); + db.new_product(tv, "TV".to_string(), ProductCategory::Electronics).unwrap(); + db.new_product(phone, "Phone".to_string(), ProductCategory::Electronics).unwrap(); + db.new_product(apple, "Apple".to_string(), ProductCategory::Food).unwrap(); + + let alice = [1, 2, 3, 4]; + let bob = [2, 3, 4, 5]; + let charlie = [3, 4, 5, 6]; + + db.new_customer(alice, "Alice".to_string(), "1 Road".to_string()).unwrap(); + db.new_customer(bob, "Bob".to_string(), "2 Road".to_string()).unwrap(); + db.new_customer(charlie, "Charlie".to_string(), "3 Road".to_string()).unwrap(); + + + db.new_sale(alice, tshirt, 2, 100, Currency::GBP).unwrap(); + db.new_sale(alice, jeans, 1, 50, Currency::USD).unwrap(); + + db.new_sale(bob, tv, 1, 200, Currency::USD).unwrap(); + db.new_sale(bob, phone, 1, 500, Currency::USD).unwrap(); + + db.new_sale(charlie, apple, 3, 10, Currency::BTC).unwrap(); + db.new_sale(charlie, phone, 2, 100, Currency::GBP).unwrap(); + + let alice_agg = db.customer_value(btc_rate, usd_rate, alice).unwrap().result; + assert_eq!(alice_agg, Aggregate{ + clothes: 3, + electronics: 0, + food: 0, + money_spent: 2 * exchange(btc_rate, usd_rate, 100, Currency::GBP) + exchange(btc_rate, usd_rate, 50, Currency::USD) + }); + + + let bob_agg = db.customer_value(btc_rate, usd_rate, bob).unwrap().result; + assert_eq!(bob_agg, Aggregate{ + clothes: 0, + electronics: 2, + food: 0, + money_spent: exchange(btc_rate, usd_rate, 200, Currency::USD) + exchange(btc_rate, usd_rate, 500, Currency::USD) + }); + + let charlie_agg = db.customer_value(btc_rate, usd_rate, charlie).unwrap().result; + assert_eq!(charlie_agg, Aggregate{ + clothes: 0, + electronics: 2, + food: 3, + money_spent: 3 * exchange(btc_rate, usd_rate, 10, Currency::BTC) + 2 * exchange(btc_rate, usd_rate, 100, Currency::GBP) + }); + + let phone_customers = db.product_customers(phone, btc_rate, usd_rate).unwrap().customers.into_iter().map(|val| (val.customer, val.total)).collect::>(); + assert_eq!(phone_customers.len(), 2); + assert_eq!(phone_customers[&bob], exchange(btc_rate, usd_rate, 500, Currency::USD)); + assert_eq!(phone_customers[&charlie], 2 * exchange(btc_rate, usd_rate, 100, Currency::GBP)); + + for tot in db.category_sales(btc_rate, usd_rate).category_totals { + println!("{} £{:>6}", tot.category, tot.total) + } +} \ No newline at end of file diff --git a/crates/emdb/examples/complex/user_details.rs b/crates/emdb/tests/valid/complex/userdetails.rs similarity index 64% rename from crates/emdb/examples/complex/user_details.rs rename to crates/emdb/tests/valid/complex/userdetails.rs index 7397b02..adb95dd 100644 --- a/crates/emdb/examples/complex/user_details.rs +++ b/crates/emdb/tests/valid/complex/userdetails.rs @@ -1,8 +1,7 @@ -#![allow(unused_variables)] -use emdb::emql; +use emdb::macros::emql; emql! { - impl my_db as SemCheck; + impl my_db as Serialized; // Reasoning: // - Constraint checking required, needs to fail immediately (hybrid IVM) @@ -14,7 +13,7 @@ emql! { premium: bool, credits: i32, } @ [ - pred(premium || credits > 0) as prem_credits + pred(*premium || *credits > 0) as prem_credits ] // Description: @@ -23,8 +22,8 @@ emql! { // - Needed for data insert, generation of id only occurs from here, // hence we know the table alone determines id // - Move semantics (taking ownership of data structure from outside the database) - query new_user(username: String, prem: bool) { - row(name: String = username, premium: bool = prem, credits: i32 = 0 ) + query new_user(username: String, prem: bool, start_creds: Option) { + row(name: String = username, premium: bool = prem, credits: i32 = start_creds.unwrap_or(0) ) ~> insert(users as ref user_id) ~> return; } @@ -74,7 +73,7 @@ emql! { query reward_premium(cred_bonus: f32) { ref users as users_ref |> deref(users_ref as it) - |> filter(it.premium) + |> filter(*it.premium) |> map(users_ref: ref users = users_ref, new_creds: i32 = ((it.credits as f32) * cred_bonus) as i32) |> update(users_ref use credits = new_creds) |> map(creds: i32 = new_creds) @@ -89,11 +88,56 @@ emql! { // the view query total_premium_credits() { use users - |> filter(premium) + |> filter(**premium) |> map(credits: i64 = credits as i64) |> fold(sum: i64 = 0 -> sum + credits) ~> return; } } -fn main() {} +pub fn test() { + // let mut ds = my_db::Datastore::new(); + // let mut db = ds.db(); + + // let bob = db + // .new_user(String::from("Bob"), false, Some(3)) + // .expect("empty database") + // .user_id; + + // let alice = db + // .new_user(String::from("Alice"), true, None) + // .expect("empty database") + // .user_id; + + // let bob_info = db.get_info(bob).unwrap(); + // let alice_info = db.get_info(alice).unwrap(); + + // assert_eq!(bob_info.userdata.name, "Bob"); + // assert_eq!(bob_info.userdata.premium, &false); + // assert_eq!(bob_info.userdata.credits, 3); + + // assert_eq!(alice_info.userdata.name, "Alice"); + // assert_eq!(alice_info.userdata.premium, &true); + // assert_eq!(alice_info.userdata.credits, 0); + + // db.add_credits(bob, 10).unwrap(); + // db.add_credits(bob, 20).unwrap(); + // db.add_credits(bob, 30).unwrap(); + // db.add_credits(bob, 40).unwrap(); + // db.add_credits(bob, 50).unwrap(); + + // let bob_info = db.get_info(bob).unwrap(); + // assert_eq!(bob_info.userdata.credits, 153); + + // assert_eq!(db.total_premium_credits().sum, 0); + + // db.add_credits(alice, 10).unwrap(); + // assert_eq!(db.total_premium_credits().sum, 10); + + // db.reward_premium(1.5).unwrap(); + // assert_eq!(db.total_premium_credits().sum, 15); + + // for entry in db.get_snapshot().it { + // println!("{:5} {:9} has {:04} credits", entry.name, if *entry.premium { "(premium)" } else { ""}, entry.credits); + // } +} diff --git a/crates/emdb/tests/valid/context/groupby.rs b/crates/emdb/tests/valid/context/groupby.rs new file mode 100644 index 0000000..69f484d --- /dev/null +++ b/crates/emdb/tests/valid/context/groupby.rs @@ -0,0 +1,57 @@ +use std::collections::{HashMap, HashSet}; + +use emdb::macros::emql; + +emql! { + impl my_db as Serialized; + + table customers { + forename: String, + surname: String, + age: u8, + } @ [pred(*age < 255) as sensible_ages] + + query customer_age_brackets() { + use customers + |> groupby(age for let people in { + use people + |> collect(people as type age_group) + ~> map(age_bracket: u8 = *age, group: type age_group = people) + ~> return; + }) + |> filter(*age_bracket > 16) + |> collect(brackets) + ~> return; + } + + query new_customer(forename: &str, surname: &str, age: u8) { + row( + forename: String = String::from(forename), + surname: String = String::from(surname), + age: u8 = age + ) + ~> insert(customers as ref name) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + db.new_customer("Alice", "Smith", 25).unwrap(); + db.new_customer("Bob", "Jones", 25).unwrap(); + db.new_customer("Charlie", "Brown", 40).unwrap(); + db.new_customer("David", "White", 50).unwrap(); + db.new_customer("Eve", "Black", 50).unwrap(); + + let brackets: HashMap> = db.customer_age_brackets().brackets.into_iter().map( + |data| (data.age_bracket, data.group.into_iter().map(|v| (v.forename, v.surname)).collect()) + ).collect(); + + assert!(brackets[&25].contains(&(&String::from("Alice"), &String::from("Smith")))); + assert!(brackets[&25].contains(&(&String::from("Bob"), &String::from("Jones")))); + assert!(brackets[&40].contains(&(&String::from("Charlie"), &String::from("Brown")))); + assert!(brackets[&50].contains(&(&String::from("David"), &String::from("White")))); + assert!(brackets[&50].contains(&(&String::from("Eve"), &String::from("Black")))); +} diff --git a/crates/emdb/tests/valid/context/lift_single.rs b/crates/emdb/tests/valid/context/lift_single.rs new file mode 100644 index 0000000..917d17b --- /dev/null +++ b/crates/emdb/tests/valid/context/lift_single.rs @@ -0,0 +1,32 @@ +use emdb::macros::emql; + +emql! { + impl my_db as Serialized { }; + + table staff { + id_serial: usize, + } @ [ unique(id_serial) as unique_id_card_serial ] + + query add_staff( + id: usize, + ) { + row( + id_serial: usize = id, + ) + ~> insert(staff as ref staff_ref) + ~> lift( + // wowee! we can use staff_member in this context + row(member: ref staff = staff_ref) + ~> deref(member as staff_data) + ~> return; + ) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + db.add_staff(1).unwrap(); +} \ No newline at end of file diff --git a/crates/emdb/tests/valid/context/lift_stream.rs b/crates/emdb/tests/valid/context/lift_stream.rs new file mode 100644 index 0000000..e6cac2f --- /dev/null +++ b/crates/emdb/tests/valid/context/lift_stream.rs @@ -0,0 +1,73 @@ +use emdb::macros::emql; + +emql! { + impl my_db as Serialized; + + table customers { + forename: String, + surname: String, + age: u8, + bonus_points: i32, + } @ [ pred(*age < 255) as sensible_ages ] + + table family_bonus { + surname: String, + bonus: i32 + } @ [ unique(surname) as unique_surnames_cons ] + + query customer_age_brackets() { + ref customers as ref_cust + |> deref(ref_cust as person) + |> update(ref_cust use bonus_points = person.bonus_points + 1) + |> lift( + row(surname: String = person.surname.clone()) + ~> unique(surname for family_bonus.surname as ref family_ref) + ~> deref(family_ref as family) + ~> update(family_ref use bonus = family.bonus + 1); + + row() ~> return; // void return + ); + } + + query add_customer(forename: String, surname: String, age: u8) { + row( + forename: String = forename, + surname: String = surname, + age: u8 = age, + bonus_points: i32 = 0 + ) + ~> insert(customers as ref name) + ~> return; + } + + query add_family(surname: String) { + row(surname: String = surname, bonus: i32 = 0) + ~> insert(family_bonus as ref name) + ~> return; + } + + query get_family(family: ref family_bonus) { + row(family: ref family_bonus = family) + ~> deref(family as family_val) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + db.add_customer("Alice".to_string(), "Smith".to_string(), 25).unwrap(); + db.add_customer("Bob".to_string(), "Smith".to_string(), 30).unwrap(); + db.add_customer("Charlie".to_string(), "Smith".to_string(), 35).unwrap(); + let smiths = db.add_family("Smith".to_string()).unwrap().name; + + db.add_customer("David".to_string(), "Jones".to_string(), 40).unwrap(); + let joneses = db.add_family("Jones".to_string()).unwrap().name; + + db.customer_age_brackets().unwrap(); + + assert_eq!(db.get_family(smiths).unwrap().family_val.bonus, 3); + assert_eq!(db.get_family(joneses).unwrap().family_val.bonus, 1); + // Evidently Keeping Up with the Joneses! +} diff --git a/crates/emdb/tests/valid/context/mod.rs b/crates/emdb/tests/valid/context/mod.rs new file mode 100644 index 0000000..2a6b383 --- /dev/null +++ b/crates/emdb/tests/valid/context/mod.rs @@ -0,0 +1,6 @@ +//! ## Tests involving context operators +//! These are the foreach and groupby operators. + +pub mod lift_stream; +pub mod lift_single; +pub mod groupby; diff --git a/crates/emdb/tests/valid/extreme/empty_emql.rs b/crates/emdb/tests/valid/extreme/empty_emql.rs new file mode 100644 index 0000000..43eb3b4 --- /dev/null +++ b/crates/emdb/tests/valid/extreme/empty_emql.rs @@ -0,0 +1,5 @@ +use emdb::macros::emql; + +emql!{} + +pub fn test() {} \ No newline at end of file diff --git a/crates/emdb/tests/valid/extreme/empty_items.rs b/crates/emdb/tests/valid/extreme/empty_items.rs new file mode 100644 index 0000000..8c604f5 --- /dev/null +++ b/crates/emdb/tests/valid/extreme/empty_items.rs @@ -0,0 +1,23 @@ +use emdb::macros::emql; + +emql!{ + impl my_db as Serialized; + + table empty {} @ [ pred(1 + 1 == 2) as always_true_never_checked] + + query empty() { + // such wow, much empty + } + + query redundant() { + row() ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let db = ds.db(); + + let () = db.empty(); + let _ = db.redundant(); // is a record with no members +} \ No newline at end of file diff --git a/crates/emdb/tests/valid/extreme/just_maths.rs b/crates/emdb/tests/valid/extreme/just_maths.rs new file mode 100644 index 0000000..381b45b --- /dev/null +++ b/crates/emdb/tests/valid/extreme/just_maths.rs @@ -0,0 +1,20 @@ +use emdb::macros::emql; + +emql! { + impl my_db as Serialized; + + query just_maths(x: i32) { + row(x: i32 = x + 1) + ~> map(y: u64 = (x * x) as u64) + ~> map(z: bool = y > 1600) + ~> return; + } +} + + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let db = ds.db(); + + assert!(!db.just_maths(39).z); +} diff --git a/crates/emdb/tests/valid/extreme/mod.rs b/crates/emdb/tests/valid/extreme/mod.rs new file mode 100644 index 0000000..8fdedd9 --- /dev/null +++ b/crates/emdb/tests/valid/extreme/mod.rs @@ -0,0 +1,6 @@ +//! ## Extreme edge cases +//! Not representative programs + +pub mod empty_emql; +pub mod empty_items; +pub mod just_maths; \ No newline at end of file diff --git a/crates/emdb/tests/valid/mod.rs b/crates/emdb/tests/valid/mod.rs new file mode 100644 index 0000000..11d1b78 --- /dev/null +++ b/crates/emdb/tests/valid/mod.rs @@ -0,0 +1,13 @@ +#![allow(dead_code, unused_variables)] +//! ## Valid Tests for emql interface +//! - Code is added in submodules from here, to be executed by the +//! [emql.rs](./../emql.rs) integration test +//! +//! NOTE: *Cargo compiles each `.rs` file in the top level of the `tests/` +//! directory as a separate crate. subdirectories with modules are not +//! compiled as tests, but are available for the test crates to use. + +pub mod complex; +pub mod context; +pub mod extreme; +pub mod simple; diff --git a/crates/emdb/tests/valid/simple/basic_join.rs b/crates/emdb/tests/valid/simple/basic_join.rs new file mode 100644 index 0000000..de7347f --- /dev/null +++ b/crates/emdb/tests/valid/simple/basic_join.rs @@ -0,0 +1,53 @@ +use emdb::macros::emql; + +emql! { + impl my_db as Serialized; + + table people { + name: String, + friend: Option, + } @ [ unique(name) as unique_names] + + query get_friendships() { + use people |> fork(let person, friend); + + join(use person [ + inner pred { + if let Some(friend_name) = &left.friend { + friend_name == right.name + } else { + false + } + } + ] use friend) + |> map(peep: &'db String = person.name, buddy: &'db String = friend.name) + |> collect(friends as type friendship) + ~> return; + } + + query new_friendship(name: &str, friend: Option) { + row( + name: String = String::from(name), + friend: Option = friend + ) + ~> insert(people as ref person) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + db.new_friendship("Alice", Some("Bob".to_string())).unwrap(); + db.new_friendship("Bob", Some("Charlie".to_string())).unwrap(); + db.new_friendship("Charlie", Some("David".to_string())).unwrap(); + db.new_friendship("David", Some("Eve".to_string())).unwrap(); + db.new_friendship("Eve", None).unwrap(); + + let friendships: Vec<(&String, &String)> = db.get_friendships().friends.into_iter().map(|v| (v.peep, v.buddy)).collect(); + + for (left, right) in friendships { + println!("{left:8} 💘 {right}") + } +} \ No newline at end of file diff --git a/crates/emdb/tests/valid/simple/counts.rs b/crates/emdb/tests/valid/simple/counts.rs new file mode 100644 index 0000000..a70dbae --- /dev/null +++ b/crates/emdb/tests/valid/simple/counts.rs @@ -0,0 +1,34 @@ +use emdb::macros::emql; + +emql! { + impl my_db as Serialized; + + table red { + value: usize, + } + + query add_red(data: usize) { + row(value: usize = data) + ~> insert(red as ref new_key); + } + + query data_counts() { + ref red as blagh + |> count(bob) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + const INSERTS: usize = 101; + + for i in 0..INSERTS { + let _: () = db.add_red(i); + } + + let count = db.data_counts().bob; + assert_eq!(count, INSERTS); +} \ No newline at end of file diff --git a/crates/emdb/tests/valid/simple/filter.rs b/crates/emdb/tests/valid/simple/filter.rs new file mode 100644 index 0000000..35bad5d --- /dev/null +++ b/crates/emdb/tests/valid/simple/filter.rs @@ -0,0 +1,20 @@ +use emdb::macros::emql; + + +emql! { + impl my_db as Serialized; + + table data { + value: i32, + } @ [unique(value) as unique_values] + + query filter_values(math: i32) { + row(other_math: i32 = 7) + ~> lift ( + use data + |> filter(**value > other_math) + |> collect(filtered) + ~> return; + ); + } +} \ No newline at end of file diff --git a/crates/emdb/tests/valid/simple/limited_table.rs b/crates/emdb/tests/valid/simple/limited_table.rs new file mode 100644 index 0000000..9d98f29 --- /dev/null +++ b/crates/emdb/tests/valid/simple/limited_table.rs @@ -0,0 +1,37 @@ +use emdb::macros::emql; + +const ADD_VALUE: usize = 3; + +const fn cool_const() -> usize { + 23 +} + +emql! { + impl my_db as Serialized; + + table coordinates { + x: i64, + y: i64, + z: i64, + } @ [ limit(crate::valid::simple::limited_table::cool_const() + crate::valid::simple::limited_table::ADD_VALUE) as max_inserts ] + + query new_datapoint(x: i64, + y: i64, + z: i64,) { + row(x: i64 = x, y: i64 = y, z: i64 = z) + ~> insert(coordinates as ref id) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + for _ in 0..(cool_const() + ADD_VALUE) { + let _: my_db::tables::coordinates::Key = db.new_datapoint(1, 2, 3).expect("Not at limit!").id; + } + + // final one over the limit + assert!(db.new_datapoint(1, 2, 3).is_err()); +} diff --git a/crates/emdb/tests/valid/simple/mod.rs b/crates/emdb/tests/valid/simple/mod.rs new file mode 100644 index 0000000..e3d0ae9 --- /dev/null +++ b/crates/emdb/tests/valid/simple/mod.rs @@ -0,0 +1,9 @@ +//! # Simple tests using a single feature +//! To be used for testing regressions of individual operators + +pub mod no_errors; +pub mod basic_join; +pub mod limited_table; +pub mod sums; +pub mod counts; +pub mod filter; \ No newline at end of file diff --git a/crates/emdb/tests/valid/simple/no_errors.rs b/crates/emdb/tests/valid/simple/no_errors.rs new file mode 100644 index 0000000..59556c8 --- /dev/null +++ b/crates/emdb/tests/valid/simple/no_errors.rs @@ -0,0 +1,39 @@ +use emdb::macros::emql; + +emql!{ + impl my_db as Serialized; + + table data { + foo: String, + bing: usize, + bar: (&'static str, bool), + } + + query new_data(foo: &str, bing: usize, bar_0: bool) { + row( + foo: String = String::from(foo), + bing: usize = bing, + bar: (&'static str, bool) = (if bar_0 { "bar" } else { "baz" }, bar_0) + ) + ~> insert(data as ref new_key) + ~> return; + } + + query all_bings() { + use data + |> map(bing_val: usize = *bing) + |> collect(values) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + for _ in 0..100 { + let _: my_db::tables::data::Key = db.new_data("hello", 50, true).new_key; + } + + let _ = db.all_bings().values.into_iter().map(|v|v.bing_val).collect::>(); +} diff --git a/crates/emdb/tests/valid/simple/sums.rs b/crates/emdb/tests/valid/simple/sums.rs new file mode 100644 index 0000000..7985293 --- /dev/null +++ b/crates/emdb/tests/valid/simple/sums.rs @@ -0,0 +1,40 @@ +use emdb::macros::emql; + +emql! { + impl my_db as Serialized; + + table values { + value: i32, + } + + query add_data(data: i32) { + row(value: i32 = data) + ~> insert(values as ref new_key) + ~> return; + } + + query sum_data_combine() { + use values + |> map(sum: i32 = *value) + |> combine(use left + right in sum[0] = [left.sum + right.sum]) + ~> return; + } + + query sum_data_fold() { + use values + |> fold(sum: i32 = 0 -> sum + *value) + ~> return; + } +} + +pub fn test() { + let mut ds = my_db::Datastore::new(); + let mut db = ds.db(); + + for i in 0..100 { + let _: my_db::tables::values::Key = db.add_data(i).new_key; + } + + let sum = db.sum_data_fold().sum; + assert_eq!(sum, 4950); +} diff --git a/crates/emdb_core/Cargo.toml b/crates/emdb_core/Cargo.toml new file mode 100644 index 0000000..c70b02f --- /dev/null +++ b/crates/emdb_core/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "emdb_core" +version = "0.1.0" +edition = "2021" + +readme = "README.md" +description = "The macros used for generating emdb embedded databases" +keywords = ["embedded-database"] +categories = ["compilers", "database"] + +repository.workspace = true +homepage.workspace = true +license-file.workspace = true + +[lib] +proc-macro = true + +[dependencies] +prettyplease = "0.2" +syn = { version = "2.0.45", features = ["full", "extra-traits"] } +typed-arena = "2.0.2" +typed-generational-arena = "0.2" +proc-macro2 = { version = "1.0" } +proc-macro-error = "1.0.4" +quote = "1.0.33" +rand = "0.8" +combi = { path = "../combi" } +enumtrait = { path = "../enumtrait" } +dot = "0.1.4" +itertools = "0.13.0" +pulpit = { path = "../pulpit" } +quote_debug = { path = "../quote_debug" } diff --git a/crates/emdb_core/README.md b/crates/emdb_core/README.md new file mode 100644 index 0000000..1e0a082 --- /dev/null +++ b/crates/emdb_core/README.md @@ -0,0 +1,3 @@ +emDB + +This crate contains the proc macros used for emdb. diff --git a/crates/emdb/src/analysis/access/mod.rs b/crates/emdb_core/src/analysis/access/mod.rs similarity index 100% rename from crates/emdb/src/analysis/access/mod.rs rename to crates/emdb_core/src/analysis/access/mod.rs diff --git a/crates/emdb/src/analysis/cardinality/mod.rs b/crates/emdb_core/src/analysis/cardinality/mod.rs similarity index 100% rename from crates/emdb/src/analysis/cardinality/mod.rs rename to crates/emdb_core/src/analysis/cardinality/mod.rs diff --git a/crates/emdb/src/analysis/concurrency/mod.rs b/crates/emdb_core/src/analysis/concurrency/mod.rs similarity index 100% rename from crates/emdb/src/analysis/concurrency/mod.rs rename to crates/emdb_core/src/analysis/concurrency/mod.rs diff --git a/crates/emdb/src/analysis/mod.rs b/crates/emdb_core/src/analysis/mod.rs similarity index 88% rename from crates/emdb/src/analysis/mod.rs rename to crates/emdb_core/src/analysis/mod.rs index 69b5027..3ffd0db 100644 --- a/crates/emdb/src/analysis/mod.rs +++ b/crates/emdb_core/src/analysis/mod.rs @@ -1,6 +1,5 @@ //! Analyses to label info on immutable plans. -pub mod interface; pub mod validity; pub mod access; diff --git a/crates/emdb_core/src/analysis/mutability/mod.rs b/crates/emdb_core/src/analysis/mutability/mod.rs new file mode 100644 index 0000000..4549810 --- /dev/null +++ b/crates/emdb_core/src/analysis/mutability/mod.rs @@ -0,0 +1,76 @@ +//! ## Geting the tables mutated by queries +//! This code is the same as for the Serialized +use crate::plan; + +#[enumtrait::store(trait_get_muts)] +pub trait GetMuts { + /// Update the set with the mutated tables + fn mutates(&self, lp: &plan::Plan) -> bool { + false + } +} + +impl GetMuts for plan::Query { + fn mutates(&self, lp: &plan::Plan) -> bool { + lp.get_context(self.ctx).mutates(lp) + } +} + +impl GetMuts for plan::Context { + fn mutates(&self, lp: &plan::Plan) -> bool { + self.ordering + .iter() + .map(|k| lp.get_operator(*k).mutates(lp)) + .any(|x| x) + } +} + +#[enumtrait::impl_trait(trait_get_muts for plan::operator_enum)] +impl GetMuts for plan::Operator {} + +impl GetMuts for plan::Insert { + fn mutates(&self, lp: &plan::Plan) -> bool { + true + } +} +impl GetMuts for plan::Update { + fn mutates(&self, lp: &plan::Plan) -> bool { + true + } +} +impl GetMuts for plan::Delete { + fn mutates(&self, lp: &plan::Plan) -> bool { + true + } +} + +impl GetMuts for plan::GroupBy { + fn mutates(&self, lp: &plan::Plan) -> bool { + lp.get_context(self.inner_ctx).mutates(lp) + } +} +impl GetMuts for plan::Lift { + fn mutates(&self, lp: &plan::Plan) -> bool { + lp.get_context(self.inner_ctx).mutates(lp) + } +} + +impl GetMuts for plan::UniqueRef {} +impl GetMuts for plan::ScanRefs {} +impl GetMuts for plan::DeRef {} +impl GetMuts for plan::Map {} +impl GetMuts for plan::Expand {} +impl GetMuts for plan::Fold {} +impl GetMuts for plan::Filter {} +impl GetMuts for plan::Combine {} +impl GetMuts for plan::Sort {} +impl GetMuts for plan::Assert {} +impl GetMuts for plan::Take {} +impl GetMuts for plan::Collect {} +impl GetMuts for plan::Count {} +impl GetMuts for plan::Join {} +impl GetMuts for plan::Fork {} +impl GetMuts for plan::Union {} +impl GetMuts for plan::Row {} +impl GetMuts for plan::Return {} +impl GetMuts for plan::Discard {} diff --git a/crates/emdb/src/analysis/validity/mod.rs b/crates/emdb_core/src/analysis/validity/mod.rs similarity index 100% rename from crates/emdb/src/analysis/validity/mod.rs rename to crates/emdb_core/src/analysis/validity/mod.rs diff --git a/crates/emdb_core/src/backend/interface/mod.rs b/crates/emdb_core/src/backend/interface/mod.rs new file mode 100644 index 0000000..1ff4565 --- /dev/null +++ b/crates/emdb_core/src/backend/interface/mod.rs @@ -0,0 +1,190 @@ +//! # Interface Trait for Comparing Implementations +//! - Allows for any return types, any table key types. +//! - Assumes a window-like (`datastore`, `database` wraps `&mut datastore`) pattern. + +use crate::{analysis::mutability::GetMuts, plan, utils::on_off::on_off}; +use combi::{ + tokens::{ + basic::{collectuntil, peekpunct, recovgroup}, + derived::listseptrailing, + options::{OptEnd, OptField, OptParse}, + TokenDiagnostic, TokenIter, TokenParser, + }, + Combi, +}; +use namer::InterfaceNamer; +use proc_macro2::{Delimiter, TokenStream}; +use quote::{quote, ToTokens}; +use quote_debug::Tokens; +use syn::{Ident, Type}; + +pub struct Interface { + public: bool, + traits_for_db: Vec, + traits_with_db: Vec, +} + +impl super::EMDBBackend for Interface { + const NAME: &'static str = "Interface"; + + fn parse_options( + backend_name: &Ident, + options: Option, + ) -> Result> { + fn get_traits() -> impl TokenParser> { + recovgroup( + Delimiter::Brace, + listseptrailing(',', collectuntil(peekpunct(','))), + ) + } + + if let Some(opts) = options { + let parser = ( + OptField::new("pub", on_off), + ( + OptField::new("traits_for_db", get_traits), + (OptField::new("traits_with_db", get_traits), OptEnd), + ), + ) + .gen('='); + + let (_, res) = parser.comp(TokenIter::from(opts, backend_name.span())); + res.to_result().map_err(TokenDiagnostic::into_list).map( + |(public, (traits_for_db, (traits_with_db, ())))| Interface { + public: public.unwrap_or(false), + traits_for_db: traits_for_db.unwrap_or(Vec::new()), + traits_with_db: traits_with_db.unwrap_or(Vec::new()), + }, + ) + } else { + Ok(Self { + public: false, + traits_for_db: Vec::new(), + traits_with_db: Vec::new(), + }) + } + } + + fn generate_code( + self, + impl_name: Ident, + plan: &plan::Plan, + ) -> Result> + { + let interface_namer = InterfaceNamer::new(); + let InterfaceNamer { + trait_database, + trait_database_type_datastore, + trait_datastore, + trait_datastore_type_database, + trait_datastore_method_new, + trait_datastore_method_db, + trait_any, + } = &interface_namer; + let db_lifetime = quote! {'db}; + let qy_lifetime = quote! {'qy}; + + let query_code = plan + .queries + .iter() + .map(|(_, plan::Query { name, ctx })| { + let mut_tk = if plan.get_context(*ctx).mutates(plan) { + quote!(mut) + } else { + quote!() + }; + + let params = plan.get_context(*ctx).params.iter().map(|(name, ty)| { + let ty = generate_parameter_type(plan, *ty, &interface_namer); + quote! { #name: #ty } + }); + + quote! { + fn #name<#qy_lifetime>(&#qy_lifetime #mut_tk self, #(#params),* ) -> impl #trait_any; + } + }); + + let exposed_table_keys = public::exposed_keys(plan); + let key_types = exposed_table_keys.into_iter().map(|tablekey| { + let key_name = interface_namer.key_name(&plan.get_table(*tablekey).name); + quote! { type #key_name: Clone + Copy + Eq } + }); + + let traits_for_db = if self.traits_for_db.is_empty() { + quote!() + } else { + let trait_vec = &self.traits_for_db; + quote!(: #(#trait_vec)+*) + }; + + let traits_with_db = if self.traits_with_db.is_empty() { + quote!() + } else { + let trait_vec = &self.traits_with_db; + quote!(#(+#trait_vec)*) + }; + + let public_tk = if self.public { quote!(pub) } else { quote!() }; + + Ok(quote! { + #public_tk mod #impl_name { + #![allow(non_camel_case_types)] + // NOTE: We want to allow methods to return any type, which would normally + // require the trait to have an associated type, and to use this in the + // return position of the method. + // + // Or we can use the newer `impl Trait`, and then implement + // a trait for everything to get as close to `auto my_method(..)` + // as possible. + pub trait #trait_any{} + impl #trait_any for T {} + + pub trait #trait_database<#db_lifetime> #traits_for_db { + type #trait_database_type_datastore: #trait_datastore; + #(#query_code)* + } + + pub trait #trait_datastore { + // NOTE: the names of the datastore, and the database cannot conflict because the table names have `_key` appended. + type #trait_datastore_type_database<'imm>: #trait_database<'imm, #trait_database_type_datastore=Self> #traits_with_db where Self: 'imm; + #(#key_types;)* + fn #trait_datastore_method_new() -> Self; + fn #trait_datastore_method_db(&mut self) -> Self::#trait_datastore_type_database<'_>; + } + } + }) + } +} + +pub mod namer; +pub mod public; + +fn generate_parameter_type( + lp: &plan::Plan, + key: plan::Key, + namer: &InterfaceNamer, +) -> Tokens { + let InterfaceNamer { + trait_any, + trait_database_type_datastore, + trait_datastore, + .. + } = namer; + + match lp.get_scalar_type_conc(key) { + plan::ScalarTypeConc::TableRef(table_id) => { + let key_name = namer.key_name(&lp.get_table(*table_id).name); + quote! (::#key_name) + } + plan::ScalarTypeConc::Rust { + type_context: plan::TypeContext::Query, + ty, + } => ty.to_token_stream(), + _ => unreachable!("Only rust types and table references are allowed in query parameters"), + } + .into() +} + +pub struct InterfaceTrait { + pub name: Ident, +} diff --git a/crates/emdb_core/src/backend/interface/namer.rs b/crates/emdb_core/src/backend/interface/namer.rs new file mode 100644 index 0000000..067dee1 --- /dev/null +++ b/crates/emdb_core/src/backend/interface/namer.rs @@ -0,0 +1,32 @@ +use syn::Ident; + +use crate::utils::misc::new_id; + + +pub struct InterfaceNamer { + pub trait_database: Ident, + pub trait_database_type_datastore: Ident, + pub trait_datastore: Ident, + pub trait_datastore_type_database: Ident, + pub trait_datastore_method_new: Ident, + pub trait_datastore_method_db: Ident, + pub trait_any: Ident, +} + +impl InterfaceNamer { + pub fn new() -> Self { + Self { + trait_database: new_id("Database"), + trait_database_type_datastore: new_id("Datastore"), + trait_datastore: new_id("Datastore"), + trait_datastore_type_database: new_id("DB"), + trait_datastore_method_new: new_id("new"), + trait_datastore_method_db: new_id("db"), + trait_any: new_id("Any") + } + } + + pub fn key_name(&self, table_name: &Ident) -> Ident { + Ident::new(&format!("{table_name}_key"), table_name.span()) + } +} \ No newline at end of file diff --git a/crates/emdb_core/src/backend/interface/public.rs b/crates/emdb_core/src/backend/interface/public.rs new file mode 100644 index 0000000..8761197 --- /dev/null +++ b/crates/emdb_core/src/backend/interface/public.rs @@ -0,0 +1,30 @@ + +use std::collections::HashSet; + +use crate::plan; +fn get_exposed_keys_record<'imm>(lp: &'imm plan::Plan, key: plan::Key, tableset: &mut HashSet>) { + for k in lp.get_record_type_conc(key).fields.values() { + get_exposed_keys_scalar(lp, *k, tableset) + } +} +fn get_exposed_keys_scalar<'imm>(lp: &'imm plan::Plan, key: plan::Key, tableset: &mut HashSet>) { + match lp.get_scalar_type_conc(key) { + plan::ScalarTypeConc::TableRef(t) => {tableset.insert(plan::ImmKey::new(*t, lp));}, + plan::ScalarTypeConc::Bag(r) | plan::ScalarTypeConc::Record(r) => get_exposed_keys_record(lp, *r, tableset), + _ => (), + } +} + +pub fn exposed_keys(lp: &plan::Plan) -> HashSet> { + let mut tableset = HashSet::new(); + for (_, plan::Query{ctx, ..}) in &lp.queries { + let context = lp.get_context(*ctx); + for (_, ty) in &context.params { + get_exposed_keys_scalar(lp, *ty, &mut tableset) + } + if let Some(ty) = context.get_return_type(lp) { + get_exposed_keys_record(lp, ty, &mut tableset) + } + } + tableset +} diff --git a/crates/emdb/src/backend/mod.rs b/crates/emdb_core/src/backend/mod.rs similarity index 97% rename from crates/emdb/src/backend/mod.rs rename to crates/emdb_core/src/backend/mod.rs index dbe6467..9b3caba 100644 --- a/crates/emdb/src/backend/mod.rs +++ b/crates/emdb_core/src/backend/mod.rs @@ -71,8 +71,8 @@ macro_rules! create_backend { create_backend!( Backend as planviz::PlanViz, - semcheck::SemCheck, - simple::Simple + serialized::Serialized, + interface::Interface ); /// Wrapper for the targets to produce diff --git a/crates/emdb/src/backend/planviz/edges.rs b/crates/emdb_core/src/backend/planviz/edges.rs similarity index 94% rename from crates/emdb/src/backend/planviz/edges.rs rename to crates/emdb_core/src/backend/planviz/edges.rs index 27211a7..5b12ab0 100644 --- a/crates/emdb/src/backend/planviz/edges.rs +++ b/crates/emdb_core/src/backend/planviz/edges.rs @@ -84,6 +84,13 @@ pub struct ScalarToTable { pub table: plan::Key, } +#[derive(Clone, Debug)] +pub struct ScalarGetTable { + pub scalar: plan::Key, + pub table: plan::Key, + pub field: String, +} + #[derive(Clone, Debug)] pub struct ScalarToScalar { pub from: plan::Key, @@ -130,6 +137,7 @@ pub enum PlanEdge { // scalar types ScalarToRecord, ScalarToTable, + ScalarGetTable, ScalarToScalar, RecordToRecord, @@ -206,8 +214,9 @@ impl GetFeature for plan::ScalarType { plan::ConcRef::Ref(r) => edges.push(ScalarToScalar {from: self_key, to: *r}.into()), plan::ConcRef::Conc(c) => match c { plan::ScalarTypeConc::TableRef(t) => edges.push(ScalarToTable {scalar: self_key, table: *t}.into()), + plan::ScalarTypeConc::TableGet { table, field } => edges.push(ScalarGetTable { scalar: self_key, table: *table, field: field.to_string() }.into()), plan::ScalarTypeConc::Bag(r) | plan::ScalarTypeConc::Record(r) => edges.push(ScalarToRecord {scalar: self_key, record: *r}.into()), - plan::ScalarTypeConc::Rust(_) => (), + plan::ScalarTypeConc::Rust{..} => (), }, } } @@ -304,7 +313,7 @@ impl GetExtraNodeEdges for plan::DeRef { } } -impl GetExtraNodeEdges for plan::ForEach { +impl GetExtraNodeEdges for plan::Lift { fn get_extra_features(&self, self_key: plan::Key, edges: &mut Vec, config: &DisplayConfig) { edges.push(OperatorToContext{ context: self.inner_ctx, operator: self_key}.into()); } @@ -320,7 +329,9 @@ impl GetExtraNodeEdges for plan::Map {} impl GetExtraNodeEdges for plan::Expand {} impl GetExtraNodeEdges for plan::Fold {} impl GetExtraNodeEdges for plan::Filter {} +impl GetExtraNodeEdges for plan::Combine {} impl GetExtraNodeEdges for plan::Sort {} +impl GetExtraNodeEdges for plan::Count {} impl GetExtraNodeEdges for plan::Assert {} impl GetExtraNodeEdges for plan::Collect {} impl GetExtraNodeEdges for plan::Take {} @@ -582,6 +593,36 @@ impl EdgeStyle for ScalarToTable { } } +impl EdgeStyle for ScalarGetTable { + fn label<'a>(&self) -> dot::LabelText<'a> { + dot::LabelText::label(self.field.clone()) + } + + fn end_arrow(&self) -> dot::Arrow { + dot::Arrow::normal() + } + + fn start_arrow(&self) -> dot::Arrow { + dot::Arrow::none() + } + + fn edge_style(&self) -> dot::Style { + dot::Style::None + } + + fn edge_color<'a>(&self) -> Option> { + Some(dot::LabelText::label("black")) + } + + fn get_side(&self,source_side:bool) -> PlanNode { + if source_side { + PlanNode::ScalarType(self.scalar) + } else { + PlanNode::Table(self.table) + } + } +} + impl EdgeStyle for RecordToRecord { fn end_arrow(&self) -> dot::Arrow { dot::Arrow::normal() diff --git a/crates/emdb/src/backend/planviz/errors.rs b/crates/emdb_core/src/backend/planviz/errors.rs similarity index 76% rename from crates/emdb/src/backend/planviz/errors.rs rename to crates/emdb_core/src/backend/planviz/errors.rs index 827797a..5448781 100644 --- a/crates/emdb/src/backend/planviz/errors.rs +++ b/crates/emdb_core/src/backend/planviz/errors.rs @@ -7,6 +7,10 @@ pub fn expected_options(backend_name: &Ident, opts_repr: &str) -> Diagnostic { Diagnostic::spanned(backend_name.span(), Level::Error, format!("No options were provided, but are mandator for {backend_name}. Expected options: {opts_repr}")) } +pub fn expected_path(backend_name: &Ident) -> Diagnostic { + Diagnostic::spanned(backend_name.span(), Level::Error, String::from("No `path` option was provided")) +} + pub fn io_error(backend_name: &Ident, path: Span, error: &Error) -> Diagnostic { Diagnostic::spanned(path, Level::Error, format!("Failed to create new file for {backend_name} with `{error}`")) } \ No newline at end of file diff --git a/crates/emdb/src/backend/planviz/mod.rs b/crates/emdb_core/src/backend/planviz/mod.rs similarity index 57% rename from crates/emdb/src/backend/planviz/mod.rs rename to crates/emdb_core/src/backend/planviz/mod.rs index 86585db..33a4af4 100644 --- a/crates/emdb/src/backend/planviz/mod.rs +++ b/crates/emdb_core/src/backend/planviz/mod.rs @@ -1,92 +1,127 @@ //! # Logical Plan Vizualisation //! The debugging plan graph view for emDB. -//! -//! Given the complexity of the [`plan::Plan`], [`crate::analysis`] and +//! +//! Given the complexity of the [`plan::Plan`], [`crate::analysis`] and //! [`crate::optimise`] it is necessary to explore plans graphically. -//! +//! //! ## Live Debugging //! It is recommended to work in a scratch file, with Planviz implemented. //! - If using vscode, the [graphviz interactive preview extension](vscode:extension/tintinweb.graphviz-interactive-preview) -//! is recommended (open dots file, save in scratch rust file and watch preview +//! is recommended (open dots file, save in scratch rust file and watch preview //! automatically update live). use std::{fs::File, path::Path}; -use combi::{core::{choice, mapsuc, seq, setrepr}, macros::{choices, seqs}, tokens::{basic::{collectuntil, gettoken, matchident, matchpunct, peekident, peekpunct, syn}, error::{error, expectederr}, TokenDiagnostic, TokenIter, TokenParser}, Combi, Repr}; -use syn::LitStr; -use super::{EMDBBackend, Ident, LinkedList, TokenStream, plan}; -use proc_macro_error::{Diagnostic, Level}; -use crate::utils::misc::singlelist; +use super::{plan, EMDBBackend, Ident, LinkedList, TokenStream}; +use crate::utils::{misc::singlelist, on_off::on_off}; +use combi::{ + core::setrepr, + tokens::{ + basic::{collectuntil, isempty, syn}, + options::{OptEnd, OptField, OptParse}, + TokenDiagnostic, TokenIter, + }, + Combi, Repr, +}; +use dot; +use proc_macro_error::Diagnostic; use quote::quote; +use syn::LitStr; use typed_generational_arena::StandardArena as GenArena; -use dot; -mod errors; mod edges; +mod errors; mod nodes; -use edges::{PlanEdge, EdgeStyle, get_edges}; -use nodes::{PlanNode, StyleableNode, node_call, get_nodes}; +use edges::{get_edges, EdgeStyle, PlanEdge}; +use nodes::{get_nodes, node_call, PlanNode, StyleableNode}; pub struct PlanViz { out_location: LitStr, - config: DisplayConfig + config: DisplayConfig, } struct DisplayConfig { display_types: bool, display_ctx_ops: bool, - display_control: bool + display_control: bool, } impl EMDBBackend for PlanViz { const NAME: &'static str = "PlanViz"; - fn parse_options(backend_name: &Ident, options: Option) -> Result> { - fn on_off(name: &'static str) -> impl TokenParser { - mapsuc(seqs!( - matchpunct(','), - matchident(name), - matchpunct('='), - choices!( - peekident("on") => mapsuc(matchident("on"), |_| true), - peekident("off") => mapsuc(matchident("off"), |_| false), - otherwise => error(gettoken, |t| Diagnostic::spanned(t.span(), Level::Error, "Expected `on` or `off`".to_owned())) - ) - ), |(_, (_, (_, opt)))| opt) - } - let parser = expectederr(mapsuc( - expectederr(seqs!( - matchident("path"), - matchpunct('='), - setrepr(syn(collectuntil(peekpunct(','))), ""), - on_off("display_types"), - on_off("display_ctx_ops"), - on_off("display_control") - )), - |(_, (_, (out_location, (display_types, (display_ctx_ops, display_control))))): (_, (_, (LitStr, _)))| PlanViz{ out_location, config: DisplayConfig{display_types,display_ctx_ops, display_control} } - )); + fn parse_options( + backend_name: &Ident, + options: Option, + ) -> Result> { + let parser = setrepr( + ( + OptField::new("path", || { + setrepr(syn(collectuntil(isempty())), "") + }), + ( + OptField::new("types", on_off), + ( + OptField::new("ctx", on_off), + (OptField::new("control", on_off), OptEnd), + ), + ), + ) + .gen('='), + "path = , types = , ctx = , control = ", + ); if let Some(opts) = options { let (_, res) = parser.comp(TokenIter::from(opts, backend_name.span())); - res.to_result().map_err(TokenDiagnostic::into_list) + let (path, (types, (ctx, (control, _)))) = + res.to_result().map_err(TokenDiagnostic::into_list)?; + + if let Some(out_location) = path { + Ok(PlanViz { + out_location, + config: DisplayConfig { + display_types: types.unwrap_or(false), + display_ctx_ops: ctx.unwrap_or(false), + display_control: control.unwrap_or(false), + }, + }) + } else { + Err(singlelist(errors::expected_path(backend_name))) + } } else { - Err(singlelist(errors::expected_options(backend_name, &format!("{}", Repr(&parser))))) + Err(singlelist(errors::expected_options( + backend_name, + &format!("{}", Repr(&parser)), + ))) } } - fn generate_code(self, impl_name: Ident, plan: &plan::Plan) -> Result> { + fn generate_code( + self, + impl_name: Ident, + plan: &plan::Plan, + ) -> Result> { let out_path_str = self.out_location.value(); match File::create(Path::new(&out_path_str)) { Ok(mut open_file) => { - match dot::render(&plan::With { plan, extended: (impl_name.clone(), self.config) }, &mut open_file) { - Ok(()) => { Ok(quote! { -mod #impl_name { - pub const OUT_DIRECTORY: &str = #out_path_str; -} - }) } - Err(e) => Err(singlelist(errors::io_error(&impl_name, self.out_location.span(), &e))) + match dot::render( + &plan::With { + plan, + extended: (impl_name.clone(), self.config), + }, + &mut open_file, + ) { + Ok(()) => Ok(quote! { + mod #impl_name { + pub const OUT_DIRECTORY: &str = #out_path_str; + } + }), + Err(e) => Err(singlelist(errors::io_error( + &impl_name, + self.out_location.span(), + &e, + ))), } - }, + } Err(e) => { let span = self.out_location.span(); Err(singlelist(errors::io_error(&impl_name, span, &e))) @@ -170,4 +205,4 @@ impl<'a> dot::GraphWalk<'a, PlanNode, PlanEdge> for plan::With<'a, (Ident, Displ fn target(&'a self, edge: &PlanEdge) -> PlanNode { edge.get_side(false) } -} \ No newline at end of file +} diff --git a/crates/emdb/src/backend/planviz/nodes.rs b/crates/emdb_core/src/backend/planviz/nodes.rs similarity index 95% rename from crates/emdb/src/backend/planviz/nodes.rs rename to crates/emdb_core/src/backend/planviz/nodes.rs index 0abce57..6c6618b 100644 --- a/crates/emdb/src/backend/planviz/nodes.rs +++ b/crates/emdb_core/src/backend/planviz/nodes.rs @@ -138,7 +138,8 @@ impl StyleableNode for plan::ScalarType { plan::ScalarTypeConc::TableRef(r) => "ref".to_owned(), plan::ScalarTypeConc::Bag(_) => "bag".to_owned(), plan::ScalarTypeConc::Record(_) => "rec".to_owned(), - plan::ScalarTypeConc::Rust(t) => format!("{}", t.to_token_stream()), + plan::ScalarTypeConc::Rust{ type_context: query_context, ty } => format!("{}", ty.to_token_stream()), + plan::ScalarTypeConc::TableGet { table, field } => format!("get {}.{field}", &plan.get_table(*table).name), }), plan::ConcRef::Ref(_) => dot::LabelText::label(""), } @@ -336,6 +337,18 @@ impl OperatorDescription for plan::Fold { } } +impl OperatorDescription for plan::Combine { + fn description(&self,plan: &plan::Plan) -> String { + format!("Combine") + } +} + +impl OperatorDescription for plan::Count { + fn description(&self,plan: &plan::Plan) -> String { + format!("Count") + } +} + impl OperatorDescription for plan::Filter { fn description(&self,plan: &plan::Plan) -> String { format!("Filter") @@ -378,7 +391,7 @@ impl OperatorDescription for plan::GroupBy { } } -impl OperatorDescription for plan::ForEach { +impl OperatorDescription for plan::Lift { fn description(&self,plan: &plan::Plan) -> String { format!("ForEach") } diff --git a/crates/emdb_core/src/backend/serialized/closures.rs b/crates/emdb_core/src/backend/serialized/closures.rs new file mode 100644 index 0000000..1dabfb1 --- /dev/null +++ b/crates/emdb_core/src/backend/serialized/closures.rs @@ -0,0 +1,110 @@ +//! generate the closures needed to use in the database, which capture parameters from the query. +//! + + +use crate::{ + plan, + utils::misc::{PushMap, PushSet}, +}; +use quote::quote; +use quote_debug::Tokens; +use syn::{ExprClosure, Ident, Path}; + +use super::{namer::{dataflow_fields, DataFlowNaming, SerializedNamer}, operators::OperatorImpl}; +use super::operators::OperatorGen; +use super::tables::GeneratedInfo; +use super::types::generate_scalar_type; + +pub struct ContextGen { + pub code: Tokens, + pub can_error: bool, + pub mutates: bool, +} + +/// Generate the code for a given context. +/// - Includes a parameter for aliasing `self` (rather than the closure +/// borrowing `self`) +#[allow(clippy::too_many_arguments)] +pub fn generate_application<'imm, 'brw>( + lp: &'imm plan::Plan, + ctx: plan::Key, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + gen_info: &GeneratedInfo<'imm>, + namer: &SerializedNamer, + operator_impl: &OperatorImpl, +) -> ContextGen { + let context = lp.get_context(ctx); + let SerializedNamer { self_alias, .. } = namer; + let mut context_vals = Vec::new(); + + let error_cnt = errors.count(); + let mut_cnt = mutated_tables.count(); + + let tokens = context + .ordering + .iter() + .map(|op_key| { + lp.get_operator(*op_key).apply( + *op_key, + lp, + namer, + error_path, + errors, + mutated_tables, + gen_info, + &mut context_vals, + operator_impl, + ) + }) + .collect::>(); + + let (ids, vals): (Vec<_>, Vec<_>) = context_vals.into_iter().unzip(); + + let can_error = errors.count() > error_cnt; + let mutates = mutated_tables.count() > mut_cnt; + + + let params = context.params.iter().map(|(id, ty)| { + let ty = generate_scalar_type(lp, &gen_info.get_types, *ty, namer); + quote! { #id: #ty } + }); + + let inflows = context.inflows.iter().map(|df| { + let DataFlowNaming { holding_var, .. } = dataflow_fields(lp, *df, namer); + quote!(#holding_var) + }); + + let ret_val = if let Some(ret_op) = context.returnflow { + let return_output = namer.operator_return_value_name(ret_op); + if can_error { + quote!(Ok(#return_output)) + } else { + quote!(#return_output) + } + } else if can_error { + quote! (Ok(())) + } else { + quote!() + }; + + let self_mut = if mutates { + quote! {mut} + } else { + quote! {} + }; + + ContextGen { + code: quote! { + |#self_alias: & #self_mut Self , #(#params,)* #(#inflows,)* | { + let ( #(#ids),* ) = ( #(#vals),* ); + #(#tokens;)* + #ret_val + } + } + .into(), + can_error, + mutates, + } +} diff --git a/crates/emdb_core/src/backend/serialized/mod.rs b/crates/emdb_core/src/backend/serialized/mod.rs new file mode 100644 index 0000000..5cb7bf2 --- /dev/null +++ b/crates/emdb_core/src/backend/serialized/mod.rs @@ -0,0 +1,205 @@ +#![warn(dead_code)] +#![warn(unused_variables)] +//! # Simple Serializable Backend +//! A basic backend producing code that uses [`pulpit`] generated tables. +//! - Allows for basic immutability optimisations, and append only tables. +//! - Generates a table object that uses parallelism internally, but only allows +//! queries to execute in parallel if they are read only (normal borrow checker +//! rules apply) + +use combi::{ + core::{choice, mapsuc}, macros::choices, tokens::{ + basic::{collectuntil, getident, gettoken, isempty, matchident, peekident, syn}, error::error, options::{OptEnd, OptField, OptParse}, TokenDiagnostic, TokenIter, TokenParser + }, Combi +}; +use prettyplease::unparse; +use proc_macro2::TokenStream; +use proc_macro_error::{Diagnostic, Level}; +use queries::QueriesInfo; +use quote::quote; +use std::{collections::LinkedList, fs::File, io::Write, path::Path}; +use syn::{parse2, File as SynFile, Ident, LitStr}; +use pulpit::gen::selector::{TableSelectors, MutabilitySelector, ThunderdomeSelector}; + +use super::{interface::InterfaceTrait, EMDBBackend}; +use crate::utils::{misc::singlelist, on_off::on_off}; +use operators::OperatorImpls; + +mod closures; +pub mod namer; +mod operators; +mod queries; +mod tables; +mod types; + +pub struct Serialized { + debug: Option, + interface: Option, + public: bool, + ds_name: Option, + aggressive_inlining: bool, + operator_impl: OperatorImpls, + table_selector: TableSelectors, +} + +fn operator_impl_parse() -> impl TokenParser { + choices! ( + peekident("Basic") => mapsuc(matchident("Basic"), |_| OperatorImpls::Basic), + peekident("Iter") => mapsuc(matchident("Iter"), |_| OperatorImpls::Iter), + peekident("Parallel") => mapsuc(matchident("Parallel"), |_| OperatorImpls::Parallel), + peekident("Chunk") => mapsuc(matchident("Chunk"), |_| OperatorImpls::Chunk), + otherwise => error(gettoken, |t| Diagnostic::spanned(t.span(), Level::Error, "Invalid Operator Choice".to_owned())) + ) +} + +fn table_select_parse() -> impl TokenParser { + choices! ( + peekident("Mutability") => mapsuc(matchident("Mutability"), |_| MutabilitySelector.into()), + peekident("Thunderdome") => mapsuc(matchident("Thunderdome"), |_| ThunderdomeSelector.into()), + otherwise => error(gettoken, |t| Diagnostic::spanned(t.span(), Level::Error, "Invalid Table Selector Choice".to_owned())) + ) +} + +impl EMDBBackend for Serialized { + const NAME: &'static str = "Serialized"; + + fn parse_options( + backend_name: &syn::Ident, + options: Option, + ) -> Result> { + const DEFAULT_OP_IMPL: OperatorImpls = OperatorImpls::Iter; + const DEFAULT_TABLE_SELECTOR: TableSelectors = TableSelectors::MutabilitySelector(MutabilitySelector); + if let Some(opts) = options { + let parser = ( + OptField::new("debug_file", || syn(collectuntil(isempty()))), + ( + OptField::new("interface", || { + mapsuc(getident(), |name| InterfaceTrait { name }) + }), + ( + OptField::new("pub", on_off), + ( + OptField::new("ds_name", getident), + ( + OptField::new("aggressive_inlining", on_off), + ( + OptField::new("op_impl", operator_impl_parse), + ( + OptField::new("table_select", table_select_parse), + OptEnd + ) + ) + ) + ) + ), + ), + ) + .gen('='); + let (_, res) = parser.comp(TokenIter::from(opts, backend_name.span())); + res.to_result() + .map_err(TokenDiagnostic::into_list) + .map(|(debug, (interface, (public, (ds_name, (inline_queries, (operator_impl, (table_selector, ())))))))| Serialized { debug, interface, public: public.unwrap_or(false), ds_name, aggressive_inlining: inline_queries.unwrap_or(false), operator_impl: operator_impl.unwrap_or(DEFAULT_OP_IMPL), table_selector: table_selector.unwrap_or(DEFAULT_TABLE_SELECTOR) }) + } else { + Ok(Self { + debug: None, + interface: None, + public: false, + ds_name: None, + aggressive_inlining: false, + operator_impl: DEFAULT_OP_IMPL, + table_selector: DEFAULT_TABLE_SELECTOR, + }) + } + } + + fn generate_code( + self, + impl_name: syn::Ident, + plan: &crate::plan::Plan, + ) -> Result> + { + let mut namer = namer::SerializedNamer::new(); + if let Some(name) = self.ds_name { + namer.struct_datastore = name; + } + + let tables::TableWindow { + table_defs, + datastore, + datastore_impl, + database, + table_generated_info, + } = tables::generate_tables(plan, &self.interface, &namer, &self.table_selector, self.aggressive_inlining); + + let record_defs = + types::generate_record_definitions(plan, &table_generated_info.get_types, &namer); + + let operator_impl = self.operator_impl.get_paths(); + + let QueriesInfo { + query_mod, + query_impls, + } = queries::generate_queries(plan, &table_generated_info, &self.interface, &namer, &operator_impl, self.aggressive_inlining); + + let namer::SerializedNamer { mod_tables, .. } = &namer; + + let public_tk = if self.public { + quote!(pub) + } else { + quote!() + }; + + let minister_trait = operator_impl.trait_path; + + let tks = quote! { + #public_tk mod #impl_name { + // lints (generated code not idiomatic, and can propagate confusing/incorrect lints to user code) + #![allow(non_shorthand_field_patterns)] // current name field printing is `fielname: fieldname` + #![allow(unused_variables)] + #![allow(dead_code)] + + use #minister_trait; //TODO: remove and use better operator selection + pub mod #mod_tables { + #(#table_defs)* + } + #query_mod + #(#record_defs)* + #datastore + #datastore_impl + #database + #query_impls + } + }; + + if let Some(debug_path) = self.debug { + debug_output(&debug_path, tks.clone())? + } + + Ok(tks) + } +} + +fn debug_output(debug_path: &LitStr, tks: TokenStream) -> Result<(), LinkedList> { + match parse2::(tks) { + Ok(m) => match File::create(Path::new(&debug_path.value())) { + Ok(mut f) => match f.write_all(unparse(&m).as_bytes()) { + Ok(()) => Ok(()), + Err(e) => Err(singlelist(Diagnostic::spanned( + debug_path.span(), + Level::Error, + format!("Could not write to file: {e}"), + ))), + }, + Err(e) => Err(singlelist(Diagnostic::spanned( + debug_path.span(), + Level::Error, + format!("Could not create file: {e}"), + ))), + }, + Err(e) => Err(singlelist(Diagnostic::spanned( + debug_path.span(), + Level::Error, + format!("Could not parse code as file: {e}"), + ))), + } +} diff --git a/crates/emdb_core/src/backend/serialized/namer.rs b/crates/emdb_core/src/backend/serialized/namer.rs new file mode 100644 index 0000000..97b61b2 --- /dev/null +++ b/crates/emdb_core/src/backend/serialized/namer.rs @@ -0,0 +1,214 @@ +use std::iter::once; + +use proc_macro2::{Span, TokenStream}; +use quote_debug::Tokens; +use syn::{Expr, ExprClosure, Ident, Lifetime, Path, Type}; + +use crate::{ + backend::interface::namer::InterfaceNamer, plan::{self, RecordConc}, utils::misc::{new_id, PushMap} +}; +use quote::{quote, ToTokens}; + +const INTERNAL_FIELD_PREFIX: &str = "__internal_"; + +pub struct SerializedNamer { + pub pulpit: pulpit::gen::namer::CodeNamer, + pub struct_datastore: Ident, + pub struct_database: Ident, + pub mod_tables: Ident, + pub db_lifetime: Tokens, + pub qy_lifetime: Tokens, + pub phantom_field: Ident, + pub mod_queries: Ident, + pub mod_queries_mod_query_enum_error: Ident, + pub operator_error_parameter: Ident, + pub interface: InterfaceNamer, + pub self_alias: Ident, +} + +impl SerializedNamer { + pub fn new() -> Self { + let db_lifetime: Tokens = quote!('db).into(); + Self { + pulpit: pulpit::gen::namer::CodeNamer { + lifetime_imm: db_lifetime.clone(), + ..pulpit::gen::namer::CodeNamer::new(quote!(emdb::dependencies::pulpit).into()) + }, + struct_datastore: new_id("Datastore"), + struct_database: new_id("Database"), + mod_tables: new_id("tables"), + db_lifetime, + qy_lifetime: quote!('qy).into(), + phantom_field: new_id(&format!("{INTERNAL_FIELD_PREFIX}phantomdata")), + mod_queries: new_id("queries"), + mod_queries_mod_query_enum_error: new_id("Error"), + operator_error_parameter: new_id("err"), + interface: InterfaceNamer::new(), + self_alias: new_id(&format!("{INTERNAL_FIELD_PREFIX}self")), + } + } + + pub fn transform_field_name(&self, name: &plan::RecordField) -> Ident { + match name { + plan::RecordField::User(i) => i.clone(), + plan::RecordField::Internal(i) => Ident::new( + &format!("{}{}", INTERNAL_FIELD_PREFIX, i), + Span::call_site(), + ), + } + } + + pub fn pulpit_table_interaction(&self, key: plan::Key) -> Ident { + new_id(&format!("pulpit_access_{}", key.arr_idx())) + } + + pub fn record_name(&self, key: plan::Key) -> Tokens { + new_id(&format!("Record{}", key.arr_idx())) + .into_token_stream() + .into() + } + + pub fn record_name_lifetimes(&self, key: plan::Key) -> Tokens { + self.lifetime_type_alias(new_id(&format!("Record{}", key.arr_idx()))) + } + + fn lifetime_type_alias(&self, id: Ident) -> Tokens { + let Self { + db_lifetime, + qy_lifetime, + .. + } = self; + quote! { + #id<#db_lifetime, #qy_lifetime> + } + .into() + } + + pub fn operator_closure_value_name(&self, key: plan::Key) -> Ident { + new_id(&format!("operator_closure_value_{}", key.arr_idx())) + } + + pub fn operator_return_value_name(&self, key: plan::Key) -> Ident { + new_id(&format!("return_value_{}", key.arr_idx())) + } + + pub fn operator_error_value_name(&self, key: plan::Key) -> Ident { + new_id(&format!("Error{}", key.arr_idx())) + } + + pub fn dataflow_value_name(&self, key: plan::Key) -> Ident { + new_id(&format!("dataflow_value_{}", key.arr_idx())) + } + + pub fn operator_error_variant_name(&self, key: plan::Key) -> Ident { + new_id(&format!("Error{}", key.arr_idx())) + } +} + +pub struct DataFlowNaming<'plan> { + pub holding_var: Ident, + pub stream: bool, + pub data_constructor: Tokens, + pub data_type: Tokens, + pub record_type: &'plan plan::RecordConc, +} + +/// Helper fn for getting the fields needed for accessing dataflow variables +pub fn dataflow_fields<'plan>( + lp: &'plan plan::Plan, + key: plan::Key, + namer: &SerializedNamer, +) -> DataFlowNaming<'plan> { + // NOTE: Previously we included the type of the stream/single, however this + // prevented using streams of types that are `impl Trait`. + let SerializedNamer { + db_lifetime, + qy_lifetime, + .. + } = namer; + let df_conn = lp.get_dataflow(key).get_conn(); + let record_index = lp.get_record_conc_index(df_conn.with.fields); + let record_name = namer.record_name(*record_index); + let record_type = match lp.get_record_type(*record_index) { + plan::ConcRef::Conc(r) => r, + plan::ConcRef::Ref(_) => unreachable!("Index is from lp.get_record_conc_index"), + }; + + DataFlowNaming { + holding_var: namer.dataflow_value_name(key), + stream: df_conn.with.stream, + data_constructor: record_name.clone(), + data_type: quote!(#record_name<#db_lifetime, #qy_lifetime>).into(), + record_type + } +} + +/// Helper fn for generating the construction for an error, and add it to the query's map of +/// error variants. +pub fn new_error( + op_key: plan::Key, + error_path: &Tokens, + error_inner: Option>, + errors: &mut PushMap<'_, Ident, Option>>, + namer: &SerializedNamer, +) -> Tokens { + let variant_name = namer.operator_error_variant_name(op_key); + let construct_error = if error_inner.is_some() { + let param = &namer.operator_error_parameter; + quote!(Err(#error_path::#variant_name(#param))) + } else { + quote!(Err(#error_path::#variant_name)) + } + .into(); + errors.push(variant_name, error_inner); + construct_error +} + +/// Transfers fields of the same name, including a phantomdata member appended to the end. +pub fn transfer_fields<'brw>( + from: &'brw Ident, + record: &'brw RecordConc, + namer: &'brw SerializedNamer, +) -> impl Iterator + 'brw { + record + .fields + .keys() + .map(move |id| { + let field_name = namer.transform_field_name(id); + quote!(#field_name: #from.#field_name) + }) + .chain(once({ + let phantom_field = &namer.phantom_field; + quote!(#phantom_field: std::marker::PhantomData) + })) +} + +pub fn expose_user_fields<'brw>(record: &'brw plan::RecordConc, namer: &'brw SerializedNamer) -> impl Iterator + 'brw { + let phantomdata: &Ident = &namer.phantom_field; + record.fields.keys().map(|rf| { + let field_name = namer.transform_field_name(rf); + let alias = match rf { + plan::RecordField::User(id) => quote! {#id}, + plan::RecordField::Internal(_) => quote! {_}, + }; + quote! {#field_name: #alias} + }).chain(once(quote! {#phantomdata: _})) +} + +pub fn boolean_predicate(lp: &plan::Plan, predicate: &Expr, dataflow: plan::Key, namer: &SerializedNamer) -> Tokens { + let DataFlowNaming { + data_constructor, + data_type, + record_type, + .. + } = dataflow_fields(lp, dataflow, namer); + + let input_fields = expose_user_fields(record_type, namer); + + quote! { + |#data_constructor { #(#input_fields,)* } : &#data_type | -> bool { + #predicate + } + } + .into() +} \ No newline at end of file diff --git a/crates/emdb_core/src/backend/serialized/operators.rs b/crates/emdb_core/src/backend/serialized/operators.rs new file mode 100644 index 0000000..df5af5d --- /dev/null +++ b/crates/emdb_core/src/backend/serialized/operators.rs @@ -0,0 +1,1567 @@ + +use quote::{quote, ToTokens}; +use quote_debug::Tokens; +use syn::{Expr, Ident, Path, Stmt}; + +use super::{ + closures::ContextGen, + namer::{ + boolean_predicate, dataflow_fields, expose_user_fields, new_error, transfer_fields, + DataFlowNaming, SerializedNamer, + }, + tables::GeneratedInfo, + types::generate_record_name, +}; +use crate::{ + backend::serialized::closures::generate_application, + plan::{self, operator_enum, FoldField}, + utils::misc::{new_id, PushMap, PushSet}, +}; + +pub enum OperatorImpls { + Basic, + Iter, + Parallel, + Chunk, +} + +impl OperatorImpls { + pub fn get_paths(&self) -> OperatorImpl { + match self { + Self::Basic => OperatorImpl { + impl_alias: quote!(emdb::dependencies::minister::basic::Basic).into(), + trait_path: quote!(emdb::dependencies::minister::basic::BasicOps).into(), + }, + Self::Iter => OperatorImpl { + impl_alias: quote!(emdb::dependencies::minister::iter::Iter).into(), + trait_path: quote!(emdb::dependencies::minister::iter::IterOps).into(), + }, + Self::Parallel => OperatorImpl { + impl_alias: quote!(emdb::dependencies::minister::parallel::Parallel).into(), + trait_path: quote!(emdb::dependencies::minister::parallel::ParallelOps).into(), + }, + Self::Chunk => OperatorImpl { + impl_alias: quote!(emdb::dependencies::minister::chunk::Chunk).into(), + trait_path: quote!(emdb::dependencies::minister::chunk::ChunkOps).into(), + }, + } + } +} + +pub struct OperatorImpl { + pub impl_alias: Tokens, + pub trait_path: Tokens, +} + +#[enumtrait::store(trait_operator_gen)] +pub trait OperatorGen { + /// Generate the code for the operator + /// - Needs to update the set of mutated tables + /// - Adds to the available errors + /// NOTE: the behaviour of 'mutates' needs to be the same as for + /// [`crate::analysis::mutability`] as that analysis is used for + /// generating traits that the serialized backend can implement. + /// - Adds to the values required for the context. + #[allow(unused_variables, clippy::too_many_arguments)] + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + op_impl: &OperatorImpl, + ) -> Tokens; +} + +#[enumtrait::impl_trait(trait_operator_gen for operator_enum)] +impl OperatorGen for plan::Operator {} + +// table access +impl OperatorGen for plan::UniqueRef { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + operator_error_parameter, + mod_tables, + self_alias, + pulpit: + pulpit::gen::namer::CodeNamer { + mod_unique, + mod_unique_struct_notfound, + .. + }, + .. + } = namer; + + let DataFlowNaming { + holding_var: input_holding_var, + record_type: input_record_type, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + stream, + data_constructor, + .. + } = dataflow_fields(lp, self.output, namer); + + let unique_reference = namer.transform_field_name(&self.from); + let new_field = namer.transform_field_name(&self.out); + let table = lp.get_table(self.table); + let table_name = &table.name; + + let error_construct = new_error(self_key, error_path, Some(quote!(super::super::#mod_tables::#table_name::#mod_unique::#mod_unique_struct_notfound).into()), errors, namer); + + // TODO: integrate this into the namer somehow? + let unique_field_access = &table.columns[&self.field] + .cons + .unique + .as_ref() + .unwrap() + .alias; + + let transfer_fields = transfer_fields(&input_holding_var, input_record_type, namer); + + let action = quote! { + let data = #self_alias.#table_name.#unique_field_access(&#input_holding_var.#unique_reference)?; + Ok(#data_constructor { + #new_field: data, + #(#transfer_fields,)* + }) + }; + + let (map_kind, buffer, consume, error_kind) = if stream { + (quote!(map), quote!(export_buffer), quote!(consume_buffer), quote!(error_stream)) + } else { + (quote!(map_single), quote!(export_single), quote!(consume_single), quote!(error_single)) + }; + + quote! { + let #holding_var = { + let result = #impl_alias::#consume(#impl_alias::#buffer(#impl_alias::#map_kind( + #input_holding_var, + |#input_holding_var| { + #action + } + ))); + match #impl_alias::#error_kind(result) { + Ok(val) => val, + Err(#operator_error_parameter) => return #error_construct + } + }; + } + .into() + } +} + +impl OperatorGen for plan::ScanRefs { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + self_alias, + pulpit: + pulpit::gen::namer::CodeNamer { + struct_window_method_scan_get, + .. + }, + .. + } = namer; + let table_name = &lp.get_table(self.table).name; + let DataFlowNaming { + holding_var, + data_constructor, + .. + } = dataflow_fields(lp, self.output, namer); + let out_ref_name = namer.transform_field_name(&self.out_ref); + quote! { + let #holding_var = { + let stream_values = #impl_alias::consume_stream( + #self_alias.#table_name.#struct_window_method_scan_get() + ); + #impl_alias::map( + stream_values, + |value| #data_constructor { + #out_ref_name : value, + #phantom_field: std::marker::PhantomData + } + ) + }; + } + .into() + } +} +impl OperatorGen for plan::DeRef { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + operator_error_parameter, + self_alias, + pulpit: + pulpit::gen::namer::CodeNamer { + struct_window_method_get, + .. + }, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + record_type: input_record, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + stream, + data_constructor: data_type, + .. + } = dataflow_fields(lp, self.output, namer); + + let table_name = &lp.get_table(self.table).name; + let deref_field = namer.transform_field_name(&self.reference); + let new_field = namer.transform_field_name(&self.named); + let error_variant = namer.operator_error_value_name(self_key); + let inner_type = generate_record_name(lp, self.named_type, namer); + let get_value_id = new_id("get_value"); + + // In order to expand fields from the old type into the new one + let transfer_fields_get_struct = transfer_fields( + &get_value_id, + lp.get_record_type_conc(self.named_type), + namer, + ); + let transfer_fields_input_append = transfer_fields(&input_holding, input_record, namer); + + let (map_kind, buffer, consume, error_kind) = if stream { + (quote!(map),quote!(export_buffer), quote!(consume_buffer), quote!(error_stream)) + } else { + (quote!(map_single), quote!(export_single), quote!(consume_single), quote!(error_single)) + }; + + if self.unchecked { + quote!{ + let #holding_var = { + #impl_alias::#consume(#impl_alias::#buffer(#impl_alias::#map_kind( + #input_holding, + |#input_holding| { + match #self_alias.#table_name.#struct_window_method_get(#input_holding.#deref_field) { + Ok(#get_value_id) => #data_type { + #new_field: #inner_type { + #(#transfer_fields_get_struct,)* + }, + #(#transfer_fields_input_append,)* + }, + Err(_) => unreachable!("This is an unchecked dereference (used internally - e.g. generated by a use") + } + } + ))) + }; + } + } else { + let error_construct = new_error(self_key, error_path, None, errors, namer); + quote!{ + let #holding_var = { + let result = #impl_alias::#map_kind( + #input_holding, + |#input_holding| { + match #self_alias.#table_name.#struct_window_method_get(#input_holding.#deref_field) { + Ok(#get_value_id) => Ok(#data_type { + #new_field: #inner_type { + #(#transfer_fields_get_struct,)* + }, + #(#transfer_fields_input_append,)* + }), + Err(_) => return Err( #error_path::#error_variant ) + } + } + ); + match #impl_alias::#error_kind(result) { + Ok(val) => #impl_alias::#consume(#impl_alias::#buffer(val)), + Err(#operator_error_parameter) => return #error_construct + } + }; + } + }.into() + } +} +impl OperatorGen for plan::Update { + // NOTE: We still need to provide a way to keep the values going into the ouput stream. + // Our design decisions could be to: + // 1. Implement some kind of 'split' which lets the user split the update into owned + // values passed into the DB, and the values to continue. + // 2. Let the user decide how to copy and change the values, + // and hope the rust compiler removes redundant clones (e.g. value + // cloned for insert, but is never used after this) + // For simplicity of implementation, I have chosen (2.) + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let closure_val = namer.operator_closure_value_name(self_key); + let SerializedNamer { + mod_tables, + operator_error_parameter, + phantom_field, + self_alias, + pulpit: + pulpit::gen::namer::CodeNamer { + mod_update, + mod_update_struct_update, + mod_update_enum_error, + .. + }, + .. + } = namer; + let DataFlowNaming { + data_constructor: input_data_constructor, + record_type: input_record_type, + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + stream, + .. + } = dataflow_fields(lp, self.output, namer); + + { + // the update expression closure + let update_type = generate_record_name(lp, self.update_type, namer); + let update_exprs = self.mapping.iter().map(|(name, expr)| { + let field_name = namer.transform_field_name(name); + quote!(#field_name: #expr) + }); + let args_names = input_record_type + .fields + .keys() + .map(|k| { + let field_name = namer.transform_field_name(k); + quote!(#field_name) + }) + .collect::>(); + + context_vals.push((closure_val.clone(), quote! { + |#input_data_constructor { #(#args_names,)* .. } | { + ( + #update_type { #(#update_exprs,)* #phantom_field: std::marker::PhantomData }, + #input_data_constructor { #(#args_names,)* #phantom_field: std::marker::PhantomData } + ) + } + } + .into())); + } + + let update_method = namer.pulpit_table_interaction(self_key); + let table_name = &lp.get_table(self.table).name; + let key_member = namer.transform_field_name(&self.reference); + + let transfer_update_struct = self.mapping.keys().map(|name| { + let field_name = namer.transform_field_name(name); + quote!(#field_name: update_struct.#field_name) + }); + + let error_construct = new_error( + self_key, + error_path, + Some(quote!( + super::super::#mod_tables::#table_name::#mod_update::#update_method::#mod_update_enum_error + ).into()), + errors, + namer + ); + + mutated_tables.push(plan::ImmKey::new(self.table, lp)); + + let (map_kind, buffer, consume, error_kind) = if stream { + (quote!(map_seq), quote!(export_buffer), quote!(consume_buffer), quote!(error_stream)) + } else { + (quote!(map_single), quote!(export_single), quote!(consume_single), quote!(error_single)) + }; + + quote! { + let #holding_var = { + let results = #impl_alias::#map_kind( + #input_holding, + |#input_holding| { + // need to clone to avoid borrow issues + // TODO: determine how closure clonign affects cloning of internals + let (update_struct, continue_struct) = #closure_val.clone()(#input_holding); + + match #self_alias.#table_name.#update_method( + #mod_tables::#table_name::#mod_update::#update_method::#mod_update_struct_update { + #(#transfer_update_struct,)* + }, + continue_struct.#key_member + ) { + Ok(()) => Ok(continue_struct), + Err(#operator_error_parameter) => #error_construct, + } + } + ); + #impl_alias::#consume(#impl_alias::#buffer(#impl_alias::#error_kind(results)?)) + }; + } + .into() + } +} +impl OperatorGen for plan::Insert { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + mod_tables, + operator_error_parameter, + phantom_field, + self_alias, + pulpit: + pulpit::gen::namer::CodeNamer { + mod_insert, + mod_insert_enum_error, + mod_insert_struct_insert, + .. + }, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + record_type, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + stream, + data_constructor, + .. + } = dataflow_fields(lp, self.output, namer); + let table_name = &lp.get_table(self.table).name; + let ref_name = namer.transform_field_name(&self.out_ref); + + mutated_tables.push(plan::ImmKey::new(self.table, lp)); + + let (map_kind, buffer, consume, error_kind) = if stream { + (quote!(map_seq), quote!(export_buffer), quote!(consume_buffer), quote!(error_stream)) + } else { + (quote!(map_single), quote!(export_single), quote!(consume_single), quote!(error_single)) + }; + + let insert_fields = record_type.fields.keys().map(|name| { + let field_name = namer.transform_field_name(name); + quote!(#field_name: #input_holding.#field_name) + }); + + let results_internal = if gen_info.insert_can_error[&plan::Idx::new(self.table, lp)] { + let error_construct = new_error(self_key, error_path, Some(quote!(super::super::#mod_tables::#table_name::#mod_insert::#mod_insert_enum_error).into()), errors, namer); + quote! { + { + let result = #impl_alias::#map_kind( + #input_holding, + |#input_holding| { + Ok(#data_constructor { + #ref_name: #self_alias.#table_name.insert(#mod_tables::#table_name::#mod_insert::#mod_insert_struct_insert { + #(#insert_fields,)* + })?, + #phantom_field: std::marker::PhantomData + }) + } + ); + match #impl_alias::#error_kind(result) { + Ok(val) => val, + Err(#operator_error_parameter) => return #error_construct + } + } + } + } else { + quote! { + #impl_alias::#map_kind( + #input_holding, + |#input_holding| { + #data_constructor { + #ref_name: #self_alias.#table_name.insert(#mod_tables::#table_name::#mod_insert::#mod_insert_struct_insert { + #(#insert_fields,)* + }), + #phantom_field: std::marker::PhantomData + } + } + ) + } + }; + + quote! { + let #holding_var = #impl_alias::#consume(#impl_alias::#buffer(#results_internal)); + } + .into() + } +} +impl OperatorGen for plan::Delete { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + mod_tables, + operator_error_parameter, + self_alias, + pulpit: + pulpit::gen::namer::CodeNamer { + type_key_error, + struct_window_method_delete, + .. + }, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + stream, + .. + } = dataflow_fields(lp, self.output, namer); + + mutated_tables.push(plan::ImmKey::new(self.table, lp)); + + let (map_kind, buffer, consume, error_kind) = if stream { + (quote!(map_seq), quote!(export_buffer), quote!(consume_buffer), quote!(error_stream)) + } else { + (quote!(map_single), quote!(export_single), quote!(consume_single), quote!(error_single)) + }; + + let table_name = &lp.get_table(self.table).name; + let key_member = namer.transform_field_name(&self.reference); + let error_construct = new_error( + self_key, + error_path, + Some(quote!(super::super::#mod_tables::#table_name::#type_key_error).into()), + errors, + namer, + ); + + quote!{ + let #holding_var = { + let result = #impl_alias::#map_kind( + #input_holding, + |#input_holding| { + match #self_alias.#table_name.#struct_window_method_delete(#input_holding.#key_member) { + Ok(()) => Ok(#input_holding), + Err(#operator_error_parameter) => #error_construct, + } + } + ); + #impl_alias::#consume(#impl_alias::#buffer(#impl_alias::#error_kind(result)?)) + }; + }.into() + } +} + +// Errors +impl OperatorGen for plan::Assert { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + stream, + .. + } = dataflow_fields(lp, self.output, namer); + + let closure_data = namer.operator_closure_value_name(self_key); + + context_vals.push(( + closure_data.clone(), + (boolean_predicate(lp, &self.assert, self.input, namer).into_token_stream()).into(), + )); + + let error_construct = new_error(self_key, error_path, None, errors, namer); + + let all_kind = if stream { + quote!(all) + } else { + quote!(is) + }; + + quote! { + let #holding_var = { + let (result, pass_on) = #impl_alias::#all_kind( + #input_holding, + #closure_data + ) + if result { + pass_on + } else { + return #error_construct; + } + }; + } + .into() + } +} + +impl OperatorGen for plan::Map { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + data_constructor: input_constructor, + record_type: input_record_type, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + data_constructor, + stream, + .. + } = dataflow_fields(lp, self.output, namer); + + let closure_data = namer.operator_closure_value_name(self_key); + + let input_fields = expose_user_fields(input_record_type, namer); + + let mapping_fields = self.mapping.iter().map(|(rf, e)| { + let field_name = namer.transform_field_name(rf); + quote! {#field_name: #e} + }); + + context_vals.push(( + closure_data.clone(), + quote! { + |#input_constructor { #(#input_fields,)* }| { + #data_constructor { + #(#mapping_fields,)* + #phantom_field: std::marker::PhantomData + } + } + } + .into(), + )); + + let map_fn = if stream { + quote!(map) + } else { + quote!(map_single) + }; + + quote! { + let #holding_var = #impl_alias::#map_fn( + #input_holding, + #closure_data + ); + } + .into() + } +} +impl OperatorGen for plan::Expand { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + stream, + .. + } = dataflow_fields(lp, self.output, namer); + + let map_fn = if stream { + quote!(map) + } else { + quote!(map_single) + }; + + let expand_field = namer.transform_field_name(&self.field); + + quote! { + let #holding_var = #impl_alias::#map_fn( + #input_holding, + | #input_holding | #input_holding.#expand_field + ); + } + .into() + } +} +impl OperatorGen for plan::Fold { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + .. + } = namer; + let DataFlowNaming { + data_constructor: input_data_constructor, + record_type: input_record_type, + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + data_constructor: acc_constructor, + data_type: acc_data_type, + record_type: acc_record_type, + .. + } = dataflow_fields(lp, self.output, namer); + let closure_value = namer.operator_closure_value_name(self_key); + + let (init_fields, update_fields): (Vec<_>, Vec<_>) = self + .fold_fields + .iter() + .map(|(rf, plan::FoldField { initial, update })| { + let field_name = namer.transform_field_name(rf); + ( + quote!(#field_name: #initial), + quote!(#field_name: { #update }), + ) + }) + .unzip(); + + let acc_fields = expose_user_fields(acc_record_type, namer); + let input_fields = expose_user_fields(input_record_type, namer); + + context_vals.push((closure_value.clone(), quote! { + ( + #acc_constructor { + #(#init_fields,)* + #phantom_field: std::marker::PhantomData + }, + |#acc_constructor { #(#acc_fields,)* } : #acc_data_type, #input_data_constructor { #(#input_fields,)* } | { + #acc_constructor { + #(#update_fields,)* + #phantom_field: std::marker::PhantomData + } + } + ) + } + .into())); + + quote! { + let #holding_var = { + let (init, update) = #closure_value; + #impl_alias::fold(#input_holding, init, update) + }; + } + .into() + } +} +impl OperatorGen for plan::Filter { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.output, namer); + let closure_value = namer.operator_closure_value_name(self_key); + + context_vals.push(( + closure_value.clone(), + (boolean_predicate(lp, &self.predicate, self.input, namer).into_token_stream()).into(), + )); + + quote!{ + let #holding_var = #impl_alias::filter(#input_holding, #closure_value); + }.into() + } +} + +impl OperatorGen for plan::Combine { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + data_type, + data_constructor, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.output, namer); + let Self { + left_name, + right_name, + .. + } = self; + let closure_value = namer.operator_closure_value_name(self_key); + + let (field_defaults, field_sets): (Vec<_>, Vec<_>) = self.update_fields.iter().map(|(field, FoldField { initial, update })| { + let field_name = namer.transform_field_name(field); + (quote!(#field_name: #initial), quote!(#field_name: #update)) + }).unzip(); + + context_vals.push(( + closure_value.clone(), + quote! { + ( + #data_constructor { + #(#field_defaults,)* + #phantom_field: std::marker::PhantomData + }, + |#left_name: #data_type, #right_name: #data_type| + #data_constructor { + #(#field_sets,)* + #phantom_field: std::marker::PhantomData + } + ) + } + .into(), + )); + + quote!{ + let #holding_var = { + let (alternative, update) = #closure_value; + #impl_alias::combine(#input_holding, alternative, update) + }; + }.into() + } +} + +impl OperatorGen for plan::Sort { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.output, namer); + + let order_greater = quote!(std::cmp::Ordering::Greater); + let order_equal = quote!(std::cmp::Ordering::Equal); + let order_less = quote!(std::cmp::Ordering::Less); + + let comparisons = self.sort_order.iter().map(|(rf, order)| { + let (gt_result, lt_result) = match order { + plan::SortOrder::Asc => (&order_greater, &order_less), + plan::SortOrder::Desc => (&order_less, &order_greater), + }; + let field_name = namer.transform_field_name(rf); + quote! { + match left.#field_name.cmp(&right.#field_name) { + std::cmp::Ordering::Greater => return #gt_result, + std::cmp::Ordering::Less => return #lt_result, + std::cmp::Ordering::Equal => (), + } + } + }); + + quote!{ + let #holding_var = #impl_alias::sort(#input_holding, |left, right| { + #(#comparisons)* + #order_equal + }); + }.into() + } +} +impl OperatorGen for plan::Take { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.output, namer); + + let closure_value = namer.operator_closure_value_name(self_key); + + let take_expr = &self.limit; + + context_vals.push(( + closure_value.clone(), + quote! { {let limit: usize = #take_expr; limit} }.into(), + )); + + quote!{ + let #holding_var = #impl_alias::take(#input_holding, #closure_value); + }.into() + } +} +impl OperatorGen for plan::Collect { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + data_constructor, + .. + } = dataflow_fields(lp, self.output, namer); + let field = namer.transform_field_name(&self.into); + + // NOTE: Dependent on the collect type being a vector + // - Dependency is also present in the `types::generate_scalar_type` + // method + + quote!{ + let #holding_var = #impl_alias::consume_single( + #data_constructor { + #field: #impl_alias::export_buffer(#input_holding), + #phantom_field: std::marker::PhantomData + } + ); + }.into() + } +} + +impl OperatorGen for plan::Count { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + data_constructor, + .. + } = dataflow_fields(lp, self.output, namer); + let field = namer.transform_field_name(&self.out_field); + + quote! { + let #holding_var = #impl_alias::map_single( + #impl_alias::count(#input_holding), + |count| + #data_constructor { + #field: count, + #phantom_field: std::marker::PhantomData + } + ); + } + .into() + } +} + +impl OperatorGen for plan::Join { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + .. + } = namer; + let DataFlowNaming { + holding_var: left_hold_var, + data_type: data_left, + .. + } = dataflow_fields(lp, self.left.dataflow, namer); + let DataFlowNaming { + holding_var: right_hold_var, + data_type: data_right, + .. + } = dataflow_fields(lp, self.right.dataflow, namer); + let DataFlowNaming { + holding_var, + data_constructor, + .. + } = dataflow_fields(lp, self.output, namer); + + let left_field = namer.transform_field_name(&self.left.identifier); + let right_field = namer.transform_field_name(&self.right.identifier); + + let joined = match &self.join_kind { + plan::JoinKind::Inner => match &self.match_kind { + plan::MatchKind::Cross => { + quote! {#impl_alias::join_cross(#left_hold_var, #right_hold_var)} + } + plan::MatchKind::Pred(predicate) => { + let join_pred = namer.operator_closure_value_name(self_key); + + context_vals.push(( + join_pred.clone(), + quote! { + |left: &#data_left, right: &#data_right| -> bool { + #predicate + } + } + .into(), + )); + + quote! {#impl_alias::predicate_join(#left_hold_var, #right_hold_var, #join_pred)} + } + plan::MatchKind::Equi { + left_field, + right_field, + } => { + let left_select = namer.transform_field_name(left_field); + let right_select = namer.transform_field_name(right_field); + quote! { + { + #impl_alias::equi_join(#left_hold_var, #right_hold_var, |left: &#data_left| &left.#left_select, |right: &#data_right| &right.#right_select) + } + } + } + }, + }; + quote! { + let #holding_var = #impl_alias::map(#joined, |(left, right): (#data_left, #data_right)| { + #data_constructor { + #left_field: left, + #right_field: right, + #phantom_field: std::marker::PhantomData + } + }); + } + .into() + } +} +impl OperatorGen for plan::Fork { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var: input_holding, + stream, + .. + } = dataflow_fields(lp, self.input, namer); + + if self.outputs.is_empty() { + unreachable!("Cannot generate fork to no outputs") + } else { + + let fork_op = if stream { + quote!(fork) + } else { + quote!(fork_single) + }; + + let (mut other_outputs_names, mut other_outputs_fork): (Vec<_>, Vec<_>) = self.outputs.iter().map(|df_out| { + let DataFlowNaming { holding_var, .. } = dataflow_fields(lp, *df_out, namer); + (quote!(#holding_var), quote!(let (#holding_var, temp) = #impl_alias::#fork_op(temp))) + }).unzip(); + + // NOTE: we do not need the final fork, we can just use temp (even if temp + // types are different, we can shadown in the same scope - thanks + // rust 🦀). + other_outputs_fork.pop(); + let final_name = other_outputs_names.pop(); + + quote!{ + let (#(#other_outputs_names,)* #final_name) = { + let temp = #input_holding; + #(#other_outputs_fork;)* + (#(#other_outputs_names,)* temp) + }; + } + }.into() + } +} +impl OperatorGen for plan::Union { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.output, namer); + if self.inputs.is_empty() { + unreachable!("Cannot generate union of no data") + } else { + let mut inflows = self.inputs.iter(); + let first_input = inflows.next().unwrap(); + let first_holding_in = dataflow_fields(lp, *first_input, namer).holding_var; + + let body = inflows.fold(quote! {#first_holding_in}, |prev, df| { + let var = dataflow_fields(lp, *df, namer).holding_var; + quote! { + #impl_alias::union(#prev, #var) + } + }); + quote! { + let #holding_var = #body; + } + } + .into() + } +} +impl OperatorGen for plan::Row { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + OperatorImpl { impl_alias, .. }: &OperatorImpl, + ) -> Tokens { + let SerializedNamer { + phantom_field, + .. + } = namer; + let DataFlowNaming { + holding_var, + data_constructor, + .. + } = dataflow_fields(lp, self.output, namer); + + let data_name = namer.operator_closure_value_name(self_key); + + let data_fields = self.fields.iter().map(|(rf, e)| { + let member_name = namer.transform_field_name(rf); + quote!(#member_name: #e) + }); + + context_vals.push(( + data_name.clone(), + quote! { + #data_constructor { + #(#data_fields,)* + #phantom_field: std::marker::PhantomData + } + } + .into(), + )); + + quote! { + let #holding_var = #impl_alias::consume_single(#data_name); + } + .into() + } +} + +impl OperatorGen for plan::Return { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + _operator_impl: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.input, namer); + let return_val = namer.operator_return_value_name(self_key); + quote! { let #return_val = #holding_var; }.into() + } +} +impl OperatorGen for plan::Discard { + fn apply<'imm, 'brw>( + &self, + _self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + _error_path: &Tokens, + _errors: &mut PushMap<'brw, Ident, Option>>, + _mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + _gen_info: &GeneratedInfo<'imm>, + _context_vals: &mut Vec<(Ident, Tokens)>, + _operator_impl: &OperatorImpl, + ) -> Tokens { + let DataFlowNaming { holding_var, .. } = dataflow_fields(lp, self.input, namer); + quote! { let _ = #holding_var; }.into() + } +} + +// contexts +impl OperatorGen for plan::GroupBy { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + operator_impl: &OperatorImpl, + ) -> Tokens { + // scoping out the mutable tables and errors to determine how to generate return and mapping. + let context_closure_var = namer.operator_closure_value_name(self_key); + let OperatorImpl {impl_alias, ..} = operator_impl; + let ContextGen { + code, + can_error, + mutates, + } = generate_application( + lp, + self.inner_ctx, + error_path, + errors, + mutated_tables, + gen_info, + namer, + operator_impl, + ); + + context_vals.push((context_closure_var.clone(), code.into_token_stream().into())); + + let grouping_field = namer.transform_field_name(&self.group_by); + + let SerializedNamer { + phantom_field, + self_alias, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.output, namer); + let DataFlowNaming { + data_constructor: inner_data_constructor, + record_type: inner_record_type, + .. + } = dataflow_fields(lp, self.stream_in, namer); + + let inner_fields = inner_record_type.fields.keys().map(|rf| { + let field_name = namer.transform_field_name(rf); + quote!(#field_name: input.#field_name) + }); + + let map_kind = if mutates { + quote!(map_seq) + } else { + quote!(map) + }; + + let final_result = if can_error { + quote!(#impl_alias::error_stream(results)?) + } else { + quote!(results) + }; + + // NOTE: Further opportunity to optimise here + // - Allowing non-mutating, non-erroring to be computed inside the groupby? + // - No longer need to materialise output if we can stream through + + quote! { + let #holding_var = { + let grouped = #impl_alias::group_by( + #input_holding, + |input| { + ( + input.#grouping_field, + #inner_data_constructor { + #(#inner_fields,)* + #phantom_field: std::marker::PhantomData + } + ) + } + ); + let results = #impl_alias::#map_kind( + grouped, + |(grouping, inner_stream)| { + (#context_closure_var)(#self_alias, grouping, inner_stream) + } + ); + #final_result + }; + } + .into() + } +} + +impl OperatorGen for plan::Lift { + fn apply<'imm, 'brw>( + &self, + self_key: plan::Key, + lp: &'imm plan::Plan, + namer: &SerializedNamer, + error_path: &Tokens, + errors: &mut PushMap<'brw, Ident, Option>>, + mutated_tables: &mut PushSet<'brw, plan::ImmKey<'imm, plan::Table>>, + gen_info: &GeneratedInfo<'imm>, + context_vals: &mut Vec<(Ident, Tokens)>, + operator_impl: &OperatorImpl, + ) -> Tokens { + let context_closure_var = namer.operator_closure_value_name(self_key); + let OperatorImpl {impl_alias, ..} = operator_impl; + let ContextGen { + code, + can_error, + mutates, + } = generate_application( + lp, + self.inner_ctx, + error_path, + errors, + mutated_tables, + gen_info, + namer, + operator_impl, + ); + context_vals.push((context_closure_var.clone(), code.into_token_stream().into())); + + let SerializedNamer { + self_alias, + .. + } = namer; + let DataFlowNaming { + holding_var: input_holding, + stream, + .. + } = dataflow_fields(lp, self.input, namer); + let DataFlowNaming { + holding_var, + .. + } = dataflow_fields(lp, self.output, namer); + + let map_kind = if stream { + if mutates { + quote!(map_seq) + } else { + quote!(map) + } + } else { + quote!(map_single) + }; + + let final_result = if can_error { + let error_map = if stream { + quote!(error_stream) + } else { + quote!(error_single) + }; + quote!(#impl_alias::#error_map(results)?) + } else { + quote!(results) + }; + + // NOTE: relies on the namer's mapping of operator names leaving user's + // field names the same. + + let closure_args = lp + .get_context(self.inner_ctx) + .params + .iter() + .map(|(id, _)| quote!(lifted.#id)); + + quote! { + let #holding_var = { + let results = #impl_alias::#map_kind( + #input_holding, + |lifted| { + (#context_closure_var)(#self_alias, #(#closure_args),*) + } + ); + #final_result + }; + } + .into() + } +} diff --git a/crates/emdb_core/src/backend/serialized/queries.rs b/crates/emdb_core/src/backend/serialized/queries.rs new file mode 100644 index 0000000..b4fa4fc --- /dev/null +++ b/crates/emdb_core/src/backend/serialized/queries.rs @@ -0,0 +1,279 @@ +use std::collections::{HashMap, HashSet}; + +use pulpit::gen::namer::CodeNamer; +use quote::quote; +use quote_debug::Tokens; +use syn::{ExprBlock, Ident, ImplItemFn, ItemEnum, ItemImpl, ItemMod, Path}; + +use crate::{ + backend::interface::{namer::InterfaceNamer, InterfaceTrait}, plan, utils::misc::{PushMap, PushSet} +}; + +use super::{ + closures::{generate_application, ContextGen}, namer::SerializedNamer, operators::OperatorImpl, tables::GeneratedInfo, types::generate_scalar_type +}; + +fn generate_errors( + errors: HashMap>>, + SerializedNamer { + mod_queries_mod_query_enum_error, + .. + }: &SerializedNamer, +) -> Option> { + if errors.is_empty() { + None + } else { + let variants = errors.iter().map(|(name, inner)| { + if let Some(path) = inner { + quote!(#name(#path)) + } else { + quote!(#name) + } + }); + Some( + quote! { + #[derive(Debug)] + pub enum #mod_queries_mod_query_enum_error { + #(#variants),* + } + } + .into(), + ) + } +} + +struct CommitInfo { + commits: Tokens, + aborts: Tokens, +} + +fn generate_commits<'imm>( + lp: &'imm plan::Plan, + mutated_tables: HashSet>, + SerializedNamer { + pulpit: + CodeNamer { + struct_window_method_commit, + struct_window_method_abort, + .. + }, + .. + }: &SerializedNamer, +) -> Option { + if mutated_tables.is_empty() { + None + } else { + let (commits, aborts): (Vec<_>, Vec<_>) = mutated_tables + .iter() + .map(|key| { + let table_name = &lp.get_table(**key).name; + ( + quote! { + self.#table_name.#struct_window_method_commit(); + }, + quote! { + self.#table_name.#struct_window_method_abort(); + }, + ) + }) + .unzip(); + Some(CommitInfo { + commits: quote! { { #(#commits;)* } }.into(), + aborts: quote! { { #(#aborts;)* } }.into(), + }) + } +} + +struct QueryMod { + query_mod: Tokens, + query_impl: Tokens, +} + +impl QueryMod { + fn extract(self) -> (Tokens, Tokens) { + (self.query_mod, self.query_impl) + } +} + +fn generate_query<'imm>( + lp: &'imm plan::Plan, + gen_info: &GeneratedInfo<'imm>, + namer: &SerializedNamer, + plan::Query { name, ctx }: &'imm plan::Query, + operator_impl: &OperatorImpl, +) -> QueryMod { + let OperatorImpl { impl_alias, .. } = operator_impl; + let SerializedNamer { + qy_lifetime, + mod_queries, + mod_queries_mod_query_enum_error, + .. + } = namer; + + let context = lp.get_context(*ctx); + let return_type = if let Some(ret) = context.get_return_type(lp) { + let ty = namer.record_name(ret); + quote!(#ty) + } else { + quote!(()) + }; + + let (params_use, params): (Vec<_>, Vec<_>) = context.params.iter().map(|(name, ty_key)| { + let ty = generate_scalar_type(lp, &gen_info.get_types, *ty_key, namer); + (name, quote!(#name: #ty)) + }).unzip(); + + let mut errors = HashMap::new(); + let mut mutated_tables = HashSet::new(); + + let ContextGen { code, .. } = generate_application( + lp, + *ctx, + "e!(#mod_queries::#name::#mod_queries_mod_query_enum_error).into(), + &mut PushMap::new(&mut errors), + &mut PushSet::new(&mut mutated_tables), + gen_info, + namer, + operator_impl + ); + + let run_query = quote!((#code)(self, #(#params_use),* )); + + match ( + generate_errors(errors, namer), + generate_commits(lp, mutated_tables, namer) + ) { + (None, None) => { + QueryMod { + query_mod: quote! { mod #name {} }.into(), + query_impl: quote! { + fn #name<#qy_lifetime>(&#qy_lifetime self, #(#params),* ) -> #return_type { + #run_query + } + } + .into(), + } + }, + (None, Some(CommitInfo { commits, aborts:_ } )) => { + + // NOTE: This case is possible when many inserts (that do not throw errors) occur on a table, + // but nothing else does. In this case we are not optimal - we could avoid transactions + // entirely. + // TODO: Consider this case (e.g. should we add in alloc errors -> in which case there are no error free inserts?) + + QueryMod { + query_mod: quote! { mod #name {} }.into(), + query_impl: quote! { + fn #name<#qy_lifetime>(&#qy_lifetime mut self, #(#params),* ) -> #return_type { + let result = #run_query; + #commits + result + } + } + .into(), + } + }, + (Some(error_enum), None) => { + QueryMod { + query_mod: quote!{ pub mod #name { + #error_enum + } }.into(), + query_impl: quote!{ + fn #name<#qy_lifetime>(&#qy_lifetime self, #(#params),* ) -> Result<#return_type, #mod_queries::#name::#mod_queries_mod_query_enum_error> { + #run_query.map(#impl_alias::export_single) + } + }.into(), + } + } + (Some(error_enum), Some(CommitInfo { commits, aborts })) => { + QueryMod { + query_mod: quote!{ pub mod #name { + #error_enum + } }.into(), + query_impl: quote!{ + fn #name<#qy_lifetime>(&#qy_lifetime mut self, #(#params),* ) -> Result<#return_type, #mod_queries::#name::#mod_queries_mod_query_enum_error> { + match #run_query { + Ok(result) => { + #commits + Ok(#impl_alias::export_single(result)) + }, + Err(e) => { + #aborts + Err(e) + } + } + } + }.into(), + } + } + } +} + +pub struct QueriesInfo { + pub query_mod: Tokens, + + /// If there are no queries, we should not produce an impl block that does not + /// use the [`SerializedNamer::db_lifetime`] as this will cause an error with span + /// [`proc_macro2::Span::call_site`] + pub query_impls: Option>, +} + +// TODO: determine error type +// get if an insert for a table has errors, if so, do thingy + +pub fn generate_queries<'imm>( + lp: &'imm plan::Plan, + gen_info: &GeneratedInfo<'imm>, + interface_trait: &Option, + namer: &'imm SerializedNamer, + operator_impl: &OperatorImpl, + inline_queries: bool, +) -> QueriesInfo { + let SerializedNamer { + db_lifetime, + mod_queries, + struct_database, + struct_datastore, + interface: InterfaceNamer { trait_database, trait_database_type_datastore, ..}, + .. + } = namer; + let (mods, impls): (Vec>, Vec>) = lp + .queries + .iter() + .map(move |(_, query)| generate_query(lp, gen_info, namer, query, operator_impl).extract()) + .unzip(); + + QueriesInfo { + query_mod: quote! { + pub mod #mod_queries { + #(#mods)* + } + } + .into(), + query_impls: if impls.is_empty() { + None + } else { + let (impl_database, modifier, type_ds) = if let Some(InterfaceTrait { name }) = interface_trait { + (quote! { super::#name::#trait_database<#db_lifetime> for }, quote!(), quote!(type #trait_database_type_datastore = #struct_datastore;)) + } else { + (quote! {}, quote!(pub), quote!()) + }; + + let inline_tks = if inline_queries { + quote!(#[inline(always)]) + } else { + quote!() + }; + + Some( + quote! { + impl <#db_lifetime> #impl_database #struct_database<#db_lifetime> { + #type_ds + #(#inline_tks #modifier #impls)* + } + } + .into(), + ) + }, + } +} diff --git a/crates/emdb_core/src/backend/serialized/tables.rs b/crates/emdb_core/src/backend/serialized/tables.rs new file mode 100644 index 0000000..6921a96 --- /dev/null +++ b/crates/emdb_core/src/backend/serialized/tables.rs @@ -0,0 +1,223 @@ +use quote::{quote, ToTokens}; +use quote_debug::Tokens; +use std::collections::HashMap; +use syn::{Ident, ItemImpl, ItemMod, ItemStruct, Type}; +use pulpit::gen::selector::{TableSelectors, SelectorImpl}; +use super::namer::SerializedNamer; +use crate::{backend::interface::{namer::InterfaceNamer, InterfaceTrait, public::exposed_keys}, plan}; + +pub struct GeneratedInfo<'imm> { + pub get_types: HashMap, HashMap>>, + pub insert_can_error: HashMap, bool>, // TODO: hashset instead? +} + +pub struct TableWindow<'imm> { + pub table_defs: Vec>, + pub datastore: Tokens, + pub datastore_impl: Tokens, + pub database: Tokens, + pub table_generated_info: GeneratedInfo<'imm>, +} + +/// Generate the tokens for the tables, and the struct to hold them (in [`TableWindow`]). +/// - Generates the tokens for the [`plan::ScalarType`]s of table fields assuming they are just [`plan::ScalarTypeConc::Rust`] tyes +pub fn generate_tables<'imm>(lp: &'imm plan::Plan, interface_trait: &Option, namer: &SerializedNamer, selector: &TableSelectors, inlining: bool) -> TableWindow<'imm> { + // get the constraints and fields of each table + let mut pulpit_configs = lp + .tables + .iter() + .map(|(key, emdb_table)| { + let pulpit_select = pulpit::gen::selector::SelectOperations { + name: emdb_table.name.clone(), + transactions: true, + deletions: false, + fields: emdb_table + .columns + .iter() + .map(|(field, v)| { + ( + namer.transform_field_name(field), + match lp.get_scalar_type_conc(v.data_type) { + plan::ScalarTypeConc::Rust { + type_context: plan::TypeContext::DataStore, + ty, + } => ty.to_token_stream().into(), + _ => unreachable!("Only Rust types are allowed in the data store"), + }, + ) + }) + .collect(), + uniques: emdb_table + .columns + .iter() + .filter_map(|(field, v)| { + v.cons + .unique + .as_ref() + .map(|a| pulpit::gen::uniques::Unique { + alias: a.alias.clone(), + field: namer.transform_field_name(field), + }) + }) + .collect(), + predicates: emdb_table + .row_cons + .preds + .iter() + .map(|pred| pulpit::gen::predicates::Predicate { + alias: pred.alias.clone(), + tokens: pred.cons.0.to_token_stream().into(), + }) + .collect(), + limit: { + if let Some(plan::Constraint { alias, cons: plan::Limit (expr)}) = &emdb_table.row_cons.limit { + Some(pulpit::gen::limit::Limit { value: pulpit::gen::limit::LimitKind::ConstVal(expr.into_token_stream().into()), alias: alias.clone() }) + } else { + None + } + }, + updates: Vec::new(), + public: true, + }; + + (plan::Idx::new(key, lp), pulpit_select) + }) + .collect::>(); + + // get the updates and deletions + for (key, op) in &lp.operators { + match op { + plan::Operator::Update(plan::Update { table, mapping, .. }) => pulpit_configs + .get_mut(&plan::Idx::new(*table, lp)) + .unwrap() + .updates + .push(pulpit::gen::operations::update::Update { + fields: mapping + .keys() + .map(|rec| namer.transform_field_name(rec)) + .collect(), + alias: namer.pulpit_table_interaction(key), + }), + plan::Operator::Delete(plan::Delete { table, .. }) => { + pulpit_configs + .get_mut(&plan::Idx::new(*table, lp)) + .unwrap() + .deletions = true; + } + _ => (), + } + } + + let SerializedNamer { + pulpit: + ref pulpit_namer @ pulpit::gen::namer::CodeNamer { + struct_table, + struct_window, + type_key, + .. + }, + struct_datastore, + struct_database, + db_lifetime, + mod_tables, + .. + } = namer; + + let (get_types, gen_data): (HashMap<_, _>, Vec<_>) = pulpit_configs + .into_iter() + .map(|(key, config)| { + let table_impl = selector.select_table(config); + ( + (key, table_impl.op_get_types(pulpit_namer)), + ( + (key, table_impl.insert_can_error()), + table_impl.generate(pulpit_namer, if inlining { vec![pulpit::gen::table::AttrKinds::Inline] } else { vec![] }), + ), + ) + }) + .unzip(); + + let (insert_can_error, table_defs): (HashMap<_, _>, Vec<_>) = gen_data.into_iter().unzip(); + + let table_names = lp + .tables + .iter() + .map(|(_, table)| &table.name) + .collect::>(); + + let datastore_members = table_names + .iter() + .map(|name| quote!(#name: #mod_tables::#name::#struct_table)); + let datastore_members_new = table_names + .iter() + .map(|name| quote!(#name: #mod_tables::#name::#struct_table::new(1024))); + + let (database_members_window_stream, database_members_stream): (Vec<_>, Vec<_>) = table_names + .iter() + .map(|name| (quote!(#name: self.#name.window()), quote!(#name: #mod_tables::#name::#struct_window<#db_lifetime>))).unzip(); + + let (database_members_window, database_members) = if database_members_stream.is_empty() { + (quote!(phantom: std::marker::PhantomData), quote!(phantom: std::marker::PhantomData<&#db_lifetime ()>)) + } else { + (quote!(#(#database_members_window_stream,)*),quote!(#(#database_members_stream,)*)) + }; + + let InterfaceNamer { + trait_datastore, + trait_datastore_method_db, + trait_datastore_method_new, + trait_datastore_type_database, + .. + } = &namer.interface; + + let (impl_datastore, modifiers, key_defs, ds_assoc_db) = if let Some(InterfaceTrait { name }) = interface_trait { + let exposed_table_keys = exposed_keys(lp); + (quote!{ super::#name::#trait_datastore for }, quote!(), exposed_table_keys.into_iter().map(|tablekey| { + let name = &lp.get_table(*tablekey).name; + let key_name = namer.interface.key_name(name); + quote! { type #key_name = #mod_tables::#name::#type_key } + }).collect::>(), quote!(type #trait_datastore_type_database<#db_lifetime> = #struct_database<#db_lifetime>;)) + } else { + (quote!(), quote!(pub), Vec::new(), quote!()) + }; + + + TableWindow { + table_defs, + datastore: quote! { + pub struct #struct_datastore { + #(#datastore_members,)* + } + } + .into(), + datastore_impl: quote! { + impl #impl_datastore #struct_datastore { + #ds_assoc_db + #(#key_defs;)* + + #modifiers fn #trait_datastore_method_new() -> Self { + Self { + #(#datastore_members_new,)* + } + } + + #modifiers fn #trait_datastore_method_db(&mut self) -> #struct_database<'_> { + #struct_database { + #database_members_window + } + } + } + } + .into(), + database: quote! { + pub struct #struct_database<#db_lifetime> { + #database_members + } + } + .into(), + table_generated_info: GeneratedInfo { + get_types, + insert_can_error, + }, + } +} diff --git a/crates/emdb_core/src/backend/serialized/types.rs b/crates/emdb_core/src/backend/serialized/types.rs new file mode 100644 index 0000000..2673abd --- /dev/null +++ b/crates/emdb_core/src/backend/serialized/types.rs @@ -0,0 +1,200 @@ +//! # Generate types for the simple backend +//! +//! ## Type Aliases vs resolving. +//! When translating types from the [`plan::Key`]s to [`plan::RecordType`] and +//! [`plan::ScalarType`], we have two options for generating a name. +//! +//! 1. Generate type aliases from the key and use them, generate a type aliases +//! for every single [`plan::ScalarType`] and [`plan::RecordType`] with type +//! aliases for [`plan::ConcRef::Ref`]. +//! 2. Traverse the graph each time to get the actual type (or some more efficient +//! collapsing of references to [`plan::ConcRef::Conc`] types). +//! +//! While (1.) is more efficient for the macro, it leaves more work for the rust +//! compiler, and generates large amounts of type aliases, which make human +//! reading & debugging of the generated code more difficult. +//! +//! On balance I decided to go with (2.), though it complicates some code (need +//! to access the plan, and the [`pulpit::gen::operations::get`] types to gets +//! the tokens for a type) + +use quote::{quote, ToTokens}; +use quote_debug::Tokens; +use std::collections::{HashMap, HashSet}; +use syn::{Ident, ItemStruct, Type}; + +use super::namer::SerializedNamer; +use crate::plan; + +/// Gets all the record types that need to be declared public. +/// - Scalar types do not need this, they are either type aliases (no need to +/// be public, they are exposed to the user as the type they alias) +/// - References to types are aliases, so are also not included. +fn public_record_types(lp: &plan::Plan) -> HashSet> { + fn recursive_collect_record<'imm>( + lp: &'imm plan::Plan, + attrs: &mut HashSet>, + mut key: plan::Key, + ) { + loop { + match lp.get_record_type(key) { + plan::ConcRef::Ref(k) => key = *k, + plan::ConcRef::Conc(record) => { + attrs.insert(plan::ImmKey::new(key, lp)); + for ty_key in record.fields.values() { + recursive_collect_scalar(lp, attrs, *ty_key) + } + return; + } + } + } + } + + fn recursive_collect_scalar<'imm>( + lp: &'imm plan::Plan, + attrs: &mut HashSet>, + mut key: plan::Key, + ) { + loop { + match lp.get_scalar_type(key) { + plan::ConcRef::Ref(k) => key = *k, + plan::ConcRef::Conc(c) => { + // Scalar types are always either aliases, or using types already available to the user, so no need to make them public. + match c { + plan::ScalarTypeConc::Bag(r) | plan::ScalarTypeConc::Record(r) => { + recursive_collect_record(lp, attrs, *r) + } + plan::ScalarTypeConc::TableRef(_) + | plan::ScalarTypeConc::TableGet { .. } => ( + // These are already specified to be public, so no need to additionally make public here + ), + plan::ScalarTypeConc::Rust { .. } => ( + // The user provided types are already available to the user, no need to make public here + ), + } + return; + } + } + } + } + + let mut public_records = HashSet::new(); + for (_, query) in &lp.queries { + if let Some(ret_type) = lp.get_context(query.ctx).get_return_type(lp) { + recursive_collect_record(lp, &mut public_records, ret_type); + } + } + + public_records +} + +/// Generates the tokens for a given scalar type. +/// - Needs to consider the values transformed by [`pulpit::gen::operations::get`] +/// which are determined after the table structure is chosen. +/// - Generates types with lifetimes ([`SerializedNamer::db_lifetime`] and +/// [`SerializedNamer::qy_lifetime`]) usable only in a [`plan::TypeContext::Query`] +/// context. +/// +/// NOTE: In some circumstances the table types are not available (e.g in [`crate::backend::interface`]) +/// so an empty `get_types` map is passed. +pub fn generate_scalar_type<'imm>( + lp: &'imm plan::Plan, + get_types: &HashMap, HashMap>>, + key: plan::Key, + namer: &SerializedNamer, +) -> Tokens { + match lp.get_scalar_type_conc(key) { + plan::ScalarTypeConc::TableRef(tk) => { + let table_name = &lp.get_table(*tk).name; + let SerializedNamer { + pulpit: pulpit::gen::namer::CodeNamer { type_key, .. }, + mod_tables, + .. + } = namer; + quote! { + #mod_tables::#table_name::#type_key + } + .into() + } + plan::ScalarTypeConc::TableGet { table, field } => get_types + .get(&plan::Idx::new(*table, lp)) + .unwrap() + .get(field.get_field()) + .unwrap() + .clone(), + plan::ScalarTypeConc::Bag(r) => { + let rec_name = namer.record_name_lifetimes(*lp.get_record_conc_index(*r)); + quote!(Vec<#rec_name>).into() + } + plan::ScalarTypeConc::Record(r) => { + namer.record_name_lifetimes(*lp.get_record_conc_index(*r)) + } + plan::ScalarTypeConc::Rust { + type_context: _, // can be used on either datastore or query types, wraps in the lifetimes required for query + ty, + } => ty.to_token_stream().into(), + } +} + +/// Gets the name of a record type to allow for its construction +/// - Does not include lifetime parameters, just the struct name. +pub fn generate_record_name( + lp: &plan::Plan, + key: plan::Key, + namer: &SerializedNamer, +) -> Tokens { + let index = lp.get_record_conc_index(key); + namer.record_name(*index) +} + +/// Generates the definitions for record types +/// - structs used to represent [`plan::RecordConc`] +/// - publicity is determined traversing the return type of queries +/// +/// Each record type needs a [`SerializedNamer::phantom_field`] to ensure the query lifetime +/// parameters are bound (we do not avalyse types provided by the user to check for usage). +pub fn generate_record_definitions<'imm>( + lp: &'imm plan::Plan, + get_types: &'imm HashMap, HashMap>>, + namer: &'imm SerializedNamer, +) -> impl Iterator> + 'imm { + let public_records = public_record_types(lp); + + let SerializedNamer { + qy_lifetime, + db_lifetime, + phantom_field, + .. + } = namer; + + lp.record_types.iter().filter_map(move |(key, rec)| { + match rec { + plan::ConcRef::Conc(rec) => { + let name = namer.record_name(key); + let pub_tks = if public_records.contains(&plan::ImmKey::new(key, lp)) { + quote!(pub ) + } else { + quote!() + }; + let members = rec.fields.iter().map( + |(field, ty)| { + let fieldname = namer.transform_field_name(field); + let ty_tks = generate_scalar_type(lp, get_types, *ty, namer); + quote!{ + #pub_tks #fieldname: #ty_tks + } + } + ); + + Some(quote!{ + #[derive(Clone)] + #pub_tks struct #name<#db_lifetime, #qy_lifetime> { + #(#members,)* + #phantom_field: std::marker::PhantomData<(&#db_lifetime (), &#qy_lifetime ())>, + } + }.into()) + }, + plan::ConcRef::Ref(_) => None, + } + }) +} diff --git a/crates/emdb/src/frontend/boss/mod.rs b/crates/emdb_core/src/frontend/boss/mod.rs similarity index 100% rename from crates/emdb/src/frontend/boss/mod.rs rename to crates/emdb_core/src/frontend/boss/mod.rs diff --git a/crates/emdb/src/frontend/emql/ast.rs b/crates/emdb_core/src/frontend/emql/ast.rs similarity index 97% rename from crates/emdb/src/frontend/emql/ast.rs rename to crates/emdb_core/src/frontend/emql/ast.rs index cf989e9..b222a85 100644 --- a/crates/emdb/src/frontend/emql/ast.rs +++ b/crates/emdb_core/src/frontend/emql/ast.rs @@ -34,7 +34,7 @@ pub(super) enum ConstraintExpr { #[derive(Debug)] pub(super) struct Constraint { - pub alias: Option, + pub alias: Ident, pub method_span: Span, pub expr: ConstraintExpr, } diff --git a/crates/emdb/src/frontend/emql/errors.rs b/crates/emdb_core/src/frontend/emql/errors.rs similarity index 72% rename from crates/emdb/src/frontend/emql/errors.rs rename to crates/emdb_core/src/frontend/emql/errors.rs index ae7948e..9f73d68 100644 --- a/crates/emdb/src/frontend/emql/errors.rs +++ b/crates/emdb_core/src/frontend/emql/errors.rs @@ -31,73 +31,62 @@ fn redefinition_error( .help(format!("Each {def_type} must have a unique name")) } -pub(super) fn backend_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn backend_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(0, "backend", def, original_def) } -pub(super) fn table_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn table_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(1, "table", def, original_def) } -pub(super) fn table_column_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn table_column_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(2, "table column", def, original_def) } -pub(super) fn table_constraint_alias_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn table_constraint_alias_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(3, "constraint alias", def, original_def) } -pub(super) fn collect_type_alias_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn collect_type_alias_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(4, "collect type alias", def, original_def) } -pub(super) fn table_constraint_duplicate_unique( +pub fn table_constraint_duplicate_unique( col_name: &Ident, method_span: Span, - prev_alias: &Option, + prev_alias: &Ident, ) -> Diagnostic { - let mut diag = emql_error( + emql_error( 5, method_span, format!("Duplicate unique constraint on column `{col_name}`"), - ); - if let Some(alias) = prev_alias { - diag = diag.span_note(alias.span(), format!("previously defined as {alias} here.")); - } - diag + ) + .span_note( + prev_alias.span(), + format!("previously defined as {prev_alias} here."), + ) } -pub(super) fn table_constraint_nonexistent_unique_column( - alias: &Option, +pub fn table_constraint_nonexistent_unique_column( + alias: &Ident, col_name: &Ident, table_name: &Ident, method_span: Span, ) -> Diagnostic { emql_error(6, method_span, format!( - "Column `{col_name}` does not exist in table `{table_name}`, so cannot apply a unique constraint{} to it", if let Some(alias) = alias { - format!(" with alias `{alias}`") - } else { - String::new() - } + "Column `{col_name}` does not exist in table `{table_name}`, so cannot apply a unique constraint `{alias}` to it" )).span_help(table_name.span(), format!("Apply the unique constraint to an available column in {table_name}")) } -pub(super) fn table_constraint_duplicate_limit( - alias: &Option, +pub fn table_constraint_duplicate_limit( + alias: &Ident, table_name: &Ident, method_span: Span, ) -> Diagnostic { emql_error( 7, method_span, - format!( - "Duplicate limit constraint{} on table `{table_name}`", - if let Some(alias) = alias { - format!(" with alias `{alias}`") - } else { - String::new() - } - ), + format!("Duplicate limit constraint `{alias}` on table `{table_name}`"), ) .span_help( table_name.span(), @@ -105,11 +94,11 @@ pub(super) fn table_constraint_duplicate_limit( ) } -pub(super) fn query_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn query_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(8, "query", def, original_def) } -pub(super) fn query_multiple_returns(ret: Span, prev_ret: Span, query: &Ident) -> Diagnostic { +pub fn query_multiple_returns(ret: Span, prev_ret: Span, query: &Ident) -> Diagnostic { emql_error( 9, ret, @@ -118,15 +107,11 @@ pub(super) fn query_multiple_returns(ret: Span, prev_ret: Span, query: &Ident) - .span_help(prev_ret, "Previously returned value here".to_string()) } -pub(super) fn query_operator_field_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn query_operator_field_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(10, "field", def, original_def) } -pub(super) fn query_stream_single_connection( - span: Span, - last_span: Span, - stream: bool, -) -> Diagnostic { +pub fn query_stream_single_connection(span: Span, last_span: Span, stream: bool) -> Diagnostic { emql_error( 11, span, @@ -150,11 +135,7 @@ pub(super) fn query_stream_single_connection( ) } -pub(super) fn query_no_data_for_next_operator( - conn_span: Span, - stream: bool, - prev_op: Span, -) -> Diagnostic { +pub fn query_no_data_for_next_operator(conn_span: Span, stream: bool, prev_op: Span) -> Diagnostic { emql_error( 12, prev_op, @@ -173,7 +154,7 @@ pub(super) fn query_no_data_for_next_operator( ) } -pub(super) fn query_early_return(conn_span: Span, stream: bool, ret_op: Span) -> Diagnostic { +pub fn query_early_return(conn_span: Span, stream: bool, ret_op: Span) -> Diagnostic { emql_error(13, ret_op, "Early return statement".to_string()).span_note( conn_span, format!( @@ -187,11 +168,11 @@ pub(super) fn query_early_return(conn_span: Span, stream: bool, ret_op: Span) -> ) } -pub(super) fn query_parameter_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { +pub fn query_parameter_redefined(def: &Ident, original_def: &Ident) -> Diagnostic { redefinition_error(14, "query parameter", def, original_def) } -pub(super) fn query_param_ref_table_not_found(query: &Ident, table_ref: &Ident) -> Diagnostic { +pub fn query_param_ref_table_not_found(query: &Ident, table_ref: &Ident) -> Diagnostic { emql_error( 15, table_ref.span(), @@ -202,7 +183,7 @@ pub(super) fn query_param_ref_table_not_found(query: &Ident, table_ref: &Ident) )) } -pub(super) fn access_field_missing(call: &Ident, field: &Ident, fields: Vec<&Ident>) -> Diagnostic { +pub fn access_field_missing(call: &Ident, field: &Ident, fields: Vec<&Ident>) -> Diagnostic { emql_error( 16, field.span(), @@ -213,7 +194,7 @@ pub(super) fn access_field_missing(call: &Ident, field: &Ident, fields: Vec<&Ide ) } -pub(super) fn query_expected_reference_type_for_update( +pub fn query_expected_reference_type_for_update( lp: &Plan, dt: &Key, reference: &Ident, @@ -231,7 +212,7 @@ pub(super) fn query_expected_reference_type_for_update( )) } -pub(super) fn query_cannot_start_with_operator(op: &Ident) -> Diagnostic { +pub fn query_cannot_start_with_operator(op: &Ident) -> Diagnostic { emql_error( 18, op.span(), @@ -240,7 +221,7 @@ pub(super) fn query_cannot_start_with_operator(op: &Ident) -> Diagnostic { .help("Instead use operators such as `use ..`, `ref ..`, or `unique(..)`".to_string()) } -pub(super) fn query_update_field_not_in_table(table_name: &Ident, field: &Ident) -> Diagnostic { +pub fn query_update_field_not_in_table(table_name: &Ident, field: &Ident) -> Diagnostic { emql_error( 19, field.span(), @@ -252,7 +233,7 @@ pub(super) fn query_update_field_not_in_table(table_name: &Ident, field: &Ident) ) } -pub(super) fn query_update_reference_not_present( +pub fn query_update_reference_not_present( lp: &Plan, reference: &Ident, prev_span: Span, @@ -275,7 +256,7 @@ pub(super) fn query_update_reference_not_present( ) } -pub(super) fn query_insert_field_rust_type_mismatch( +pub fn query_insert_field_rust_type_mismatch( lp: &Plan, call: &Ident, field: &Ident, @@ -289,7 +270,7 @@ pub(super) fn query_insert_field_rust_type_mismatch( .span_note(prev_span, format!("Input to `{call}` comes from here")) } -pub(super) fn query_insert_field_type_mismatch( +pub fn query_insert_field_type_mismatch( lp: &Plan, call: &Ident, field: &Ident, @@ -313,7 +294,7 @@ pub(super) fn query_insert_field_type_mismatch( .span_note(prev_span, format!("Input to `{call}` comes from here")) } -pub(super) fn query_insert_field_missing( +pub fn query_insert_field_missing( call: &Ident, table_name: &Ident, field: &Ident, @@ -331,11 +312,7 @@ pub(super) fn query_insert_field_missing( ) } -pub(super) fn query_insert_extra_field( - call: &Ident, - field: &Ident, - table_name: &Ident, -) -> Diagnostic { +pub fn query_insert_extra_field(call: &Ident, field: &Ident, table_name: &Ident) -> Diagnostic { emql_error( 24, call.span(), @@ -345,7 +322,7 @@ pub(super) fn query_insert_extra_field( .span_note(table_name.span(), format!("`{table_name}` defined here")) } -pub(super) fn query_nonexistent_table(call: &Ident, table_used: &Ident) -> Diagnostic { +pub fn query_nonexistent_table(call: &Ident, table_used: &Ident) -> Diagnostic { emql_error(25, table_used.span(), format!( "Table `{table_used}` does not exist in the query so cannot be accessed through `{call}`", )).help(format!( @@ -353,7 +330,7 @@ pub(super) fn query_nonexistent_table(call: &Ident, table_used: &Ident) -> Diagn )) } -pub(super) fn query_delete_field_not_present(call: &Ident, field: &Ident) -> Diagnostic { +pub fn query_delete_field_not_present(call: &Ident, field: &Ident) -> Diagnostic { emql_error( 26, field.span(), @@ -365,7 +342,7 @@ pub(super) fn query_delete_field_not_present(call: &Ident, field: &Ident) -> Dia ) } -pub(super) fn query_delete_field_not_reference( +pub fn query_delete_field_not_reference( lp: &Plan, call: &Ident, field: &Ident, @@ -385,7 +362,7 @@ pub(super) fn query_delete_field_not_reference( .span_help(call.span(), format!("`{field}` ")) } -pub(super) fn query_deref_field_already_exists(new: &Ident, existing: &Ident) -> Diagnostic { +pub fn query_deref_field_already_exists(new: &Ident, existing: &Ident) -> Diagnostic { emql_error(28, new.span(), format!("Field `{new}` already exists")) .span_note(existing.span(), format!("{existing} defined here")) .help(format!( @@ -393,7 +370,7 @@ pub(super) fn query_deref_field_already_exists(new: &Ident, existing: &Ident) -> )) } -pub(super) fn query_reference_field_missing(reference: &Ident) -> Diagnostic { +pub fn query_reference_field_missing(reference: &Ident) -> Diagnostic { emql_error( 29, reference.span(), @@ -405,7 +382,7 @@ pub(super) fn query_reference_field_missing(reference: &Ident) -> Diagnostic { ) } -pub(super) fn query_deref_cannot_deref_rust_type(reference: &Ident, t: &Type) -> Diagnostic { +pub fn query_deref_cannot_deref_rust_type(reference: &Ident, t: &Type) -> Diagnostic { emql_error( 30, reference.span(), @@ -413,7 +390,7 @@ pub(super) fn query_deref_cannot_deref_rust_type(reference: &Ident, t: &Type) -> ) } -pub(super) fn query_deref_cannot_deref_record( +pub fn query_deref_cannot_deref_record( lp: &Plan, reference: &Ident, t: &Key, @@ -431,7 +408,7 @@ pub(super) fn query_deref_cannot_deref_record( ) } -pub(super) fn query_operator_cannot_come_first(call: &Ident) -> Diagnostic { +pub fn query_operator_cannot_come_first(call: &Ident) -> Diagnostic { emql_error( 32, call.span(), @@ -439,7 +416,7 @@ pub(super) fn query_operator_cannot_come_first(call: &Ident) -> Diagnostic { ) } -pub(super) fn query_unique_table_not_found(table: &Ident) -> Diagnostic { +pub fn query_unique_table_not_found(table: &Ident) -> Diagnostic { emql_error( 33, table.span(), @@ -450,7 +427,7 @@ pub(super) fn query_unique_table_not_found(table: &Ident) -> Diagnostic { )) } -pub(super) fn query_unique_no_field_in_table(field: &Ident, table_name: &Ident) -> Diagnostic { +pub fn query_unique_no_field_in_table(field: &Ident, table_name: &Ident) -> Diagnostic { emql_error( 34, field.span(), @@ -462,7 +439,7 @@ pub(super) fn query_unique_no_field_in_table(field: &Ident, table_name: &Ident) ) } -pub(super) fn query_unique_field_is_not_unique(field: &Ident, table_name: &Ident) -> Diagnostic { +pub fn query_unique_field_is_not_unique(field: &Ident, table_name: &Ident) -> Diagnostic { emql_error(35, field.span(), format!("Field `{field}` is not unique in table `{table_name}`")) .span_help( table_name.span(), @@ -472,11 +449,7 @@ pub(super) fn query_unique_field_is_not_unique(field: &Ident, table_name: &Ident ) } -pub(super) fn query_use_variable_already_used( - usage: &Ident, - created: Span, - used: Span, -) -> Diagnostic { +pub fn query_use_variable_already_used(usage: &Ident, created: Span, used: Span) -> Diagnostic { emql_error( 36, usage.span(), @@ -486,7 +459,7 @@ pub(super) fn query_use_variable_already_used( .span_error(used, "And consumed here".to_string()) } -pub(super) fn query_invalid_use( +pub fn query_invalid_use( usage: &Ident, tn: &HashMap>, vs: &HashMap, @@ -509,10 +482,7 @@ pub(super) fn query_invalid_use( )) } -pub(super) fn query_invalid_variable_use( - usage: &Ident, - vs: &HashMap, -) -> Diagnostic { +pub fn query_invalid_variable_use(usage: &Ident, vs: &HashMap) -> Diagnostic { let vars = vs .iter() .filter_map(|(var, state)| { @@ -531,7 +501,7 @@ pub(super) fn query_invalid_variable_use( .help(format!("Currently available variables are {vars}")) } -pub(super) fn query_let_variable_already_assigned( +pub fn query_let_variable_already_assigned( assign: &Ident, created: Span, used: Option, @@ -549,7 +519,16 @@ pub(super) fn query_let_variable_already_assigned( } } -pub(super) fn query_deref_cannot_deref_bag_type( +pub fn query_let_variable_shadows_table(assign: &Ident, table: &Ident) -> Diagnostic { + emql_error( + 55, + assign.span(), + format!("variables created by let cannot shadow tables, but `{assign}` does"), + ) + .span_note(table.span(), "Table defined here".to_string()) +} + +pub fn query_deref_cannot_deref_bag_type( lp: &Plan, reference: &Ident, t: &Key, @@ -566,7 +545,7 @@ pub(super) fn query_deref_cannot_deref_bag_type( ), ) } -pub(super) fn query_cannot_return_stream(last: Span, ret: Span) -> Diagnostic { +pub fn query_cannot_return_stream(last: Span, ret: Span) -> Diagnostic { emql_error(41, ret, "Cannot return a stream from a query".to_string()) .span_note( last, @@ -575,7 +554,7 @@ pub(super) fn query_cannot_return_stream(last: Span, ret: Span) -> Diagnostic { .help("Use a `collect` operator to convert the stream into a bag of records".to_string()) } -pub(super) fn query_table_access_nonexisted_columns(table_name: &Ident, col: &Ident) -> Diagnostic { +pub fn query_table_access_nonexisted_columns(table_name: &Ident, col: &Ident) -> Diagnostic { emql_error( 42, col.span(), @@ -584,7 +563,7 @@ pub(super) fn query_table_access_nonexisted_columns(table_name: &Ident, col: &Id .span_note(table_name.span(), format!("{table_name} defined here")) } -pub(super) fn query_invalid_record_type( +pub fn query_invalid_record_type( lp: &Plan, op: &Ident, prev: Span, @@ -608,16 +587,16 @@ pub(super) fn query_invalid_record_type( ) } -pub(super) fn query_no_cust_type_found(t: &Ident) -> Diagnostic { +pub fn query_no_cust_type_found(t: &Ident) -> Diagnostic { emql_error(44, t.span(), format!("Cannot find type {t}")) } -pub(super) fn table_query_no_such_field(table: &Ident, t: &Ident) -> Diagnostic { +pub fn table_query_no_such_field(table: &Ident, t: &Ident) -> Diagnostic { emql_error(45, t.span(), format!("no such field `{t}` in `{table}`")) .span_note(table.span(), format!("`{table}` defined here")) } -pub(super) fn query_cannot_append_to_record(new: &Ident, existing: &Ident) -> Diagnostic { +pub fn query_cannot_append_to_record(new: &Ident, existing: &Ident) -> Diagnostic { emql_error( 46, new.span(), @@ -626,12 +605,12 @@ pub(super) fn query_cannot_append_to_record(new: &Ident, existing: &Ident) -> Di .span_note(existing.span(), format!("{existing} defined here")) } -pub(super) fn sort_field_used_twice(field: &Ident, dup_field: &Ident) -> Diagnostic { +pub fn sort_field_used_twice(field: &Ident, dup_field: &Ident) -> Diagnostic { emql_error(47, field.span(), format!("Field `{field}` is used twice in th sort order, sorts can only sort of each field once")) .span_note(dup_field.span(), format!("`{dup_field}` first used here")) } -pub(super) fn union_requires_at_least_one_input(call: &Ident) -> Diagnostic { +pub fn union_requires_at_least_one_input(call: &Ident) -> Diagnostic { emql_error( 48, call.span(), @@ -639,7 +618,7 @@ pub(super) fn union_requires_at_least_one_input(call: &Ident) -> Diagnostic { ) } -pub(super) fn operator_requires_streams(call: &Ident, var: &Ident) -> Diagnostic { +pub fn operator_requires_streams(call: &Ident, var: &Ident) -> Diagnostic { emql_error( 49, var.span(), @@ -647,11 +626,11 @@ pub(super) fn operator_requires_streams(call: &Ident, var: &Ident) -> Diagnostic ) } -pub(super) fn operator_requires_streams2(call: &Ident) -> Diagnostic { +pub fn operator_requires_streams2(call: &Ident) -> Diagnostic { emql_error(50, call.span(), format!("`{call}` input must be a stream")) } -pub(super) fn no_return_in_context(call: &Ident) -> Diagnostic { +pub fn no_return_in_context(call: &Ident) -> Diagnostic { emql_error( 51, call.span(), @@ -659,7 +638,7 @@ pub(super) fn no_return_in_context(call: &Ident) -> Diagnostic { ) } -pub(super) fn union_not_same_type( +pub fn union_not_same_type( lp: &plan::Plan, call: &Ident, var: &Ident, @@ -669,3 +648,34 @@ pub(super) fn union_not_same_type( ) -> Diagnostic { emql_error(52, other_var.span(), format!("`{other_var}` has type `{}` but union requires all inputs to be of the same type `{}` (from `{var}`)", plan::With { plan: lp, extended: other_data_type }, plan::With { plan: lp, extended: data_type })) } + +pub fn query_deref_cannot_deref_table_get( + lp: &plan::Plan, + reference: &Ident, + table: plan::Key, + field: &plan::RecordField, +) -> Diagnostic { + let name = &lp.get_table(table).name; + emql_error(53, reference.span(), format!("Cannot dereference a field directly taken from a table (this is `{name}.{field}`). Try mapping this into a table reference")) + .note(format!("But what if `{name}.{field}` *is* a table reference? When dereferencing values from a table, the returned value is not the same type as the column, it can be optimised (for example it could be optimised into returning an Rc, a Cow, or a reference to the data). Rather than automatically copying out the value in these cases, it it left to the user to decide how they want to extract this.")) +} + +pub fn query_combine_extra_field( + lp: &plan::Plan, + call: &Ident, + field: &Ident, + data_type: &plan::Key, +) -> Diagnostic { + emql_error(54, field.span(), format!("Field `{field}` is not present in the type for `{call}` (same fields in input as output)")) + .span_note(field.span(), format!("The type is: `{}`", plan::With { plan: lp, extended: data_type })) +} + +pub fn query_combine_missing_field( + lp: &plan::Plan, + call: &Ident, + field: &Ident, + data_type: &plan::Key, +) -> Diagnostic { + emql_error(54, field.span(), format!("Field `{field}` is required but not present in the type for `{call}` (same fields in input as output)")) + .span_note(field.span(), format!("The type is: `{}`", plan::With { plan: lp, extended: data_type })) +} diff --git a/crates/emdb/src/frontend/emql/mod.rs b/crates/emdb_core/src/frontend/emql/mod.rs similarity index 93% rename from crates/emdb/src/frontend/emql/mod.rs rename to crates/emdb_core/src/frontend/emql/mod.rs index 3401e62..660ed6b 100644 --- a/crates/emdb/src/frontend/emql/mod.rs +++ b/crates/emdb_core/src/frontend/emql/mod.rs @@ -51,7 +51,9 @@ //! 1. Use the rust API to fully analyse passed expressions //! - provides the guarentee that all logical plans contain valid embedded rust //! - allows for type inference on expressions +//! //! However it has a significant drawback: +//! //! - Cannot analyse code from outside the macro invocation, so cannot use types //! and functions from outside [`emQL`](crate::emql!) //! - the API is exposed compiler library internals, so is subject to change, not ideal for a @@ -63,7 +65,9 @@ //! everywhere in the crate by rustc //! - reduces frontend complexity, and redundant work (analysing code twice, once //! in macro, once for end result) +//! //! However +//! //! - With no backend implementations, no expressions are checked //! //! I decided to check expression syntax (using syn) in the frontend and use the passthrough design. @@ -122,6 +126,21 @@ //! - Allow other attributes to be applied to queries //! //! Requires adding the doc comment to the plan, to pass on to generated code. +//! +//! ### Pass through references +//! Allow references to be stored in tables, where the lifetime of the reference +//! matches that of the database. +//! +//! ```ignore +//! table cool { +//! name: String, +//! very_huge_thing: &Huge // inferred as &'database Huge +//! } +//! +//! query add_new(name: String, huge: &Huge) { +//! row(name: String = name, very_huge_thing: &Huge = huge) |> insert(cool); +//! } +//! ``` mod ast; mod errors; diff --git a/crates/emdb/src/frontend/emql/operators/mod.rs b/crates/emdb_core/src/frontend/emql/operators/mod.rs similarity index 87% rename from crates/emdb/src/frontend/emql/operators/mod.rs rename to crates/emdb_core/src/frontend/emql/operators/mod.rs index 70c8394..2c99c25 100644 --- a/crates/emdb/src/frontend/emql/operators/mod.rs +++ b/crates/emdb_core/src/frontend/emql/operators/mod.rs @@ -18,9 +18,10 @@ use crate::frontend::emql::parse::{ fields_assign, fields_expr, functional_style, type_parser_to_punct, ContextRecurHandle, }; use crate::frontend::emql::sem::{ - add_streams_to_context, ast_typeto_scalar, create_scanref, discard_ends, extract_fields, - generate_access, get_all_cols, get_user_fields, linear_builder, update_incomplete, - valid_linear_builder, Continue, LinearBuilderState, ReturnVal, StreamContext, VarState, + add_streams_to_context, assign_new_var, check_fields_type, create_scanref, discard_ends, + extract_fields_ordered, generate_access, get_all_cols, get_user_fields, linear_builder, + query_ast_typeto_scalar, update_incomplete, valid_linear_builder, Continue, FieldComparison, + LinearBuilderState, ReturnVal, StreamContext, VarState, }; use crate::plan; use crate::utils::misc::{result_to_opt, singlelist}; @@ -48,10 +49,12 @@ use syn::Expr; trait EMQLOperator: Sized + Debug { const NAME: &'static str; + /// Parse the operator's tokens (taken directly from the stream, after peeking for [`EMQLOperator::NAME`]) fn build_parser(ctx_recur: ContextRecurHandle) -> impl TokenParser; /// Convert the operator to a logical plan node - /// - `tn` represents the identifier to table mapping + /// - Needs to ensure a valid plan is left even on logical errors (to allow + /// other streams, inner contexts to be analysed). #[allow(clippy::too_many_arguments)] fn build_logical( self, @@ -139,7 +142,9 @@ create_operator!( op_take::Take, op_fork::Fork, op_union::Union, - op_foreach::ForEach, + op_lift::Lift, op_groupby::GroupBy, - op_join::Join + op_join::Join, + op_combine::Combine, + op_count::Count ); diff --git a/crates/emdb/src/frontend/emql/operators/op_assert.rs b/crates/emdb_core/src/frontend/emql/operators/op_assert.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_assert.rs rename to crates/emdb_core/src/frontend/emql/operators/op_assert.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_collect.rs b/crates/emdb_core/src/frontend/emql/operators/op_collect.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_collect.rs rename to crates/emdb_core/src/frontend/emql/operators/op_collect.rs diff --git a/crates/emdb_core/src/frontend/emql/operators/op_combine.rs b/crates/emdb_core/src/frontend/emql/operators/op_combine.rs new file mode 100644 index 0000000..419e734 --- /dev/null +++ b/crates/emdb_core/src/frontend/emql/operators/op_combine.rs @@ -0,0 +1,134 @@ + +use super::*; + +#[derive(Debug)] +pub struct Combine { + call: Ident, + left_name: Ident, + right_name: Ident, + fields: Vec<(Ident, (Expr, Expr))>, +} + +impl EMQLOperator for Combine { + const NAME: &'static str = "combine"; + + fn build_parser(ctx_recur: ContextRecurHandle) -> impl TokenParser { + // NOTE: I dont like this syntax, but for some reason using the 'fold' syntax + // here (separated with arrow), results in `rustc 1.80.0-nightly (032af18af 2024-06-02)` + // SIGSEGV. I dont have time to debug that. + mapsuc( + functional_style( + Self::NAME, + seqs!( + matchident("use"), + setrepr(getident(), ""), + matchpunct('+'), + setrepr(getident(), ""), + matchident("in"), + setrepr(listseptrailing( + ',', + mapsuc( + seqs!( + setrepr(getident(), ""), + recovgroup(Delimiter::Bracket, setrepr(syn(collectuntil(isempty())), "")), + matchpunct('='), + recovgroup(Delimiter::Bracket, setrepr(syn(collectuntil(isempty())), "")) + ), + | (field, (default, (_, combine))) | (field, (default, combine)) + ) + ), "bob") + ), + ), + | (call, (_, (left_name, (_, (right_name, (_, fields))))))| Combine { + call, + left_name, + right_name, + fields, + }, + ) + } + + fn build_logical( + self, + lp: &mut plan::Plan, + tn: &HashMap>, + vs: &mut HashMap, + ts: &mut HashMap>, + op_ctx: plan::Key, + cont: Option, + ) -> Result> { + let Self { + call, + left_name, + right_name, + fields, + } = self; + + if let Some(cont) = cont { + linear_builder(lp, op_ctx, cont, |lp, ctx, prev, next_edge| { + let (raw_fields, mut errors) = + extract_fields_ordered(fields, errors::query_operator_field_redefined); + + if !prev.data_type.stream { + errors.push_back(errors::query_stream_single_connection( + call.span(), + prev.last_span, + true, + )) + } + + let FieldComparison { + extra_fields, + missing_fields, + } = check_fields_type( + lp, + prev.data_type.fields, + raw_fields.iter().map(|(id, _)| id), + ); + + for field in extra_fields { + errors.push_back(errors::query_combine_extra_field( + lp, + &call, + field, + &prev.data_type.fields, + )); + } + + for field in missing_fields { + errors.push_back(errors::query_combine_missing_field( + lp, + &call, + field, + &prev.data_type.fields, + )); + } + + if errors.is_empty() { + Ok(LinearBuilderState { + data_out: plan::Data { + fields: prev.data_type.fields, + stream: false, + }, + op: plan::Combine { + input: prev.prev_edge, + left_name, + right_name, + update_fields: raw_fields + .into_iter() + .map(|(id, (initial, update))| (id.into(), plan::FoldField { initial, update})) + .collect(), + output: next_edge, + } + .into(), + call_span: call.span(), + }) + } else { + Err(errors) + } + }) + } else { + Err(singlelist(errors::query_cannot_start_with_operator(&call))) + } + } +} diff --git a/crates/emdb_core/src/frontend/emql/operators/op_count.rs b/crates/emdb_core/src/frontend/emql/operators/op_count.rs new file mode 100644 index 0000000..9d43f00 --- /dev/null +++ b/crates/emdb_core/src/frontend/emql/operators/op_count.rs @@ -0,0 +1,80 @@ +use super::*; + +#[derive(Debug)] +pub struct Count { + call: Ident, + out_field: Ident, +} + +impl EMQLOperator for Count { + const NAME: &'static str = "count"; + + fn build_parser(ctx_recur: ContextRecurHandle) -> impl TokenParser { + mapsuc( + functional_style(Self::NAME, getident()), + |(call, out_field)| Self { call, out_field }, + ) + } + + fn build_logical( + self, + lp: &mut plan::Plan, + tn: &HashMap>, + vs: &mut HashMap, + ts: &mut HashMap>, + op_ctx: plan::Key, + cont: Option, + ) -> Result> { + let Self { call, out_field } = self; + let x: usize = 0; + if let Some(cont) = cont { + linear_builder(lp, op_ctx, cont, |lp, ctx, prev, next_edge| { + if !prev.data_type.stream { + Err(singlelist(errors::query_stream_single_connection( + call.span(), + prev.last_span, + true, + ))) + } else { + // NOTE: This operator needs to produce a usize type. + // - It is not truly a user type, but it is a rust primitive, so we assume it is available + // - It 'kinda' is breaking the separation between implementation and frontend, as we rely + // on the count operator giving us a usize. Ordinarily we ensure only user code (expressions) + // are susceptible to changes in the backend's types + // The span for the out_field is used to ensure type errors propagate to user code, rather than + // `Span::call_site()`. + + let usize_span = out_field.span(); + let out_field = plan::RecordField::User(out_field); + + let size_type = + lp.scalar_types + .insert(plan::ConcRef::Conc(plan::ScalarTypeConc::Rust { + type_context: plan::TypeContext::Query, + ty: syn::parse2(quote::quote_spanned!(usize_span => usize)).unwrap(), + })); + + Ok(LinearBuilderState { + data_out: plan::Data { + fields: lp + .record_types + .insert(plan::ConcRef::Conc(plan::RecordConc { + fields: HashMap::from([(out_field.clone(), size_type)]), + })), + stream: false, + }, + op: plan::Count { + input: prev.prev_edge, + output: next_edge, + out_field, + } + .into(), + call_span: call.span(), + }) + } + }) + } else { + Err(singlelist(errors::query_cannot_start_with_operator(&call))) + } + } +} diff --git a/crates/emdb/src/frontend/emql/operators/op_delete.rs b/crates/emdb_core/src/frontend/emql/operators/op_delete.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_delete.rs rename to crates/emdb_core/src/frontend/emql/operators/op_delete.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_deref.rs b/crates/emdb_core/src/frontend/emql/operators/op_deref.rs similarity index 83% rename from crates/emdb/src/frontend/emql/operators/op_deref.rs rename to crates/emdb_core/src/frontend/emql/operators/op_deref.rs index 594090c..6d378a5 100644 --- a/crates/emdb/src/frontend/emql/operators/op_deref.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_deref.rs @@ -61,23 +61,29 @@ impl EMQLOperator for DeRef { plan::ScalarTypeConc::Record(r) => Err(singlelist( errors::query_deref_cannot_deref_record(lp, &reference, r), )), - plan::ScalarTypeConc::Rust(rt) => Err(singlelist( - errors::query_deref_cannot_deref_rust_type(&reference, rt), + plan::ScalarTypeConc::Rust { ty, .. } => Err(singlelist( + errors::query_deref_cannot_deref_rust_type(&reference, ty), )), plan::ScalarTypeConc::Bag(b) => Err(singlelist( errors::query_deref_cannot_deref_bag_type(lp, &reference, b), )), + + plan::ScalarTypeConc::TableGet { table, field } => Err(singlelist( + errors::query_deref_cannot_deref_table_get(lp, &reference, *table, field) + )), plan::ScalarTypeConc::TableRef(table_id) => { let table_id_copy = *table_id; let table_name = lp.get_table(*table_id).name.clone(); - let dt = generate_access::dereference(*table_id, lp, named, data_type.fields)?; + let generate_access::DereferenceTypes{outer_record: dt, inner_record} = generate_access::dereference(*table_id, lp, named, data_type.fields)?; let new_type = plan::Data { fields: dt, stream: data_type.stream }; Ok( LinearBuilderState { data_out: new_type, op: plan::DeRef { - input: prev_edge, reference: rec_reference, named: rec_named, table: table_id_copy, output: next_edge }.into(), + input: prev_edge, reference: rec_reference, named: rec_named, table: table_id_copy, output: next_edge, + named_type: inner_record, + unchecked: false, }.into(), call_span: call.span() } ) diff --git a/crates/emdb/src/frontend/emql/operators/op_filter.rs b/crates/emdb_core/src/frontend/emql/operators/op_filter.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_filter.rs rename to crates/emdb_core/src/frontend/emql/operators/op_filter.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_fold.rs b/crates/emdb_core/src/frontend/emql/operators/op_fold.rs similarity index 93% rename from crates/emdb/src/frontend/emql/operators/op_fold.rs rename to crates/emdb_core/src/frontend/emql/operators/op_fold.rs index 9e6329e..3d9a68f 100644 --- a/crates/emdb/src/frontend/emql/operators/op_fold.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_fold.rs @@ -58,7 +58,7 @@ impl EMQLOperator for Fold { if let Some(cont) = cont { linear_builder(lp, op_ctx, cont, |lp, mo, prev, next_edge| { - let (raw_fields, mut errors) = extract_fields(fields, errors::query_operator_field_redefined); + let (raw_fields, mut errors) = extract_fields_ordered(fields, errors::query_operator_field_redefined); if !prev.data_type.stream { errors.push_back(errors::query_stream_single_connection(call.span(), prev.last_span, true)) } @@ -67,7 +67,7 @@ impl EMQLOperator for Fold { let mut fold_fields = Vec::new(); for (field, (typ, initial, update)) in raw_fields { - if let Some(scalar_t) = result_to_opt(ast_typeto_scalar(tn, ts, typ, |e| errors::query_nonexistent_table(&call, e), errors::query_no_cust_type_found), &mut errors) { + if let Some(scalar_t) = result_to_opt(query_ast_typeto_scalar(tn, ts, typ, |e| errors::query_nonexistent_table(&call, e), errors::query_no_cust_type_found), &mut errors) { let data_type = lp.scalar_types.insert(scalar_t); type_fields.insert(field.clone().into(), data_type); fold_fields.push((field.into(), plan::FoldField { initial, update })); diff --git a/crates/emdb_core/src/frontend/emql/operators/op_fork.rs b/crates/emdb_core/src/frontend/emql/operators/op_fork.rs new file mode 100644 index 0000000..2a016ff --- /dev/null +++ b/crates/emdb_core/src/frontend/emql/operators/op_fork.rs @@ -0,0 +1,107 @@ +use combi::tokens::derived::listsep; + +use super::*; + +#[derive(Debug)] +pub struct Fork { + call: Ident, + vars: Vec, +} + +impl EMQLOperator for Fork { + const NAME: &'static str = "fork"; + + fn build_parser(ctx_recur: ContextRecurHandle) -> impl TokenParser { + mapsuc( + functional_style( + Self::NAME, + seq( + matchident("let"), + listsep(',', setrepr(getident(), "")), + ), + ), + |(call, (_, vars))| Fork { call, vars }, + ) + } + + fn build_logical( + self, + lp: &mut plan::Plan, + tn: &HashMap>, + vs: &mut HashMap, + ts: &mut HashMap>, + op_ctx: plan::Key, + cont: Option, + ) -> Result> { + let Self { call, vars } = self; + if let Some(cont) = cont { + let mut errors = LinkedList::new(); + + + let (var_edges, vars_added): (Vec>, Vec) = vars + .into_iter() + .filter_map(|var| { + let out_edge = lp.dataflow.insert(plan::DataFlow::Null); + if assign_new_var( + var.clone(), + Continue { + data_type: cont.data_type.clone(), + prev_edge: out_edge, + last_span: call.span(), + }, + vs, + tn, + &mut errors, + ) { + Some((out_edge, var)) + } else { + lp.dataflow.remove(out_edge); + None + } + }) + .unzip(); + + if errors.is_empty() { + let fork_op = lp.operators.insert( + plan::Fork { + input: cont.prev_edge, + outputs: var_edges.clone(), + } + .into(), + ); + + for edge in var_edges { + *lp.get_mut_dataflow(edge) = plan::DataFlow::Incomplete { + from: fork_op, + with: cont.data_type.clone(), + } + } + + update_incomplete(lp.get_mut_dataflow(cont.prev_edge), fork_op); + lp.get_mut_context(op_ctx).add_operator(fork_op); + + Ok(StreamContext::Nothing { + last_span: call.span(), + }) + } else { + // NOTE: given we were unable to add all the edge, we need to repair the + // logical plan so that further semantic analysis can continue with + // a valid plan. + // I previously implemented by checking names first, but: + // - fork can duplicate names + // - easier to change with one `assign_new_var` function to do all + // variable assignment + for var in vars_added { + vs.remove(&var); + } + for edge in var_edges { + lp.dataflow.remove(edge); + } + + Err(errors) + } + } else { + Err(singlelist(errors::query_cannot_start_with_operator(&call))) + } + } +} diff --git a/crates/emdb/src/frontend/emql/operators/op_groupby.rs b/crates/emdb_core/src/frontend/emql/operators/op_groupby.rs similarity index 95% rename from crates/emdb/src/frontend/emql/operators/op_groupby.rs rename to crates/emdb_core/src/frontend/emql/operators/op_groupby.rs index d16ab6b..6765559 100644 --- a/crates/emdb/src/frontend/emql/operators/op_groupby.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_groupby.rs @@ -55,7 +55,7 @@ impl EMQLOperator for GroupBy { let inner_rec = lp.record_types.insert(plan::ConcRef::Conc(in_fields)); let next_edge = lp.dataflow.insert(plan::DataFlow::Null); let stream_in_edge = lp.dataflow.insert(plan::DataFlow::Null); - let inner_ctx = lp.contexts.insert(plan::Context::from_params(vec![(by, grouping_type)])); + let inner_ctx = lp.contexts.insert(plan::Context::from_params(vec![(by, grouping_type)], vec![stream_in_edge])); let groupby_op = lp.operators.insert(plan::GroupBy { @@ -77,10 +77,9 @@ impl EMQLOperator for GroupBy { last_span: call.span(), }; - let mut variables = HashMap::from([ - (in_name.clone(), VarState::Available { created: in_name.span(), state: inner_cont }) - ]); - + let mut variables = HashMap::new(); + assign_new_var(in_name.clone(), inner_cont, &mut variables, tn , &mut errors); + add_streams_to_context(lp, tn, ts, &mut variables, inner_ctx, contents, &call, &mut errors); discard_ends(lp, inner_ctx, variables); lp.get_mut_context(op_ctx).add_operator(groupby_op); diff --git a/crates/emdb/src/frontend/emql/operators/op_insert.rs b/crates/emdb_core/src/frontend/emql/operators/op_insert.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_insert.rs rename to crates/emdb_core/src/frontend/emql/operators/op_insert.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_join.rs b/crates/emdb_core/src/frontend/emql/operators/op_join.rs similarity index 95% rename from crates/emdb/src/frontend/emql/operators/op_join.rs rename to crates/emdb_core/src/frontend/emql/operators/op_join.rs index 294380e..155e2d5 100644 --- a/crates/emdb/src/frontend/emql/operators/op_join.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_join.rs @@ -130,7 +130,7 @@ impl EMQLOperator for Join { let op_matcher = match matcher { MatchKind::Equi { left_field, right_field } => { check_field(lp, &call, &left_field, left_rec_conc, &mut errors); - check_field(lp, &call, &right_field, left_rec_conc, &mut errors); + check_field(lp, &call, &right_field, right_rec_conc, &mut errors); plan::MatchKind::Equi { left_field: left_field.into(), right_field: right_field.into() } }, @@ -146,8 +146,8 @@ impl EMQLOperator for Join { if errors.is_empty() { let next_edge = lp.dataflow.insert(plan::DataFlow::Null); let join_op = lp.operators.insert(plan::Join { - left: left_cont.prev_edge, - right: right_cont.prev_edge, + left: plan::JoinInput { identifier: left.clone().into(), dataflow: left_cont.prev_edge}, + right: plan::JoinInput { identifier: right.clone().into(), dataflow: right_cont.prev_edge}, match_kind: op_matcher, join_kind, output: next_edge diff --git a/crates/emdb/src/frontend/emql/operators/op_let.rs b/crates/emdb_core/src/frontend/emql/operators/op_let.rs similarity index 55% rename from crates/emdb/src/frontend/emql/operators/op_let.rs rename to crates/emdb_core/src/frontend/emql/operators/op_let.rs index f8d115f..0bfbc5f 100644 --- a/crates/emdb/src/frontend/emql/operators/op_let.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_let.rs @@ -27,28 +27,11 @@ impl EMQLOperator for Let { ) -> Result> { let Self { call, var_name } = self; if let Some(prev_state) = cont { - if let Some(varstate) = vs.get(&var_name) { - Err(singlelist(match varstate { - VarState::Used { created, used } => { - errors::query_let_variable_already_assigned( - &var_name, - *created, - Some(*used), - ) - } - VarState::Available { created, state } => { - errors::query_let_variable_already_assigned(&var_name, *created, None) - } - })) + let mut errors = LinkedList::new(); + assign_new_var(var_name, prev_state, vs, tn, &mut errors); + if !errors.is_empty() { + Err(errors) } else { - let var_span = var_name.span(); - vs.insert( - var_name, - VarState::Available { - created: var_span, - state: prev_state, - }, - ); Ok(StreamContext::Nothing { last_span: call.span(), }) diff --git a/crates/emdb/src/frontend/emql/operators/op_foreach.rs b/crates/emdb_core/src/frontend/emql/operators/op_lift.rs similarity index 62% rename from crates/emdb/src/frontend/emql/operators/op_foreach.rs rename to crates/emdb_core/src/frontend/emql/operators/op_lift.rs index 5796ef5..a0803bf 100644 --- a/crates/emdb/src/frontend/emql/operators/op_foreach.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_lift.rs @@ -1,26 +1,19 @@ use super::*; #[derive(Debug)] -pub struct ForEach { +pub struct Lift { call: Ident, - in_name: Ident, contents: Vec } -impl EMQLOperator for ForEach { - const NAME: &'static str = "foreach"; +impl EMQLOperator for Lift { + const NAME: &'static str = "lift"; fn build_parser(ctx_recur: ContextRecurHandle) -> impl TokenParser { mapsuc(functional_style(Self::NAME, - seqs!( - matchident("let"), - setrepr(getident(),""), - matchident("in"), - recovgroup(Delimiter::Brace, setrepr(ctx_recur, "")) - ) - ),|(call, (_, (in_name, (_, contents))))| ForEach { + ctx_recur + ),|(call, contents)| Lift { call, - in_name, contents, }) } @@ -34,41 +27,34 @@ impl EMQLOperator for ForEach { op_ctx: plan::Key, cont: Option, ) -> Result> { - let Self { call, in_name, contents } = self; + let Self { call, contents } = self; if let Some(Continue { data_type, prev_edge, last_span }) = cont { let mut errors = LinkedList::new(); let next_edge = lp.dataflow.insert(plan::DataFlow::Null); - let stream_in_edge = lp.dataflow.insert(plan::DataFlow::Null); - let inner_ctx = lp.contexts.insert(plan::Context::from_params(Vec::new())); + let inner_ctx = lp.contexts.insert(plan::Context::from_params(lp.get_record_type_conc(data_type.fields).fields.iter().filter_map(|(field, ty)| { + // NOTE: Here we disallow the use of internal fields in a lift. + // - We lift to provide values to the user's closures, as + // internals cannot be used in user's closures, there is + // no point in lifting. + match field { + plan::RecordField::User(i) => Some((i.clone(), *ty)), + plan::RecordField::Internal(_) => None, + } + }).collect(), Vec::new())); - let foreach_op = lp.operators.insert(plan::ForEach { - input: prev_edge, - stream_in: stream_in_edge, + let foreach_op = lp.operators.insert(plan::Lift { + input: prev_edge, inner_ctx, output: next_edge }.into()); update_incomplete(lp.get_mut_dataflow(prev_edge), foreach_op); - if !data_type.stream { - errors.push_back(errors::operator_requires_streams2(&call)); - } - - let stream_in_data = plan::Data { fields: data_type.fields, stream: false }; - *lp.get_mut_dataflow(stream_in_edge) = plan::DataFlow::Incomplete { from: foreach_op, with: stream_in_data.clone() }; - - let inner_cont = Continue { - data_type: stream_in_data, - prev_edge: stream_in_edge, - last_span: call.span(), - }; - - let mut variables = HashMap::from([ - (in_name.clone(), VarState::Available { created: in_name.span(), state: inner_cont }) - ]); + let mut variables = HashMap::new(); add_streams_to_context(lp, tn, ts, &mut variables, inner_ctx, contents, &call, &mut errors); discard_ends(lp, inner_ctx, variables); + lp.get_mut_context(op_ctx).add_operator(foreach_op); if let Some(out_stream) = lp.get_context(inner_ctx).returnflow { @@ -76,7 +62,7 @@ impl EMQLOperator for ForEach { if let plan::Operator::Return(plan::Return{ input }) = lp.get_operator(out_stream) { let old_data = lp.get_dataflow(*input).get_conn().with.clone(); assert!(!old_data.stream, "return always takes single"); - let new_data = plan::Data { fields: old_data.fields, stream: true }; + let new_data = plan::Data { fields: old_data.fields, stream: data_type.stream }; *lp.get_mut_dataflow(next_edge) = plan::DataFlow::Incomplete { from: foreach_op, with: new_data.clone() }; Ok( StreamContext::Continue(Continue { data_type: new_data, prev_edge: next_edge, last_span: call.span() })) diff --git a/crates/emdb/src/frontend/emql/operators/op_map.rs b/crates/emdb_core/src/frontend/emql/operators/op_map.rs similarity index 90% rename from crates/emdb/src/frontend/emql/operators/op_map.rs rename to crates/emdb_core/src/frontend/emql/operators/op_map.rs index 591323c..0dd03f8 100644 --- a/crates/emdb/src/frontend/emql/operators/op_map.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_map.rs @@ -32,12 +32,12 @@ impl EMQLOperator for Map { op_ctx, cont, |lp, op_ctx, Continue { data_type, prev_edge, last_span }, next_edge| { - let (fields, mut errors) = extract_fields(new_fields, errors::query_operator_field_redefined); + let (fields, mut errors) = extract_fields_ordered(new_fields, errors::query_operator_field_redefined); let mut type_fields = HashMap::new(); let mut expr_fields = Vec::new(); for (field, (ast_type, expr)) in fields { - match ast_typeto_scalar(tn, ts, ast_type, |e| errors::query_nonexistent_table(&call, e), errors::query_no_cust_type_found) { + match query_ast_typeto_scalar(tn, ts, ast_type, |e| errors::query_nonexistent_table(&call, e), errors::query_no_cust_type_found) { Ok(t) => { let t_index = lp.scalar_types.insert(t); type_fields.insert(field.clone().into(), t_index); diff --git a/crates/emdb/src/frontend/emql/operators/op_ref.rs b/crates/emdb_core/src/frontend/emql/operators/op_ref.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_ref.rs rename to crates/emdb_core/src/frontend/emql/operators/op_ref.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_return.rs b/crates/emdb_core/src/frontend/emql/operators/op_return.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_return.rs rename to crates/emdb_core/src/frontend/emql/operators/op_return.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_row.rs b/crates/emdb_core/src/frontend/emql/operators/op_row.rs similarity index 90% rename from crates/emdb/src/frontend/emql/operators/op_row.rs rename to crates/emdb_core/src/frontend/emql/operators/op_row.rs index c6f903f..4b0b02a 100644 --- a/crates/emdb/src/frontend/emql/operators/op_row.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_row.rs @@ -27,13 +27,13 @@ impl EMQLOperator for Row { ) -> Result> { let Self { call, fields } = self; if cont.is_none() { - let (fields, mut errors) = extract_fields(fields, errors::query_operator_field_redefined); + let (fields, mut errors) = extract_fields_ordered(fields, errors::query_operator_field_redefined); let mut type_fields = HashMap::new(); let mut expr_fields = Vec::new(); for (field, (ast_type, expr)) in fields { - match ast_typeto_scalar(tn, ts, ast_type, |e| errors::query_nonexistent_table(&call, e), errors::query_no_cust_type_found) { + match query_ast_typeto_scalar(tn, ts, ast_type, |e| errors::query_nonexistent_table(&call, e), errors::query_no_cust_type_found) { Ok(t) => { let t_index = lp.scalar_types.insert(t); type_fields.insert(field.clone().into(), t_index); diff --git a/crates/emdb/src/frontend/emql/operators/op_sort.rs b/crates/emdb_core/src/frontend/emql/operators/op_sort.rs similarity index 98% rename from crates/emdb/src/frontend/emql/operators/op_sort.rs rename to crates/emdb_core/src/frontend/emql/operators/op_sort.rs index 040d4f2..0bbbcb4 100644 --- a/crates/emdb/src/frontend/emql/operators/op_sort.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_sort.rs @@ -61,7 +61,7 @@ impl EMQLOperator for Sort { cont, |lp, op_ctx, prev, next_edge| { let rec_type = lp.get_record_type_conc(prev.data_type.fields); - let (raw_fields, mut errors) = extract_fields(fields, errors::sort_field_used_twice); + let (raw_fields, mut errors) = extract_fields_ordered(fields, errors::sort_field_used_twice); let mut sort_order = Vec::new(); for (field, (ordering, _)) in raw_fields { let rec_field = field.clone().into(); diff --git a/crates/emdb/src/frontend/emql/operators/op_take.rs b/crates/emdb_core/src/frontend/emql/operators/op_take.rs similarity index 97% rename from crates/emdb/src/frontend/emql/operators/op_take.rs rename to crates/emdb_core/src/frontend/emql/operators/op_take.rs index 17c247f..2136e24 100644 --- a/crates/emdb/src/frontend/emql/operators/op_take.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_take.rs @@ -40,7 +40,7 @@ impl EMQLOperator for Take { Ok( LinearBuilderState { data_out: data_type, - op: (plan::Take { input: prev_edge, top_n: expr, output: next_edge }.into()), + op: (plan::Take { input: prev_edge, limit: expr, output: next_edge }.into()), call_span: call.span() } ) diff --git a/crates/emdb/src/frontend/emql/operators/op_union.rs b/crates/emdb_core/src/frontend/emql/operators/op_union.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_union.rs rename to crates/emdb_core/src/frontend/emql/operators/op_union.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_unique.rs b/crates/emdb_core/src/frontend/emql/operators/op_unique.rs similarity index 100% rename from crates/emdb/src/frontend/emql/operators/op_unique.rs rename to crates/emdb_core/src/frontend/emql/operators/op_unique.rs diff --git a/crates/emdb/src/frontend/emql/operators/op_update.rs b/crates/emdb_core/src/frontend/emql/operators/op_update.rs similarity index 97% rename from crates/emdb/src/frontend/emql/operators/op_update.rs rename to crates/emdb_core/src/frontend/emql/operators/op_update.rs index a737ad2..c096530 100644 --- a/crates/emdb/src/frontend/emql/operators/op_update.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_update.rs @@ -45,7 +45,7 @@ impl EMQLOperator for Update { op_ctx, cont, |lp, op_ctx, prev, next_edge| { - let (raw_fields, mut errors) = extract_fields(fields, errors::query_operator_field_redefined); + let (raw_fields, mut errors) = extract_fields_ordered(fields, errors::query_operator_field_redefined); let raw_table_id = if let Some(sk) = lp.get_record_type_conc(prev.data_type.fields).fields.get(&rec_reference) { if let plan::ScalarTypeConc::TableRef(table) = lp.get_scalar_type_conc(*sk) { Some(*table) } else { @@ -68,7 +68,7 @@ impl EMQLOperator for Update { let table = lp.get_table(table_id); let mut update_record = plan::RecordConc { fields: HashMap::new() }; - for id in nondup_fields.keys() { + for (id,_) in &nondup_fields { match table.columns.get(&id.clone().into()) { Some(col) => { update_record.fields.insert(id.clone().into(), col.data_type); diff --git a/crates/emdb/src/frontend/emql/operators/op_use.rs b/crates/emdb_core/src/frontend/emql/operators/op_use.rs similarity index 90% rename from crates/emdb/src/frontend/emql/operators/op_use.rs rename to crates/emdb_core/src/frontend/emql/operators/op_use.rs index 06d9e1e..b22f23d 100644 --- a/crates/emdb/src/frontend/emql/operators/op_use.rs +++ b/crates/emdb_core/src/frontend/emql/operators/op_use.rs @@ -34,7 +34,10 @@ impl EMQLOperator for Use { let ref_field = plan::RecordField::Internal(0); let rec_field = plan::RecordField::Internal(1); - let table_fields_type = lp.record_types.insert(get_all_cols(lp, *table_id).into()); + let ref_scalar_type = lp.scalar_types.insert(plan::ConcRef::Conc(plan::ScalarTypeConc::TableRef(*table_id))); + + let cols = get_all_cols(lp, *table_id); + let table_fields_type = lp.record_types.insert(cols.into()); let table_fields_scalar_type = lp.scalar_types.insert(plan::ScalarTypeConc::Record(table_fields_type).into()); let scanref_cont = create_scanref(lp, op_ctx, *table_id, ref_field.clone(), call.span()); @@ -47,10 +50,10 @@ impl EMQLOperator for Use { }, next_edge| { LinearBuilderState { data_out: plan::Data { - fields: lp.record_types.insert(plan::RecordConc{ fields: HashMap::from([(rec_field.clone(), table_fields_scalar_type)]) }.into()), + fields: lp.record_types.insert(plan::RecordConc{ fields: HashMap::from([(rec_field.clone(), table_fields_scalar_type), (ref_field.clone(), ref_scalar_type) ]) }.into()), stream: true, }, - op: plan::DeRef { input: prev_edge, reference: ref_field, named: rec_field.clone(), table: *table_id, output: next_edge }.into(), + op: plan::DeRef { input: prev_edge, reference: ref_field, named: rec_field.clone(), table: *table_id, output: next_edge, named_type: table_fields_type, unchecked: true }.into(), call_span: call.span(), } } diff --git a/crates/emdb/src/frontend/emql/parse.rs b/crates/emdb_core/src/frontend/emql/parse.rs similarity index 97% rename from crates/emdb/src/frontend/emql/parse.rs rename to crates/emdb_core/src/frontend/emql/parse.rs index ed81329..93437da 100644 --- a/crates/emdb/src/frontend/emql/parse.rs +++ b/crates/emdb_core/src/frontend/emql/parse.rs @@ -221,13 +221,10 @@ fn constraint_parser() -> impl TokenParser { seqs!( matchident(name), recovgroup(Delimiter::Parenthesis, p), - choice( - peekident("as"), - mapsuc(seq(matchident("as"), getident()), |(_, i)| Some(i)), - mapsuc(nothing(), |()| None) - ) + matchident("as"), + getident() ), - |(method, (p, alias))| ast::Constraint { + |(method, (p, (_, alias)))| ast::Constraint { alias, method_span: method.span(), expr: p, diff --git a/crates/emdb/src/frontend/emql/sem.rs b/crates/emdb_core/src/frontend/emql/sem.rs similarity index 78% rename from crates/emdb/src/frontend/emql/sem.rs rename to crates/emdb_core/src/frontend/emql/sem.rs index ac73d15..af0c261 100644 --- a/crates/emdb/src/frontend/emql/sem.rs +++ b/crates/emdb_core/src/frontend/emql/sem.rs @@ -16,7 +16,7 @@ use crate::{ }, errors, }, - plan, + plan::{self, ConcRef, ScalarTypeConc}, }; use proc_macro2::{Ident, Span}; use proc_macro_error::Diagnostic; @@ -134,9 +134,12 @@ fn add_table( duplicate.get_field(), )); } else { - let type_index = lp - .scalar_types - .insert(plan::ConcRef::Conc(plan::ScalarTypeConc::Rust(col_type))); + let type_index = + lp.scalar_types + .insert(plan::ConcRef::Conc(plan::ScalarTypeConc::Rust { + type_context: plan::TypeContext::DataStore, + ty: col_type, + })); columns.insert( col_rf, plan::Column { @@ -160,13 +163,12 @@ fn add_table( expr, } in cons { - if let Some(alias) = &alias { - if let Some(duplicate) = constraint_names.get(alias) { - errs.push_back(errors::table_constraint_alias_redefined(alias, duplicate)); - } else { - constraint_names.insert(alias.clone()); - } + if let Some(duplicate) = constraint_names.get(&alias) { + errs.push_back(errors::table_constraint_alias_redefined(&alias, duplicate)); + } else { + constraint_names.insert(alias.clone()); } + match expr { ConstraintExpr::Unique { field } => { let rf_field = field.clone().into(); @@ -246,27 +248,29 @@ fn add_query( let mut ts = HashMap::new(); // Analyse the query parameters - let (raw_params, mut errors) = extract_fields(params, errors::query_parameter_redefined); - let params = raw_params.into_iter().filter_map(|(name, data_type)| { - match ast_typeto_scalar( - tn, - &mut ts, - data_type, - |e| errors::query_param_ref_table_not_found(&name, e), - errors::query_no_cust_type_found, - ) { - Ok(t) => Some((name, lp.scalar_types.insert(t))), - Err(e) => { - errors.push_back(e); - None + let (raw_params, mut errors) = + extract_fields_ordered(params, errors::query_parameter_redefined); + let params = + raw_params.into_iter().filter_map(|(name, data_type)| { + match query_ast_typeto_scalar( + tn, + &mut ts, + data_type, + |e| errors::query_param_ref_table_not_found(&name, e), + errors::query_no_cust_type_found, + ) { + Ok(t) => Some((name, lp.scalar_types.insert(t))), + Err(e) => { + errors.push_back(e); + None + } } - } - }); + }); // Create and populate the query context let op_ctx = lp .contexts - .insert(plan::Context::from_params(params.collect())); + .insert(plan::Context::from_params(params.collect(), Vec::new())); lp.queries.insert(plan::Query { name: name.clone(), ctx: op_ctx, @@ -289,7 +293,7 @@ fn add_query( } /// Add a collection of streams to a context (e.g. a [`Query`], or the inside of -/// a [`plan::ForEach`]) +/// a [`plan::Lift`]) #[allow(clippy::too_many_arguments)] pub fn add_streams_to_context( lp: &mut plan::Plan, @@ -456,7 +460,7 @@ fn recur_stream( } } -/// helper for extracting a map of unique fields by Ident +/// Creates a hashmap from the fields, with errors for duplicate fields pub fn extract_fields( fields: Vec<(Ident, T)>, err_fn: impl Fn(&Ident, &Ident) -> Diagnostic, @@ -474,7 +478,34 @@ pub fn extract_fields( (map_fields, errors) } -pub fn ast_typeto_scalar( +/// Similar to [extract_fields] but maintains the order of fields from the original vector. +/// - duplicates of fields are removed +/// - errors are generated for each duplicate +pub fn extract_fields_ordered( + fields: Vec<(Ident, T)>, + err_fn: impl Fn(&Ident, &Ident) -> Diagnostic, +) -> (Vec<(Ident, T)>, LinkedList) { + let mut errors = LinkedList::new(); + let mut used_names = HashSet::with_capacity(fields.len()); + + let non_dup_fields = fields + .into_iter() + .filter_map(|(id, content)| { + if let Some(other_id) = used_names.get(&id) { + errors.push_back(err_fn(&id, other_id)); + None + } else { + used_names.insert(id.clone()); + Some((id, content)) + } + }) + .collect(); + + (non_dup_fields, errors) +} + +/// Converts an AST type to a scalar type, if a rust type then the [`plan::TypeContext::Query`] context is used. +pub fn query_ast_typeto_scalar( tn: &HashMap>, ts: &mut HashMap>, t: AstType, @@ -482,7 +513,10 @@ pub fn ast_typeto_scalar( cust_err_fn: impl Fn(&Ident) -> Diagnostic, ) -> Result { match t { - AstType::RsType(t) => Ok(plan::ConcRef::Conc(plan::ScalarTypeConc::Rust(t))), + AstType::RsType(ty) => Ok(plan::ConcRef::Conc(plan::ScalarTypeConc::Rust { + type_context: plan::TypeContext::Query, + ty, + })), AstType::TableRef(table_ref) => { if let Some(table_id) = tn.get(&table_ref) { Ok(plan::ConcRef::Conc(plan::ScalarTypeConc::TableRef( @@ -580,7 +614,71 @@ pub fn create_scanref( } } -pub fn get_all_cols(lp: &plan::Plan, table_id: plan::Key) -> plan::RecordConc { +pub struct FieldComparison<'res> { + pub extra_fields: Vec<&'res Ident>, + pub missing_fields: Vec<&'res Ident>, +} + +/// Check if the user defined fields present match a data type. +/// INV: the data type has no [`plan::RecordField::Internal`] fields +pub fn check_fields_type<'imm>( + lp: &'imm plan::Plan, + data_type: plan::Key, + fields: impl Iterator, +) -> FieldComparison<'imm> { + let mut keys = lp + .get_record_type_conc(data_type) + .fields + .keys() + .map(|rf| match rf { + plan::RecordField::User(i) => i, + plan::RecordField::Internal(_) => { + unreachable!("Cannot call this method with internal fields") + } + }) + .collect::>(); + + let mut extra_fields = Vec::new(); + let mut missing_fields = Vec::new(); + + for field in fields { + if !keys.remove(field) { + extra_fields.push(field); + } + } + + for field in keys { + missing_fields.push(field); + } + + FieldComparison { + extra_fields, + missing_fields, + } +} + +pub fn get_all_cols(lp: &mut plan::Plan, table_id: plan::Key) -> plan::RecordConc { + // NOTE: cannot use lp.get_table as borrow checker does not know that does + // not borrow from lp.scalar_types which is mutated later + let table = lp.tables.get(table_id).unwrap(); + plan::RecordConc { + fields: table + .columns + .iter() + .map(|(id, plan::Column { cons, data_type })| { + (id.clone(), { + let access = ConcRef::Conc(ScalarTypeConc::TableGet { + table: table_id, + field: id.clone(), + }); + lp.scalar_types.insert(access) + }) + }) + .collect(), + } +} + +pub fn insert_all_cols(lp: &plan::Plan, table_id: plan::Key) -> plan::RecordConc { let table = lp.get_table(table_id); plan::RecordConc { fields: table @@ -614,26 +712,36 @@ pub mod generate_access { ) } + pub struct DereferenceTypes { + pub outer_record: plan::Key, + pub inner_record: plan::Key, + } + pub fn dereference( table_id: plan::Key, lp: &mut plan::Plan, new_field: Ident, include_from: plan::Key, - ) -> Result, LinkedList> { - let inner_record = lp.record_types.insert(get_all_cols(lp, table_id).into()); + ) -> Result> { + let cols = get_all_cols(lp, table_id); + let inner_record = lp.record_types.insert(cols.into()); let scalar_t = lp .scalar_types .insert(plan::ConcRef::Conc(plan::ScalarTypeConc::Record( inner_record, ))); - append_fields(lp, vec![(new_field, scalar_t)], include_from) + let outer_record = append_fields(lp, vec![(new_field, scalar_t)], include_from)?; + Ok(DereferenceTypes { + outer_record, + inner_record, + }) } pub fn insert( table_id: plan::Key, lp: &mut plan::Plan, ) -> plan::Key { - lp.record_types.insert(get_all_cols(lp, table_id).into()) + lp.record_types.insert(insert_all_cols(lp, table_id).into()) } pub fn unique( @@ -790,3 +898,38 @@ pub fn valid_linear_builder( last_span: result.call_span, } } + +pub fn assign_new_var( + var_name: Ident, + state: Continue, + vs: &mut HashMap, + tn: &HashMap>, + errors: &mut LinkedList, +) -> bool { + if let Some((table_name, _)) = tn.get_key_value(&var_name) { + errors.push_back(errors::query_let_variable_shadows_table( + &var_name, table_name, + )); + false + } else if let Some(varstate) = vs.get(&var_name) { + errors.push_back(match varstate { + VarState::Used { created, used } => { + errors::query_let_variable_already_assigned(&var_name, *created, Some(*used)) + } + VarState::Available { created, state } => { + errors::query_let_variable_already_assigned(&var_name, *created, None) + } + }); + false + } else { + let var_span = var_name.span(); + vs.insert( + var_name, + VarState::Available { + created: var_span, + state, + }, + ); + true + } +} diff --git a/crates/emdb/src/frontend/mod.rs b/crates/emdb_core/src/frontend/mod.rs similarity index 100% rename from crates/emdb/src/frontend/mod.rs rename to crates/emdb_core/src/frontend/mod.rs diff --git a/crates/emdb/src/frontend/sql/mod.rs b/crates/emdb_core/src/frontend/sql/mod.rs similarity index 100% rename from crates/emdb/src/frontend/sql/mod.rs rename to crates/emdb_core/src/frontend/sql/mod.rs diff --git a/crates/emdb_core/src/lib.rs b/crates/emdb_core/src/lib.rs new file mode 100644 index 0000000..ef847f0 --- /dev/null +++ b/crates/emdb_core/src/lib.rs @@ -0,0 +1,58 @@ +#![allow(dead_code)] +#![allow(unused_variables)] + +extern crate proc_macro; + +mod analysis; +mod backend; +mod frontend; +mod optimise; +mod plan; +mod utils; + +mod macros { + use proc_macro2::TokenStream; + use quote::quote; + use std::collections::LinkedList; + + pub(crate) fn make_impl(tk: TokenStream) -> TokenStream { + match F::from_tokens(tk) { + Err(ds) => { + for d in ds { + d.emit(); + } + TokenStream::new() + } + Ok((lp, bks)) => { + let mut errors = LinkedList::new(); + let impls = bks + .impls + .into_iter() + .filter_map(|(id, backend)| { + match crate::backend::generate_code(backend, id, &lp) { + Ok(code) => Some(code), + Err(mut e) => { + errors.append(&mut e); + None + } + } + }) + .collect::>(); + + for e in errors { + e.emit(); + } + + quote! { + #(#impls)* + } + } + } + } +} + +#[proc_macro_error::proc_macro_error] +#[proc_macro] +pub fn emql(tk: proc_macro::TokenStream) -> proc_macro::TokenStream { + crate::macros::make_impl::(tk.into()).into() +} diff --git a/crates/emdb/src/optimise/mod.rs b/crates/emdb_core/src/optimise/mod.rs similarity index 100% rename from crates/emdb/src/optimise/mod.rs rename to crates/emdb_core/src/optimise/mod.rs diff --git a/crates/emdb_core/src/plan/access.rs b/crates/emdb_core/src/plan/access.rs new file mode 100644 index 0000000..ff03095 --- /dev/null +++ b/crates/emdb_core/src/plan/access.rs @@ -0,0 +1,122 @@ +//! Helpers for accessing [`Plan`] values through keys, and indexes to immutable plans + +use super::*; + +/// All component types can be indexed through a [Key] +/// - No shared mutability, need to have the plan also to use +/// - Checked access for keys to ensure no use after delete +/// - Keys are generational, so no aliasing of old deleted, vs new keys is +/// possible. +pub type Key = Index>; + +/// When a key into an immutable plan is needed, but the plan is not changed: +/// - Can use a borrow to enforce the plan is not mutated +/// - Can ignore the generation count on keys +/// +/// It is a zero cost wrapper (no extra memory used, exists only to supplement +/// usize in type checking). +/// ```ignore +/// # fn wrapper<'imm, T>() { +/// assert_eq!(std::mem::size_of::(), std::mem::size_of::>()); +/// # } +/// ``` +// TODO: Make this doctest runnable +pub struct Idx<'imm, T> { + arr_idx: usize, + plan_holder: &'imm (), + _phantom: std::marker::PhantomData, +} + +impl<'imm, T> Idx<'imm, T> { + pub fn new(key: Key, plan: &'imm Plan) -> Self { + Idx { + arr_idx: key.arr_idx(), + plan_holder: &plan._holder, + _phantom: std::marker::PhantomData, + } + } +} + +impl<'imm, T> From<(Key, &'imm Plan)> for Idx<'imm, T> { + fn from((key, plan): (Key, &'imm Plan)) -> Self { + Self::new(key, plan) + } +} + +impl<'imm, T> Clone for Idx<'imm, T> { + fn clone(&self) -> Self { + *self + } +} +impl<'imm, T> Copy for Idx<'imm, T> {} +impl<'imm, T> PartialEq for Idx<'imm, T> { + fn eq(&self, other: &Self) -> bool { + self.arr_idx == other.arr_idx + } +} +impl<'imm, T> Eq for Idx<'imm, T> {} +impl<'imm, T> std::hash::Hash for Idx<'imm, T> { + fn hash(&self, state: &mut H) { + self.arr_idx.hash(state) + } +} +impl<'imm, T> std::ops::Deref for Idx<'imm, T> { + type Target = usize; + fn deref(&self) -> &Self::Target { + &self.arr_idx + } +} + +/// A key with a lifetime binding to prevent mutation of the referenced plan. +/// - Implements hash (unlike [`typed_generational_arena::Index`] (the generation count is not hashable)) +/// TODO: contribute to [`typed_generational_arena::Index`] to fix this. +pub struct ImmKey<'imm, T> { + key: Key, + plan_holder: &'imm (), +} +impl<'imm, T> ImmKey<'imm, T> { + pub fn new(key: Key, plan: &'imm Plan) -> Self { + Self { + key, + plan_holder: &plan._holder, + } + } +} + +impl<'imm, T> From<(Key, &'imm Plan)> for ImmKey<'imm, T> { + fn from((key, plan): (Key, &'imm Plan)) -> Self { + Self::new(key, plan) + } +} + +impl<'imm, T> Clone for ImmKey<'imm, T> { + fn clone(&self) -> Self { + *self + } +} +impl<'imm, T> Copy for ImmKey<'imm, T> {} +impl<'imm, T> PartialEq for ImmKey<'imm, T> { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } +} +impl<'imm, T> Eq for ImmKey<'imm, T> {} +impl<'imm, T> std::hash::Hash for ImmKey<'imm, T> { + fn hash(&self, state: &mut H) { + self.key.arr_idx().hash(state) + } +} +impl<'imm, T> std::ops::Deref for ImmKey<'imm, T> { + type Target = Key; + fn deref(&self) -> &Self::Target { + &self.key + } +} + +/// A wrapper type for implementing traits on components that need to use the +/// plan for context. +/// - for example printing types requires the logical plan for table ref types +pub struct With<'a, A> { + pub plan: &'a Plan, + pub extended: A, +} diff --git a/crates/emdb/src/plan/mod.rs b/crates/emdb_core/src/plan/mod.rs similarity index 65% rename from crates/emdb/src/plan/mod.rs rename to crates/emdb_core/src/plan/mod.rs index 5302d0a..7f41912 100644 --- a/crates/emdb/src/plan/mod.rs +++ b/crates/emdb_core/src/plan/mod.rs @@ -1,36 +1,29 @@ //! # emDB Logical Plan //! Describes the schema, tables, expressions and operations. +//! +//! ## Shortcomings +//! Heavy usage of indexes, and not grouping queries, contexts, and operators optimally. +//! - More invariants checked and reasoned about, but not enforced by types. +//! - Potentially nesting contexts with dataflow and operators inside. use typed_generational_arena::{Index, NonzeroGeneration, StandardArena as GenArena}; +mod access; mod operators; mod queries; mod tables; mod types; +pub use access::*; pub use operators::*; pub use queries::*; pub use tables::*; pub use types::*; -/// All component types can be indexed through a [Key] -/// - No shared mutability, need to have the plan also to use -/// - Checked access for keys to ensure no use after delete -/// - Keys are generational, so no aliasing of old deleted, vs new keys is -/// possible. -pub type Key = Index>; - -/// A wrapper type for implementing traits on components that need to use the -/// plan for context. -/// - for example printing types requires the logical plan for table ref types -pub struct With<'a, A> { - pub plan: &'a Plan, - pub extended: A, -} - /// The basic logical plan /// - All components can be accessed via [Key] /// - Can be agumented with other data that uses [Key] to reference components +#[allow(clippy::manual_non_exhaustive)] pub struct Plan { pub queries: GenArena, pub contexts: GenArena, @@ -39,6 +32,7 @@ pub struct Plan { pub dataflow: GenArena, pub scalar_types: GenArena, pub record_types: GenArena, + _holder: (), } impl Plan { @@ -51,6 +45,7 @@ impl Plan { dataflow: GenArena::new(), scalar_types: GenArena::new(), record_types: GenArena::new(), + _holder: (), } } } diff --git a/crates/emdb/src/plan/operators.rs b/crates/emdb_core/src/plan/operators.rs similarity index 69% rename from crates/emdb/src/plan/operators.rs rename to crates/emdb_core/src/plan/operators.rs index 2bcd637..1bacfa0 100644 --- a/crates/emdb/src/plan/operators.rs +++ b/crates/emdb_core/src/plan/operators.rs @@ -5,10 +5,31 @@ //! with different data, as members of the operator) //! - Arena based (allows [analyses](crate::analysis) to index nodes in the //! plan, without requiring additions to it) +//! +//! ## Shortcomings +//! For the side effecting operators, we want to move some attributes from the +//! stream, but propagate others. +//! +//! Ideally we would have something like: +//! ```text +//! Rec { a, b } -> insert(only take a) -> Rec { ref, b } +//! ``` +//! +//! We could generalise this as a `remap` operator +//! ```text +//! Rec { a, b } -> remap( Rec{a} -> insert() -> return, Rec{b} -> return ) -> Rec { b }) +//! ``` +//! +//! In the [super] documentation the shortcomings note provides some of a suggestion: +//! - Could nest a context, but the operator type is restricted to ones that dont affect +//! cardinality of output. +//! +//! Potential Improvement: +//! - Strongly type single and stream dataflows. (`Key`, `Key`) use super::{Context, Data, Key, Plan, RecordField, RecordType, Table}; use std::collections::HashMap; -use syn::Expr; +use syn::{Expr, Ident}; /// A complete data flow connection (only type allowed for valid, constructed plans) pub struct DataFlowConn { @@ -42,62 +63,99 @@ impl DataFlow { } } -/// Apply write to specific columns in a table. -/// - `INV`: mapping and output have the same fields -/// - `INV`: mapping expressions only contain fields from input and globals -/// - `INV`: mapping assignment only contains fields from referenced table +/// Write to columns of a row in the table from a stream/single, specifying: +/// - The values to provide for columns (using references to the input) +/// - The field that contains the table reference to use. +/// +/// Returns the input type, table is mutated. +/// ```text +/// RECORD -> update(use RECORD.{ .. } and Fields = |&RECORD| { .. } ) -> RECORD +/// ``` pub struct Update { pub input: Key, - /// The field and table referenced - pub reference: RecordField, // + /// `INV`: `reference` field is in the input + pub reference: RecordField, + pub table: Key, - // the expressions for the output type + // `INV`: each field in mapping is in the table. pub mapping: HashMap, + + /// `INV`: All fields in the record are fields in the table pub update_type: Key, + // `INV`: `input.with == output.with` pub output: Key, } -/// Insert a single row or a stream into a table, the inserted rows -/// are propagated -/// - `INV`: input and output have the same fields -/// - `INV`: input has same fields as table +/// Insert the record as a field stream/single, producing a stream/single of row +/// references. +/// - return stream is of references to the inserted values. +/// +/// ```text +/// TABLE::INSERT -> insert(TABLE) -> TABLE::REF +/// ``` pub struct Insert { pub input: Key, pub table: Key
, + + /// The single field to place the out_ref in. + /// `INV`: `out_ref` is the only field in `output.with` pub out_ref: RecordField, + pub output: Key, } -/// Delete a single row or a stream from a table by reference, -/// the deleted rows are propagated -/// - `INV`: input is a stream or single of row references -/// - `INV`: output contains the tuple of removed values, same fields as table +/// Delete a rows from a table using a row reference. +/// +/// ```text +/// RECORD -> delete(Ruse RECORD.{ .. } ) -> RECORD +/// ``` pub struct Delete { pub input: Key, + + /// The table reference to delete with. pub reference: RecordField, pub table: Key
, + + /// `INV`: `[Self::input].with == output.with` pub output: Key, } -/// Gets a unique row from a table +/// Borrow a field and use it for unique lookup into a table, to get a row +/// reference. +/// - The column used for the row lookup must have the unique constraint +/// +/// ```text +/// RECORD [-> or |>] unique_ref(use RECORD.{ .. } TABLE at COLUMN ) [-> or |>] RECORD + TABLE::REF +/// ``` pub struct UniqueRef { pub input: Key, + /// the dataflow field to get the unique value from pub from: RecordField, + + /// the table that is being referenced, and the column in that table pub table: Key
, pub field: RecordField, + + /// the new field to add to the record pub out: RecordField, pub output: Key, } -/// Scan a table to generate a stream of table references +/// Scan all refs from a table into a stream. +/// +/// ```text +/// scan_refs(TABLE) -> TABLE::REF +/// ``` pub struct ScanRefs { pub table: Key
, pub out_ref: RecordField, + + //. `INV`: is a stream with a single field (`out_ref`) of table reference to `table` pub output: Key, } @@ -105,22 +163,21 @@ pub struct ScanRefs { /// - `INV`: the 'named' not present in the input record pub struct DeRef { pub input: Key, + + /// The field input with the key pub reference: RecordField, + + /// Specifies that the access should not be checked (e.g. panic on key not found) + pub unchecked: bool, + + /// The field to put the data in pub named: RecordField, + pub named_type: Key, + pub table: Key
, pub output: Key, } -pub enum SortOrder { - Asc, - Desc, -} - -pub struct FoldField { - pub initial: Expr, - pub update: Expr, -} - /// Applying a function over a stream of values /// - `INV`: output fields match mapping fields /// - `INV`: mapping expressions only contain fields from input and globals @@ -139,6 +196,11 @@ pub struct Expand { pub output: Key, } +pub struct FoldField { + pub initial: Expr, + pub update: Expr, +} + /// A fold operation over a stream of values /// - `INV`: initial fields only contain globals /// - `INV`: update expressions only contain fields from input, initial and globals @@ -157,6 +219,11 @@ pub struct Filter { pub output: Key, } +pub enum SortOrder { + Asc, + Desc, +} + /// Sort the input given some keys and ordering /// - `INV`: input and output must have the same fields /// - `INV`: input and output must both be streams @@ -191,8 +258,14 @@ pub struct Collect { /// Take the top n from a stream, discarding the rest pub struct Take { pub input: Key, - pub top_n: Expr, + pub limit: Expr, + pub output: Key, +} + +pub struct Count { + pub input: Key, pub output: Key, + pub out_field: RecordField, } pub enum MatchKind { @@ -211,15 +284,32 @@ pub enum JoinKind { Inner, } +pub struct JoinInput { + pub identifier: RecordField, + pub dataflow: Key, +} + /// Join two streams together pub struct Join { - pub left: Key, - pub right: Key, + pub left: JoinInput, + pub right: JoinInput, pub match_kind: MatchKind, pub join_kind: JoinKind, pub output: Key, } +/// Combine a stream into a single element +pub struct Combine { + pub input: Key, + + pub left_name: Ident, + pub right_name: Ident, + + pub update_fields: Vec<(RecordField, FoldField)>, + + pub output: Key, +} + /// Group by a field and aggregate the results pub struct GroupBy { pub input: Key, @@ -232,16 +322,14 @@ pub struct GroupBy { } /// Run a sub-query for each row in an input stream -pub struct ForEach { +pub struct Lift { pub input: Key, - - pub stream_in: Key, pub inner_ctx: Key, - pub output: Key, } /// Given an operator output, multiply it into multiple outputs +/// Can be either single or stream inputs! pub struct Fork { pub input: Key, pub outputs: Vec>, @@ -295,6 +383,8 @@ pub enum Operator { Filter, Sort, Assert, + Combine, + Count, // cardinality set Take, @@ -302,14 +392,14 @@ pub enum Operator { // nested contexts GroupBy, - ForEach, + Lift, // stream join & split Join, Fork, Union, - // control Flow + // control flow Row, Return, Discard, diff --git a/crates/emdb/src/plan/queries.rs b/crates/emdb_core/src/plan/queries.rs similarity index 75% rename from crates/emdb/src/plan/queries.rs rename to crates/emdb_core/src/plan/queries.rs index 4f16e84..61e3ff4 100644 --- a/crates/emdb/src/plan/queries.rs +++ b/crates/emdb_core/src/plan/queries.rs @@ -1,4 +1,4 @@ -use super::{Key, Operator, Plan, ScalarType}; +use super::{DataFlow, Key, Operator, Plan, RecordType, ScalarType}; use proc_macro2::Ident; pub struct Query { @@ -14,6 +14,8 @@ pub struct Context { pub ordering: Vec>, /// The parameters for the context pub params: Vec<(Ident, Key)>, + /// Any streams that are passed into the context + pub inflows: Vec>, /// if the context returns a value, then the return value operator /// INV is a [super::Return] pub returnflow: Option>, @@ -21,10 +23,11 @@ pub struct Context { } impl Context { - pub fn from_params(params: Vec<(Ident, Key)>) -> Self { + pub fn from_params(params: Vec<(Ident, Key)>, inflows: Vec>) -> Self { Context { ordering: Vec::new(), params, + inflows, returnflow: None, discards: Vec::new(), } @@ -45,6 +48,15 @@ impl Context { pub fn add_discard(&mut self, operator: Key) { self.discards.push(operator); } + + pub fn get_return_type(&self, lp: &Plan) -> Option> { + self.returnflow.map(|ret| { + lp.get_dataflow(lp.get_operator(ret).get_return().input) + .get_conn() + .with + .fields + }) + } } impl Plan { diff --git a/crates/emdb/src/plan/tables.rs b/crates/emdb_core/src/plan/tables.rs similarity index 97% rename from crates/emdb/src/plan/tables.rs rename to crates/emdb_core/src/plan/tables.rs index 19dee81..b462626 100644 --- a/crates/emdb/src/plan/tables.rs +++ b/crates/emdb_core/src/plan/tables.rs @@ -11,7 +11,7 @@ use syn::Expr; use super::{Key, Plan, RecordField, ScalarType}; pub struct Constraint { - pub alias: Option, + pub alias: Ident, pub cons: C, } diff --git a/crates/emdb/src/plan/types.rs b/crates/emdb_core/src/plan/types.rs similarity index 80% rename from crates/emdb/src/plan/types.rs rename to crates/emdb_core/src/plan/types.rs index 18bdead..795e1d9 100644 --- a/crates/emdb/src/plan/types.rs +++ b/crates/emdb_core/src/plan/types.rs @@ -23,6 +23,7 @@ //! - two different bags of an equal type are equal (use the same bag data structure) //! - two references to the same table are equal (use the same reference type) //! - two records with the same fields are equal (use the same wrapping record data structure) +//! //! This is the type of equality we use for semantic analysis. //! //! 2. Implementation Type Equality (used for code generation) asks: do the @@ -55,7 +56,7 @@ //! - Cycles are *very bad*, avoid at all cost. Can cause stackoverflow on //! compilation, [`ConcRef`] are assumed to be non-cyclic. -use super::{GenArena, Key, Plan, Table, With}; +use super::{GenArena, ImmKey, Key, Plan, Table, With}; use proc_macro2::Ident; use quote::ToTokens; use std::{ @@ -71,6 +72,7 @@ pub enum ConcRef { /// A reference to another record/type /// - Used coalescing different records of the same type to point to the same concrete record + /// /// INV: Not self-referential / no recursive types / no cycles Ref(Key>), } @@ -119,12 +121,40 @@ pub struct Data { pub type ScalarType = ConcRef; +/// Used to label borrows that live as long as the database, or the window into the database. +/// - Specified here as they could be contained within user specified types, that emdb does not analyse. +pub const DB_LIFETIME_ID: &str = "db"; + +/// Used to label borrows that live as long as the reference provided to a query +/// - Specified here as they could be contained within user specified types, that emdb does not analyse. +/// - Allows for values to be borrowed temporarily and accessed from outside a query, for the duration +/// a database is not mutated +pub const QUERY_LIFETIME_ID: &str = "qy"; + +/// Used to label the additional info to consider for rust types +#[derive(PartialEq, Eq, Clone)] +pub enum TypeContext { + /// The type is used within a query context, so needs to have the lifetimes + /// of [`QUERY_LIFETIME_ID`] and [`DB_LIFETIME_ID`] present. + /// + /// The generated code needs to use type aliases like: + /// ``` + /// # type SomeRustTypeTokens = i32; + /// type TypeAlias<'db, 'qy> = SomeRustTypeTokens; + /// ``` + Query, + + /// The type is used in the data store, so does not have access to [`DB_LIFETIME_ID`] + /// and is not contained in any query, so not lifetimes can be wrapper + DataStore, +} + #[derive(PartialEq, Eq, Clone)] pub enum ScalarTypeConc { /// A reference to a row in a table, allows the user to interact wit row /// references while still allowing the backend to decide what they are. /// ``` - /// # use emdb::emql; + /// # use emdb_core::emql; /// # emql! { /// # table foos { x: i32 } /// # query foo_query() { @@ -135,15 +165,25 @@ pub enum ScalarTypeConc { /// # } /// ``` /// - Can use different types of references depending table implementation - /// chosen (e.g. key with generation, pointer, etc) + /// chosen (e.g. key with generation, pointer, etc) TableRef(Key
), + /// A get value for a tabe member + /// - This is different from the rust type of a column because the backend + /// provided may use the actions on the table to optimise (e.g. to return a + /// reference rather than copy) + /// - Allows the plan's type system to be backend agnostic + TableGet { + table: Key
, + field: RecordField, + }, + /// A collection of records in a container to be specified by the chosen /// backend. Allows the plan to express the type, without specifying its /// implementation. /// /// ``` - /// # use emdb::emql; + /// # use emdb_core::emql; /// # emql! { /// # table foos { x: i32 } /// # query foo_query() { @@ -156,7 +196,7 @@ pub enum ScalarTypeConc { /// ``` /// /// ``` - /// # use emdb::emql; + /// # use emdb_core::emql; /// # emql! { /// # table foos { x: i32 } /// # query foo_query() { @@ -173,10 +213,21 @@ pub enum ScalarTypeConc { Record(Key), /// A rust type propagated from the user - /// - Can be from the user's code (e.g. a library) + /// - Can be from the user's code (e.g. a library or user defined type) /// - Can be incorrect (need to propagate spans to backend for rustc to /// report) - Rust(Type), + /// + /// For types in a query context, we can consider the lifetimes of borrows + /// (for query), or gets (lifetime for entire database) + /// - Types can include the [`DB_LIFETIME_ID`] lifetime to signify they are tied to the + /// lifetime of the database (e.g. getting an immutable value), or the + /// window used for database access. + /// - Types can use the [`QUERY_LIFETIME_ID`] lifetime to signify they are tied to the + /// lifetime of the query. + /// + /// Note: The lifetimes are not applied outside of a query context, namely + /// in the types used for table members + Rust { type_context: TypeContext, ty: Type }, } /// Check two record types are equal. @@ -219,7 +270,7 @@ pub fn scalar_type_eq(lp: &Plan, t1: &Key, t2: &Key) -> ScalarTypeConc::Bag(r1) | ScalarTypeConc::Record(r1), ScalarTypeConc::Bag(r2) | ScalarTypeConc::Record(r2), ) => record_type_eq(lp, r1, r2), - (ScalarTypeConc::Rust(rt1), ScalarTypeConc::Rust(rt2)) => rt1 == rt2, + (ScalarTypeConc::Rust { ty: rt1, .. }, ScalarTypeConc::Rust { ty: rt2, .. }) => rt1 == rt2, _ => false, } } @@ -290,6 +341,10 @@ impl Plan { pub fn get_record_type_conc(&self, k: Key) -> &RecordConc { self.get_record_type(k).get_conc(&self.record_types) } + + pub fn get_record_conc_index(&self, k: Key) -> ImmKey<'_, RecordType> { + ImmKey::new(get_conc_index(&self.record_types, k), self) + } } impl Display for RecordField { @@ -352,7 +407,15 @@ impl<'a, 'b> Display for With<'a, &'b Key> { extended: r, } .fmt(f), - ScalarTypeConc::Rust(rt) => rt.to_token_stream().fmt(f), + ScalarTypeConc::Rust { ty, .. } => ty.to_token_stream().fmt(f), + ScalarTypeConc::TableGet { table, field } => { + write!( + f, + "get {} from {}", + field, + self.plan.tables.get(*table).unwrap().name + ) + } } } } diff --git a/crates/emdb/src/utils/choose.rs b/crates/emdb_core/src/utils/choose.rs similarity index 100% rename from crates/emdb/src/utils/choose.rs rename to crates/emdb_core/src/utils/choose.rs diff --git a/crates/emdb/src/utils/conster.rs b/crates/emdb_core/src/utils/conster.rs similarity index 99% rename from crates/emdb/src/utils/conster.rs rename to crates/emdb_core/src/utils/conster.rs index e3cb247..f9fd05f 100644 --- a/crates/emdb/src/utils/conster.rs +++ b/crates/emdb_core/src/utils/conster.rs @@ -69,7 +69,7 @@ macro_rules! conster { } pub(crate) use conster; -pub(crate) trait Const { +pub trait Const { fn val() -> T; } diff --git a/crates/emdb_core/src/utils/misc.rs b/crates/emdb_core/src/utils/misc.rs new file mode 100644 index 0000000..4046ca9 --- /dev/null +++ b/crates/emdb_core/src/utils/misc.rs @@ -0,0 +1,87 @@ +use std::{ + collections::{HashMap, HashSet, LinkedList}, + hash::Hash, +}; + +use proc_macro2::Span; +use syn::Ident; + +pub(crate) fn singlelist(item: T) -> LinkedList { + let mut list = LinkedList::new(); + list.push_back(item); + list +} + +pub(crate) fn result_to_opt(res: Result, errs: &mut LinkedList) -> Option { + match res { + Ok(o) => Some(o), + Err(e) => { + errs.push_back(e); + None + } + } +} + +pub struct PushMap<'brw, K, V> { + map: &'brw mut HashMap, + push_cnt: usize, +} + +impl<'brw, K: Hash + Eq, V> PushMap<'brw, K, V> { + pub fn new(map: &'brw mut HashMap) -> Self { + Self { map, push_cnt: 0 } + } + + pub fn push(&mut self, key: K, value: V) -> Option { + self.push_cnt += 1; + self.map.insert(key, value) + } + + pub fn count(&self) -> usize { + self.push_cnt + } +} + +pub struct PushSet<'brw, K> { + set: &'brw mut HashSet, + push_cnt: usize, +} + +impl<'brw, K: Hash + Eq> PushSet<'brw, K> { + pub fn new(set: &'brw mut HashSet) -> Self { + Self { set, push_cnt: 0 } + } + + pub fn push(&mut self, key: K) -> bool { + self.push_cnt += 1; + self.set.insert(key) + } + + pub fn count(&self) -> usize { + self.push_cnt + } +} + +pub fn new_id(id: &str) -> Ident { + Ident::new(id, Span::call_site()) +} + +pub struct PushVec<'brw, T> { + vec: &'brw mut Vec, + push_cnt: usize, +} + +impl<'brw, T> PushVec<'brw, T> { + pub fn new(vec: &'brw mut Vec) -> Self { + Self { vec, push_cnt: 0 } + } + + pub fn push(&mut self, item: T) { + self.push_cnt += 1; + self.vec.push(item) + } + + pub fn count(&self) -> usize { + self.push_cnt + } +} diff --git a/crates/emdb_core/src/utils/mod.rs b/crates/emdb_core/src/utils/mod.rs new file mode 100644 index 0000000..dae6bc7 --- /dev/null +++ b/crates/emdb_core/src/utils/mod.rs @@ -0,0 +1,5 @@ +pub mod choose; +pub mod conster; +pub mod misc; +pub mod on_off; +pub mod push; diff --git a/crates/emdb_core/src/utils/on_off.rs b/crates/emdb_core/src/utils/on_off.rs new file mode 100644 index 0000000..9ec3f33 --- /dev/null +++ b/crates/emdb_core/src/utils/on_off.rs @@ -0,0 +1,18 @@ +use combi::{ + core::{choice, mapsuc}, + macros::choices, + tokens::{ + basic::{gettoken, matchident, peekident}, + error::error, + TokenParser, + }, +}; +use proc_macro_error::{Diagnostic, Level}; + +pub fn on_off() -> impl TokenParser { + choices!( + peekident("on") => mapsuc(matchident("on"), |_| true), + peekident("off") => mapsuc(matchident("off"), |_| false), + otherwise => error(gettoken, |t| Diagnostic::spanned(t.span(), Level::Error, "Expected `on` or `off`".to_owned())) + ) +} diff --git a/crates/emdb_core/src/utils/push.rs b/crates/emdb_core/src/utils/push.rs new file mode 100644 index 0000000..c8becfe --- /dev/null +++ b/crates/emdb_core/src/utils/push.rs @@ -0,0 +1,169 @@ +//! ## Push only wrappers for [`HashSet`] and [`HashMap`] +//! allows the tracking of if the structures are mutated in different scopes, +//! while allowing all inserts to be sent to the same `&mut` data structure. + +use std::{ + collections::{HashMap, HashSet}, + hash::Hash, + marker::PhantomData, +}; + +pub trait PushMap { + fn scope(&mut self) -> impl PushMap + '_; + fn push(&mut self, key: K, value: V) -> Option; + fn pushed(&self) -> bool; +} + +pub struct PushMapConc { + map: HashMap, + pushed: bool, +} + +impl PushMapConc { + pub fn new(map: HashMap) -> Self { + Self { map, pushed: false } + } + + pub fn extract(self) -> HashMap { + self.map + } +} + +struct RefMap<'brw, K, V, PM: PushMap> { + map: &'brw mut PM, + pushed: bool, + phantom: PhantomData<(K, V)>, +} + +impl PushMap for PushMapConc { + fn scope(&mut self) -> impl PushMap + '_ { + RefMap { + map: self, + pushed: false, + phantom: PhantomData, + } + } + + fn push(&mut self, key: K, value: V) -> Option { + self.pushed = true; + self.map.insert(key, value) + } + + fn pushed(&self) -> bool { + self.pushed + } +} + +impl<'brw, K: Hash + Eq, V, PM: PushMap> PushMap for RefMap<'brw, K, V, PM> { + fn scope(&mut self) -> impl PushMap + '_ { + RefMap { + map: self, + pushed: false, + phantom: PhantomData, + } + } + + fn push(&mut self, key: K, value: V) -> Option { + self.pushed = true; + self.map.push(key, value) + } + + fn pushed(&self) -> bool { + self.pushed + } +} + +pub trait PushSet { + fn scope(&mut self) -> impl PushSet + '_; + fn push(&mut self, key: K) -> bool; + fn pushed(&self) -> bool; +} + +pub struct PushSetConc { + set: HashSet, + pushed: bool, +} + +impl PushSetConc { + pub fn new(set: HashSet) -> Self { + Self { set, pushed: false } + } + + pub fn extract(self) -> HashSet { + self.set + } +} + +struct RefSet<'brw, K, PS: PushSet> { + set: &'brw mut PS, + pushed: bool, + phantom: PhantomData, +} + +impl PushSet for PushSetConc { + fn scope(&mut self) -> impl PushSet + '_ { + RefSet { + set: self, + pushed: false, + phantom: PhantomData, + } + } + + fn push(&mut self, key: K) -> bool { + self.pushed = true; + self.set.insert(key) + } + + fn pushed(&self) -> bool { + self.pushed + } +} + +impl<'brw, K: Hash + Eq, PS: PushSet> PushSet for RefSet<'brw, K, PS> { + fn scope(&mut self) -> impl PushSet + '_ { + RefSet { + set: self, + pushed: false, + phantom: PhantomData, + } + } + + fn push(&mut self, key: K) -> bool { + self.pushed = true; + self.set.push(key) + } + + fn pushed(&self) -> bool { + self.pushed + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn check_pushmap() { + let mut push_map = PushMapConc::new(HashMap::new()); + assert!(!push_map.pushed()); + push_map.push(12, "hello"); + assert!(push_map.pushed()); + { + let mut push_map_1 = push_map.scope(); + assert!(!push_map_1.pushed()); + { + let mut push_map_2 = push_map_1.scope(); + assert!(!push_map_2.pushed()); + push_map_2.push(13, "world"); + assert!(push_map_2.pushed()); + }; + assert!(push_map_1.pushed()); + { + let mut push_map_2 = push_map_1.scope(); + assert!(!push_map_2.pushed()); + push_map_2.push(13, "world"); + assert!(push_map_2.pushed()); + }; + } + } +} diff --git a/crates/minister/Cargo.toml b/crates/minister/Cargo.toml new file mode 100644 index 0000000..97cb3d8 --- /dev/null +++ b/crates/minister/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "minister" +version = "0.1.0" +edition = "2021" + +readme = "README.md" +description = "A crate for operator abstractions" +keywords = ["performance", "traits"] +categories = ["data"] + +repository.workspace = true +homepage.workspace = true +license-file.workspace = true + +[dependencies] +rayon = "1.10.0" + +[dev-dependencies] +divan = { git = "https://github.com/OliverKillane/divan.git", branch = "enh/file-output" } + +[[bench]] +name = "iterators" +harness = false \ No newline at end of file diff --git a/crates/minister/README.md b/crates/minister/README.md new file mode 100644 index 0000000..2c82306 --- /dev/null +++ b/crates/minister/README.md @@ -0,0 +1,8 @@ +## Minister +A library for generating tablular data structures, used to support emDB. + +## Operator Implementation +Provides a macro for generating traits to constraint implementations for operators +(making it easier to verify codegeneration relying on the operators). + +Provides several operator implementations. diff --git a/crates/minister/benches/iterators.rs b/crates/minister/benches/iterators.rs new file mode 100644 index 0000000..cd524a5 --- /dev/null +++ b/crates/minister/benches/iterators.rs @@ -0,0 +1,63 @@ +//! ## A small example of iterators versus loops in rust. +//! In this example we demonstrate both that rust iterators can be optimised to +//! be as fast as loops (inlined, and converted to iteration). +//! +//! But also due to in-place collection, iterators can be significantly faster than loops. +//! - See [the `in_place_collect.rs` source code](https://github.com/rust-lang/rust/blob/master/library/alloc/src/vec/in_place_collect.rs) +//! +//! When running `cargo bench` you will see that the loop version requires allocating +//! another vector, and deallocating the input vector. +//! +//! No allocations are performed in the iterator version. + +use divan; + +#[global_allocator] +static ALLOC: divan::AllocProfiler = divan::AllocProfiler::system(); + +fn apply(x: usize) -> usize { + x * 2 +} + +trait Operate { + fn op(values: Vec) -> Vec; +} + +struct Iters; +impl Operate for Iters { + fn op(values: Vec) -> Vec { + values + .into_iter() + .map(apply) + .map(apply) + .map(apply) + .map(apply) + .collect() + } +} + +struct Loops; +impl Operate for Loops { + fn op(values: Vec) -> Vec { + let mut result = Vec::with_capacity(values.len()); + for item in values { + result.push(apply(apply(apply(apply(item))))); + } + result + } +} + +#[divan::bench( + name = "compare_loops_and_iterators", + types = [Iters, Loops], + consts = [1, 128, 8388608] +)] +fn comparison(bencher: divan::Bencher) { + bencher + .with_inputs(|| (0..SIZE).collect::>()) + .bench_local_values(|r| T::op(r)); +} + +fn main() { + divan::main() +} diff --git a/crates/minister/src/basic.rs b/crates/minister/src/basic.rs new file mode 100644 index 0000000..2eb240f --- /dev/null +++ b/crates/minister/src/basic.rs @@ -0,0 +1,329 @@ +#![allow(clippy::ptr_arg)] +use std::collections::HashMap; + +macro_rules! single { + ($data:ty) => { + $data + }; +} +macro_rules! stream { ($data:ty) => { Vec<$data> }; } +super::generate_minister_trait! { BasicOps } + +/// ## An extremely basic push operator implementation. +/// - Designed to be as correct as possible +/// - Simple implementation pushed values between vectors +/// - No extra wrapping - it is literally just vectors +/// +/// This implementation is easy to understand, and very clearly correct. +pub struct Basic; + +impl BasicOps for Basic { + fn consume_stream(iter: impl Iterator) -> stream!(Data) + where + Data: Send + Sync, + { + iter.collect() + } + + fn consume_buffer(buff: Vec) -> stream!(Data) + where + Data: Send + Sync, + { + buff + } + + fn consume_single(data: Data) -> single!(Data) + where + Data: Send + Sync, + { + data + } + + fn export_stream(stream: stream!(Data)) -> impl Iterator + where + Data: Send + Sync, + { + stream.into_iter() + } + + fn export_buffer(stream: stream!(Data)) -> Vec + where + Data: Send + Sync, + { + stream + } + fn export_single(single: single!(Data)) -> Data + where + Data: Send + Sync, + { + single + } + + fn error_stream( + stream: stream!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + stream.into_iter().collect::>() + } + + fn error_single( + single: single!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + single + } + + fn map( + stream: stream!(InData), + mapping: impl Fn(InData) -> OutData + Send + Sync, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + stream.into_iter().map(mapping).collect() + } + + fn map_seq( + stream: stream!(InData), + mapping: impl FnMut(InData) -> OutData, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + stream.into_iter().map(mapping).collect() + } + + fn map_single( + single: single!(InData), + mapping: impl FnOnce(InData) -> OutData, + ) -> single!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + (mapping)(single) + } + + fn filter( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + stream.into_iter().filter(|data| predicate(data)).collect() + } + + fn all( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> (bool, stream!(Data)) + where + Data: Send + Sync, + { + for data in &stream { + if !predicate(data) { + return (false, stream); + } + } + (true, stream) + } + + fn is(single: single!(Data), predicate: impl Fn(&Data) -> bool) -> (bool, single!(Data)) + where + Data: Send + Sync, + { + (predicate(&single), single) + } + + fn count(stream: stream!(Data)) -> single!(usize) + where + Data: Send + Sync, + { + stream.len() + } + + fn fold( + stream: stream!(InData), + initial: Acc, + fold_fn: impl Fn(Acc, InData) -> Acc, + ) -> single!(Acc) + where + InData: Send + Sync, + Acc: Send + Sync, + { + let mut acc = initial; + for data in stream { + acc = fold_fn(acc, data); + } + acc + } + + fn combine( + stream: stream!(Data), + alternative: Data, + combiner: impl Fn(Data, Data) -> Data + Send + Sync, + ) -> single!(Data) + where + Data: Send + Sync + Clone, + { + stream.into_iter().reduce(combiner).unwrap_or(alternative) + } + + fn sort( + mut stream: stream!(Data), + ordering: impl Fn(&Data, &Data) -> std::cmp::Ordering + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + stream.sort_unstable_by(ordering); + stream + } + + fn take(mut stream: stream!(Data), n: usize) -> stream!(Data) + where + Data: Send + Sync, + { + stream.truncate(n); + stream + } + + fn group_by( + stream: stream!(Data), + split: impl Fn(Data) -> (Key, Rest), + ) -> stream!((Key, stream!(Rest))) + where + Data: Send + Sync, + Key: Eq + std::hash::Hash + Send + Sync, + Rest: Send + Sync, + { + let mut groups = HashMap::new(); + for data in stream { + let (k, r) = split(data); + groups.entry(k).or_insert_with(Vec::new).push(r); + } + groups.into_iter().collect() + } + + fn cross_join( + left: stream!(LeftData), + right: stream!(RightData), + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + let mut result = Vec::with_capacity(left.len() * right.len()); + for l in left { + for r in &right { + result.push((l.clone(), r.clone())); + } + } + result + } + + /// A very basic optimisation is to hash the smaller side of the join. + fn equi_join( + left: stream!(LeftData), + right: stream!(RightData), + left_split: impl Fn(&LeftData) -> &Key + Send + Sync, + right_split: impl Fn(&RightData) -> &Key + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + Key: Eq + std::hash::Hash + Send + Sync, + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + let mut results = Vec::with_capacity(left.len() * right.len()); + if left.len() < right.len() { + let mut lefts = HashMap::with_capacity(left.len()); + for l in &left { + lefts.entry(left_split(l)).or_insert_with(Vec::new).push(l); + } + for r in right { + if let Some(ls) = lefts.get(right_split(&r)) { + for l in ls { + results.push(((*l).clone(), r.clone())) + } + } + } + } else { + let mut rights = HashMap::with_capacity(right.len()); + for r in &right { + rights + .entry(right_split(r)) + .or_insert_with(Vec::new) + .push(r); + } + for l in left { + if let Some(rs) = rights.get(left_split(&l)) { + for r in rs { + results.push((l.clone(), (*r).clone())) + } + } + } + } + results + } + + fn predicate_join( + left: stream!(LeftData), + right: stream!(RightData), + pred: impl Fn(&LeftData, &RightData) -> bool + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + let mut results = Vec::with_capacity(left.len() * right.len()); + for l in &left { + for r in &right { + if pred(l, r) { + results.push((l.clone(), r.clone())); + } + } + } + results + } + + fn union(mut left: stream!(Data), right: stream!(Data)) -> stream!(Data) + where + Data: Send + Sync, + { + left.extend(right); + left + } + + fn fork(stream: stream!(Data)) -> (stream!(Data), stream!(Data)) + where + Data: Clone + Send + Sync, + { + (stream.clone(), stream) + } + + fn fork_single(single: single!(Data)) -> (single!(Data), single!(Data)) + where + Data: Clone + Send + Sync, + { + (single.clone(), single) + } + + fn split( + stream: stream!((LeftData, RightData)), + ) -> (stream!(LeftData), stream!(RightData)) + where + LeftData: Send + Sync, + RightData: Send + Sync, + { + stream.into_iter().unzip() + } +} diff --git a/crates/minister/src/chunk.rs b/crates/minister/src/chunk.rs new file mode 100644 index 0000000..5f884bf --- /dev/null +++ b/crates/minister/src/chunk.rs @@ -0,0 +1,410 @@ +#![allow(clippy::ptr_arg)] +use rayon::{current_num_threads, prelude::*}; +use std::collections::HashMap; + +macro_rules! single { + ($data:ty) => { + $data + }; +} +macro_rules! stream { ($data:ty) => { Vec> }; } +super::generate_minister_trait! { ChunkOps } + +/// ## A Slow (😒) parallel operator implementation that splits streams into chunks of data. +pub struct Chunk; + +fn split_chunks( + data_size: usize, + mut input_data: impl Iterator, +) -> Vec> { + let num_cores = current_num_threads(); + let chunk_size = data_size / num_cores; + let mut output_data = Vec::with_capacity(num_cores + 1); + + if let Some(first) = input_data.next() { + let mut first_vec = Vec::with_capacity(chunk_size); + first_vec.push(first); + output_data.push(first_vec); + + let mut current_chunk_size = 1; + for data in input_data { + if current_chunk_size == chunk_size { + let mut next_vec = Vec::with_capacity(chunk_size); + next_vec.push(data); + output_data.push(next_vec); + } else { + output_data.last_mut().unwrap().push(data); + } + current_chunk_size += 1; + } + output_data + } else { + debug_assert_eq!(data_size, 0); + output_data + } +} + +fn merge_chunks(chunks: Vec>) -> Vec { + chunks.into_iter().flat_map(|v| v.into_iter()).collect() +} + +impl ChunkOps for Chunk { + fn consume_stream(iter: impl Iterator) -> stream!(Data) + where + Data: Send + Sync, + { + let data = iter.collect::>(); + split_chunks(data.len(), data.into_iter()) + } + + fn consume_buffer(buff: Vec) -> stream!(Data) + where + Data: Send + Sync, + { + split_chunks(buff.len(), buff.into_iter()) + } + + fn consume_single(data: Data) -> single!(Data) + where + Data: Send + Sync, + { + data + } + + fn export_stream(stream: stream!(Data)) -> impl Iterator + where + Data: Send + Sync, + { + stream.into_iter().flatten() + } + + fn export_buffer(stream: stream!(Data)) -> Vec + where + Data: Send + Sync, + { + stream.into_iter().flatten().collect() + } + fn export_single(single: single!(Data)) -> Data + where + Data: Send + Sync, + { + single + } + + fn error_stream( + stream: stream!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + let mut output = Vec::with_capacity(stream.len()); + for substream in stream { + output.push(substream.into_iter().collect::, _>>()?); + } + Ok(output) + } + + fn error_single( + single: single!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + single + } + + fn map( + stream: stream!(InData), + mapping: impl Fn(InData) -> OutData + Send + Sync, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + stream + .into_par_iter() + .map(|v| v.into_iter().map(&mapping).collect()) + .collect() + } + + fn map_seq( + stream: stream!(InData), + mut mapping: impl FnMut(InData) -> OutData, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + let mut output = Vec::with_capacity(stream.len()); + for substream in stream { + let mut out_substream = Vec::with_capacity(substream.len()); + for data in substream { + out_substream.push(mapping(data)); + } + output.push(out_substream) + } + output + } + + fn map_single( + single: single!(InData), + mapping: impl FnOnce(InData) -> OutData, + ) -> single!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + (mapping)(single) + } + + fn filter( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + stream + .into_par_iter() + .map(|v: Vec| v.into_iter().filter(&predicate).collect()) + .collect() + } + + fn all( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> (bool, stream!(Data)) + where + Data: Send + Sync, + { + (stream.par_iter().all(|v| v.iter().all(&predicate)), stream) + } + + fn is(single: single!(Data), predicate: impl Fn(&Data) -> bool) -> (bool, single!(Data)) + where + Data: Send + Sync, + { + (predicate(&single), single) + } + + fn count(stream: stream!(Data)) -> single!(usize) + where + Data: Send + Sync, + { + stream.into_par_iter().map(|v| v.len()).sum() + } + + fn fold( + stream: stream!(InData), + initial: Acc, + fold_fn: impl Fn(Acc, InData) -> Acc, + ) -> single!(Acc) + where + InData: Send + Sync, + Acc: Send + Sync, + { + let mut acc = initial; + for substream in stream { + for data in substream { + acc = fold_fn(acc, data); + } + } + acc + } + + fn combine( + stream: stream!(Data), + alternative: Data, + combiner: impl Fn(Data, Data) -> Data + Send + Sync, + ) -> single!(Data) + where + Data: Send + Sync + Clone, + { + stream + .into_par_iter() + .filter_map(|v| v.into_iter().reduce(&combiner)) + .reduce(|| alternative.clone(), &combiner) + } + + fn sort( + stream: stream!(Data), + ordering: impl Fn(&Data, &Data) -> std::cmp::Ordering + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + let mut data = stream + .into_iter() + .flat_map(|v| v.into_iter()) + .collect::>(); + data.par_sort_unstable_by(ordering); + split_chunks(data.len(), data.into_iter()) + } + + fn take(stream: stream!(Data), n: usize) -> stream!(Data) + where + Data: Send + Sync, + { + let mut data = merge_chunks(stream); + data.truncate(n); + split_chunks(data.len(), data.into_iter()) + } + + fn group_by( + stream: stream!(Data), + split: impl Fn(Data) -> (Key, Rest), + ) -> stream!((Key, stream!(Rest))) + where + Data: Send + Sync, + Key: Eq + std::hash::Hash + Send + Sync, + Rest: Send + Sync, + { + let mut groups = HashMap::new(); + for substream in stream { + for data in substream { + let (k, r) = split(data); + groups.entry(k).or_insert_with(Vec::new).push(r); + } + } + split_chunks( + groups.len(), + groups + .into_iter() + .map(|(k, v)| (k, split_chunks(v.len(), v.into_iter()))), + ) + } + + fn cross_join( + left: stream!(LeftData), + right: stream!(RightData), + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + left.into_par_iter() + .map(|ls| { + let mut v = Vec::new(); + for l in ls { + for rs in &right { + for r in rs { + v.push((l.clone(), r.clone())) + } + } + } + v + }) + .collect::>() + } + + /// A very basic optimisation is to hash the smaller side of the join. + fn equi_join( + left: stream!(LeftData), + right: stream!(RightData), + left_split: impl Fn(&LeftData) -> &Key + Send + Sync, + right_split: impl Fn(&RightData) -> &Key + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + Key: Eq + std::hash::Hash + Send + Sync, + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + // NOTE: Not optimised at all, but does mantain balance of chunk sizes + let left = merge_chunks(left); + let right = merge_chunks(right); + let mut results = Vec::with_capacity(left.len() * right.len()); + if left.len() < right.len() { + let mut lefts = HashMap::with_capacity(left.len()); + for l in &left { + lefts.entry(left_split(l)).or_insert_with(Vec::new).push(l); + } + for r in right { + if let Some(ls) = lefts.get(right_split(&r)) { + for l in ls { + results.push(((*l).clone(), r.clone())) + } + } + } + } else { + let mut rights = HashMap::with_capacity(right.len()); + for r in &right { + rights + .entry(right_split(r)) + .or_insert_with(Vec::new) + .push(r); + } + for l in left { + if let Some(rs) = rights.get(left_split(&l)) { + for r in rs { + results.push((l.clone(), (*r).clone())) + } + } + } + } + split_chunks(results.len(), results.into_iter()) + } + + fn predicate_join( + left: stream!(LeftData), + right: stream!(RightData), + pred: impl Fn(&LeftData, &RightData) -> bool + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + // NOTE: Can unbalance the chunk sizes + left.into_par_iter() + .map(|ls| { + let mut v = Vec::new(); + for l in ls { + for rs in &right { + for r in rs { + if pred(&l, r) { + v.push((l.clone(), r.clone())) + } + } + } + } + v + }) + .collect::>() + } + + fn union(mut left: stream!(Data), right: stream!(Data)) -> stream!(Data) + where + Data: Send + Sync, + { + left.extend(right); + left + } + + fn fork(stream: stream!(Data)) -> (stream!(Data), stream!(Data)) + where + Data: Clone + Send + Sync, + { + (stream.clone(), stream) + } + + fn fork_single(single: single!(Data)) -> (single!(Data), single!(Data)) + where + Data: Clone + Send + Sync, + { + (single.clone(), single) + } + + fn split( + stream: stream!((LeftData, RightData)), + ) -> (stream!(LeftData), stream!(RightData)) + where + LeftData: Send + Sync, + RightData: Send + Sync, + { + stream + .into_iter() + .map(|inner| inner.into_iter().unzip()) + .unzip() + } +} diff --git a/crates/minister/src/iter.rs b/crates/minister/src/iter.rs new file mode 100644 index 0000000..1b92b9f --- /dev/null +++ b/crates/minister/src/iter.rs @@ -0,0 +1,363 @@ +use std::collections::HashMap; + +macro_rules! single { + ($data:ty) => { + $data + }; +} +macro_rules! stream { ($data:ty) => { impl Iterator }; } +super::generate_minister_trait! { IterOps } + +/// ## Rust Iterator based Operators +/// Implements a hybrid-push-full operator model. +/// - **Pull** Uses lazily evaluated rust iterators as the stream type +/// - **Push** All single values are structly evaluated, similarly for buffering +/// operations. +/// +/// While rust iterators implement a lazily evaluated pull model at a high level. They do not suffer +/// from the repeated `.next()` calls and option checking in release builds. +/// +/// In fact, due to in place collection, iterators can be faster than loop. (See the `iterators` benchmark). +/// +/// ## Interesting Reads +/// - [Comparing Performance: Loops vs Iterators](https://doc.rust-lang.org/book/ch13-04-performance.html) +pub struct Iter; + +const ASSUME_SIZE: usize = 1024; +fn get_size(left: Option, right: Option) -> usize { + left.unwrap_or(ASSUME_SIZE) * right.unwrap_or(ASSUME_SIZE) +} +fn get_side_size(hint: Option) -> usize { + hint.unwrap_or(ASSUME_SIZE) +} + +impl IterOps for Iter { + fn consume_stream(iter: impl Iterator) -> stream!(Data) + where + Data: Send + Sync, + { + iter + } + + fn consume_buffer(buff: Vec) -> stream!(Data) + where + Data: Send + Sync, + { + buff.into_iter() + } + + fn consume_single(data: Data) -> single!(Data) + where + Data: Send + Sync, + { + data + } + + fn export_stream(stream: stream!(Data)) -> impl Iterator + where + Data: Send + Sync, + { + stream + } + + fn export_buffer(stream: stream!(Data)) -> Vec + where + Data: Send + Sync, + { + stream.collect::>() + } + + fn export_single(single: single!(Data)) -> Data + where + Data: Send + Sync, + { + single + } + + fn error_stream( + stream: stream!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + stream.collect::, _>>().map(Vec::into_iter) + } + + fn error_single( + single: single!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + single + } + + fn map( + stream: stream!(InData), + mapping: impl Fn(InData) -> OutData + Send + Sync, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + stream.map(mapping) + } + + fn map_seq( + stream: stream!(InData), + mapping: impl FnMut(InData) -> OutData, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + stream.map(mapping) + } + + fn map_single( + single: single!(InData), + mapping: impl FnOnce(InData) -> OutData, + ) -> single!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + mapping(single) + } + + fn filter( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + stream.filter(predicate) + } + + fn all( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> (bool, stream!(Data)) + where + Data: Send + Sync, + { + let vals = stream.collect::>(); + (vals.iter().all(predicate), vals.into_iter()) + } + + fn is(single: single!(Data), predicate: impl Fn(&Data) -> bool) -> (bool, single!(Data)) + where + Data: Send + Sync, + { + (predicate(&single), single) + } + + fn count(stream: stream!(Data)) -> single!(usize) + where + Data: Send + Sync, + { + stream.count() + } + + fn fold( + stream: stream!(InData), + initial: Acc, + fold_fn: impl Fn(Acc, InData) -> Acc, + ) -> single!(Acc) + where + InData: Send + Sync, + Acc: Send + Sync, + { + stream.fold(initial, fold_fn) + } + + fn combine( + stream: stream!(Data), + alternative: Data, + combiner: impl Fn(Data, Data) -> Data + Send + Sync, + ) -> single!(Data) + where + Data: Send + Sync + Clone, + { + stream.reduce(combiner).unwrap_or(alternative) + } + + fn sort( + stream: stream!(Data), + ordering: impl Fn(&Data, &Data) -> std::cmp::Ordering + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + let mut data = stream.collect::>(); + data.sort_unstable_by(ordering); + data.into_iter() + } + + fn take(stream: stream!(Data), n: usize) -> stream!(Data) + where + Data: Send + Sync, + { + stream.take(n) + } + + fn group_by( + stream: stream!(Data), + split: impl Fn(Data) -> (Key, Rest), + ) -> stream!((Key, stream!(Rest))) + where + Data: Send + Sync, + Key: Eq + std::hash::Hash + Send + Sync, + Rest: Send + Sync, + { + let mut groups = HashMap::new(); + for data in stream { + let (k, r) = split(data); + groups.entry(k).or_insert_with(Vec::new).push(r); + } + groups.into_iter().map(|(k, v)| (k, v.into_iter())) + } + + fn cross_join( + left: stream!(LeftData), + right: stream!(RightData), + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + let right_vals = right.collect::>(); + let mut result = + Vec::with_capacity(right_vals.len() * left.size_hint().1.unwrap_or(ASSUME_SIZE)); + for l in left { + for r in &right_vals { + result.push((l.clone(), r.clone())); + } + } + result.into_iter() + } + + fn equi_join( + left: stream!(LeftData), + right: stream!(RightData), + left_split: impl Fn(&LeftData) -> &Key + Send + Sync, + right_split: impl Fn(&RightData) -> &Key + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + Key: Eq + std::hash::Hash + Send + Sync, + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + match (left.size_hint().1, right.size_hint().1) { + (Some(left_size), Some(right_size)) if left_size < right_size => { + let mut results = Vec::with_capacity(left_size * right_size); + let mut lefts = HashMap::with_capacity(left_size); + let left = left.collect::>(); + for l in &left { + lefts.entry(left_split(l)).or_insert_with(Vec::new).push(l); + } + for r in right { + if let Some(ls) = lefts.get(right_split(&r)) { + for l in ls { + results.push(((*l).clone(), r.clone())) + } + } + } + results.into_iter() + } + (left_size, right_size) => { + let mut results = Vec::with_capacity(get_size(left_size, right_size)); + let mut rights = HashMap::with_capacity(get_side_size(right_size)); + let right = right.collect::>(); + for r in &right { + rights + .entry(right_split(r)) + .or_insert_with(Vec::new) + .push(r); + } + for l in left { + if let Some(rs) = rights.get(left_split(&l)) { + for r in rs { + results.push((l.clone(), (*r).clone())) + } + } + } + results.into_iter() + } + } + } + + fn predicate_join( + left: stream!(LeftData), + right: stream!(RightData), + pred: impl Fn(&LeftData, &RightData) -> bool + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + match (left.size_hint().1, right.size_hint().1) { + (Some(left_size), Some(right_size)) if left_size < right_size => { + let left = left.collect::>(); + let mut results = Vec::with_capacity(left_size * right_size); + for r in right { + for l in &left { + if pred(l, &r) { + results.push((l.clone(), r.clone())); + } + } + } + results.into_iter() + } + (_, right_size) => { + let right = right.collect::>(); + let mut results = Vec::with_capacity(get_side_size(right_size)); + for l in left { + for r in &right { + if pred(&l, r) { + results.push((l.clone(), r.clone())); + } + } + } + results.into_iter() + } + } + } + + fn union(left: stream!(Data), right: stream!(Data)) -> stream!(Data) + where + Data: Send + Sync, + { + left.chain(right) + } + + fn fork(stream: stream!(Data)) -> (stream!(Data), stream!(Data)) + where + Data: Clone + Send + Sync, + { + let data = stream.collect::>(); + let data2 = data.clone(); + (data.into_iter(), data2.into_iter()) + } + + fn fork_single(single: single!(Data)) -> (single!(Data), single!(Data)) + where + Data: Clone + Send + Sync, + { + (single.clone(), single) + } + + fn split( + stream: stream!((LeftData, RightData)), + ) -> (stream!(LeftData), stream!(RightData)) + where + LeftData: Send + Sync, + RightData: Send + Sync, + { + let (left, right): (Vec<_>, Vec<_>) = stream.unzip(); + (left.into_iter(), right.into_iter()) + } +} diff --git a/crates/minister/src/lib.rs b/crates/minister/src/lib.rs new file mode 100644 index 0000000..ed6d173 --- /dev/null +++ b/crates/minister/src/lib.rs @@ -0,0 +1,233 @@ +//! # Minister +//! A library for implementing stream operators. Used by `emDB` for the physical +//! implementation of operators. +//! +//! > **Note** +//! > The [parallel] and [chunk] implementations are not optimised & should not be used. +//! > [iter] is the best performing. + +pub mod basic; +pub mod chunk; +pub mod iter; +pub mod parallel; + +/// ## Minister Trait Generation +/// In order to ensure correct implementation of different operator implementations (important for +/// emDB's code generation), a single interface for Operators is needed. +/// +/// The requirements are as follows: +/// 1. A simplified set of operators that are composable for implementing higher level emDB operations. +/// 2. To allow operators to execute on data in parallel +/// 3. To allow operators to define their own types for streams and single values. +/// +/// Ordinarily this would be satisifed by a single `trait Operator {}`, however the +/// requirement to allow implementations to define their own stream types includes defining streams +/// as any implementation of a trait. +/// +/// For example, for the [iter::Iter] backend uses streams as `impl Iterator`. +/// +/// To implement as a trait would require being able to define an associated item, that is either a +/// type, or a trait. +/// - This work is at RFC stage as part of the [Impl trait Initiative](https://rust-lang.github.io/impl-trait-initiative/) +/// +/// Hence instead we generate a trait, substituting the types using other macros (`single!` and `stream!`). +/// - The `single!` and `stream!` macros need to be defined in the same scope as the trait. +/// ``` +/// # trait Thunk { type Item; } +/// # trait ThunkIterator { type Item; } +/// # use minister::generate_minister_trait; +/// macro_rules! single { ($data:ty) => { impl Thunk }; } +/// macro_rules! stream { ($data:ty) => { impl ThunkIterator }; } +/// generate_minister_trait! { LazyOps } +/// ``` +/// +/// ## Operator Types +/// While the operator pattern supported appears to be push based, pull based operators can also be +/// supported by pushing a lazily evaluated stream. +/// - While [basic::Basic] is a traditional pull-based operator, [iter::Iter] is sort-of-pull based (with +/// some pipeline breakage for expanding errors, and notably the ability of the rust compiler to +/// combine/inline the operations from a pull). +/// - A fully lazy 'iterators of thunks' implementation is also possible with this pattern. +/// +/// The push-like pattern makes code generation significantly easier, especially when emDB supports +/// plans that are DAGs (operators can pull data from and push to any number of sources). +/// - This is also discussed by [snowflake](https://info.snowflake.net/rs/252-RFO-227/images/Snowflake_SIGMOD.pdf) +/// as an advantage. +/// +/// ### Interesting Reads +/// - [Justin Jaffray: Push vs Pull](https://justinjaffray.com/query-engines-push-vs.-pull/) +/// - [snowflake paper](https://info.snowflake.net/rs/252-RFO-227/images/Snowflake_SIGMOD.pdf) +/// - [Push vs Pull-Based Loop Fusion in Query Engines](https://arxiv.org/pdf/1610.09166) +#[macro_export] +macro_rules! generate_minister_trait { + ($trait_name:ident) => { + pub trait $trait_name { + fn consume_stream(iter: impl Iterator) -> stream!(Data) + where + Data: Send + Sync; + fn consume_buffer(buff: Vec) -> stream!(Data) + where + Data: Send + Sync; + fn consume_single(data: Data) -> single!(Data) + where + Data: Send + Sync; + + fn export_stream(stream: stream!(Data)) -> impl Iterator + where + Data: Send + Sync; + fn export_buffer(stream: stream!(Data)) -> Vec + where + Data: Send + Sync; + fn export_single(single: single!(Data)) -> Data + where + Data: Send + Sync; + + fn error_stream( + stream: stream!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync; + + fn error_single( + single: single!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync; + + fn map( + stream: stream!(InData), + mapping: impl Fn(InData) -> OutData + Send + Sync, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync; + + fn map_seq( + stream: stream!(InData), + mapping: impl FnMut(InData) -> OutData, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync; + + fn map_single( + single: single!(InData), + mapping: impl FnOnce(InData) -> OutData, + ) -> single!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync; + + fn filter( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync; + + fn all( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> (bool, stream!(Data)) + where + Data: Send + Sync; + + fn is( + single: single!(Data), + predicate: impl Fn(&Data) -> bool, + ) -> (bool, single!(Data)) + where + Data: Send + Sync; + + fn count(stream: stream!(Data)) -> single!(usize) + where + Data: Send + Sync; + + fn fold( + stream: stream!(InData), + initial: Acc, + fold_fn: impl Fn(Acc, InData) -> Acc, + ) -> single!(Acc) + where + InData: Send + Sync, + Acc: Send + Sync; + + fn combine( + stream: stream!(Data), + alternative: Data, + combiner: impl Fn(Data, Data) -> Data + Send + Sync, + ) -> single!(Data) + where + Data: Send + Sync + Clone; + + fn sort( + stream: stream!(Data), + ordering: impl Fn(&Data, &Data) -> std::cmp::Ordering + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync; + + fn take(stream: stream!(Data), n: usize) -> stream!(Data) + where + Data: Send + Sync; + + fn group_by( + stream: stream!(Data), + split: impl Fn(Data) -> (Key, Rest), + ) -> stream!((Key, stream!(Rest))) + where + Data: Send + Sync, + Key: Eq + std::hash::Hash + Send + Sync, + Rest: Send + Sync; + + fn cross_join( + left: stream!(LeftData), + right: stream!(RightData), + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync; + + fn equi_join( + left: stream!(LeftData), + right: stream!(RightData), + left_split: impl Fn(&LeftData) -> &Key + Send + Sync, + right_split: impl Fn(&RightData) -> &Key + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + Key: Eq + std::hash::Hash + Send + Sync, + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync; + + fn predicate_join( + left: stream!(LeftData), + right: stream!(RightData), + pred: impl Fn(&LeftData, &RightData) -> bool + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync; + + fn union(left: stream!(Data), right: stream!(Data)) -> stream!(Data) + where + Data: Send + Sync; + + fn fork(stream: stream!(Data)) -> (stream!(Data), stream!(Data)) + where + Data: Clone + Send + Sync; + + fn fork_single(single: single!(Data)) -> (single!(Data), single!(Data)) + where + Data: Clone + Send + Sync; + + fn split( + stream: stream!((LeftData, RightData)), + ) -> (stream!(LeftData), stream!(RightData)) + where + LeftData: Send + Sync, + RightData: Send + Sync; + } + }; +} diff --git a/crates/minister/src/parallel.rs b/crates/minister/src/parallel.rs new file mode 100644 index 0000000..3051068 --- /dev/null +++ b/crates/minister/src/parallel.rs @@ -0,0 +1,350 @@ +use rayon::prelude::*; +use std::collections::HashMap; + +macro_rules! single { + ($data:ty) => { + $data + }; +} +macro_rules! stream { ($data:ty) => { impl ParallelIterator }; } +super::generate_minister_trait! { ParallelOps } + +/// ## A very slow (😒) but maximally parallel implementation with [rayon] +/// - Every single operation that can be made a task is sent to the thread pool (massive +/// contention, and overhead for small tasks). +pub struct Parallel; + +impl ParallelOps for Parallel { + fn consume_stream(iter: impl Iterator) -> stream!(Data) + where + Data: Send + Sync, + { + // TODO: Specialise for Range iterator (for which we can efficiently convert directly) + iter.collect::>().into_par_iter() + } + + fn consume_buffer(buff: Vec) -> stream!(Data) + where + Data: Send + Sync, + { + buff.into_par_iter() + } + + fn consume_single(data: Data) -> single!(Data) + where + Data: Send + Sync, + { + data + } + + fn export_stream(stream: stream!(Data)) -> impl Iterator + where + Data: Send + Sync, + { + stream.collect::>().into_iter() + } + + fn export_buffer(stream: stream!(Data)) -> Vec + where + Data: Send + Sync, + { + stream.collect::>() + } + + fn export_single(single: single!(Data)) -> Data + where + Data: Send + Sync, + { + single + } + + fn error_stream( + stream: stream!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + Ok(stream.collect::, _>>()?.into_par_iter()) + } + + fn error_single( + single: single!(Result), + ) -> Result + where + Data: Send + Sync, + Error: Send + Sync, + { + single + } + + fn map( + stream: stream!(InData), + mapping: impl Fn(InData) -> OutData + Send + Sync, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + stream.map(mapping) + } + + fn map_seq( + stream: stream!(InData), + mapping: impl FnMut(InData) -> OutData, + ) -> stream!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + // Cannot work in parallel here - mutating data structures! + let data = stream.collect::>(); + data.into_iter() + .map(mapping) + .collect::>() + .into_par_iter() + } + + fn map_single( + single: single!(InData), + mapping: impl FnOnce(InData) -> OutData, + ) -> single!(OutData) + where + InData: Send + Sync, + OutData: Send + Sync, + { + (mapping)(single) + } + + fn filter( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + stream.filter(predicate) + } + + fn all( + stream: stream!(Data), + predicate: impl Fn(&Data) -> bool + Send + Sync, + ) -> (bool, stream!(Data)) + where + Data: Send + Sync, + { + let vals = stream.collect::>(); + let res = vals.par_iter().all(predicate); + (res, vals.into_par_iter()) + } + + fn is(single: single!(Data), predicate: impl Fn(&Data) -> bool) -> (bool, single!(Data)) + where + Data: Send + Sync, + { + (predicate(&single), single) + } + + fn count(stream: stream!(Data)) -> single!(usize) + where + Data: Send + Sync, + { + stream.count() + } + + fn fold( + stream: stream!(InData), + initial: Acc, + fold_fn: impl Fn(Acc, InData) -> Acc, + ) -> single!(Acc) + where + InData: Send + Sync, + Acc: Send + Sync, + { + let mut acc = initial; + for data in stream.collect::>() { + acc = fold_fn(acc, data); + } + acc + } + + fn combine( + stream: stream!(Data), + alternative: Data, + combiner: impl Fn(Data, Data) -> Data + Send + Sync, + ) -> single!(Data) + where + Data: Send + Sync + Clone, + { + stream.reduce(|| alternative.clone(), combiner) + } + + fn sort( + stream: stream!(Data), + ordering: impl Fn(&Data, &Data) -> std::cmp::Ordering + Send + Sync, + ) -> stream!(Data) + where + Data: Send + Sync, + { + let mut data = stream.collect::>(); + data.par_sort_unstable_by(ordering); + data.into_par_iter() + } + + fn take(stream: stream!(Data), n: usize) -> stream!(Data) + where + Data: Send + Sync, + { + let mut values = stream.collect::>(); + values.truncate(n); + values.into_par_iter() + } + + fn group_by( + stream: stream!(Data), + split: impl Fn(Data) -> (Key, Rest), + ) -> stream!((Key, stream!(Rest))) + where + Data: Send + Sync, + Key: Eq + std::hash::Hash + Send + Sync, + Rest: Send + Sync, + { + // can improve parallelism + let mut groups = HashMap::new(); + for data in stream.collect::>() { + let (k, r) = split(data); + groups.entry(k).or_insert_with(Vec::new).push(r); + } + groups.into_par_iter().map(|(k, v)| (k, v.into_par_iter())) + } + + fn cross_join( + left: stream!(LeftData), + right: stream!(RightData), + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + let left = left.collect::>(); + right + .map(|r| left.par_iter().map(move |l| (l.clone(), r.clone()))) + .flatten() + .collect::>() + .into_par_iter() + } + + fn equi_join( + left: stream!(LeftData), + right: stream!(RightData), + left_split: impl Fn(&LeftData) -> &Key + Send + Sync, + right_split: impl Fn(&RightData) -> &Key + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + Key: Eq + std::hash::Hash + Send + Sync, + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + let left = left.collect::>(); + let right = right.collect::>(); + if left.len() < right.len() { + let mut lefts = HashMap::with_capacity(left.len()); + for l in &left { + lefts.entry(left_split(l)).or_insert_with(Vec::new).push(l); + } + + right + .into_par_iter() + .filter_map(|r| { + lefts.get(right_split(&r)).map(|ls| { + ls.par_iter() + .map(|l| ((*l).clone(), r.clone())) + .collect::>() + .into_par_iter() + }) + }) + .flatten() + .collect::>() + .into_par_iter() + } else { + let mut rights = HashMap::with_capacity(right.len()); + for r in &right { + rights + .entry(right_split(r)) + .or_insert_with(Vec::new) + .push(r); + } + left.into_par_iter() + .filter_map(|l| { + rights.get(left_split(&l)).map(|rs| { + rs.par_iter() + .map(|r| (l.clone(), (*r).clone())) + .collect::>() + .into_par_iter() + }) + }) + .flatten() + .collect::>() + .into_par_iter() + } + } + + fn predicate_join( + left: stream!(LeftData), + right: stream!(RightData), + pred: impl Fn(&LeftData, &RightData) -> bool + Send + Sync, + ) -> stream!((LeftData, RightData)) + where + LeftData: Clone + Send + Sync, + RightData: Clone + Send + Sync, + { + let left = left.collect::>(); + right + .map(|r| { + let pred2 = &pred; + left.par_iter().filter_map(move |l| { + if (pred2)(l, &r) { + Some((l.clone(), r.clone())) + } else { + None + } + }) + }) + .flatten() + .collect::>() + .into_par_iter() + } + + fn union(left: stream!(Data), right: stream!(Data)) -> stream!(Data) + where + Data: Send + Sync, + { + left.chain(right) + } + + fn fork(stream: stream!(Data)) -> (stream!(Data), stream!(Data)) + where + Data: Clone + Send + Sync, + { + let (left, right): (Vec<_>, Vec<_>) = stream.map(|d| (d.clone(), d)).unzip(); + (left.into_par_iter(), right.into_par_iter()) + } + + fn fork_single(single: single!(Data)) -> (single!(Data), single!(Data)) + where + Data: Clone + Send + Sync, + { + (single.clone(), single) + } + + fn split( + stream: stream!((LeftData, RightData)), + ) -> (stream!(LeftData), stream!(RightData)) + where + LeftData: Send + Sync, + RightData: Send + Sync, + { + let (left, right): (Vec<_>, Vec<_>) = stream.map(|(l, r)| (l, r)).unzip(); + (left.into_par_iter(), right.into_par_iter()) + } +} diff --git a/crates/pulpit/Cargo.toml b/crates/pulpit/Cargo.toml new file mode 100644 index 0000000..e644782 --- /dev/null +++ b/crates/pulpit/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "pulpit" +version = "0.1.0" +edition = "2021" + +readme = "README.md" +description = "A library for generating table data structures" +keywords = ["performance", "traits", "arenas"] +categories = ["data"] + +repository.workspace = true +homepage.workspace = true +license-file.workspace = true + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } + +[dependencies] +proc-macro2 = "1.0" +proc-macro-error = "1.0.4" +syn = { version = "2.0.45", features = ["full", "extra-traits"] } +quote = "1.0.33" +combi = { path = "../combi" } +typed-generational-arena = "0.2" +thunderdome = "0.6.1" +enumtrait = { path = "../enumtrait" } +pulpit_gen = { path = "../pulpit_gen" } +pulpit_macro = { path = "../pulpit_macro" } +assume = "0.5.0" + +[dev-dependencies] +divan = { git = "https://github.com/OliverKillane/divan.git", branch = "enh/file-output" } +trybuild = "1.0.91" +glob = "0.3.1" + +[[bench]] +name = "rc_vs_brw" +harness = false + +[[bench]] +name = "col_vs_tup" +harness = false + +[[bench]] +name = "pull_arena" +harness = false \ No newline at end of file diff --git a/crates/pulpit/README.md b/crates/pulpit/README.md new file mode 100644 index 0000000..5c501c2 --- /dev/null +++ b/crates/pulpit/README.md @@ -0,0 +1,153 @@ +# Pulpit +A library for generating tablular data structures, used to support emDB. + +Includes both the underlying data structures, the code generation and the macro interface. + +## Table Structure +alt text + +Pulpit allows for both entirely nary (one `primary`), entirely decomposed (each column in a different associated), and combinations between. +- Indexes supported are currently limited to just the `Unique` index, kept separately from the data storage. +- Depending on the requirement for transactions, and for the deletion operation, the table structure can be chosen to improve performance. +- Tables differentiate between a mutable and immutable section for each row. This allows optimisatons such as returning references into the table for immutable data. +- Tables are accessed through a safe interface (either macro generated, or directly through a `primary`) + +## Table Windows +In order to bind the lifetimes of immutable borrows from inside the table data structure to the lifetime for which the table is not moved requires building a `window` into the table to interact. +FUll explanation is included in [`crate::value`]. +```rust +use pulpit::column::*; +let mut table = PrimaryRetain::::new(1024); +{ + let mut window = table.window(); + // window (and returned references) are valid for this scope +} +let mut window_2 = table.window(); +// ... +``` + +## Macro Interface +Macros to generate table implementations (using associateds, with indexes, tracked with a transaction log) are included. + +```rust +#[allow(dead_code)] +#[derive(Clone)] +enum RGB { + Red, + Green, + Blue, +} + +pulpit::macros::simple! { + fields { + name: String, + id: usize @ unique(unique_reference_number), + age: u8, + fav_rgb_colour: crate::RGB, + }, + updates { + update_age: [age], + }, + predicates { + adults_only: *age > 18, + age_cap: *age < 100, + }, + limit { + cool_limit: 2000 + }, + transactions: on, + deletions: on, + name: bowling_club +} + +fn main() { + // We generate a basic table, and open a window into it + let mut x = bowling_club::Table::new(1024); + let mut w = x.window(); + + // We can then insert some data, which is checked against the predicates and unique constraints + let bill_key = w + .insert(bowling_club::insert::Insert { + id: 0, + fav_rgb_colour: RGB::Blue, + name: String::from("Bill Bob"), + age: 50, + }) + .unwrap(); + + // We can also update the data using the update method we described in the macro + w.update_age( + bowling_club::updates::update_age::Update { age: 51 }, + bill_key, + ) + .unwrap(); + + // The count is correct + assert_eq!(w.count(), 1); + + // By committing the data, it can no longer be easily rolled back + w.commit(); + + // We try with another insert, however the age constraint is breached, so it fails + let fred_insert = w.insert(bowling_club::insert::Insert { + id: 1, + fav_rgb_colour: RGB::Red, + name: String::from("Fred Dey"), + age: 101, + }); + assert!(matches!( + fred_insert, + Err(bowling_club::insert::Error::age_cap) + )); + + // With an updated age we can now insert + let fred_key = w + .insert(bowling_club::insert::Insert { + id: 1, + fav_rgb_colour: RGB::Red, + name: String::from("Fred Dey"), + age: 30, + }) + .unwrap(); + + // We can grab data from the table, as a retaining arena is used for the table, and we do not + // update the names, we can pull references to the names that live as long as `w` (the window) + let names = vec![w.get(fred_key).unwrap().name, w.get(bill_key).unwrap().name]; + + // After deciding fred is not so cool, we roll back and un-insert him + assert_eq!(w.count(), 2); + w.abort(); + assert_eq!(w.count(), 1); + + // While the mutable data for the table is removed, the names are still valid & safely accessible + // by these references until the window is destroyed. + println!("{} and {}", names[0], names[1]); + + // we can hence discover that fred is no longer present by trying to get his reference_number + assert!(matches!( + w.unique_reference_number(&1), + Err(bowling_club::unique::NotFound) + )); +} +``` + +## Language Limitations +This implementation could be radically simplified with variadict generics. +- Would allow the column types incide tables to be expressed without macros +- Would allow the coupling of associated tables with a primary to be expressed without macros. + +But alas, it has been stuck in several closed RFCs such as [this one from 2013](https://github.com/rust-lang/rust/issues/10124). + +For now we have struct generating macros like pulpit's and `tuple_impl_for(..)`. + +## Potential Improvements +1. Improving performance by specifying invariants (in particular on rollback, when re-accessing indices) using [`std::hint`]. +2. Adding a table that clears dropped (referenced data) when a window is dropped. +3. Adding a sorted index. +4. Adding special associated columns for sets (repetitions of the same object), and indexes +5. Fixing lack of macro provided errors for no fields, duplicate or nonexistent columns in updates +6. Use the `ImmGet` type for the `ImmPull` in pullable columns. + +## Related Work +Similar ideas for generating multi-index, in memory data structures. +- [Boost Multi-index](https://www.boost.org/doc/libs/1_85_0/libs/multi_index/) diff --git a/crates/pulpit/benches/append_arena.rs b/crates/pulpit/benches/append_arena.rs new file mode 100644 index 0000000..d776e65 --- /dev/null +++ b/crates/pulpit/benches/append_arena.rs @@ -0,0 +1,2 @@ +//! A comparison of pulpit based generational arena, against a conventional +//! generational arena. diff --git a/crates/pulpit/benches/col_vs_tup.rs b/crates/pulpit/benches/col_vs_tup.rs new file mode 100644 index 0000000..10a0e88 --- /dev/null +++ b/crates/pulpit/benches/col_vs_tup.rs @@ -0,0 +1,170 @@ +//! Compare access performance from separate vectors versus a single vector. +use divan; + +trait Access +where + A: Clone, + B: Clone, +{ + fn new(size_hint: usize) -> Self; + fn get_all(&self, ind: usize) -> Option<(&A, &B)>; + fn get_a(&self, ind: usize) -> Option<&A>; + fn get_b(&self, ind: usize) -> Option<&B>; + fn put(&mut self, ind: usize, val: (A, B)) -> bool; + fn append(&mut self, val: (A, B)) -> usize; +} + +struct Decomp { + a_data: Vec, + b_data: Vec, +} + +impl Access for Decomp +where + A: Clone, + B: Clone, +{ + fn new(size_hint: usize) -> Self { + Self { + a_data: Vec::with_capacity(size_hint), + b_data: Vec::with_capacity(size_hint), + } + } + + fn get_all(&self, ind: usize) -> Option<(&A, &B)> { + if let Some(a) = self.a_data.get(ind) { + unsafe { + let b = self.b_data.get_unchecked(ind); + Some((a, b)) + } + } else { + None + } + } + + fn put(&mut self, ind: usize, (a, b): (A, B)) -> bool { + if let Some(a_r) = self.a_data.get_mut(ind) { + unsafe { + let b_r = self.b_data.get_unchecked_mut(ind); + *a_r = a; + *b_r = b; + true + } + } else { + false + } + } + + fn append(&mut self, (a, b): (A, B)) -> usize { + let next_ind = self.a_data.len(); + self.a_data.push(a); + self.b_data.push(b); + next_ind + } + + fn get_a(&self, ind: usize) -> Option<&A> { + self.a_data.get(ind) + } + + fn get_b(&self, ind: usize) -> Option<&B> { + self.b_data.get(ind) + } +} + +struct Tuple { + data: Vec<(A, B)>, +} + +impl Access for Tuple +where + A: Clone, + B: Clone, +{ + fn new(size_hint: usize) -> Self { + Self { + data: Vec::with_capacity(size_hint), + } + } + + fn get_all(&self, ind: usize) -> Option<(&A, &B)> { + self.data.get(ind).map(|(a, b)| (a, b)) + } + + fn put(&mut self, ind: usize, val: (A, B)) -> bool { + if let Some(d) = self.data.get_mut(ind) { + *d = val; + true + } else { + false + } + } + + fn append(&mut self, val: (A, B)) -> usize { + let ind = self.data.len(); + self.data.push(val); + ind + } + + fn get_a(&self, ind: usize) -> Option<&A> { + self.data.get(ind).map(|(a, _)| a) + } + + fn get_b(&self, ind: usize) -> Option<&B> { + self.data.get(ind).map(|(_, b)| b) + } +} + +#[divan::bench( + name="Comparing push performance of tuple vs decomp", + types=[Tuple, Decomp], +)] +fn push_vals>(bencher: divan::Bencher) { + const ITERS: usize = 100000; + bencher.bench_local(|| { + let mut v = V::new(ITERS); + for i in 0..ITERS { + v.append((i, i)); + } + divan::black_box_drop(v) + }) +} + +#[divan::bench( + name="Comparing get all performance of tuple vs decomp", + types=[Tuple, Decomp], +)] +fn get_vals>(bencher: divan::Bencher) { + const ITERS: usize = 100000; + let mut v = V::new(ITERS); + for i in 0..ITERS { + v.append((i, i)); + } + bencher.bench_local(|| { + for i in 0..ITERS { + divan::black_box_drop(v.get_all(i)); + } + }); + divan::black_box_drop(v) +} + +#[divan::bench( + name="Comparing get just a column performance of tuple vs decomp", + types=[Tuple, Decomp], +)] +fn get_a_vals>(bencher: divan::Bencher) { + const ITERS: usize = 10000000; + let mut v = V::new(ITERS); + for i in 0..ITERS { + v.append((i, i)); + } + bencher.bench_local(|| { + for i in 0..ITERS { + divan::black_box_drop(v.get_a(i)); + } + }); + divan::black_box_drop(v) +} + +fn main() { + divan::main() +} diff --git a/crates/pulpit/benches/pull_arena.rs b/crates/pulpit/benches/pull_arena.rs new file mode 100644 index 0000000..a3b3bdc --- /dev/null +++ b/crates/pulpit/benches/pull_arena.rs @@ -0,0 +1,161 @@ +/// Compare the performance of generational arenas +/// Here we compare insert, followed by sequential access. +use divan; +use pulpit::column::{ + Column, Data, Entry, PrimaryGenerationalArena, PrimaryRetain, PrimaryThunderDome, + PrimaryWindow, PrimaryWindowPull, +}; + +/// Sequential insert & access. assumes the user needs to get a value lasting longer than a borrow. +fn workload(to_insert: Vec>) +where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, ImmData, MutData>, +{ + let mut col = Col::new(to_insert.len()); + let mut vals = Vec::with_capacity(to_insert.len()); + let mut win = col.window(); + + for val in to_insert { + let (key, _) = win.insert(val); + let Entry { index: _, data } = win.get(key).unwrap(); + vals.push(data); + } + + divan::black_box_drop(win); + divan::black_box_drop(vals); + divan::black_box_drop(col); +} + +/// Sequential insert & access, measures only the borrow access. +fn borrow_only_workload(to_insert: Vec>) +where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, ImmData, MutData>, +{ + let mut col = Col::new(to_insert.len()); + let mut win = col.window(); + + for val in to_insert { + let (key, _) = win.insert(val); + divan::black_box_drop(win.brw(key)); + } + + divan::black_box_drop(win); + divan::black_box_drop(col); +} + +#[divan::bench( + name="Basic workload to get (immutable: String, mutable: usize)", + types=[ + PrimaryGenerationalArena, + PrimaryThunderDome, + PrimaryRetain + ], + consts=[64,512,4096], +)] +fn bench_workload(bencher: divan::Bencher) +where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, String, usize>, +{ + const ELEMENTS: usize = 100000; + bencher + .counter(divan::counter::ItemsCount::new(ELEMENTS)) + .with_inputs(|| { + let x: Vec> = (0..ELEMENTS) + .map(|i| Data { + imm_data: "a".repeat(STRING_LEN), + mut_data: i, + }) + .collect(); + x + }) + .bench_values(|v: Vec>| workload::(v)) +} + +#[divan::bench( + name="Basic workload to brw (immutable: String, mutable: usize)", + types=[ + PrimaryGenerationalArena, + PrimaryThunderDome, + PrimaryRetain + ], + )] +fn bench_workload_brw(bencher: divan::Bencher) +where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, String, usize>, +{ + const STRING_LEN: usize = 128; + const ELEMENTS: usize = 100000; + bencher + .counter(divan::counter::ItemsCount::new(ELEMENTS)) + .with_inputs(|| { + (0..ELEMENTS) + .map(|i| Data { + imm_data: "a".repeat(STRING_LEN), + mut_data: i, + }) + .collect() + }) + .bench_values(|v| borrow_only_workload::(v)) +} + +#[divan::bench( + name="Comparing a workload with no immutable advantage", + types=[ + PrimaryGenerationalArena, + PrimaryThunderDome, + PrimaryRetain + ], +)] +fn bench_workload_no_imm(bencher: divan::Bencher) +where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, usize, usize>, +{ + const ELEMENTS: usize = 100000; + bencher + .counter(divan::counter::ItemsCount::new(ELEMENTS)) + .with_inputs(|| { + (0..ELEMENTS) + .map(|i| Data { + imm_data: i, + mut_data: i, + }) + .collect() + }) + .bench_values(|v| workload::(v)) +} + +#[divan::bench( + name="Comparing a workload of zero size types", + types=[ + PrimaryGenerationalArena<(), ()>, + PrimaryThunderDome<(), ()>, + PrimaryRetain<(), (), 1024> + ], +)] +fn bench_workload_zero_size(bencher: divan::Bencher) +where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, (), ()>, +{ + const ELEMENTS: usize = 100000; + bencher + .counter(divan::counter::ItemsCount::new(ELEMENTS)) + .with_inputs(|| { + (0..ELEMENTS) + .map(|_| Data { + imm_data: (), + mut_data: (), + }) + .collect() + }) + .bench_values(|v| workload::<(), (), Col>(v)) +} + +fn main() { + divan::main() +} diff --git a/crates/pulpit/benches/rc_vs_brw.rs b/crates/pulpit/benches/rc_vs_brw.rs new file mode 100644 index 0000000..77788b1 --- /dev/null +++ b/crates/pulpit/benches/rc_vs_brw.rs @@ -0,0 +1,96 @@ +//! # Benchmarking Reference Counting vs Borrowing +//! The core advantage of the more complex [`pulpit::value`] wrappers is to avoid +//! the need to do reference counting and separate allocations. +//! +//! Here we check the actual cost of reference counting over borrowing for +//! placement and read. + +use divan; +use std::{ops::Deref, rc::Rc}; +type Contained = [u8; 100]; + +trait RcRefCmp { + type Value<'a>: Deref + where + Self: 'a; + + fn push(&mut self, val: Contained); + fn new(vals: Vec) -> Self; + fn get(&self, ind: usize) -> Self::Value<'_>; +} + +struct Refs { + data: Vec, +} + +impl RcRefCmp for Refs { + type Value<'a> = &'a Contained; + + fn new(vals: Vec) -> Self { + Refs { data: vals } + } + + fn get(&self, ind: usize) -> Self::Value<'_> { + &self.data[ind] + } + + fn push(&mut self, val: Contained) { + self.data.push(val); + } +} + +struct Rcs { + data: Vec>, +} + +impl RcRefCmp for Rcs { + type Value<'a> = Rc; + + fn new(vals: Vec) -> Self { + Rcs { + data: vals.into_iter().map(Rc::new).collect(), + } + } + + fn get(&self, ind: usize) -> Rc { + self.data[ind].clone() + } + + fn push(&mut self, val: Contained) { + self.data.push(Rc::new(val)); + } +} + +#[divan::bench( + name="Scanning values", + types=[Rcs, Refs], + consts=[10,100,1000], +)] +fn get_vals(bencher: divan::Bencher) { + let data = (0..SIZE).map(|_| [0; 100]).collect(); + let rcs = C::new(data); + bencher.bench_local(|| { + for i in 0..SIZE { + divan::black_box_drop(rcs.get(i)); + } + }) +} + +#[divan::bench( + name="Pushing values", + types=[Rcs, Refs], + consts=[10,100,1000], +)] +fn push_vals(bencher: divan::Bencher) { + bencher.bench_local(|| { + let mut rcs = C::new(Vec::with_capacity(SIZE)); + for _ in 0..SIZE { + rcs.push([0; 100]); + } + divan::black_box_drop(rcs); + }) +} + +fn main() { + divan::main() +} diff --git a/crates/pulpit/docs/table_structure.drawio.svg b/crates/pulpit/docs/table_structure.drawio.svg new file mode 100644 index 0000000..982c18b --- /dev/null +++ b/crates/pulpit/docs/table_structure.drawio.svg @@ -0,0 +1,192 @@ + + + + + + + + + +
+
+
+ Key +
+
+
+
+ + Key + +
+
+ + + + + + + +
+
+
+ Primary +
+
+
+
+ + Primary + +
+
+ + + + +
+
+
+ Associated +
+
+
+
+ + Associated + +
+
+ + + + +
+
+
+ Indexes +
+
+
+
+ + Indexes + +
+
+ + + + + + + + + + + + + + + + + + +
+
+
+ + Other +
+ Data +
+
+
+
+
+ + Other... + +
+
+ + + + +
+
+
+ Metadata +
+
+
+
+ + Metadata + +
+
+ + + + + + + +
+
+
+ Associated columns are indexed by entry (no generation data) with no bounds checks +
+
+
+
+ + Associated columns are indexed by en... + +
+
+ + + + +
+
+
+ Primary contains generation metadata (such as generation, or row hiding) +
+
+
+
+ + Primary contains generation met... + +
+
+ + + + +
+
+
+ Updates with tables, allow access to keys +
+
+
+
+ + Updates with tables,... + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/crates/pulpit/src/access/mod.rs b/crates/pulpit/src/access/mod.rs new file mode 100644 index 0000000..0bc08d1 --- /dev/null +++ b/crates/pulpit/src/access/mod.rs @@ -0,0 +1,4 @@ +//! Additional indexes for access to take keys. + +mod unique; +pub use unique::*; diff --git a/crates/pulpit/src/access/unique.rs b/crates/pulpit/src/access/unique.rs new file mode 100644 index 0000000..50facf2 --- /dev/null +++ b/crates/pulpit/src/access/unique.rs @@ -0,0 +1,101 @@ +//! ## Storage of a unique index +//! Allows for fast access, as well as checking unique constrains. +//! +//! In order to +//! ## Efficient Design +//! An O(1) set of `Wrapper`, which borrows from the table on access. +//! - No extra data kept +//! - Can simply access and if valid, convert value to the contained key +//! - efficient recompute of the index on update, simply pass index to update. +//! +//! The same benefits for immutability as in [`crate::column`] can be applied here. +//! +//! ## Correct Design +//! To save dev time: just `get` the value (cringe but easy). + +use std::{collections::HashMap, hash::Hash}; + +#[derive(Debug)] +pub struct MissingUniqueValue; +#[derive(Debug)] +pub struct UniqueConflict; + +/// A simple wrapper for storing copies of keys and associated unique values in +/// an index. +pub struct Unique { + mapping: HashMap, +} + +impl Unique { + pub fn new(size_hint: usize) -> Self { + Self { + mapping: HashMap::with_capacity(size_hint), + } + } + + pub fn lookup(&self, value: &Field) -> Result { + match self.mapping.get(value) { + Some(k) => Ok(*k), + None => Err(MissingUniqueValue), + } + } + + // TODO: avoid copies + pub fn insert(&mut self, field: Field, key: Key) -> Result<(), UniqueConflict> { + match self.mapping.insert(field.clone(), key) { + Some(old_key) => { + *self.mapping.get_mut(&field).unwrap() = old_key; + Err(UniqueConflict) + } + None => Ok(()), + } + } + + pub fn pull(&mut self, field: &Field) -> Result<(), MissingUniqueValue> { + match self.mapping.remove(field) { + Some(_) => Ok(()), + None => Err(MissingUniqueValue), + } + } + + /// At the given key, with the given old value, replace with the new value in to_insert. + /// - If error on uniqueconflict + /// - Otherwise returns the old value + pub fn replace( + &mut self, + to_insert: &Field, + replace: &Field, + key: Key, + ) -> Result { + if to_insert == replace { + Ok(replace.clone()) + } else { + let (old_val, old_key) = self.mapping.remove_entry(replace).unwrap(); + debug_assert!(old_key == key, "Keys for replace do not match"); + + match self.mapping.insert(to_insert.clone(), key) { + Some(_) => { + *self.mapping.get_mut(to_insert).unwrap() = old_key; + Err(UniqueConflict) + } + None => Ok(old_val), + } + } + } + + // replace the old (successfully inserted) value (no copy required) + pub fn undo_replace(&mut self, old_val: Field, update: &Field, key: Key) { + self.mapping.remove(update).unwrap(); + let res = self.mapping.insert(old_val, key); + debug_assert!(res.is_none(), "Undo replace failed"); + } +} + +/* +if !self.additionals..replace(&update.fields, &) { +} + +// to reverse +self.additionals..replace(&mut , &update.fields) + +*/ diff --git a/crates/pulpit/src/column/assoc_blocks.rs b/crates/pulpit/src/column/assoc_blocks.rs new file mode 100644 index 0000000..0303194 --- /dev/null +++ b/crates/pulpit/src/column/assoc_blocks.rs @@ -0,0 +1,181 @@ +use super::*; +// TODO: optimise by keeping end pointer immediately available + +/// An associated, append only [`Column`] that stores mutable and immutable data together in +/// blocks, and provides stable references to the immutable part. +pub struct AssocBlocks { + blocks: utils::Blocks, BLOCK_SIZE>, +} + +impl Column + for AssocBlocks +{ + type WindowKind<'imm> = Window<'imm, AssocBlocks> + where + Self: 'imm; + + fn new(size_hint: usize) -> Self { + AssocBlocks { + blocks: utils::Blocks::new(size_hint), + } + } + + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } +} + +impl<'imm, ImmData, MutData, const BLOCK_SIZE: usize> AssocWindow<'imm, ImmData, MutData> + for Window<'imm, AssocBlocks> +where + MutData: Clone, + ImmData: Clone, +{ + type ImmGet = &'imm ImmData; + + #[inline(always)] + unsafe fn assoc_get(&self, ind: UnsafeIndex) -> Data { + unsafe { + let Data { imm_data, mut_data } = + >::assoc_brw(self, ind); + Data { + imm_data: transmute::<&ImmData, &'imm ImmData>(imm_data), + mut_data: mut_data.clone(), + } + } + } + + #[inline(always)] + unsafe fn assoc_brw(&self, ind: UnsafeIndex) -> Data<&ImmData, &MutData> { + unsafe { + let Data { imm_data, mut_data } = self.inner.blocks.get(ind); + Data { imm_data, mut_data } + } + } + + #[inline(always)] + unsafe fn assoc_brw_mut(&mut self, ind: UnsafeIndex) -> Data<&ImmData, &mut MutData> { + unsafe { + let Data { imm_data, mut_data } = self.inner.blocks.get_mut(ind); + Data { imm_data, mut_data } + } + } + + #[inline(always)] + fn assoc_append(&mut self, val: Data) { + self.inner.blocks.append(val); + } + + #[inline(always)] + fn conv_get(get: Self::ImmGet) -> ImmData { + get.clone() + } + + #[inline(always)] + unsafe fn assoc_unppend(&mut self) { + self.inner.blocks.unppend(); + } +} + +impl Keyable + for AssocBlocks +{ + type Key = UnsafeIndex; +} + +impl<'imm, ImmData, MutData, const BLOCK_SIZE: usize> PrimaryWindow<'imm, ImmData, MutData> + for Window<'imm, AssocBlocks> +where + MutData: Clone, + ImmData: Clone, +{ + type ImmGet = &'imm ImmData; + type Col = AssocBlocks; + + #[inline(always)] + fn get(&self, key: ::Key) -> Access { + if key <= self.inner.blocks.count() { + Ok(Entry { + index: key, + data: unsafe { + let Data { imm_data, mut_data } = self.inner.blocks.get(key); + Data { + imm_data: transmute::<&ImmData, &'imm ImmData>(imm_data), + mut_data: mut_data.clone(), + } + }, + }) + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn brw(&self, key: ::Key) -> Access<&ImmData, &MutData> { + if key <= self.inner.blocks.count() { + Ok(Entry { + index: key, + data: unsafe { + let Data { imm_data, mut_data } = self.inner.blocks.get(key); + Data { imm_data, mut_data } + }, + }) + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn brw_mut(&mut self, key: ::Key) -> Access<&ImmData, &mut MutData> { + if key <= self.inner.blocks.count() { + Ok(Entry { + index: key, + data: unsafe { + let Data { imm_data, mut_data } = self.inner.blocks.get_mut(key); + Data { imm_data, mut_data } + }, + }) + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn conv_get(get: Self::ImmGet) -> ImmData { + get.clone() + } + + #[inline(always)] + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw { + self.scan_get() + } + + #[inline(always)] + fn scan_get(&self) -> impl Iterator::Key> + 'static { + 0..self.inner.blocks.count() + } + + #[inline(always)] + fn count(&self) -> usize { + self.inner.blocks.count() + } +} + +impl<'imm, ImmData, MutData, const BLOCK_SIZE: usize> PrimaryWindowApp<'imm, ImmData, MutData> + for Window<'imm, AssocBlocks> +where + MutData: Clone, + ImmData: Clone, +{ + #[inline(always)] + fn append(&mut self, val: Data) -> ::Key { + let new_ind = self.inner.blocks.count(); + self.inner.blocks.append(val); + new_ind + } + + #[inline(always)] + unsafe fn unppend(&mut self) { + self.inner.blocks.unppend(); + } +} diff --git a/crates/pulpit/src/column/assoc_vec.rs b/crates/pulpit/src/column/assoc_vec.rs new file mode 100644 index 0000000..2d2b2f1 --- /dev/null +++ b/crates/pulpit/src/column/assoc_vec.rs @@ -0,0 +1,102 @@ +use assume::assume; + +// TODO: Group together accesses into a hot block, then transfer these to the vector as it grows. +use super::*; + +/// An associated, append only [`Column`] storing data in a large vector for faster +/// lookup than [`super::AssocBlocks`], but at the expense of needing copies for [`AssocWindow::assoc_get`]. +pub struct AssocVec { + data: Vec>>, +} + +impl Column for AssocVec { + type WindowKind<'imm> = Window<'imm, AssocVec> + where + Self: 'imm; + + fn new(size_hint: usize) -> Self { + AssocVec { + data: Vec::with_capacity(size_hint), + } + } + + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } +} + +impl<'imm, ImmData, MutData> AssocWindow<'imm, ImmData, MutData> + for Window<'imm, AssocVec> +where + ImmData: Clone, + MutData: Clone, +{ + type ImmGet = ImmData; + + #[inline(always)] + unsafe fn assoc_get(&self, ind: UnsafeIndex) -> Data { + let Data { imm_data, mut_data } = self.assoc_brw(ind); + Data { + imm_data: imm_data.clone(), + mut_data: mut_data.clone(), + } + } + + #[inline(always)] + unsafe fn assoc_brw(&self, ind: UnsafeIndex) -> Data<&ImmData, &MutData> { + if let Some(Data { imm_data, mut_data }) = self.inner.data.get_unchecked(ind) { + Data { imm_data, mut_data } + } else { + assume!(unsafe: @unreachable) + } + } + + #[inline(always)] + unsafe fn assoc_brw_mut(&mut self, ind: UnsafeIndex) -> Data<&ImmData, &mut MutData> { + if let Some(Data { imm_data, mut_data }) = self.inner.data.get_unchecked_mut(ind) { + Data { imm_data, mut_data } + } else { + assume!(unsafe: @unreachable) + } + } + + #[inline(always)] + fn assoc_append(&mut self, val: Data) { + self.inner.data.push(Some(val)) + } + + #[inline(always)] + fn conv_get(get: Self::ImmGet) -> ImmData { + get + } + + #[inline(always)] + unsafe fn assoc_unppend(&mut self) { + self.inner.data.pop(); + } +} + +impl<'imm, ImmData, MutData> AssocWindowPull<'imm, ImmData, MutData> + for Window<'imm, AssocVec> +where + ImmData: Clone, + MutData: Clone, +{ + type ImmPull = ImmData; + + #[inline(always)] + unsafe fn assoc_pull(&mut self, ind: UnsafeIndex) -> Data { + let val = self.inner.data.get_unchecked_mut(ind); + val.take().unwrap() + } + + #[inline(always)] + unsafe fn assoc_place(&mut self, ind: UnsafeIndex, val: Data) { + *self.inner.data.get_unchecked_mut(ind) = Some(val); + } + + #[inline(always)] + fn conv_pull(pull: Self::ImmPull) -> ImmData { + pull + } +} diff --git a/crates/pulpit/src/column/mod.rs b/crates/pulpit/src/column/mod.rs new file mode 100644 index 0000000..3976195 --- /dev/null +++ b/crates/pulpit/src/column/mod.rs @@ -0,0 +1,854 @@ +//! # Primary and Associated Index Columns +//! Each pulpit table is composed of a primary column (accessed through user-visible +//! [keys](Keyable::Key)) and associated columns (accessed without bounds +//! checks through raw column indexes). +//! +//! ## Immutability Advantage +//! This column interface explicitly supports demarking parts of each row as +//! immutable to allow for performance improvements by avoiding copying data. +//! +//! - All data is moved on insert (move strictly cheaper than copy) +//! - All data can be borrowed (avoid copying for short borrow) +//! - Immutable data gotten with copy or cheaper (from borrow, to reindex, to copy) +//! - Mutable data gotten must be copied (table can be mutated after) +//! +//! For tables supporting [`PrimaryWindowPull`] or [`AssocWindowPull`], the immutable data is retained +//! - All data can be pulled (mutable by move, immutable by move or cheaper - e.g. cloning [`Rc`](std::rc::Rc)) +//! +//! This advantage is significant when compared to conventional tables in embedded +//! database systems which require: +//! 1. Copy the value over to database (cannot take ownership of heap memory) +//! 2. Do database things in the database, cannot guarentee immutability while +//! supporting ad-hoc queries, so some copies occur during query execution +//! 3. Copy the result back over to the user (user cannot safely reference memory +//! inside the database) +//! +//! ## Referencing Immutable Data +//! In order to safely return references to immutable data while allowing further +//! referencing and mutation of the mutable data, we need the following: +//! 1. Guarentee the referenced data is not mutated (simple to verify) +//! 2. Guarentee the referenced data is not moved within the column (e.g. like a [`Vec`] reallocating on insert) +//! (use different kinds of references) +//! 3. Limit the reference to the lifetime that the column is not moved (difficult) +//! +//! In order to achieve (3.) we need to attach the lifetime of the column to +//! returned references. +//! +//! ### Using the [interior mutability pattern](https://doc.rust-lang.org/reference/interior-mutability.html) +//! As all references are reads, this is just the lifetime of `&self` in a trait method. +//! +//! However interior mutability removes some of the safety in the user interface, +//! we need to check mutations, but can no longer rely on the borrow checker to do +//! so. +//! +//! Hence use of [`std::cell::RefCell`] or locking the column with [`std::sync::RwLock`]. +//! +//! ### Using an access Token +//! By using the lifetime of the borrowed token as a lifetime parameter to the Column +//! to use in qualifying references, we can control the lifetime of immutable references. +//! +//! However, we need to ensure the token does not live longer than the column, +//! otherwise we can get dangling references. +//! +//! ```no_run +//! struct Token; // Zero-Size Token + +//! struct Data<'imm, ImmData, MutData> { +//! imm_data: ImmData, +//! mut_data: MutData, +//! tk: &'imm Token // We could just steal the lifetime with a phantomdata +//! } +//! +//! impl <'imm, ImmData, MutData> Data<'imm, ImmData, MutData> { +//! fn get_imm(&self) -> &'imm ImmData { +//! unsafe { +//! std::mem::transmute(&self.imm_data) +//! } +//! } +//! +//! fn get_mut(&mut self) -> &mut MutData { +//! &mut self.mut_data +//! } +//! } +//! +//! fn test() { +//! let tk = Token; // Token lives longer than the data +//! let imm_ref; +//! { +//! let mut data = Data { +//! imm_data: 3, +//! mut_data: 4, +//! tk: &tk +//! }; +//! // Get immutable +//! let x1 = data.get_imm(); +//! // mutable borrow or mutable field does not conflict (GOOD) +//! let y1 = data.get_mut(); +//! // immutable borrow still present without conflict +//! let z1 = *x1; +//! imm_ref = x1; +//! } +//! // `tk` lives to here, but `data` did not +//! let z2 = *imm_ref; // ERROR! dereferencing dangling reference +//! } +//! ``` +//! +//! ### Using a Window +//! In order to solve this issue with tokens outliving values, we can instead +//! flip the roles. Place the data in the token (as `Column`), and allow only one `Window` +//! into the `Column` (enforced using the borrow checker and a `&mut` of the `Column`) +//! +//! This allows for the compiler to check borrows from the safe interface (no +//! runtime checks as with interior mutability), while preventing any dangling +//! references (immutable borrows properly qualified). +//! +//! This implementation is chosen in the form of [`Column::WindowKind`], which is a +//! single mutable borrow of the column. +//! +//! ## Immutable Value Storage +//! ### Pullability +//! The delete operation on tables is expressed through [`PrimaryWindowPull`]/[`AssocWindowPull`], here pulling the value +//! (ideally a move) from the table for the user. +//! +//! This affects references to values, if a value is pulled from a column, +//! references to it may be invalidated. Solutions include: +//! +//! 1. Keeping values alive until the column is destroyed, in a stable allocation (e.g. a box). +//! 2. Using reference counted values, stored separately. +//! 3. Rather than getting values, just re-index and borrow later - it is immutable data after all, copy on get. +//! +//! ### Pointer Stability +//! Columns internally may want to reallocate where data is placed, which will +//! invalidate references to data in the table. +//! +//! To prevent this requires placing the data in some separate stable allocation +//! that can be referenced, or copying. +//! +//! ## Why not separate indexes? +//! I originally considered having the index entirely separate to the data +//! storage, however as demonstrated in the `col_vs_tup` benchmark, the cost of +//! separate inserts (required for an index that need to keep generations) is high. +//! - Allows for other optimisations, such as in [`PrimaryRetain`]'s reuse of space for +//! data, and for the mutable data for generation & free slot storage. +//! +//! Note that you can technically still use a separate index using +//! [`PrimaryAppendAdapter`] or [`PrimaryPullAdapter`] +//! +//! ## Potential for Improvement +//! ### Using limit constraints on a table to determine the UnsafeIndex size. +//! Requires adding this type of constraint. +//! - The unsafeindex should be chosen based on the limit to the number of rows +//! e.g. < 256 elements means a u8 is all that is required. +//! +//! ### Variadict Tuples +//! A proposed feature for rust that could dramatically improve the code in +//! [`crate::gen`] by allowing fields to be represented here at the type level, +//! rather than being managed by macro. +//! +//! ### References in Tables +//! Allowing users to let the table borrow data for the lifetime of the Table efficiently. +//! +//! Currently possible, but with the significant caveat that: +//! 1. The references when 'gotten' live as long as the window, not their original lifetime. +//! 2. When returning an `'imm` reference, we do not check if that type is already +//! a reference type. By copying rather than re-referencing we can avoid a +//! double dereference by the user on access, and allow the lifetime extension +//! mentioned in (1.) +//! +//! ### A reference counted arena +//! Need to store Rcs, and use them as the [`PrimaryWindow::ImmGet`] type. +//! - Allows us to return Rcs, even for a [`PrimaryWindowPull`] +//! - This should use our own allocator, rather than [`std::alloc`] (we know +//! type information [`std::alloc`] cannot use). +//! +//! ### HashSet backed arena for large types +//! To avoid duplicates - particularly with large strings. +//! - As an associated column. +//! +//! ### Optimisation Study +//! We can further optimise the tables with: +//! - removing missed bounds checks in [`AssocWindow`] implementations + +use std::{hash::Hash, mem::transmute}; + +mod assoc_blocks; +pub use assoc_blocks::*; +mod assoc_vec; +pub use assoc_vec::*; +mod primary_gen_arena; +pub use primary_gen_arena::*; +mod primary_append_adapter; +pub use primary_append_adapter::*; +mod primary_pull_adapter; +pub use primary_pull_adapter::*; +mod primary_retain; +pub use primary_retain::*; +mod primary_thunderdome; +pub use primary_thunderdome::*; +mod primary_thunderdome_trans; +pub use primary_thunderdome_trans::*; + +/// A single window type holding a mutable references through which windows for +/// columns and primary indexes can be generated. +pub struct Window<'imm, Table> { + inner: &'imm mut Table, +} + +/// The trait for describing column construction and windowing. +pub trait Column { + type WindowKind<'imm> + where + Self: 'imm; + fn new(size_hint: usize) -> Self; + fn window(&mut self) -> Self::WindowKind<'_>; +} + +/// In order to get the Key (without needing the `'imm` lifetime parameter) it is +/// kept separate from the window, referenced through the column in the window. +pub trait Keyable { + type Key: Copy + Eq; +} + +/// The raw column index type (used for unchecked indexes) +pub type UnsafeIndex = usize; + +#[derive(Clone)] +pub struct Data { + pub imm_data: ImmData, + pub mut_data: MutData, +} + +impl Data { + #[inline(always)] + pub fn convert_imm( + self, + trans: impl Fn(ImmData) -> ImmDataProcessed, + ) -> Data { + let Self { imm_data, mut_data } = self; + Data { + imm_data: trans(imm_data), + mut_data, + } + } +} + +pub struct Entry { + pub index: UnsafeIndex, + pub data: Data, +} + +pub type Access = Result, KeyError>; + +pub enum InsertAction { + Place(UnsafeIndex), + Append, +} + +/// For safe access to a [`PrimaryWindow`] with an incorrect index. +#[derive(Debug)] +pub struct KeyError; + +/// A view into a primary index (bounds checked, and produced [`UnsafeIndex`]es +/// for access to associated columns). +pub trait PrimaryWindow<'imm, ImmData, MutData> { + /// Getting the immutable value for the lifetime of the [`PrimaryWindow`] + /// - Does not conflict with concurrent [`PrimaryWindow::brw`], [`PrimaryWindow::brw_mut`] + /// or any [`PrimaryWindowPull`] operations. + type ImmGet: 'imm; + + /// The key type of backing column, used to get the type needed for key (which + /// does not need the `'imm` lifetime parameter) + type Col: Keyable + Column; + + fn get(&self, key: ::Key) -> Access; + fn brw(&self, key: ::Key) -> Access<&ImmData, &MutData>; + fn brw_mut(&mut self, key: ::Key) -> Access<&ImmData, &mut MutData>; + + /// For testing include a conversion for the immutable value + fn conv_get(get: Self::ImmGet) -> ImmData; + + /// Get an iterator over the current indices, guarenteed to be valid for `'brw` + /// - For [`PrimaryWindowPull`] this prevents the table being modified + /// - For [`PrimaryWindowApp`] the implementation can be optimised, given no deletions can occur. + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw; + + /// Get an iterator over the current indices, that does not keep a borrow of the window. + /// - Typically collects indices from [`PrimaryWindow::scan_brw`]. + /// - Can return other kinds of iterators (e.g. compressed values, for append only tables - ranges) + fn scan_get(&self) -> impl Iterator::Key> + 'static; + + fn count(&self) -> usize; +} + +pub trait PrimaryWindowApp<'imm, ImmData, MutData>: PrimaryWindow<'imm, ImmData, MutData> { + fn append(&mut self, val: Data) -> ::Key; + + /// To allow for transactions to remove data from the table + /// + /// # Safety + /// - All [`PrimaryWindow::get`] values must not be accessed from this call, + /// to when they are dropped. + unsafe fn unppend(&mut self); +} + +pub trait PrimaryWindowPull<'imm, ImmData, MutData>: PrimaryWindow<'imm, ImmData, MutData> { + /// The immutable data that can be pulled from the table. This is separate from + /// [`PrimaryWindow::ImmGet`]. Allows for deletions that take ownership of contained + /// data. + type ImmPull: 'imm; + + /// n insert must track if old [`UnsafeIndex`] is to be overwritten in + /// [`AssocWindowPull`] or an append is required. + fn insert( + &mut self, + val: Data, + ) -> (::Key, InsertAction); + + /// Pull data from a column (removes it from the column) + /// For tables implementing [`PrimaryWindowHide`], this can include hidden + /// values. + fn pull(&mut self, key: ::Key) -> Access; + + /// For testing include a conversion for the immutable value pulled + fn conv_pull(pull: Self::ImmPull) -> ImmData; +} + +/// Hides a given key temporarily, until revealed or removed. +/// - Allows for 'deletions' that are not actually enforced until commit. +/// - Allows the deletion from other associated columns to be postponed till the +/// end of a transaction. +pub trait PrimaryWindowHide<'imm, ImmData, MutData>: + PrimaryWindowPull<'imm, ImmData, MutData> +{ + /// Hide a value from get and brw access. + /// - Can be pulled from the table, or releaved (back to normal row) + /// - Cannot be hidden twice + fn hide(&mut self, key: ::Key) -> Result<(), KeyError>; + + /// Un-Hide a value to return it to its normal state + /// - Panics be called on a currently available row + fn reveal(&mut self, key: ::Key) -> Result<(), KeyError>; +} + +pub trait AssocWindow<'imm, ImmData, MutData> { + type ImmGet: 'imm; + + /// Get the value of the given [`UnsafeIndex`], that lives as long as the window + /// - Not zero cost, but at least as cheap as [`Clone`] + /// - Resulting [`AssocWindow::ImmGet`] can be held without blocking concurrent operations. + /// + /// # Safety + /// - No bounds checks applied + /// - index assumed to be in valid state + unsafe fn assoc_get(&self, ind: UnsafeIndex) -> Data; + + /// Borrow a value from an index in the column for a smaller lifetime + /// - Zero cost, a normal reference. + /// + /// # Safety + /// - No bounds checks applied + /// - index assumed to be in valid state + unsafe fn assoc_brw(&self, ind: UnsafeIndex) -> Data<&ImmData, &MutData>; + + /// Mutably borrow the mutable part of an index in the column. + /// + /// # Safety + /// - No bounds checks applied + /// - index assumed to be in valid state + unsafe fn assoc_brw_mut(&mut self, ind: UnsafeIndex) -> Data<&ImmData, &mut MutData>; + + /// Append a value to the column that is at the new largest [`UnsafeIndex`]. + fn assoc_append(&mut self, val: Data); + + /// To allow for transactions to remove data from the table + /// + /// # Safety + /// - All [`AssocWindow::assoc_get`] values must not be accessed from this call, + /// to when they are dropped. + unsafe fn assoc_unppend(&mut self); + + /// For testing include a conversion for the immutable value + fn conv_get(get: Self::ImmGet) -> ImmData; +} + +pub trait AssocWindowPull<'imm, ImmData, MutData>: AssocWindow<'imm, ImmData, MutData> { + type ImmPull: 'imm; + + /// Pull a value from an index. The index is in an `INVALID` state after + /// this operation. + /// + /// # Safety + /// - No bounds checks + unsafe fn assoc_pull(&mut self, ind: UnsafeIndex) -> Data; + + /// Place a value in an index that is in a `PULLED` state. + /// + /// # Safety + /// - No bounds checks + unsafe fn assoc_place(&mut self, ind: UnsafeIndex, val: Data); + + /// For testing include a conversion for the immutable value pulled + fn conv_pull(pull: Self::ImmPull) -> ImmData; +} + +/// A Simple Generational Index Key +pub struct GenKey { + index: UnsafeIndex, + generation: GenCounter, +} + +impl PartialEq for GenKey { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + self.index == other.index && self.generation == other.generation + } +} +impl Eq for GenKey {} +impl Clone for GenKey { + #[inline(always)] + fn clone(&self) -> Self { + *self + } +} +impl Copy for GenKey {} +impl Hash for GenKey { + #[inline(always)] + fn hash(&self, state: &mut H) { + self.index.hash(state); + self.generation.hash(state); + } +} +mod utils { + use std::mem::MaybeUninit; + + /// A sequence of allocated blocks providing stable pointers. + pub struct Blocks { + count: usize, + data: Vec; BLOCK_SIZE]>>, + } + + impl Drop for Blocks { + fn drop(&mut self) { + for alive in 0..self.count { + let (block, seq) = quotrem::(alive); + unsafe { + self.data.get_unchecked_mut(block)[seq].assume_init_drop(); + } + } + } + } + + impl Blocks { + #[inline(always)] + pub fn new(size_hint: usize) -> Self { + Blocks { + count: 0, + data: Vec::with_capacity(size_hint / BLOCK_SIZE + 1), + } + } + #[inline(always)] + pub fn count(&self) -> usize { + self.count + } + #[inline(always)] + pub fn append(&mut self, val: Value) -> *mut Value { + let (block, seq) = quotrem::(self.count); + let data_ptr; + unsafe { + if seq == 0 { + self.data + .push(Box::new(MaybeUninit::uninit().assume_init())); + } + data_ptr = self.data.get_unchecked_mut(block)[seq].as_mut_ptr(); + data_ptr.write(val); + } + self.count += 1; + data_ptr + } + + /// Must not be used if references to the value still exist. + #[inline(always)] + pub unsafe fn unppend(&mut self) { + let (block, seq) = quotrem::(self.count - 1); + self.data.get_unchecked_mut(block)[seq].assume_init_drop(); + self.count -= 1; + } + + #[inline(always)] + pub unsafe fn get(&self, ind: usize) -> &Value { + let (block, seq) = quotrem::(ind); + self.data.get_unchecked(block)[seq].assume_init_ref() + } + + #[inline(always)] + pub unsafe fn get_mut(&mut self, ind: usize) -> &mut Value { + let (block, seq) = quotrem::(ind); + self.data.get_unchecked_mut(block)[seq].assume_init_mut() + } + } + + #[inline(always)] + pub fn quotrem(val: usize) -> (usize, usize) { + (val / DIV, val % DIV) + } +} + +#[cfg(any(test, kani))] +mod verif { + use super::*; + use std::collections::HashMap; + use std::marker::PhantomData; + + trait ReferenceMap { + fn with_capacity(size_hint: usize) -> Self; + fn get(&self, key: &Key) -> Option<&Value>; + fn insert(&mut self, key: Key, value: Value) -> Option; + fn remove(&mut self, key: &Key) -> Option; + fn len(&self) -> usize; + fn get_next_key(&self) -> Option; + } + + impl ReferenceMap for HashMap { + fn with_capacity(size_hint: usize) -> Self { + HashMap::with_capacity(size_hint) + } + + fn get(&self, key: &Key) -> Option<&Value> { + self.get(key) + } + + fn insert(&mut self, key: Key, value: Value) -> Option { + self.insert(key, value) + } + + fn remove(&mut self, key: &Key) -> Option { + self.remove(key) + } + + fn len(&self) -> usize { + self.len() + } + + fn get_next_key(&self) -> Option { + self.keys().next().cloned() + } + } + + // A wrapper to check the correct + struct CheckPrimary<'imm, ImmData, MutData, ColWindow, RefMap> + where + ColWindow: PrimaryWindow<'imm, ImmData, MutData>, + RefMap: + ReferenceMap<::Key, (UnsafeIndex, Data)>, + { + colwindow: ColWindow, + items: RefMap, + phantom: PhantomData<&'imm (ImmData, MutData)>, + } + + impl<'imm, ImmData, MutData, ColWindow, RefMap> + CheckPrimary<'imm, ImmData, MutData, ColWindow, RefMap> + where + RefMap: + ReferenceMap<::Key, (UnsafeIndex, Data)>, + ColWindow: PrimaryWindow<'imm, ImmData, MutData>, + ImmData: Clone + Eq + std::fmt::Debug, + MutData: Clone + Eq + std::fmt::Debug, + ::Key: Eq + Hash, + { + fn new(size_hint: usize, colwindow: ColWindow) -> Self { + Self { + colwindow, + items: RefMap::with_capacity(size_hint), + phantom: PhantomData, + } + } + + fn check_get(&self, key: ::Key) { + if let Some((unsafeindex, data)) = self.items.get(&key) { + let entry = self + .colwindow + .get(key) + .expect("Key unexpectedly missing from column"); + let imm_data = ColWindow::conv_get(entry.data.imm_data); + assert_eq!(imm_data, data.imm_data, "Incorrect immutable data"); + assert_eq!(entry.data.mut_data, data.mut_data, "Incorrect mutable data"); + assert_eq!(entry.index, *unsafeindex, "Incorrect index"); + } else { + let entry = self.colwindow.get(key); + assert!(entry.is_err(), "Key unexpectedly present in column"); + } + } + } + + impl<'imm, ImmData, MutData, ColWindow, RefMap> + CheckPrimary<'imm, ImmData, MutData, ColWindow, RefMap> + where + RefMap: + ReferenceMap<::Key, (UnsafeIndex, Data)>, + ColWindow: PrimaryWindowApp<'imm, ImmData, MutData>, + ImmData: Clone + Eq + std::fmt::Debug, + MutData: Clone + Eq + std::fmt::Debug, + ::Key: Eq + Hash, + { + fn check_append(&mut self, data: Data) { + let key = self.colwindow.append(data.clone()); + let unsafeindex = self.items.len(); + assert!( + self.items + .insert(key, (unsafeindex, data.clone())) + .is_none(), + "Key unexpectedly present in column" + ); + } + } + + impl<'imm, ImmData, MutData, ColWindow, RefMap> + CheckPrimary<'imm, ImmData, MutData, ColWindow, RefMap> + where + RefMap: + ReferenceMap<::Key, (UnsafeIndex, Data)>, + ColWindow: PrimaryWindowPull<'imm, ImmData, MutData>, + ImmData: Clone + Eq + std::fmt::Debug, + MutData: Clone + Eq + std::fmt::Debug, + ::Key: Eq + Hash, + { + fn check_pull(&mut self, key: ::Key) { + if let Some((unsafeindex, data)) = self.items.remove(&key) { + let entry = self + .colwindow + .pull(key) + .expect("Key unexpectedly missing from column"); + let imm_data = ColWindow::conv_pull(entry.data.imm_data); + assert_eq!(imm_data, data.imm_data, "Incorrect immutable data"); + assert_eq!(entry.data.mut_data, data.mut_data, "Incorrect mutable data"); + assert_eq!(entry.index, unsafeindex, "Incorrect index"); + } else { + let entry = self.colwindow.pull(key); + assert!(entry.is_err(), "Key unexpectedly present in column"); + } + } + + fn check_insert(&mut self, data: Data) { + let (key, action) = self.colwindow.insert(data.clone()); + match action { + InsertAction::Place(unsafeindex) => { + assert!( + self.items + .insert(key, (unsafeindex, data.clone())) + .is_none(), + "Key unexpectedly present in column" + ); + } + InsertAction::Append => { + let unsafeindex = self.items.len(); + assert!( + self.items + .insert(key, (unsafeindex, data.clone())) + .is_none(), + "Key unexpectedly present in column" + ); + } + } + } + } + + #[cfg(test)] + mod test_verif { + use super::*; + + fn check_primary_pull
() + where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, usize, usize>, + for<'a> < as PrimaryWindow<'a, usize, usize>>::Col as Keyable>::Key: + Eq + Hash, + { + const ITERS: usize = 100000; + let mut col = Col::new(ITERS); + let mut check: CheckPrimary<_, _, _, HashMap<_, _>> = + CheckPrimary::new(ITERS, col.window()); + + for n in 0..1024 { + check.check_insert(Data { + imm_data: n, + mut_data: n, + }); + check.check_insert(Data { + imm_data: n, + mut_data: n, + }); + if let Some(next_key) = check.items.get_next_key() { + check.check_get(next_key) + } + if let Some(next_key) = check.items.get_next_key() { + check.check_pull(next_key); + check.check_pull(next_key); + } + } + } + + fn check_primary_app() + where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowApp<'a, usize, usize>, + for<'a> < as PrimaryWindow<'a, usize, usize>>::Col as Keyable>::Key: + Eq + Hash, + { + const ITERS: usize = 100000; + let mut col = Col::new(ITERS); + let mut check: CheckPrimary<_, _, _, HashMap<_, _>> = + CheckPrimary::new(ITERS, col.window()); + + for n in 0..1024 { + check.check_append(Data { + imm_data: n, + mut_data: n, + }); + check.check_append(Data { + imm_data: n, + mut_data: n, + }); + if let Some(next_key) = check.items.get_next_key() { + check.check_get(next_key) + } + } + } + + macro_rules! test_pull_impl { + ($name:ident => $col:ty) => { + #[test] + fn $name() { + check_primary_pull::<$col>(); + } + }; + } + + macro_rules! test_app_impl { + ($name:ident => $col:ty) => { + #[test] + fn $name() { + check_primary_app::<$col>(); + } + }; + } + + test_pull_impl!(primary_retain => PrimaryRetain); + test_pull_impl!(gen_arena => PrimaryGenerationalArena); + test_pull_impl!(thunderdome => PrimaryThunderDome); + test_pull_impl!(thunderdome_trans => PrimaryThunderDomeTrans); + + test_app_impl!(assoc_blocks => AssocBlocks); + } + + #[cfg(kani)] + mod kani_verif { + use super::*; + + /// A very simple (and horribly inefficient) map, that is far faster to + /// verify than the (efficient) HashMap. + /// As verification of a [`primaryWindow`] requires tracking with a map, + /// we need to use this. + struct SimpleMap { + data: Vec>, + count: usize, + } + + impl ReferenceMap for SimpleMap { + fn with_capacity(size_hint: usize) -> Self { + Self { + data: Vec::with_capacity(size_hint), + count: 0, + } + } + + fn get(&self, key: &Key) -> Option<&Value> { + self.data.iter().find_map(|entry| { + if let Some((k, v)) = entry { + if k == key { + Some(v) + } else { + None + } + } else { + None + } + }) + } + + fn insert(&mut self, key: Key, value: Value) -> Option { + if let Some(v) = self.get(&key) { + Some(v.clone()) + } else { + self.data.push(Some((key, value))); + self.count += 1; + None + } + } + + fn remove(&mut self, key: &Key) -> Option { + let val = self.data.iter_mut().find_map(|entry| { + if let Some((k, _)) = entry { + if k == key { + Some(entry) + } else { + None + } + } else { + None + } + })?; + + val.take().map(|(_, v)| { + self.count -= 1; + v + }) + } + + fn len(&self) -> usize { + self.count + } + + fn get_next_key(&self) -> Option { + self.data.iter().find_map(|entry| { + if let Some((k, _)) = entry { + Some(k.clone()) + } else { + None + } + }) + } + } + + fn verif_pull() + where + Col: Column, + for<'a> Col::WindowKind<'a>: PrimaryWindowPull<'a, usize, usize>, + for<'a> as PrimaryWindow<'a, usize, usize>>::Key: + kani::Arbitrary + Eq + Hash, + { + let mut col = Col::new(ITERS); + let mut check: CheckPrimary<_, _, _, SimpleMap<_, _>> = + CheckPrimary::new(ITERS, col.window()); + + for n in 0..ITERS { + check.check_insert(Data { + imm_data: n, + mut_data: n, + }); + check.check_insert(Data { + imm_data: n, + mut_data: n, + }); + check.check_pull(kani::any()); + check.check_get(kani::any()); + } + } + + #[kani::proof] + #[kani::unwind(6)] + fn check_id_arena() { + verif_pull::, 5>(); + } + } +} diff --git a/crates/pulpit/src/column/primary_append_adapter.rs b/crates/pulpit/src/column/primary_append_adapter.rs new file mode 100644 index 0000000..dfa6f12 --- /dev/null +++ b/crates/pulpit/src/column/primary_append_adapter.rs @@ -0,0 +1,110 @@ +use super::*; + +/// An adapter used as a primary column when associated columns are all that +/// is needed. +pub struct PrimaryAppendAdapter { + max_key: usize, + /// Required as to fit the interface we need to be able to return `&mut ()`, + /// however we cannot do the neat lifetime extension trick of `&()` with `&mut` + mut_val: (), +} + +impl Keyable for PrimaryAppendAdapter { + type Key = usize; +} + +impl Column for PrimaryAppendAdapter { + type WindowKind<'imm> = Window<'imm, PrimaryAppendAdapter> where Self: 'imm; + + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } + + fn new(_: usize) -> Self { + Self { + max_key: 0, + mut_val: (), + } + } +} + +impl<'imm> PrimaryWindow<'imm, (), ()> for Window<'imm, PrimaryAppendAdapter> { + type ImmGet = (); + type Col = PrimaryAppendAdapter; + + #[inline(always)] + fn get(&self, key: ::Key) -> Access { + if key < self.inner.max_key { + Ok(Entry { + index: key, + data: Data { + imm_data: (), + mut_data: (), + }, + }) + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn brw(&self, key: ::Key) -> Access<&(), &()> { + if key < self.inner.max_key { + Ok(Entry { + index: key, + data: Data { + imm_data: &(), + mut_data: &(), + }, + }) + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn brw_mut(&mut self, key: ::Key) -> Access<&(), &mut ()> { + if key < self.inner.max_key { + Ok(Entry { + index: key, + data: Data { + imm_data: &(), + mut_data: &mut self.inner.mut_val, + }, + }) + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn conv_get(_: Self::ImmGet) {} + + #[inline(always)] + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw { + self.scan_get() + } + + #[inline(always)] + fn scan_get(&self) -> impl Iterator::Key> + 'static { + 0..(self.inner.max_key) + } + + #[inline(always)] + fn count(&self) -> usize { + self.inner.max_key + } +} + +impl<'imm> PrimaryWindowApp<'imm, (), ()> for Window<'imm, PrimaryAppendAdapter> { + #[inline(always)] + fn append(&mut self, _: Data<(), ()>) -> ::Key { + let key = self.inner.max_key; + self.inner.max_key += 1; + key + } + #[inline(always)] + unsafe fn unppend(&mut self) { + self.inner.max_key -= 1; + } +} diff --git a/crates/pulpit/src/column/primary_gen_arena.rs b/crates/pulpit/src/column/primary_gen_arena.rs new file mode 100644 index 0000000..18bd2a3 --- /dev/null +++ b/crates/pulpit/src/column/primary_gen_arena.rs @@ -0,0 +1,136 @@ +use super::*; +use typed_generational_arena::{Arena as GenArena, Index as GenIndex}; + +/// A Primary [`Column`] implemented using the [`typed_generational_arena`]'s [`GenArena`]. +/// - No immutability optimisations. +pub struct PrimaryGenerationalArena { + arena: GenArena>, +} + +impl Keyable for PrimaryGenerationalArena { + type Key = GenIndex, usize, usize>; +} + +impl Column for PrimaryGenerationalArena { + type WindowKind<'imm> = Window<'imm, PrimaryGenerationalArena> + where + Self: 'imm; + + fn new(size_hint: usize) -> Self { + Self { + arena: GenArena::with_capacity(size_hint), + } + } + + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } +} + +impl<'imm, ImmData, MutData> PrimaryWindow<'imm, ImmData, MutData> + for Window<'imm, PrimaryGenerationalArena> +where + ImmData: Clone + 'static, + MutData: Clone + 'static, +{ + type ImmGet = ImmData; + type Col = PrimaryGenerationalArena; + + #[inline(always)] + fn get(&self, key: ::Key) -> Access { + let Entry { + data: Data { imm_data, mut_data }, + index, + } = self.brw(key)?; + Ok(Entry { + index, + data: Data { + imm_data: imm_data.clone(), + mut_data: mut_data.clone(), + }, + }) + } + + #[inline(always)] + fn brw(&self, key: ::Key) -> Access<&ImmData, &MutData> { + match self.inner.arena.get(key) { + Some(Data { imm_data, mut_data }) => Ok(Entry { + data: Data { imm_data, mut_data }, + index: key.to_idx(), + }), + None => Err(KeyError), + } + } + + #[inline(always)] + fn brw_mut(&mut self, key: ::Key) -> Access<&ImmData, &mut MutData> { + match self.inner.arena.get_mut(key) { + Some(Data { imm_data, mut_data }) => Ok(Entry { + data: Data { imm_data, mut_data }, + index: key.to_idx(), + }), + None => Err(KeyError), + } + } + + #[inline(always)] + fn conv_get(get: Self::ImmGet) -> ImmData { + get + } + + #[inline(always)] + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw { + self.inner.arena.iter().map(|(key, _)| key) + } + + fn scan_get(&self) -> impl Iterator::Key> + 'static { + self.scan_brw().collect::>().into_iter() + } + + #[inline(always)] + fn count(&self) -> usize { + self.inner.arena.len() + } +} + +impl<'imm, ImmData, MutData> PrimaryWindowPull<'imm, ImmData, MutData> + for Window<'imm, PrimaryGenerationalArena> +where + ImmData: Clone + 'static, + MutData: Clone + 'static, +{ + type ImmPull = ImmData; + + #[inline(always)] + fn insert( + &mut self, + val: Data, + ) -> (::Key, InsertAction) { + let curr_max = self.inner.arena.len(); + let key = self.inner.arena.insert(val); + ( + key, + if key.to_idx() == curr_max { + InsertAction::Append + } else { + InsertAction::Place(key.to_idx()) + }, + ) + } + + #[inline(always)] + fn pull(&mut self, key: ::Key) -> Access { + match self.inner.arena.remove(key) { + Some(Data { imm_data, mut_data }) => Ok(Entry { + data: Data { imm_data, mut_data }, + index: key.to_idx(), + }), + None => Err(KeyError), + } + } + + #[inline(always)] + fn conv_pull(pull: Self::ImmPull) -> ImmData { + pull + } +} diff --git a/crates/pulpit/src/column/primary_pull_adapter.rs b/crates/pulpit/src/column/primary_pull_adapter.rs new file mode 100644 index 0000000..fabd1a6 --- /dev/null +++ b/crates/pulpit/src/column/primary_pull_adapter.rs @@ -0,0 +1,258 @@ +use super::*; +use assume::assume; + +#[derive(Clone, Copy)] +enum GenEntry { + Generation(usize), + Hidden(usize), + NextFree(Option), +} + +struct GenInfo { + next_free: Option, + gen_counter: usize, + generations: Vec, + visible_count: usize, +} + +impl GenInfo { + #[inline(always)] + fn lookup_key(&self, key: GenKey) -> Result { + match self.generations.get(key.index) { + Some(GenEntry::Generation(g)) if key.generation == *g => Ok(key.index), + _ => Err(KeyError), + } + } + + #[inline(always)] + fn pull_key(&mut self, key: GenKey) -> Result { + if let Some(entry) = self.generations.get_mut(key.index) { + if let GenEntry::Generation(_) = entry { + self.visible_count -= 1; + } + match *entry { + GenEntry::Generation(g) | GenEntry::Hidden(g) if g == key.generation => { + *entry = GenEntry::NextFree(self.next_free); + self.next_free = Some(key.index); + self.gen_counter += 1; + Ok(key.index) + } + _ => Err(KeyError), + } + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn hide_key(&mut self, key: GenKey) -> Result<(), KeyError> { + if let Some(entry) = self.generations.get_mut(key.index) { + match *entry { + GenEntry::Generation(g) if g == key.generation => { + *entry = GenEntry::Hidden(g); + self.visible_count -= 1; + Ok(()) + } + _ => Err(KeyError), + } + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn reveal_key(&mut self, key: GenKey) -> Result<(), KeyError> { + if let Some(entry) = self.generations.get_mut(key.index) { + match *entry { + GenEntry::Hidden(g) if g == key.generation => { + *entry = GenEntry::Generation(g); + self.visible_count += 1; + Ok(()) + } + _ => Err(KeyError), + } + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn scan(&self) -> impl Iterator> + '_ { + self.generations + .iter() + .enumerate() + .filter_map(|(i, e)| match e { + GenEntry::Generation(g) => Some(GenKey { + index: i, + generation: *g, + }), + GenEntry::NextFree(_) | GenEntry::Hidden(_) => None, + }) + } + + #[inline(always)] + fn insert(&mut self) -> (GenKey, InsertAction) { + if let Some(k) = self.next_free { + // TODO: could use unchecked here + let entry = self.generations.get_mut(k).unwrap(); + match *entry { + GenEntry::NextFree(opt) => { + self.next_free = opt; + *entry = GenEntry::Generation(self.gen_counter); + ( + GenKey { + index: k, + generation: self.gen_counter, + }, + InsertAction::Place(k), + ) + } + _ => assume!(unsafe: @unreachable), + } + } else { + let index = self.generations.len(); + self.generations + .push(GenEntry::Generation(self.gen_counter)); + ( + GenKey { + index, + generation: self.gen_counter, + }, + InsertAction::Append, + ) + } + } + + fn count(&self) -> usize { + self.visible_count + } +} + +/// An adapter to allow for associated columns to be used with a primary. +/// - Used as the primary column for a table, but with only generation data +/// (no user data) +pub struct PrimaryPullAdapter { + gen: GenInfo, + /// Required as to fit the interface we need to be able to return `&mut ()`, + /// however we cannot do the neat lifetime extension trick of `&()` with `&mut` + mut_val: (), +} + +impl Keyable for PrimaryPullAdapter { + type Key = GenKey; +} + +impl Column for PrimaryPullAdapter { + type WindowKind<'imm> = Window<'imm, PrimaryPullAdapter> where Self: 'imm; + + fn new(size_hint: usize) -> Self { + PrimaryPullAdapter { + gen: GenInfo { + next_free: None, + generations: Vec::with_capacity(size_hint), + gen_counter: 0, + visible_count: 0, + }, + mut_val: (), + } + } + + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } +} + +impl<'imm> PrimaryWindow<'imm, (), ()> for Window<'imm, PrimaryPullAdapter> { + type ImmGet = (); + type Col = PrimaryPullAdapter; + + #[inline(always)] + fn get(&self, key: ::Key) -> Access { + let index = self.inner.gen.lookup_key(key)?; + Ok(Entry { + index, + data: Data { + imm_data: (), + mut_data: (), + }, + }) + } + + #[inline(always)] + fn brw(&self, key: ::Key) -> Access<&(), &()> { + let index = self.inner.gen.lookup_key(key)?; + Ok(Entry { + index, + data: Data { + imm_data: &(), + mut_data: &(), + }, + }) + } + + #[inline(always)] + fn brw_mut(&mut self, key: ::Key) -> Access<&(), &mut ()> { + let index = self.inner.gen.lookup_key(key)?; + Ok(Entry { + index, + data: Data { + imm_data: &(), + mut_data: &mut self.inner.mut_val, + }, + }) + } + + #[inline(always)] + fn conv_get(_: Self::ImmGet) {} + + #[inline(always)] + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw { + self.inner.gen.scan() + } + + #[inline(always)] + fn scan_get(&self) -> impl Iterator::Key> + 'static { + self.inner.gen.scan().collect::>().into_iter() + } + + #[inline(always)] + fn count(&self) -> usize { + self.inner.gen.count() + } +} + +impl<'imm> PrimaryWindowPull<'imm, (), ()> for Window<'imm, PrimaryPullAdapter> { + type ImmPull = (); + + #[inline(always)] + fn pull(&mut self, key: ::Key) -> Access { + let index = self.inner.gen.pull_key(key)?; + Ok(Entry { + index, + data: Data { + imm_data: (), + mut_data: (), + }, + }) + } + + #[inline(always)] + fn insert(&mut self, _: Data<(), ()>) -> (::Key, InsertAction) { + self.inner.gen.insert() + } + + #[inline(always)] + fn conv_pull(_: Self::ImmPull) {} +} + +impl<'imm> PrimaryWindowHide<'imm, (), ()> for Window<'imm, PrimaryPullAdapter> { + #[inline(always)] + fn hide(&mut self, key: ::Key) -> Result<(), KeyError> { + self.inner.gen.hide_key(key) + } + + #[inline(always)] + fn reveal(&mut self, key: ::Key) -> Result<(), KeyError> { + self.inner.gen.reveal_key(key) + } +} diff --git a/crates/pulpit/src/column/primary_retain.rs b/crates/pulpit/src/column/primary_retain.rs new file mode 100644 index 0000000..0cde262 --- /dev/null +++ b/crates/pulpit/src/column/primary_retain.rs @@ -0,0 +1,434 @@ +use assume::assume; + +use super::*; + +use std::{ + mem::{size_of, ManuallyDrop}, + ptr, +}; + +// TODO: Bench against 0 as missing +/// The next free splot to reuse. +/// Note: Cannot be `usize::Max` +struct NextFree(Option); +type EncodedNextFree = usize; + +impl NextFree { + #[inline(always)] + fn encode(&self) -> usize { + if let Some(index) = self.0 { + assume!(unsafe: index != EncodedNextFree::MAX, "index is invalid" ); + index + } else { + EncodedNextFree::MAX + } + } + + #[inline(always)] + fn decode(val: EncodedNextFree) -> Self { + NextFree(if val == EncodedNextFree::MAX { + None + } else { + Some(val) + }) + } +} + +struct HiddenData { + hidden: bool, + data: MutData, +} + +union Slot { + full: ManuallyDrop>, + next_free: EncodedNextFree, +} + +struct MutEntry { + imm_ptr: PtrGen, + mut_data: Slot, +} + +impl Drop for MutEntry { + fn drop(&mut self) { + if self.imm_ptr.0.is_null() { + unsafe { + ManuallyDrop::drop(&mut self.mut_data.full); + } + } + } +} + +/// A generational arena that retains immutable data to allow for immutable, +/// stable references to be taken. +/// +/// # Leaks +/// This arena *retains* immutable data until the arena is dropped, as a result +/// it can accumulate large amounts of immutable values. +/// - Detremental for large, frequently deleted and inserted tables on machines +/// with limited memory. +/// - Not a true leak (i.e. like [`std::mem::forget`]), data is still cleared on +/// drop. Though if a table is retained for the entire program run, this makes +/// no difference. +/// +/// # Generations +/// The immutable data pointer is used as the generation counter. +/// - No extra space overhead & need this pointer anyway when accessing [PrimaryWindow::get]. +/// - As each new allocation for a non-zero sized object is unique, this gives +/// us a value to use for generation. +/// - For zero size types we have the same +/// +/// This strategy does not work for zero sized types, so in this instance, we +/// use the immutable data pointer's location as a normal generation counter, +/// and pass [transmute]-ed references to an internal zero-sized type out. +/// +/// ``` +/// # use std::mem::{MaybeUninit, size_of}; +/// assert_eq!(size_of::<()>(), 0); +/// assert_eq!(size_of::<[MaybeUninit<()>; 10]>(), 0); +/// let x: [MaybeUninit<()>; 10] = [MaybeUninit::new(()); 10]; +/// unsafe { +/// assert_eq!(x[0].as_ptr(), x[9].as_ptr()); +/// } +/// ``` +pub struct PrimaryRetain { + mut_data: Vec>, + visible_count: usize, + next_free_mut: NextFree, + imm_data: utils::Blocks, + gen_counter: usize, + dummy_zero_size: (), +} + +/// ## Concurrency Safe Marker for Key Type +/// As we use the pointer as a generation counter, and only access using it once +/// we have matched the generation, it is safe to share these. +pub struct PtrGen(*const ImmData); + +impl Clone for PtrGen { + fn clone(&self) -> Self { + *self + } +} +impl Copy for PtrGen {} +impl PartialEq for PtrGen { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} +impl Eq for PtrGen {} +impl Hash for PtrGen { + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} + +// Wrapping for concurrency (pointers are `!Send + !Sync` by default, we need to +// explicitly mark this as concurrency safe) +unsafe impl Send for PtrGen {} +unsafe impl Sync for PtrGen {} + +impl Keyable + for PrimaryRetain +{ + type Key = GenKey>; +} + +impl Column + for PrimaryRetain +{ + type WindowKind<'imm> = Window<'imm, PrimaryRetain> + where + Self: 'imm; + + fn new(size_hint: usize) -> Self { + PrimaryRetain { + mut_data: Vec::with_capacity(size_hint / BLOCK_SIZE + 1), + imm_data: utils::Blocks::new(size_hint), + visible_count: 0, + next_free_mut: NextFree(None), + gen_counter: 1, + dummy_zero_size: (), + } + } + + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } +} + +impl<'imm, ImmData, MutData, const BLOCK_SIZE: usize> PrimaryWindow<'imm, ImmData, MutData> + for Window<'imm, PrimaryRetain> +where + MutData: Clone + 'static, + ImmData: Clone + 'static, +{ + type ImmGet = &'imm ImmData; + type Col = PrimaryRetain; + + #[inline(always)] + fn get(&self, key: ::Key) -> Access { + let Entry { + index, + data: Data { imm_data, mut_data }, + } = self.brw(key)?; + Ok(Entry { + index, + data: Data { + imm_data: unsafe { transmute::<&ImmData, &'imm ImmData>(imm_data) }, + mut_data: mut_data.clone(), + }, + }) + } + + #[inline(always)] + fn brw(&self, key: ::Key) -> Access<&ImmData, &MutData> { + if let Some(MutEntry { imm_ptr, mut_data }) = self.inner.mut_data.get(key.index) { + unsafe { + if key.generation == *imm_ptr && !mut_data.full.hidden { + Ok(Entry { + index: key.index, + data: Data { + imm_data: if size_of::() == 0 { + transmute::<&(), &ImmData>(&self.inner.dummy_zero_size) + } else { + &*(imm_ptr.0).cast::() + }, + mut_data: &mut_data.full.data, + }, + }) + } else { + Err(KeyError) + } + } + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn brw_mut(&mut self, key: ::Key) -> Access<&ImmData, &mut MutData> { + if let Some(MutEntry { imm_ptr, mut_data }) = self.inner.mut_data.get_mut(key.index) { + unsafe { + if key.generation == *imm_ptr && !mut_data.full.hidden { + Ok(Entry { + index: key.index, + data: Data { + imm_data: if size_of::() == 0 { + transmute::<&(), &ImmData>(&self.inner.dummy_zero_size) + } else { + &*(imm_ptr.0).cast::() + }, + mut_data: &mut mut_data.full.data, + }, + }) + } else { + Err(KeyError) + } + } + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn conv_get(get: Self::ImmGet) -> ImmData { + get.clone() + } + + #[inline(always)] + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw { + self.inner + .mut_data + .iter() + .enumerate() + .filter_map(|(index, entry)| { + if entry.imm_ptr.0.is_null() { + None + } else { + Some(GenKey { + index, + generation: entry.imm_ptr, + }) + } + }) + } + + #[inline(always)] + fn scan_get(&self) -> impl Iterator::Key> + 'static { + self.scan_brw().collect::>().into_iter() + } + + #[inline(always)] + fn count(&self) -> usize { + self.inner.visible_count + } +} + +impl<'imm, ImmData, MutData, const BLOCK_SIZE: usize> PrimaryWindowPull<'imm, ImmData, MutData> + for Window<'imm, PrimaryRetain> +where + MutData: Clone + 'static, + ImmData: Clone + 'static, +{ + type ImmPull = &'imm ImmData; + + #[inline(always)] + fn insert( + &mut self, + Data { imm_data, mut_data }: Data, + ) -> (::Key, InsertAction) { + let imm_ptr = self.inner.imm_data.append(imm_data); + self.inner.visible_count += 1; + if let NextFree(Some(next_free)) = self.inner.next_free_mut { + unsafe { + let mut_entry = self.inner.mut_data.get_unchecked_mut(next_free); + debug_assert!(mut_entry.imm_ptr.0.is_null()); + + let imm_ptr = if size_of::() == 0 { + // For zero sized types, use the generation counter. + let val = self.inner.gen_counter as *const ImmData; + self.inner.gen_counter += 1; + val + } else { + imm_ptr + }; + + self.inner.next_free_mut = NextFree::decode(mut_entry.mut_data.next_free); + *mut_entry = MutEntry { + imm_ptr: PtrGen(imm_ptr), + mut_data: Slot { + full: ManuallyDrop::new(HiddenData { + hidden: false, + data: mut_data, + }), + }, + }; + ( + GenKey { + index: next_free, + generation: PtrGen(imm_ptr), + }, + InsertAction::Place(next_free), + ) + } + } else { + let index = self.inner.mut_data.len(); + self.inner.mut_data.push(MutEntry { + imm_ptr: PtrGen(imm_ptr), + mut_data: Slot { + full: ManuallyDrop::new(HiddenData { + hidden: false, + data: mut_data, + }), + }, + }); + ( + GenKey { + index, + generation: PtrGen(imm_ptr), + }, + InsertAction::Append, + ) + } + } + + #[inline(always)] + fn pull(&mut self, key: ::Key) -> Access { + if let Some(mut_entry) = self.inner.mut_data.get_mut(key.index) { + unsafe { + if key.generation == mut_entry.imm_ptr { + let pull_imm_ref = if size_of::() == 0 { + transmute::<&(), &ImmData>(&self.inner.dummy_zero_size) + } else { + &*(mut_entry.imm_ptr.0).cast::() + }; + let pull_mut_data = ManuallyDrop::take(&mut mut_entry.mut_data.full); + if !pull_mut_data.hidden { + self.inner.visible_count -= 1; + } + *mut_entry = MutEntry { + imm_ptr: PtrGen(ptr::null()), + mut_data: Slot { + next_free: self.inner.next_free_mut.encode(), + }, + }; + self.inner.next_free_mut = NextFree(Some(key.index)); + Ok(Entry { + index: key.index, + data: Data { + imm_data: pull_imm_ref, + mut_data: pull_mut_data.data, + }, + }) + } else { + Err(KeyError) + } + } + } else { + Err(KeyError) + } + } + + fn conv_pull(pull: Self::ImmPull) -> ImmData { + pull.clone() + } +} + +impl<'imm, ImmData, MutData, const BLOCK_SIZE: usize> PrimaryWindowHide<'imm, ImmData, MutData> + for Window<'imm, PrimaryRetain> +where + MutData: Clone + 'static, + ImmData: Clone + 'static, +{ + #[inline(always)] + fn hide(&mut self, key: ::Key) -> Result<(), KeyError> { + if let Some(MutEntry { imm_ptr, mut_data }) = self.inner.mut_data.get_mut(key.index) { + unsafe { + if key.generation == *imm_ptr && !mut_data.full.hidden { + mut_data.full.hidden = true; + self.inner.visible_count -= 1; + Ok(()) + } else { + Err(KeyError) + } + } + } else { + Err(KeyError) + } + } + + #[inline(always)] + fn reveal(&mut self, key: ::Key) -> Result<(), KeyError> { + if let Some(MutEntry { imm_ptr, mut_data }) = self.inner.mut_data.get_mut(key.index) { + unsafe { + if key.generation == *imm_ptr && mut_data.full.hidden { + mut_data.full.hidden = false; + self.inner.visible_count += 1; + Ok(()) + } else { + Err(KeyError) + } + } + } else { + Err(KeyError) + } + } +} + +#[cfg(kani)] +impl kani::Arbitrary + for GenKey, *const ImmData> +{ + fn any() -> Self { + let mut gen_ind: usize = kani::any(); + if gen_ind == 0 { + gen_ind += 1; + } + Self { + index: kani::any(), + generation: (gen_ind as *const ImmData), + phantom: PhantomData, + } + } +} diff --git a/crates/pulpit/src/column/primary_thunderdome.rs b/crates/pulpit/src/column/primary_thunderdome.rs new file mode 100644 index 0000000..88f274b --- /dev/null +++ b/crates/pulpit/src/column/primary_thunderdome.rs @@ -0,0 +1,136 @@ +use super::*; +use thunderdome::{Arena as ThunderArena, Index as ThunderIndex}; + +/// A Primary [`Column`] implemented using the [`thunderdome`]'s [`ThunderArena`]. +/// - Conforms to the interface (using 8 byte [`UnsafeIndex`] indices) despite being +/// backed by [`u32`] indexed [`ThunderIndex`]s. +pub struct PrimaryThunderDome { + arena: ThunderArena>, +} + +impl Column for PrimaryThunderDome { + type WindowKind<'imm> = Window<'imm, PrimaryThunderDome> + where + Self: 'imm; + + fn new(size_hint: usize) -> Self { + Self { + arena: ThunderArena::with_capacity(size_hint), + } + } + + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } +} + +impl Keyable for PrimaryThunderDome { + type Key = ThunderIndex; +} + +impl<'imm, ImmData, MutData> PrimaryWindow<'imm, ImmData, MutData> + for Window<'imm, PrimaryThunderDome> +where + ImmData: Clone, + MutData: Clone, +{ + type ImmGet = ImmData; + type Col = PrimaryThunderDome; + + #[inline(always)] + fn get(&self, key: ::Key) -> Access { + let Entry { + data: Data { imm_data, mut_data }, + index: _, + } = self.brw(key)?; + Ok(Entry { + index: key.slot() as usize, + data: Data { + imm_data: imm_data.clone(), + mut_data: mut_data.clone(), + }, + }) + } + + #[inline(always)] + fn brw(&self, key: ::Key) -> Access<&ImmData, &MutData> { + match self.inner.arena.get(key) { + Some(Data { imm_data, mut_data }) => Ok(Entry { + index: key.slot() as usize, + data: Data { imm_data, mut_data }, + }), + None => Err(KeyError), + } + } + + #[inline(always)] + fn brw_mut(&mut self, key: ::Key) -> Access<&ImmData, &mut MutData> { + match self.inner.arena.get_mut(key) { + Some(Data { imm_data, mut_data }) => Ok(Entry { + index: key.slot() as usize, + data: Data { imm_data, mut_data }, + }), + None => Err(KeyError), + } + } + + fn conv_get(get: Self::ImmGet) -> ImmData { + get + } + + #[inline(always)] + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw { + self.inner.arena.iter().map(|(i, _)| i) + } + + #[inline(always)] + fn scan_get(&self) -> impl Iterator::Key> + 'static { + self.scan_brw().collect::>().into_iter() + } + + fn count(&self) -> usize { + self.inner.arena.len() + } +} + +impl<'imm, ImmData, MutData> PrimaryWindowPull<'imm, ImmData, MutData> + for Window<'imm, PrimaryThunderDome> +where + ImmData: Clone, + MutData: Clone, +{ + type ImmPull = ImmData; + + #[inline(always)] + fn insert( + &mut self, + val: Data, + ) -> (::Key, InsertAction) { + let curr_max = self.inner.arena.len(); + let key = self.inner.arena.insert(val); + let index = key.slot() as usize; + ( + key, + if index == curr_max { + InsertAction::Append + } else { + InsertAction::Place(index) + }, + ) + } + + #[inline(always)] + fn pull(&mut self, key: ::Key) -> Access { + match self.inner.arena.remove(key) { + Some(Data { imm_data, mut_data }) => Ok(Entry { + index: key.slot() as usize, + data: Data { imm_data, mut_data }, + }), + None => Err(KeyError), + } + } + + fn conv_pull(pull: Self::ImmPull) -> ImmData { + pull + } +} diff --git a/crates/pulpit/src/column/primary_thunderdome_trans.rs b/crates/pulpit/src/column/primary_thunderdome_trans.rs new file mode 100644 index 0000000..e4b246c --- /dev/null +++ b/crates/pulpit/src/column/primary_thunderdome_trans.rs @@ -0,0 +1,219 @@ +use super::*; +use thunderdome::{Arena as ThunderArena, Index as ThunderIndex}; + +struct TransData { + visible: bool, + mut_data: MutData, +} + +/// A modification on [`PrimaryThunderDome`] to allow for transactions by nincluding a 'hide field' +pub struct PrimaryThunderDomeTrans { + arena: ThunderArena>>, + visible_size: usize, +} + +impl Column for PrimaryThunderDomeTrans { + type WindowKind<'imm> = Window<'imm, PrimaryThunderDomeTrans> + where + Self: 'imm; + + #[inline(always)] + fn new(size_hint: usize) -> Self { + Self { + arena: ThunderArena::with_capacity(size_hint), + visible_size: 0, + } + } + + #[inline(always)] + fn window(&mut self) -> Self::WindowKind<'_> { + Window { inner: self } + } +} + +impl Keyable for PrimaryThunderDomeTrans { + type Key = ThunderIndex; +} + +impl<'imm, ImmData, MutData> PrimaryWindow<'imm, ImmData, MutData> + for Window<'imm, PrimaryThunderDomeTrans> +where + ImmData: Clone, + MutData: Clone, +{ + type ImmGet = ImmData; + type Col = PrimaryThunderDomeTrans; + + #[inline(always)] + fn get(&self, key: ::Key) -> Access { + let Entry { + data: Data { imm_data, mut_data }, + index: _, + } = self.brw(key)?; + Ok(Entry { + index: key.slot() as usize, + data: Data { + imm_data: imm_data.clone(), + mut_data: mut_data.clone(), + }, + }) + } + + #[inline(always)] + fn brw(&self, key: ::Key) -> Access<&ImmData, &MutData> { + match self.inner.arena.get(key) { + Some(Data { + imm_data, + mut_data: + TransData { + visible: true, + mut_data, + }, + }) => Ok(Entry { + index: key.slot() as usize, + data: Data { imm_data, mut_data }, + }), + _ => Err(KeyError), + } + } + + #[inline(always)] + fn brw_mut(&mut self, key: ::Key) -> Access<&ImmData, &mut MutData> { + match self.inner.arena.get_mut(key) { + Some(Data { + imm_data, + mut_data: + TransData { + visible: true, + mut_data, + }, + }) => Ok(Entry { + index: key.slot() as usize, + data: Data { imm_data, mut_data }, + }), + _ => Err(KeyError), + } + } + + fn conv_get(get: Self::ImmGet) -> ImmData { + get + } + + #[inline(always)] + fn scan_brw<'brw>(&'brw self) -> impl Iterator::Key> + 'brw { + self.inner.arena.iter().map(|(i, _)| i) + } + + #[inline(always)] + fn scan_get(&self) -> impl Iterator::Key> + 'static { + self.scan_brw().collect::>().into_iter() + } + + fn count(&self) -> usize { + self.inner.arena.len() + } +} + +impl<'imm, ImmData, MutData> PrimaryWindowPull<'imm, ImmData, MutData> + for Window<'imm, PrimaryThunderDomeTrans> +where + ImmData: Clone, + MutData: Clone, +{ + type ImmPull = ImmData; + + #[inline(always)] + fn insert( + &mut self, + Data { imm_data, mut_data }: Data, + ) -> (::Key, InsertAction) { + let curr_max = self.inner.arena.len(); + let key = self.inner.arena.insert(Data { + imm_data, + mut_data: TransData { + visible: true, + mut_data, + }, + }); + self.inner.visible_size += 1; + let index = key.slot() as usize; + ( + key, + if index == curr_max { + InsertAction::Append + } else { + InsertAction::Place(index) + }, + ) + } + + #[inline(always)] + fn pull(&mut self, key: ::Key) -> Access { + match self.inner.arena.remove(key) { + Some(Data { + imm_data, + mut_data: + TransData { + visible: true, + mut_data, + }, + }) => { + self.inner.visible_size -= 1; + Ok(Entry { + index: key.slot() as usize, + data: Data { imm_data, mut_data }, + }) + } + _ => Err(KeyError), + } + } + + fn conv_pull(pull: Self::ImmPull) -> ImmData { + pull + } +} + +impl<'imm, ImmData, MutData> PrimaryWindowHide<'imm, ImmData, MutData> + for Window<'imm, PrimaryThunderDomeTrans> +where + ImmData: Clone, + MutData: Clone, +{ + #[inline(always)] + fn hide(&mut self, key: ::Key) -> Result<(), KeyError> { + match self.inner.arena.get_mut(key) { + Some(Data { + imm_data: _, + mut_data: + TransData { + visible, + mut_data: _, + }, + }) if *visible => { + *visible = false; + self.inner.visible_size -= 1; + Ok(()) + } + _ => Err(KeyError), + } + } + + #[inline(always)] + fn reveal(&mut self, key: ::Key) -> Result<(), KeyError> { + match self.inner.arena.get_mut(key) { + Some(Data { + imm_data: _, + mut_data: + TransData { + visible, + mut_data: _, + }, + }) if !*visible => { + *visible = true; + self.inner.visible_size += 1; + Ok(()) + } + _ => Err(KeyError), + } + } +} diff --git a/crates/pulpit/src/lib.rs b/crates/pulpit/src/lib.rs new file mode 100644 index 0000000..930e494 --- /dev/null +++ b/crates/pulpit/src/lib.rs @@ -0,0 +1,14 @@ +#![allow(clippy::needless_doctest_main)] +#![doc = include_str!("../README.md")] + +pub mod access; +pub mod column; +pub mod value; + +pub mod gen { + pub use pulpit_gen::*; +} + +pub mod macros { + pub use pulpit_macro::*; +} diff --git a/crates/pulpit/src/value.rs b/crates/pulpit/src/value.rs new file mode 100644 index 0000000..872bea9 --- /dev/null +++ b/crates/pulpit/src/value.rs @@ -0,0 +1,108 @@ +//! # Independent Mutable & Immutable Borrows +//! Using the window pattern, we can prevent mutable access to a value field, +//! while assigning the correct lifetime to borrows of the immutable field. +//! +//! | Part | Access | +//! |-----------|-------------------------------------------------------------------------------------------| +//! | Immutable | Borrows can last lifetime of object, and are independent from borrows of the mutable side | +//! | Mutable | Normal borrow rules apply | +//! +//! This pattern is used for the [`crate::column`] interfaces. +//! +//! ## Alternative Designs +//! ### Unsafe Interface +//! Using unsafe to provide a pointer to immutable data and exposing this unsafe interface to the user (bad api design) +//! +//! ### Dummy Objects +//! Providing unbounded lifetimes, or lifetimes bound to a separate dummy object. +//! +//! ### Split Data +//! Splitting the immutable data to a separate (and immutable) data structure solves all safety +//! issues elegantly, however requiring separate allocations for the immutable data has performance +//! consequences. +//! +//! ## Pattern Implementation +//! A single data store object contains the data and is mutably borrowed by a window object. +//! - The window holds the only mutable reference, so has exclusive access. +//! - The lifetime of the window is bound to the contained mutable reference, which it can now +//! internally use to supply lifetime bounds for borrows of the immutable part. +//! +//! ## Examples +//! ### Valid Usage +//! The following demonstrates mutable and immutable references being managed independently. +//! +//! ``` +//! # use pulpit::value::{Value, ValueWindow}; +//! # fn test(imm_data: ImmData, mut_data: MutData) { +//! let mut val = Value{imm_data, mut_data}; +//! let mut window = ValueWindow::new_window(&mut val); +//! let (imm_ref, imm_mut_ref) = window.brw(); +//! let mut_ref = window.brw_mut(); +//! +//! let imm_available = imm_ref; // still available +//! let mut_available = mut_ref; // new mutable taken over from imm_mut_ref +//! # } +//! ``` +//! +//! ### Conflicting borrows on the mutable side +//! This demonstrates borrow checking working properly still, but for the mutable member. +//! +//! ```compile_fail,E0502 +//! # use pulpit::value::{Value, ValueWindow}; +//! # fn test2(imm_data: ImmData, mut_data: MutData) { +//! let mut val = Value{imm_data, mut_data}; +//! let mut window = ValueWindow::new_window(&mut val); +//! let (imm_ref, imm_mut_ref) = window.brw(); +//! let mut_ref = window.brw_mut(); // ERROR! borrow of mut_ref not possible as imm_mut_ref used later +//! +//! let value_imm = imm_ref; // still available +//! let old_mut_unavailable = imm_mut_ref; +//! # } +//! ``` +//! +//! ### No Dangling references +//! This demonstrates that the references to the immutable part are restricted to be valid by the window's mutable reference. +//! ```compile_fail,E0597 +//! # use pulpit::value::{Value, ValueWindow}; +//! # fn test3(imm_data: ImmData, mut_data: MutData) { +//! let imm_ref_dangling; +//! { +//! let mut val = Value{imm_data, mut_data}; +//! +//! // ERROR! needs to borrow long enough for imm_ref_dangling, but col does not live that long +//! let mut window = ValueWindow::new_window(&mut val); +//! +//! let (imm_ref, imm_mut_ref) = window.brw(); +//! let mut_ref = window.brw_mut(); +//! +//! imm_ref_dangling = imm_ref; +//! } +//! let imm_ref_dangling_unavailable = imm_ref_dangling; +//! # } +//! ``` + +pub struct Value { + pub imm_data: ImmData, + pub mut_data: MutData, +} + +pub struct ValueWindow<'imm, ImmData, MutData> { + data: &'imm mut Value, +} + +impl<'imm, ImmData, MutData> ValueWindow<'imm, ImmData, MutData> { + pub fn new_window(val: &'imm mut Value) -> Self { + ValueWindow { data: val } + } + + pub fn brw<'brw>(&'brw self) -> (&'imm ImmData, &'brw MutData) { + ( + unsafe { std::mem::transmute::<&'brw ImmData, &'imm ImmData>(&self.data.imm_data) }, + &self.data.mut_data, + ) + } + + pub fn brw_mut(&mut self) -> &mut MutData { + &mut self.data.mut_data + } +} diff --git a/crates/pulpit/tests/transactions.rs b/crates/pulpit/tests/transactions.rs new file mode 100644 index 0000000..fe6acdf --- /dev/null +++ b/crates/pulpit/tests/transactions.rs @@ -0,0 +1 @@ +// TODO: Tests for transaction abort diff --git a/crates/pulpit_gen/Cargo.toml b/crates/pulpit_gen/Cargo.toml new file mode 100644 index 0000000..651043b --- /dev/null +++ b/crates/pulpit_gen/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "pulpit_gen" +version = "0.1.0" +edition = "2021" + +readme = "README.md" +description = "A crate for generating pulpit supported data tables" +keywords = ["performance", "traits", "arenas"] +categories = ["data"] + +repository.workspace = true +homepage.workspace = true +license-file.workspace = true + +[dependencies] +proc-macro2 = "1.0" +syn = { version = "2.0.45", features = ["full", "extra-traits"] } +quote = "1.0.33" +bimap = "0.6.3" +enumtrait = { path = "../enumtrait" } +quote_debug = { path = "../quote_debug" } +proc-macro-error = "1.0.4" +combi = { path = "../combi" } + +[dev-dependencies] +prettyplease = "0.2" +divan = { git = "https://github.com/OliverKillane/divan.git", branch = "enh/file-output" } +trybuild = "1.0.91" +glob = "0.3.1" \ No newline at end of file diff --git a/crates/pulpit_gen/README.md b/crates/pulpit_gen/README.md new file mode 100644 index 0000000..34eb2ce --- /dev/null +++ b/crates/pulpit_gen/README.md @@ -0,0 +1,3 @@ +## Pulpit Table Generation +The code generation for [pulpit](./../pulpit), contained in this separate crate +so it can be used for [pulpit_macro](./../pulpit_macro). diff --git a/crates/pulpit_gen/src/columns/assoc_blocks.rs b/crates/pulpit_gen/src/columns/assoc_blocks.rs new file mode 100644 index 0000000..0e11117 --- /dev/null +++ b/crates/pulpit_gen/src/columns/assoc_blocks.rs @@ -0,0 +1,106 @@ +use super::*; + +/// An append only column of blocks, with pointer stability. +/// No restrictions on the mutable and immutable +pub struct AssocBlocks { + pub block_size: usize, +} + +impl ColKind for AssocBlocks { + fn derives(&self) -> MutImmut> { + MutImmut { + imm_fields: vec![Ident::new("Clone", Span::call_site())], + mut_fields: vec![Ident::new("Clone", Span::call_site())], + } + } + + fn convert_imm(&self, namer: &CodeNamer, imm_fields: &[Field]) -> ImmConversion { + let CodeNamer { + mod_columns_struct_imm_unpacked, + mod_columns_fn_imm_unpack, + mod_columns_struct_imm, + name_phantom_member, + .. + } = namer; + + if imm_fields.is_empty() { + ImmConversion { + imm_unpacked: quote!{ + pub struct #mod_columns_struct_imm_unpacked<'imm> { + pub #name_phantom_member: std::marker::PhantomData<&'imm ()> + } + }.into(), + unpacker: quote!{ + pub fn #mod_columns_fn_imm_unpack<'imm>(_: &'imm #mod_columns_struct_imm) -> #mod_columns_struct_imm_unpacked<'imm> { + #mod_columns_struct_imm_unpacked { #name_phantom_member: std::marker::PhantomData } + } + }.into() + } + } else { + let field_defs = imm_fields.iter().map(|Field { name, ty }| { + quote! { + pub #name : &'imm #ty + } + }); + let fields = imm_fields.iter().map(|Field { name, ty: _ }| name); + let unpack_fields = fields.clone(); + + ImmConversion { + imm_unpacked: quote!{ + pub struct #mod_columns_struct_imm_unpacked<'imm> { + #(#field_defs),* + } + }.into(), + unpacker: quote!{ + pub fn #mod_columns_fn_imm_unpack<'imm>(#mod_columns_struct_imm { #(#fields),* }: &'imm #mod_columns_struct_imm) -> #mod_columns_struct_imm_unpacked<'imm> { + #mod_columns_struct_imm_unpacked { #(#unpack_fields),* } + } + }.into() + } + } + } + + fn generate_base_type(&self, namer: &CodeNamer) -> Tokens { + let pulpit_path = &namer.pulpit_path; + quote! { #pulpit_path::column::AssocBlocks }.into() + } + + fn generate_generics( + &self, + _namer: &CodeNamer, + imm_type: Tokens, + mut_type: Tokens, + ) -> TokenStream { + let Self { block_size } = self; + quote! { <#imm_type, #mut_type, #block_size> } + } + + fn requires_get_lifetime(&self) -> bool { + true + } + + fn convert_imm_type(&self, field: &Field, namer: &CodeNamer) -> Tokens { + let ty = &field.ty; + let lifetime = &namer.lifetime_imm; + quote!(&#lifetime #ty).into() + } + + fn check_column_application( + &self, + error_span: Span, + _imm_fields: &[Field], + _mut_fields: &[Field], + _transactions: bool, + deletions: bool, + ) -> LinkedList { + if deletions { + LinkedList::from([Diagnostic::spanned( + error_span, + Level::Error, + String::from("AssocBlocks does not support deletions"), + )]) + } else { + LinkedList::new() + } + } +} diff --git a/crates/pulpit_gen/src/columns/assoc_vec.rs b/crates/pulpit_gen/src/columns/assoc_vec.rs new file mode 100644 index 0000000..395c7ad --- /dev/null +++ b/crates/pulpit_gen/src/columns/assoc_vec.rs @@ -0,0 +1,28 @@ +use super::*; + +pub struct AssocVec; + +impl ColKind for AssocVec { + fn derives(&self) -> MutImmut> { + MutImmut { + imm_fields: vec![Ident::new("Clone", Span::call_site())], + mut_fields: vec![Ident::new("Clone", Span::call_site())], + } + } + + fn generate_base_type(&self, namer: &CodeNamer) -> Tokens { + let pulpit_path = &namer.pulpit_path; + quote! { #pulpit_path::column::AssocVec }.into() + } + + fn check_column_application( + &self, + _error_span: Span, + _imm_fields: &[Field], + _mut_fields: &[Field], + _transactions: bool, + _deletions: bool, + ) -> LinkedList { + LinkedList::new() + } +} diff --git a/crates/pulpit_gen/src/columns/mod.rs b/crates/pulpit_gen/src/columns/mod.rs new file mode 100644 index 0000000..493ab36 --- /dev/null +++ b/crates/pulpit_gen/src/columns/mod.rs @@ -0,0 +1,131 @@ +use std::collections::LinkedList; + +use crate::{ + groups::{Field, MutImmut}, + namer::CodeNamer, +}; +use proc_macro2::{Span, TokenStream}; +use proc_macro_error::{Diagnostic, Level}; +use quote::quote; +use quote_debug::Tokens; +use syn::{Ident, ItemFn, ItemStruct, Type}; + +pub struct ImmConversion { + pub imm_unpacked: Tokens, + pub unpacker: Tokens, +} + +// TODO: remove the strongly typed interface and replace with two enum types + +#[enumtrait::store(col_kind_trait)] +pub trait ColKind { + /// Required to check columns can be applied with the values provided. + /// - Adapters take no values + fn check_column_application( + &self, + error_span: Span, + imm_fields: &[Field], + mut_fields: &[Field], + transactions: bool, + deletions: bool, + ) -> LinkedList; + + fn derives(&self) -> MutImmut>; + fn generate_column_type( + &self, + namer: &CodeNamer, + imm_type: Tokens, + mut_type: Tokens, + ) -> Tokens { + let base_type = self.generate_base_type(namer); + let generics = self.generate_generics(namer, imm_type, mut_type); + quote! (#base_type #generics).into() + } + + fn generate_base_type(&self, namer: &CodeNamer) -> Tokens; + + fn generate_generics( + &self, + _namer: &CodeNamer, + imm_type: Tokens, + mut_type: Tokens, + ) -> TokenStream { + quote! {<#imm_type, #mut_type>} + } + + fn requires_get_lifetime(&self) -> bool { + false + } + fn convert_imm(&self, namer: &CodeNamer, imm_fields: &[Field]) -> ImmConversion { + let field_defs = imm_fields.iter().map(|Field { name, ty }| { + quote! { + pub #name : #ty + } + }); + let fields = imm_fields.iter().map(|Field { name, ty: _ }| name); + let unpack_fields = fields.clone(); + + let CodeNamer { + mod_columns_struct_imm_unpacked, + mod_columns_fn_imm_unpack, + mod_columns_struct_imm, + .. + } = namer; + + ImmConversion { + imm_unpacked: quote! { + pub struct #mod_columns_struct_imm_unpacked { + #(#field_defs),* + } + } + .into(), + unpacker: quote! { + pub fn #mod_columns_fn_imm_unpack(#mod_columns_struct_imm { #(#fields),* }: #mod_columns_struct_imm) -> #mod_columns_struct_imm_unpacked { + #mod_columns_struct_imm_unpacked { #(#unpack_fields),* } + } + } + .into(), + } + } + fn convert_imm_type(&self, field: &Field, _namer: &CodeNamer) -> Tokens { + field.ty.clone() + } +} + +mod primary_retain; +pub use primary_retain::*; +mod assoc_vec; +pub use assoc_vec::*; +mod primary_gen_arena; +pub use primary_gen_arena::*; +mod primary_thunderdome; +pub use primary_thunderdome::*; +mod assoc_blocks; +pub use assoc_blocks::*; +mod primary_thunderdome_trans; +pub use primary_thunderdome_trans::*; + +#[enumtrait::quick_enum] +#[enumtrait::quick_from] +#[enumtrait::store(enum_primary)] +pub enum Primary { + AssocBlocks, + PrimaryRetain, + PrimaryThunderdome, + PrimaryThunderDomeTrans, + PrimaryGenArena, +} + +#[enumtrait::impl_trait(col_kind_trait for enum_primary)] +impl ColKind for Primary {} + +#[enumtrait::quick_enum] +#[enumtrait::quick_from] +#[enumtrait::store(enum_associated)] +pub enum Associated { + AssocVec, + AssocBlocks, +} + +#[enumtrait::impl_trait(col_kind_trait for enum_associated)] +impl ColKind for Associated {} diff --git a/crates/pulpit_gen/src/columns/primary_gen_arena.rs b/crates/pulpit_gen/src/columns/primary_gen_arena.rs new file mode 100644 index 0000000..3c79480 --- /dev/null +++ b/crates/pulpit_gen/src/columns/primary_gen_arena.rs @@ -0,0 +1,36 @@ +use super::*; + +pub struct PrimaryGenArena; + +impl ColKind for PrimaryGenArena { + fn derives(&self) -> MutImmut> { + MutImmut { + imm_fields: vec![Ident::new("Clone", Span::call_site())], + mut_fields: vec![Ident::new("Clone", Span::call_site())], + } + } + + fn generate_base_type(&self, namer: &CodeNamer) -> Tokens { + let CodeNamer { pulpit_path, .. } = namer; + quote! { #pulpit_path::column::PrimaryGenerationalArena }.into() + } + + fn check_column_application( + &self, + error_span: Span, + _imm_fields: &[Field], + _mut_fields: &[Field], + transactions: bool, + _deletions: bool, + ) -> LinkedList { + if transactions { + LinkedList::from([Diagnostic::spanned( + error_span, + Level::Error, + String::from("primaryGenArena does not support transactions"), + )]) + } else { + LinkedList::new() + } + } +} diff --git a/crates/pulpit_gen/src/columns/primary_retain.rs b/crates/pulpit_gen/src/columns/primary_retain.rs new file mode 100644 index 0000000..a073784 --- /dev/null +++ b/crates/pulpit_gen/src/columns/primary_retain.rs @@ -0,0 +1,94 @@ +use super::*; + +/// an arena that supports deletions/pulls, but stores immutable data in a separate (pointer stable) arena. +/// - Can use pointer to the immutable data to get the mutable data. +pub struct PrimaryRetain { + pub block_size: usize, +} + +impl ColKind for PrimaryRetain { + fn derives(&self) -> MutImmut> { + MutImmut { + imm_fields: vec![Ident::new("Clone", Span::call_site())], + mut_fields: vec![Ident::new("Clone", Span::call_site())], + } + } + + fn convert_imm(&self, namer: &CodeNamer, imm_fields: &[Field]) -> ImmConversion { + let CodeNamer { + mod_columns_struct_imm_unpacked, + mod_columns_fn_imm_unpack, + mod_columns_struct_imm, + .. + } = namer; + + if imm_fields.is_empty() { + unreachable!("Cannot run on empty fields") + } else { + let field_defs = imm_fields.iter().map(|Field { name, ty }| { + quote! { + pub #name : &'imm #ty + } + }); + + let fields = imm_fields.iter().map(|Field { name, ty: _ }| name); + let unpack_fields = fields.clone(); + + ImmConversion { + imm_unpacked: quote!{ + pub struct #mod_columns_struct_imm_unpacked<'imm> { + #(#field_defs),* + } + }.into(), + unpacker: quote!{ + pub fn #mod_columns_fn_imm_unpack<'imm>(#mod_columns_struct_imm { #(#fields),* }: &'imm #mod_columns_struct_imm) -> #mod_columns_struct_imm_unpacked<'imm> { + #mod_columns_struct_imm_unpacked { #(#unpack_fields),* } + } + }.into() + } + } + } + + fn generate_base_type(&self, namer: &CodeNamer) -> Tokens { + let pulpit_path = &namer.pulpit_path; + quote! { #pulpit_path::column::PrimaryRetain }.into() + } + + fn generate_generics( + &self, + _namer: &CodeNamer, + imm_type: Tokens, + mut_type: Tokens, + ) -> TokenStream { + let Self { block_size } = self; + quote! { <#imm_type, #mut_type, #block_size> } + } + + fn requires_get_lifetime(&self) -> bool { + true + } + + fn convert_imm_type(&self, field: &Field, namer: &CodeNamer) -> Tokens { + let ty = &field.ty; + let lifetime = &namer.lifetime_imm; + quote!(&#lifetime #ty).into() + } + + fn check_column_application( + &self, + _error_span: Span, + imm_fields: &[Field], + _mut_fields: &[Field], + _transactions: bool, + _deletions: bool, + ) -> LinkedList { + if imm_fields.is_empty() { + LinkedList::from([Diagnostic::new( + Level::Error, + String::from("PrimaryRetain requires at least one immutable field"), + )]) + } else { + LinkedList::new() + } + } +} diff --git a/crates/pulpit_gen/src/columns/primary_thunderdome.rs b/crates/pulpit_gen/src/columns/primary_thunderdome.rs new file mode 100644 index 0000000..ce80a53 --- /dev/null +++ b/crates/pulpit_gen/src/columns/primary_thunderdome.rs @@ -0,0 +1,37 @@ +use super::*; + +/// A column that uses `thunderdome` +pub struct PrimaryThunderdome; + +impl ColKind for PrimaryThunderdome { + fn derives(&self) -> MutImmut> { + MutImmut { + imm_fields: vec![Ident::new("Clone", Span::call_site())], + mut_fields: vec![Ident::new("Clone", Span::call_site())], + } + } + + fn generate_base_type(&self, namer: &CodeNamer) -> Tokens { + let CodeNamer { pulpit_path, .. } = namer; + quote! { #pulpit_path::column::PrimaryThunderDome }.into() + } + + fn check_column_application( + &self, + error_span: Span, + _imm_fields: &[Field], + _mut_fields: &[Field], + transactions: bool, + _deletions: bool, + ) -> LinkedList { + if transactions { + LinkedList::from([Diagnostic::spanned( + error_span, + Level::Error, + String::from("PrimaryThunderdome does not support transactions"), + )]) + } else { + LinkedList::new() + } + } +} diff --git a/crates/pulpit_gen/src/columns/primary_thunderdome_trans.rs b/crates/pulpit_gen/src/columns/primary_thunderdome_trans.rs new file mode 100644 index 0000000..b383594 --- /dev/null +++ b/crates/pulpit_gen/src/columns/primary_thunderdome_trans.rs @@ -0,0 +1,29 @@ +use super::*; + +/// A column that uses `thunderdome` +pub struct PrimaryThunderDomeTrans; + +impl ColKind for PrimaryThunderDomeTrans { + fn derives(&self) -> MutImmut> { + MutImmut { + imm_fields: vec![Ident::new("Clone", Span::call_site())], + mut_fields: vec![Ident::new("Clone", Span::call_site())], + } + } + + fn generate_base_type(&self, namer: &CodeNamer) -> Tokens { + let CodeNamer { pulpit_path, .. } = namer; + quote! { #pulpit_path::column::PrimaryThunderDomeTrans }.into() + } + + fn check_column_application( + &self, + _error_span: Span, + _imm_fields: &[Field], + _mut_fields: &[Field], + _transactions: bool, + _deletions: bool, + ) -> LinkedList { + LinkedList::new() + } +} diff --git a/crates/pulpit_gen/src/groups.rs b/crates/pulpit_gen/src/groups.rs new file mode 100644 index 0000000..c0f8c53 --- /dev/null +++ b/crates/pulpit_gen/src/groups.rs @@ -0,0 +1,326 @@ +use std::{collections::HashMap, iter::once}; + +use proc_macro2::TokenStream; +use quote::quote; +use quote_debug::Tokens; +use syn::{Ident, ItemImpl, ItemMod, ItemStruct, ItemType, Type}; + +use crate::{ + columns::{Associated, ColKind, ImmConversion, Primary}, + namer::CodeNamer, +}; + +pub type FieldName = Ident; + +pub struct MutImmut { + pub imm_fields: Data, + pub mut_fields: Data, +} + +pub struct Field { + pub name: FieldName, + pub ty: Tokens, +} + +pub struct Group { + pub col: Col, + pub fields: MutImmut>, +} + +impl Group{ + fn get_type<'a>(&'a self, index: &FieldIndexInner) -> Option<&'a Tokens> { + if index.imm { + &self.fields.imm_fields + } else { + &self.fields.mut_fields + } + .get(index.field_num) + .map(|f| &f.ty) + } + + fn get_members( + &self, + placement: impl Fn(FieldIndexInner) -> FieldIndex, + mapping: &mut HashMap, + ) { + for (ind, field) in self.fields.imm_fields.iter().enumerate() { + mapping.insert( + field.name.clone(), + placement(FieldIndexInner { + imm: true, + field_num: ind, + }), + ); + } + for (ind, field) in self.fields.mut_fields.iter().enumerate() { + mapping.insert( + field.name.clone(), + placement(FieldIndexInner { + imm: false, + field_num: ind, + }), + ); + } + } +} + +pub struct GroupConfig { + pub primary: Group, + pub assoc: Vec>, +} + +impl From for Groups { + fn from(GroupConfig { primary, assoc }: GroupConfig) -> Self { + let mut idents = HashMap::new(); + primary.get_members(FieldIndex::Primary, &mut idents); + for (ind, group) in assoc.iter().enumerate() { + group.get_members( + |index| FieldIndex::Assoc { + assoc_ind: ind, + inner: index, + }, + &mut idents, + ); + } + + Groups { + idents, + primary, + assoc, + } + } +} + +pub struct Groups { + pub idents: HashMap, + pub primary: Group, + pub assoc: Vec>, +} + +impl Groups { + // get type from field + + pub fn get_field_index(&self, field: &FieldName) -> Option<&FieldIndex> { + self.idents.get(field) + } + + pub fn get_type(&self, index: &FieldIndex) -> Option<&Tokens> { + match index { + FieldIndex::Primary(inner) => self.primary.get_type(inner), + FieldIndex::Assoc { assoc_ind, inner } => { + self.assoc.get(*assoc_ind).and_then(|f| f.get_type(inner)) + } + } + } + + pub fn get_typefield(&self, field: &FieldName) -> Option<&Tokens> { + self.get_field_index(field).and_then(|f| self.get_type(f)) + } +} + +pub struct FieldIndexInner { + pub imm: bool, + pub field_num: usize, +} + +pub enum FieldIndex { + Primary(FieldIndexInner), + Assoc { + assoc_ind: usize, + inner: FieldIndexInner, + }, +} + +impl FieldIndex { + pub fn is_imm(&self) -> bool { + match self { + FieldIndex::Primary(inner) => inner.imm, + FieldIndex::Assoc { inner, .. } => inner.imm, + } + } +} + +pub struct GroupsDef { + pub columns_struct: Tokens, + pub columns_impl: Tokens, + pub window_holder_struct: Tokens, +} + +impl Group{ + fn column_type(&self, group_name: Ident, namer: &CodeNamer) -> Tokens { + let CodeNamer { + mod_columns_struct_imm, + mod_columns_struct_mut, + .. + } = namer; + + let MutImmut { + imm_fields: imm_derives, + mut_fields: mut_derives, + } = self.col.derives(); + + fn get_tks(Field { name, ty }: &Field) -> TokenStream { + quote!(pub #name: #ty) + } + + let MutImmut { + imm_fields, + mut_fields, + } = &self.fields; + let imm_fields = imm_fields.iter().map(get_tks); + let mut_fields = mut_fields.iter().map(get_tks); + + let ImmConversion { + imm_unpacked, + unpacker, + } = self.col.convert_imm(namer, &self.fields.imm_fields); + + quote! { + pub mod #group_name { + #[derive(#(#imm_derives),*)] + pub struct #mod_columns_struct_imm { + #(#imm_fields),* + } + + #[derive(#(#mut_derives),*)] + pub struct #mod_columns_struct_mut { + #(#mut_fields),* + } + + #imm_unpacked + + #unpacker + } + } + .into() + } +} + +impl Groups { + pub fn column_types(&self, namer: &CodeNamer) -> Tokens { + let mod_columns = &namer.mod_columns; + + let primary_mod = self + .primary + .column_type(namer.name_primary_column.clone(), namer); + let assoc_mods = self + .assoc + .iter() + .enumerate() + .map(|(ind, grp)| grp.column_type(namer.name_assoc_column(ind), namer)); + quote! { + mod #mod_columns { + //! Column types to be used for storage in each column. + #primary_mod + #(#assoc_mods)* + } + } + .into() + } + + pub fn key_type(&self, namer: &CodeNamer) -> Tokens { + let CodeNamer { + mod_columns, + name_primary_column, + mod_columns_struct_imm, + mod_columns_struct_mut, + pulpit_path, + type_key, + .. + } = namer; + + let primary_type = self.primary.col.generate_column_type( + namer, + quote!(#mod_columns::#name_primary_column::#mod_columns_struct_imm).into(), + quote!(#mod_columns::#name_primary_column::#mod_columns_struct_mut).into(), + ); + quote! { + /// The key for accessing rows (delete, update, get) + pub type #type_key = <#primary_type as #pulpit_path::column::Keyable>::Key; + } + .into() + } + + pub fn columns_definition(&self, namer: &CodeNamer) -> GroupsDef { + let CodeNamer { + mod_columns, + name_primary_column, + mod_columns_struct_imm, + mod_columns_struct_mut, + pulpit_path, + struct_column_holder, + struct_window_holder, + .. + } = namer; + + let num_members = self.assoc.len() + 1; + let mut col_defs = Vec::with_capacity(num_members); + let mut window_defs = Vec::with_capacity(num_members); + let mut converts = Vec::with_capacity(num_members); + let mut news = Vec::with_capacity(num_members); + + for (ty, ty_no_gen, member) in self + .assoc + .iter() + .enumerate() + .map(|(ind, Group { col, fields: _ })| { + let assoc_name = namer.name_assoc_column(ind); + ( + col.generate_column_type( + namer, + quote!(#mod_columns::#assoc_name::#mod_columns_struct_imm).into(), + quote!(#mod_columns::#assoc_name::#mod_columns_struct_mut).into(), + ), + col.generate_base_type(namer), + assoc_name, + ) + }) + .chain(once(( + self.primary.col.generate_column_type( + namer, + quote!(#mod_columns::#name_primary_column::#mod_columns_struct_imm).into(), + quote!(#mod_columns::#name_primary_column::#mod_columns_struct_mut).into(), + ), + self.primary.col.generate_base_type(namer), + name_primary_column.clone(), + ))) + { + col_defs.push(quote!(#member: #ty)); + window_defs + .push(quote!(#member: <#ty as #pulpit_path::column::Column>::WindowKind<'imm>)); + converts.push(quote!(#member: self.#member.window())); + news.push(quote!(#member: #ty_no_gen::new(size_hint))); + } + + GroupsDef { + columns_struct: quote! { + struct #struct_column_holder { + #(#col_defs),* + } + } + .into(), + columns_impl: quote! { + impl #struct_column_holder { + fn new(size_hint: usize) -> Self { + Self { + #(#news),* + } + } + + fn window(&mut self) -> #struct_window_holder<'_> { + #struct_window_holder { + #(#converts),* + } + } + } + } + .into(), + window_holder_struct: quote! { + struct #struct_window_holder<'imm> { + #(#window_defs),* + } + } + .into(), + } + } +} diff --git a/crates/pulpit_gen/src/lib.rs b/crates/pulpit_gen/src/lib.rs new file mode 100644 index 0000000..ba885f7 --- /dev/null +++ b/crates/pulpit_gen/src/lib.rs @@ -0,0 +1,12 @@ +#![feature(associated_const_equality)] + +pub mod columns; +pub mod groups; +pub mod limit; +pub mod macros; +pub mod namer; +pub mod operations; +pub mod predicates; +pub mod selector; +pub mod table; +pub mod uniques; diff --git a/crates/pulpit_gen/src/limit.rs b/crates/pulpit_gen/src/limit.rs new file mode 100644 index 0000000..b16a512 --- /dev/null +++ b/crates/pulpit_gen/src/limit.rs @@ -0,0 +1,34 @@ +use quote::quote; +use quote_debug::Tokens; +use syn::{Expr, Ident}; + +pub enum LimitKind { + /// Used when the limit provided is known to the gen macro + /// TODO: Perform optimisations the size of the indices + Literal(usize), + + /// Used to provide generic-level information for pulpit columns to use. + /// - e.g. A const index for the size of the column + /// TODO: Implement pulpit table that is a single buffer + ConstVal(Tokens), +} + +pub struct Limit { + pub value: LimitKind, + pub alias: Ident, +} + +impl Limit { + pub fn generate_check(&self) -> Tokens { + match &self.value { + LimitKind::Literal(l) => quote! {#l}, + LimitKind::ConstVal(expr) => quote! { + { + const VALUE: usize = { (#expr) as usize }; + VALUE + } + }, + } + .into() + } +} diff --git a/crates/pulpit_gen/src/macros/mod.rs b/crates/pulpit_gen/src/macros/mod.rs new file mode 100644 index 0000000..79875aa --- /dev/null +++ b/crates/pulpit_gen/src/macros/mod.rs @@ -0,0 +1,2 @@ +pub mod raw_interface; +pub mod simple; diff --git a/crates/pulpit_gen/src/macros/raw_interface.rs b/crates/pulpit_gen/src/macros/raw_interface.rs new file mode 100644 index 0000000..e487528 --- /dev/null +++ b/crates/pulpit_gen/src/macros/raw_interface.rs @@ -0,0 +1,26 @@ +//! # Direct Control over tabl structure +//! A raw interface to allow users to choose each data type, for the + +use proc_macro2::TokenStream; + +/// Provides raw access to generate table structures +/// ```ignore +/// raw_interface!{ +/// primary: { +/// mut field: type, +/// field: type, +/// } +/// associated: [ +/// { +/// mut field: type, +/// field: type, +/// } +/// ], +/// updates: [ +/// +/// ], +/// deletions, +/// transactions, +/// } +/// ``` +pub fn raw_interface(_: TokenStream) {} diff --git a/crates/pulpit_gen/src/macros/simple.rs b/crates/pulpit_gen/src/macros/simple.rs new file mode 100644 index 0000000..6105ce7 --- /dev/null +++ b/crates/pulpit_gen/src/macros/simple.rs @@ -0,0 +1,254 @@ +//! # A simple interface for generating tables. +//! TODO: improve this using the new [`combi::tokens::options`] parser +use std::collections::{HashMap, HashSet, LinkedList}; + +use combi::{ + core::{choice, mapsuc, nothing, recover, seq, seqdiff}, + logical::or, + macros::{choices, seqs}, + tokens::{ + basic::{ + collectuntil, getident, gettoken, isempty, matchident, matchpunct, peekident, + peekpunct, recovgroup, terminal, + }, + derived::listseptrailing, + error::{error, expectederr}, + recovery::until, + TokenDiagnostic, TokenIter, TokenParser, + }, + Combi, +}; +use proc_macro2::{Span, TokenStream}; +use proc_macro_error::{Diagnostic, Level}; +use syn::Ident; + +use crate::{ + groups::Field, + limit::{Limit, LimitKind}, + operations::update::Update, + predicates::Predicate, + selector::SelectOperations, + uniques::Unique, +}; + +struct ASTField { + field_kind: Field, + unique: Option, +} + +fn comma_after(inp: impl TokenParser) -> impl TokenParser { + mapsuc( + seq(recover(inp, until(peekpunct(','))), matchpunct(',')), + |(fields, _)| fields, + ) +} + +fn fields_parser() -> impl TokenParser> { + let unique_parse = mapsuc( + seqs!( + matchpunct('@'), + matchident("unique"), + recovgroup(proc_macro2::Delimiter::Parenthesis, getident()) + ), + |(_, (_, id))| Some(id), + ); + let inner = listseptrailing( + ',', + mapsuc( + seqs!( + getident(), + matchpunct(':'), + collectuntil(or(peekpunct(','), peekpunct('@'))), + choice(peekpunct('@'), unique_parse, mapsuc(nothing(), |()| None)) + ), + |(name, (_, (ty, unique)))| ASTField { + field_kind: Field { + name, + ty: ty.into(), + }, + unique, + }, + ), + ); + expectederr(named_parse( + "fields", + recovgroup(proc_macro2::Delimiter::Brace, inner), + )) +} + +fn parse_on_off(name: &'static str) -> impl TokenParser { + mapsuc( + seqs!( + matchident(name), + matchpunct(':'), + choices!( + peekident("on") => mapsuc(matchident("on"), |_| true), + peekident("off") => mapsuc(matchident("off"), |_| false), + otherwise => error(gettoken, |t| Diagnostic::spanned(t.span(), Level::Error, "Expected `on` or `off`".to_owned())) + ) + ), + |(_, (_, switch))| switch, + ) +} + +fn parse_updates() -> impl TokenParser> { + let update_parser = mapsuc( + seqs!( + getident(), + matchpunct(':'), + recovgroup( + proc_macro2::Delimiter::Bracket, + listseptrailing(',', getident()) + ) + ), + |(alias, (_, fields))| Update { alias, fields }, + ); + named_parse( + "updates", + recovgroup( + proc_macro2::Delimiter::Brace, + listseptrailing(',', update_parser), + ), + ) +} + +fn parse_predicates() -> impl TokenParser> { + let predicate_parser = mapsuc( + seqs!(getident(), matchpunct(':'), collectuntil(peekpunct(','))), + |(alias, (_, tokens))| Predicate { + alias, + tokens: tokens.into(), + }, + ); + named_parse( + "predicates", + recovgroup( + proc_macro2::Delimiter::Brace, + listseptrailing(',', predicate_parser), + ), + ) +} + +fn parse_limit() -> impl TokenParser> { + named_parse( + "limit", + recovgroup( + proc_macro2::Delimiter::Brace, + choices! { + peekident("None") => mapsuc(matchident("None"), |_|None), + otherwise => mapsuc( + seqs!( + getident(), + matchpunct(':'), + collectuntil(isempty()) + ), + |( alias, (_, tks))| Some(Limit { value: LimitKind::ConstVal(tks.into()), alias }) + ) + }, + ), + ) +} + +fn named_parse(name: &'static str, inner: impl TokenParser) -> impl TokenParser { + mapsuc(seq(matchident(name), inner), |(_, data)| data) +} + +fn analyse( + fields: Vec, + updates: Vec, + predicates: Vec, + limit: Option, + transactions: bool, + deletions: bool, + name: Ident, +) -> Result> { + let mut seen_access_names: HashSet = HashSet::new(); + let mut field_types = HashMap::new(); + let mut uniques = Vec::new(); + let mut errors = LinkedList::new(); + + let mut add_duplicate = |curr_name: &Ident, prev_name: &Ident| { + errors.push_back( + Diagnostic::spanned(curr_name.span(), Level::Error, String::from("AAA")) + .span_help(prev_name.span(), String::from("Originally here")), + ); + }; + + for pred in &predicates { + if let Some(prev_name) = seen_access_names.get(&pred.alias) { + add_duplicate(&pred.alias, prev_name) + } else { + seen_access_names.insert(pred.alias.clone()); + } + } + + if let Some(Limit { alias, .. }) = &limit { + if let Some(prev_name) = seen_access_names.get(alias) { + add_duplicate(alias, prev_name) + } else { + seen_access_names.insert(alias.clone()); + } + } + + for ASTField { field_kind, unique } in fields { + if let Some(alias) = unique { + if let Some(name) = seen_access_names.get(&alias) { + add_duplicate(&alias, name); + } else { + seen_access_names.insert(alias.clone()); + } + uniques.push(Unique { + alias, + field: field_kind.name.clone(), + }) + } + + if let Some(name) = seen_access_names.get(&field_kind.name) { + add_duplicate(&field_kind.name, name); + } else { + seen_access_names.insert(field_kind.name.clone()); + field_types.insert(field_kind.name, field_kind.ty); + } + } + + Ok(SelectOperations { + name, + transactions, + deletions, + fields: field_types, + uniques, + predicates, + updates, + public: false, + limit, + }) +} + +pub fn simple(input: TokenStream) -> Result> { + let parser = seqs!( + comma_after(fields_parser()), + comma_after(parse_updates()), + comma_after(parse_predicates()), + comma_after(parse_limit()), + comma_after(parse_on_off("transactions")), + comma_after(parse_on_off("deletions")), + mapsuc( + seqs!(matchident("name"), matchpunct(':'), getident()), + |(_, (_, name))| name + ) + ); + + let (_, res) = mapsuc(seqdiff(parser, terminal), |(o, ())| o) + .comp(TokenIter::from(input, Span::call_site())); + let (fields, (updates, (predicates, (limit, (transactions, (deletions, name)))))) = + res.to_result().map_err(TokenDiagnostic::into_list)?; + analyse( + fields, + updates, + predicates, + limit, + transactions, + deletions, + name, + ) +} diff --git a/crates/pulpit_gen/src/namer.rs b/crates/pulpit_gen/src/namer.rs new file mode 100644 index 0000000..8dd32f2 --- /dev/null +++ b/crates/pulpit_gen/src/namer.rs @@ -0,0 +1,134 @@ +use proc_macro2::Span; +use quote::quote; +use quote_debug::Tokens; +use syn::{Ident, Lifetime, Path}; + +pub struct CodeNamer { + pub lifetime_imm: Tokens, + pub name_primary_column: Ident, + pub name_phantom_member: Ident, + pub pulpit_path: Tokens, + pub struct_column_holder: Ident, + pub type_key_error: Ident, + pub type_key: Ident, + pub mod_predicates: Ident, + pub struct_uniques_holder: Ident, + pub struct_table: Ident, + pub struct_table_member_uniques: Ident, + pub struct_table_member_transactions: Ident, + pub struct_table_member_columns: Ident, + pub mod_columns: Ident, + pub mod_columns_struct_imm: Ident, + pub mod_columns_struct_mut: Ident, + pub mod_columns_struct_imm_unpacked: Ident, + pub mod_columns_fn_imm_unpack: Ident, + pub mod_transactions: Ident, + pub mod_transactions_enum_logitem: Ident, + pub mod_transactions_enum_update: Ident, + pub mod_transactions_struct_data: Ident, + pub mod_transactions_struct_data_member_log: Ident, + pub mod_transactions_struct_data_member_rollback: Ident, + pub mod_transactions_enum_logitem_variant_update: Ident, + pub mod_transactions_enum_logitem_variant_insert: Ident, + pub mod_transactions_enum_logitem_variant_append: Ident, + pub mod_transactions_enum_logitem_variant_delete: Ident, + pub mod_update: Ident, + pub mod_update_struct_update: Ident, + pub mod_update_enum_error: Ident, + pub mod_borrow: Ident, + pub mod_borrow_struct_borrow: Ident, + pub mod_get: Ident, + pub mod_get_struct_get: Ident, + pub mod_unique: Ident, + pub mod_unique_struct_notfound: Ident, + pub mod_insert: Ident, + pub mod_insert_struct_insert: Ident, + pub mod_insert_enum_error: Ident, + pub struct_unique: Ident, + pub struct_window_holder: Ident, + pub struct_window: Ident, + pub struct_window_method_commit: Ident, + pub struct_window_method_abort: Ident, + pub struct_window_method_get: Ident, + pub struct_window_method_borrow: Ident, + pub struct_window_method_insert: Ident, + pub struct_window_method_delete: Ident, + pub struct_window_method_scan_brw: Ident, + pub struct_window_method_scan_get: Ident, + pub struct_window_method_count: Ident, + pub struct_window_method_reverse_insert: Ident, + pub struct_window_method_delete_hidden: Ident, + pub struct_window_method_restore_hidden: Ident, +} + +fn new_id(id: &str) -> Ident { + Ident::new(id, Span::call_site()) +} + +impl CodeNamer { + pub fn pulpit() -> Self { + Self::new(quote!(pulpit).into()) + } + pub fn new(pulpit_path: Tokens) -> Self { + Self { + lifetime_imm: quote! {'imm}.into(), + mod_columns: new_id("column_types"), + name_primary_column: new_id("primary"), + name_phantom_member: new_id("phantom"), + mod_columns_struct_imm: new_id("Imm"), + mod_columns_struct_mut: new_id("Mut"), + mod_columns_struct_imm_unpacked: new_id("ImmUnpack"), + mod_columns_fn_imm_unpack: new_id("imm_unpack"), + pulpit_path, + struct_column_holder: new_id("ColumnHolder"), + struct_window_holder: new_id("WindowHolder"), + mod_update_struct_update: new_id("Update"), + mod_update_enum_error: new_id("UpdateError"), + type_key_error: new_id("KeyError"), + struct_table_member_columns: new_id("columns"), + type_key: new_id("Key"), + mod_predicates: new_id("predicates"), + struct_uniques_holder: new_id("Uniques"), + struct_table_member_uniques: new_id("uniques"), + struct_table_member_transactions: new_id("transactions"), + mod_transactions: new_id("transactions"), + mod_transactions_enum_logitem: new_id("LogItem"), + mod_transactions_enum_update: new_id("Updates"), + mod_update: new_id("updates"), + mod_borrow_struct_borrow: new_id("Borrows"), + struct_window: new_id("Window"), + struct_table: new_id("Table"), + mod_borrow: new_id("borrows"), + mod_get: new_id("get"), + mod_get_struct_get: new_id("Get"), + mod_insert: new_id("insert"), + mod_insert_struct_insert: new_id("Insert"), + mod_insert_enum_error: new_id("Error"), + struct_unique: new_id("Uniques"), + mod_transactions_struct_data: new_id("Data"), + mod_transactions_struct_data_member_log: new_id("log"), + mod_transactions_struct_data_member_rollback: new_id("rollback"), + mod_transactions_enum_logitem_variant_update: new_id("Update"), + mod_transactions_enum_logitem_variant_insert: new_id("Insert"), + mod_transactions_enum_logitem_variant_append: new_id("Append"), + mod_transactions_enum_logitem_variant_delete: new_id("Delete"), + struct_window_method_commit: new_id("commit"), + struct_window_method_abort: new_id("abort"), + struct_window_method_get: new_id("get"), + struct_window_method_borrow: new_id("borrow"), + struct_window_method_insert: new_id("insert"), + struct_window_method_delete: new_id("delete"), + struct_window_method_scan_brw: new_id("borrow_indices"), + struct_window_method_scan_get: new_id("get_indices"), + struct_window_method_count: new_id("count"), + struct_window_method_reverse_insert: new_id("reverse_insert"), + struct_window_method_delete_hidden: new_id("delete_hidden"), + struct_window_method_restore_hidden: new_id("restore_hidden"), + mod_unique: new_id("unique"), + mod_unique_struct_notfound: new_id("NotFound"), + } + } + pub fn name_assoc_column(&self, assoc_ind: usize) -> Ident { + Ident::new(&format!("assoc_{assoc_ind}"), Span::call_site()) + } +} diff --git a/crates/pulpit_gen/src/operations/borrow.rs b/crates/pulpit_gen/src/operations/borrow.rs new file mode 100644 index 0000000..0c3fa05 --- /dev/null +++ b/crates/pulpit_gen/src/operations/borrow.rs @@ -0,0 +1,92 @@ +use super::SingleOp; +use crate::{ + groups::{FieldIndex, Groups}, + namer::CodeNamer, +}; +use proc_macro2::TokenStream; +use quote::quote; + +fn generate_borrow_fields<'a>( + groups: &'a Groups, + namer: &'a CodeNamer, +) -> impl Iterator + 'a { + groups.idents.iter().map(|(field_name, field_index)| { + let data = match field_index { + FieldIndex::Primary(_) => namer.name_primary_column.clone(), + FieldIndex::Assoc { + assoc_ind, + inner: _, + } => namer.name_assoc_column(*assoc_ind), + }; + + let imm_access = if field_index.is_imm() { + quote!(imm_data) + } else { + quote!(mut_data) + }; + + quote!(#field_name: &#data.#imm_access.#field_name) + }) +} + +pub fn generate(groups: &Groups, namer: &CodeNamer, op_attrs: &TokenStream) -> SingleOp { + let CodeNamer { + type_key, + struct_window, + pulpit_path, + name_primary_column, + type_key_error, + struct_table_member_columns, + mod_borrow, + mod_borrow_struct_borrow, + struct_window_method_borrow, + name_phantom_member, + .. + } = namer; + + let (struct_fields_def, borrowed_fields) = if groups.idents.is_empty() { + ( + quote!(pub #name_phantom_member: std::marker::PhantomData<&'brw ()>), + quote!(#name_phantom_member: std::marker::PhantomData), + ) + } else { + let borrowed_fields = generate_borrow_fields(groups, namer); + let struct_fields = groups.idents.iter().map(|(field_name, field_index)| { + let field_ty = groups.get_type(field_index).unwrap(); + quote!(pub #field_name: &'brw #field_ty) + }); + (quote! {#(#struct_fields),*}, quote!(#(#borrowed_fields),*)) + }; + + let assoc_brws = (0..groups.assoc.len()).map(|ind| { + let name = namer.name_assoc_column(ind); + quote!(let #name = unsafe { self.#struct_table_member_columns.#name.assoc_brw(index) } ) + }); + SingleOp { + op_mod: quote! { + pub mod #mod_borrow { + pub struct #mod_borrow_struct_borrow<'brw> { + #struct_fields_def + } + } + } + .into(), + op_impl: quote! { + impl <'imm> #struct_window<'imm> { + #op_attrs + pub fn #struct_window_method_borrow<'brw>(&'brw self, key: #type_key) -> Result<#mod_borrow::#mod_borrow_struct_borrow<'brw>, #type_key_error> { + let #pulpit_path::column::Entry {index, data: #name_primary_column} = match self.#struct_table_member_columns.#name_primary_column.brw(key) { + Ok(entry) => entry, + Err(_) => return Err(#type_key_error), + }; + #(#assoc_brws;)* + + Ok(#mod_borrow::#mod_borrow_struct_borrow { + #borrowed_fields + }) + } + } + } + .into(), + } +} diff --git a/crates/pulpit_gen/src/operations/count.rs b/crates/pulpit_gen/src/operations/count.rs new file mode 100644 index 0000000..4570cb3 --- /dev/null +++ b/crates/pulpit_gen/src/operations/count.rs @@ -0,0 +1,26 @@ +use super::SingleOpFn; +use crate::namer::CodeNamer; +use proc_macro2::TokenStream; +use quote::quote; + +pub fn generate(namer: &CodeNamer, op_attrs: &TokenStream) -> SingleOpFn { + let CodeNamer { + struct_window_method_count: method_count, + struct_window, + name_primary_column, + struct_table_member_columns: table_member_columns, + .. + } = namer; + + SingleOpFn { + op_impl: quote! { + impl <'imm> #struct_window<'imm> { + #op_attrs + pub fn #method_count(&self) -> usize { + self.#table_member_columns.#name_primary_column.count() + } + } + } + .into(), + } +} diff --git a/crates/pulpit_gen/src/operations/delete.rs b/crates/pulpit_gen/src/operations/delete.rs new file mode 100644 index 0000000..816d521 --- /dev/null +++ b/crates/pulpit_gen/src/operations/delete.rs @@ -0,0 +1,153 @@ +use crate::{ + groups::{FieldIndex, Groups}, + namer::CodeNamer, + operations::SingleOpFn, + uniques::Unique, +}; +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use quote_debug::Tokens; +use syn::{ExprMatch, Ident}; + +/// Should only be called when deletions are enabled for the table +pub fn generate( + namer: &CodeNamer, + groups: &Groups, + uniques: &[Unique], + transactions: bool, + op_attrs: &TokenStream, +) -> SingleOpFn { + let CodeNamer { + type_key_error, + type_key, + struct_window, + name_primary_column, + struct_table_member_columns: table_member_columns, + mod_transactions_enum_logitem, + mod_transactions_enum_logitem_variant_delete, + mod_transactions, + struct_table_member_transactions: table_member_transactions, + mod_transactions_struct_data_member_rollback, + mod_transactions_struct_data_member_log, + struct_window_method_delete: method_delete, + pulpit_path, + struct_window_method_reverse_insert, + struct_window_method_delete_hidden, + struct_window_method_restore_hidden, + struct_table_member_uniques, + struct_window_method_borrow, + .. + } = namer; + let key_ident = Ident::new("key", Span::call_site()); + let index_ident = Ident::new("index", Span::call_site()); + let brw_ident = Ident::new("brw_data", Span::call_site()); + + let unique_deletions = uniques.iter().map(|Unique { alias: _, field }| { + let field_index = groups.get_field_index(field).unwrap(); + let data = match field_index { + FieldIndex::Primary(_) => namer.name_primary_column.clone(), + FieldIndex::Assoc { + assoc_ind, + inner: _, + } => namer.name_assoc_column(*assoc_ind), + }; + + let imm_access = if field_index.is_imm() { + quote!(imm_data) + } else { + quote!(mut_data) + }; + + // POSSIBLE BUG: pulling values does not consider the transformations + // that may need to be applied to immutable values + // (`ImmPull`), or autodereference might take care of + // this - not failing any tests for retain, would fail for + // other wrappings? + quote!(self.#struct_table_member_uniques.#field.pull(&#data.#imm_access.#field).unwrap()) + }); + + let assoc_cols = (0..groups.assoc.len()) + .map(|ind| { + let name = namer.name_assoc_column(ind); + quote!(let #name = unsafe { self.#table_member_columns.#name.assoc_pull(#index_ident) }) + }) + .collect::>(); + + let delete_hard: Tokens = quote!{ + match self.#table_member_columns.#name_primary_column.pull(#key_ident) { + Ok(#pulpit_path::column::Entry{ index: #index_ident, data: #name_primary_column }) => { + #(#assoc_cols;)* + #(#unique_deletions;)* + Ok(()) + }, + Err(_) => Err(#type_key_error), + } + }.into(); + + let op_impl = if transactions { + let transactional = quote! { + if !self.#table_member_transactions.#mod_transactions_struct_data_member_rollback { + self.#table_member_transactions.#mod_transactions_struct_data_member_log.push(#mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_delete(key)); + } + }; + // We cannot insert into the table while holding the borrow of the row. + // - while borrow does not impact the unique indices, the `self.borrow` + // borrows all members of `self` + // - Hence we clone, then place, rather than just `self.uniques.insert(brw.field.clone())` + let get_clone_of_uniques = uniques + .iter() + .map(|Unique { alias, field }| quote!(let #alias = #brw_ident.#field.clone())); + let restore_unique_from_borrow = uniques.iter().map(|Unique { alias, field }| { + quote!(self.#struct_table_member_uniques.#field.insert(#alias, #key_ident).unwrap()) + }); + + quote! { + impl <'imm> #struct_window<'imm> { + fn #struct_window_method_reverse_insert(&mut self, #key_ident: #type_key) { + debug_assert!(self.#table_member_transactions.#mod_transactions_struct_data_member_rollback); + { + #delete_hard + }.unwrap() + } + + fn #struct_window_method_delete_hidden(&mut self, #key_ident: #type_key) { + debug_assert!(!self.#table_member_transactions.#mod_transactions_struct_data_member_rollback); + let #pulpit_path::column::Entry{ index: #index_ident, data } = self.#table_member_columns.#name_primary_column.pull(key).unwrap(); + unsafe { + #(#assoc_cols;)* + } + } + + fn #struct_window_method_restore_hidden(&mut self, #key_ident: #type_key) { + debug_assert!(self.#table_member_transactions.#mod_transactions_struct_data_member_rollback); + self.#table_member_columns.#name_primary_column.reveal(#key_ident).unwrap(); + let #brw_ident = self.#struct_window_method_borrow(#key_ident).unwrap(); + #(#get_clone_of_uniques;)* + #(#restore_unique_from_borrow;)* + } + + #op_attrs + pub fn #method_delete(&mut self, #key_ident: #type_key) -> Result<(), #type_key_error> { + match self.#table_member_columns.#name_primary_column.hide(#key_ident) { + Ok(()) => (), + Err(_) => return Err(#type_key_error), + } + #transactional + Ok(()) + } + } + } + .into() + } else { + quote!{ + impl <'imm> #struct_window<'imm> { + #op_attrs + pub fn #method_delete(&mut self, #key_ident: #type_key) -> Result<(), #type_key_error> { + #delete_hard + } + } + }.into() + }; + + SingleOpFn { op_impl } +} diff --git a/crates/pulpit_gen/src/operations/get.rs b/crates/pulpit_gen/src/operations/get.rs new file mode 100644 index 0000000..ea115e9 --- /dev/null +++ b/crates/pulpit_gen/src/operations/get.rs @@ -0,0 +1,178 @@ +use std::collections::HashMap; + +use super::SingleOp; +use crate::{ + columns::ColKind, + groups::{Field, FieldIndex, FieldName, Group, Groups, MutImmut}, + namer::CodeNamer, +}; +use proc_macro2::TokenStream; +use quote::quote; +use quote_debug::Tokens; +use syn::Type; + +fn generate_get_fields<'a>( + groups: &'a Groups, + namer: &'a CodeNamer, +) -> impl Iterator + 'a { + groups.idents.iter().map(|(field_name, field_index)| { + let data = match field_index { + FieldIndex::Primary(_) => namer.name_primary_column.clone(), + FieldIndex::Assoc { + assoc_ind, + inner: _, + } => namer.name_assoc_column(*assoc_ind), + }; + + let imm_access = if field_index.is_imm() { + quote!(imm_data) + } else { + quote!(mut_data) + }; + + quote!(#field_name: #data.#imm_access.#field_name) + }) +} + +/// Used to generate the field types for get operations on a table +pub fn get_struct_fields<'a>( + groups: &'a Groups, + namer: &'a CodeNamer, +) -> HashMap> { + fn append( + fs: &mut HashMap>, + col: &Col, + fields: &MutImmut>, + namer: &CodeNamer, + ) { + for Field { name, ty } in &fields.mut_fields { + fs.insert(name.clone(), ty.clone()); + } + for field @ Field { name, .. } in &fields.imm_fields { + fs.insert(name.clone(), col.convert_imm_type(field, namer)); + } + } + let mut def_fields = HashMap::with_capacity(groups.idents.len()); + append( + &mut def_fields, + &groups.primary.col, + &groups.primary.fields, + namer, + ); + + for Group { col, fields } in &groups.assoc { + append(&mut def_fields, col, fields, namer); + } + def_fields +} + +pub fn generate_get_struct_fields<'a>( + groups: &'a Groups, + namer: &'a CodeNamer, +) -> Vec { + fn append( + fs: &mut Vec, + col: &Col, + fields: &MutImmut>, + namer: &CodeNamer, + ) { + for Field { name, ty } in &fields.mut_fields { + fs.push(quote!(pub #name: #ty)); + } + for field @ Field { name, .. } in &fields.imm_fields { + let ty_trans = col.convert_imm_type(field, namer); + fs.push(quote!(pub #name: #ty_trans)); + } + } + let mut def_fields = Vec::with_capacity(groups.idents.len()); + append( + &mut def_fields, + &groups.primary.col, + &groups.primary.fields, + namer, + ); + + for Group { col, fields } in &groups.assoc { + append(&mut def_fields, col, fields, namer); + } + def_fields +} + +pub fn generate(groups: &Groups, namer: &CodeNamer, op_attrs: &TokenStream) -> SingleOp { + let CodeNamer { + type_key_error, + type_key, + struct_window, + pulpit_path, + name_primary_column, + struct_table_member_columns: table_member_columns, + mod_columns, + mod_columns_fn_imm_unpack, + mod_get, + mod_get_struct_get, + lifetime_imm, + struct_window_method_get: method_get, + name_phantom_member, + .. + } = namer; + + let include_lifetime = groups.primary.col.requires_get_lifetime() + || groups + .assoc + .iter() + .any(|Group { col, fields: _ }| col.requires_get_lifetime()); // TODO: implement + let lifetime = if include_lifetime { + quote!(<#lifetime_imm>) + } else { + quote!() + }; + + let get_struct_fields = generate_get_struct_fields(groups, namer); + + let phantom_get = if include_lifetime && get_struct_fields.is_empty() { + quote!(pub #name_phantom_member: std::marker::PhantomData<&#lifetime_imm ()>) + } else { + quote!() + }; + + let assoc_cols = (0..groups.assoc.len()).map(|ind| { + let name = namer.name_assoc_column(ind); + quote!(let #name = unsafe { self.#table_member_columns.#name.assoc_get(index) }.convert_imm(#mod_columns::#name::#mod_columns_fn_imm_unpack)) + }); + let get_fields_stream = generate_get_fields(groups, namer).collect::>(); + let get_fields = if get_fields_stream.is_empty() { + quote!(#name_phantom_member: std::marker::PhantomData) + } else { + quote!(#(#get_fields_stream,)*) + }; + + SingleOp { + op_mod: quote! { + pub mod #mod_get { + pub struct #mod_get_struct_get #lifetime { + #(#get_struct_fields,)* + #phantom_get + } + } + } + .into(), + op_impl: quote! { + impl <#lifetime_imm> #struct_window<#lifetime_imm> { + #op_attrs + pub fn #method_get(&self, key: #type_key) -> Result<#mod_get::#mod_get_struct_get #lifetime, #type_key_error> { + let #pulpit_path::column::Entry {index, data: #name_primary_column} = match self.#table_member_columns.#name_primary_column.get(key) { + Ok(entry) => entry, + Err(_) => return Err(#type_key_error), + }; + let #name_primary_column = #name_primary_column.convert_imm(#mod_columns::#name_primary_column::#mod_columns_fn_imm_unpack); + #(#assoc_cols;)* + + Ok(#mod_get::#mod_get_struct_get { + #get_fields + }) + } + } + } + .into(), + } +} diff --git a/crates/pulpit_gen/src/operations/insert.rs b/crates/pulpit_gen/src/operations/insert.rs new file mode 100644 index 0000000..fd70808 --- /dev/null +++ b/crates/pulpit_gen/src/operations/insert.rs @@ -0,0 +1,284 @@ +use std::iter::once; + +use super::SingleOp; +use crate::{ + columns::ColKind, + groups::{Field, Group, Groups}, + limit::Limit, + namer::CodeNamer, + predicates::Predicate, + uniques::Unique, +}; +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use quote_debug::Tokens; +use syn::{ExprLet, Ident}; + +pub fn generate_column_assignments( + name: &Ident, + insert_val: &Ident, + group: &Group, + namer: &CodeNamer, +) -> Tokens { + let imm_data_fields = group.fields.imm_fields.iter().map(|Field { name, .. }| { + quote! {#name: #insert_val.#name} + }); + let mut_data_fields = group.fields.mut_fields.iter().map(|Field { name, .. }| { + quote! {#name: #insert_val.#name} + }); + + let CodeNamer { + mod_columns, + mod_columns_struct_imm, + mod_columns_struct_mut, + pulpit_path, + .. + } = namer; + + // TODO: remove extra brackets, as ExprLet parsing of struct literals has + // been fixed in syn (see https://github.com/dtolnay/syn/issues/1670) + quote! { + let #name = (#pulpit_path::column::Data { + imm_data: #mod_columns::#name::#mod_columns_struct_imm { + #(#imm_data_fields,)* + }, + mut_data: #mod_columns::#name::#mod_columns_struct_mut { + #(#mut_data_fields,)* + } + }) + } + .into() +} + +#[allow(clippy::too_many_arguments)] +pub fn generate( + groups: &Groups, + uniques: &[Unique], + predicates: &[Predicate], + namer: &CodeNamer, + limit: &Option, + deletions: bool, + transactions: bool, + op_attrs: &TokenStream, +) -> SingleOp { + let CodeNamer { + type_key, + struct_window, + mod_insert, + mod_insert_struct_insert, + mod_insert_enum_error, + mod_borrow, + mod_borrow_struct_borrow, + mod_predicates, + struct_table_member_uniques: table_member_uniques, + struct_table_member_columns: table_member_columns, + pulpit_path, + name_primary_column, + mod_transactions_enum_logitem, + mod_transactions_enum_logitem_variant_insert, + mod_transactions_enum_logitem_variant_append, + mod_transactions, + struct_table_member_transactions: table_member_transactions, + mod_transactions_struct_data_member_rollback, + mod_transactions_struct_data_member_log, + struct_window_method_insert: method_insert, + name_phantom_member, + struct_window_method_count, + .. + } = namer; + + let insert_val = Ident::new("insert_val", Span::call_site()); + let key_var = Ident::new("key", Span::call_site()); + + let insert_struct_fields = groups.idents.iter().map(|(field_name, field_index)| { + let ty = groups.get_type(field_index); + quote!(pub #field_name: #ty) + }); + + let predicate_args_stream = groups + .idents + .keys() + .map(|k| quote! {#k : &#insert_val.#k}) + .collect::>(); + + let predicate_args = if predicate_args_stream.is_empty() { + quote!(#name_phantom_member: std::marker::PhantomData) + } else { + quote!(#(#predicate_args_stream),*) + }; + + let predicate_checks = predicates.iter().map(|Predicate { alias, tokens: _ }| { + quote! { + if !#mod_predicates::#alias(#mod_borrow::#mod_borrow_struct_borrow{ #predicate_args }) { + return Err(#mod_insert::#mod_insert_enum_error::#alias); + } + } + }); + + let mut errors = uniques + .iter() + .map(|Unique { alias, .. }| alias) + .chain( + predicates + .iter() + .map(|Predicate { alias, tokens: _ }| alias), + ) + .collect::>(); + + let limit_cons = if let Some(limit) = limit { + let alias = &limit.alias; + errors.push(alias); + let value = limit.generate_check(); + quote! { + { + if self.#struct_window_method_count() >= #value { + return Err(#mod_insert::#mod_insert_enum_error::#alias); + } + } + } + } else { + quote!() + }; + + let unique_checks = uniques.iter().map(|Unique { alias, field }| { + quote! { + let #alias = match self.#table_member_uniques.#field.lookup(&#insert_val.#field) { + Ok(_) => return Err(#mod_insert::#mod_insert_enum_error::#alias), + Err(_) => #insert_val.#field.clone(), + }; + } + }); + + let unique_updates = uniques.iter().map(|Unique { alias, field }| { + quote! { + self.#table_member_uniques.#field.insert(#alias, #key_var).unwrap(); + } + }); + + let splitting = once(generate_column_assignments( + &namer.name_primary_column.clone(), + &insert_val, + &groups.primary, + namer, + )) + .chain((0..groups.assoc.len()).map(|ind| { + generate_column_assignments( + &namer.name_assoc_column(ind), + &insert_val, + &groups.assoc[ind], + namer, + ) + })); + + let assoc_grps = (0..groups.assoc.len()).map(|ind| namer.name_assoc_column(ind)); + let appends = assoc_grps.clone().map(|grp| { + quote! { + self.#table_member_columns.#grp.assoc_append(#grp); + } + }); + + let (add_action, add_trans) = if deletions { + let places = assoc_grps.map(|grp| { + quote! { + self.#table_member_columns.#grp.assoc_place(index, #grp); + } + }); + ( + quote! { + let (#key_var, action) = self.#table_member_columns.#name_primary_column.insert(#name_primary_column); + match action { + #pulpit_path::column::InsertAction::Place(index) => { + unsafe { + #(#places)* + } + }, + #pulpit_path::column::InsertAction::Append => { + #(#appends)* + } + } + }, + if transactions { + quote! { + if !self.#table_member_transactions.#mod_transactions_struct_data_member_rollback { + self.#table_member_transactions.#mod_transactions_struct_data_member_log.push(#mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_insert(key)); + } + } + } else { + quote!() + }, + ) + } else { + ( + quote! { + let #key_var = self.#table_member_columns.#name_primary_column.append(#name_primary_column); + #(#appends)* + }, + if transactions { + quote! { + if !self.#table_member_transactions.#mod_transactions_struct_data_member_rollback { + self.#table_member_transactions.#mod_transactions_struct_data_member_log.push(#mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_append); + } + } + } else { + quote!() + }, + ) + }; + + if errors.is_empty() { + SingleOp { + op_mod: quote! { + pub mod #mod_insert { + pub struct #mod_insert_struct_insert { + #(#insert_struct_fields,)* + } + } + } + .into(), + op_impl: quote! { + impl <'imm> #struct_window<'imm> { + #op_attrs + pub fn #method_insert(&mut self, #insert_val: #mod_insert::#mod_insert_struct_insert) -> #type_key { + #(#splitting;)* + #add_action + #add_trans + key + } + } + } + .into(), + } + } else { + SingleOp { + op_mod: quote! { + pub mod #mod_insert { + pub struct #mod_insert_struct_insert { + #(#insert_struct_fields,)* + } + #[derive(Debug)] + pub enum #mod_insert_enum_error { + #(#errors,)* + } + } + } + .into(), + op_impl: quote! { + impl <'imm> #struct_window<'imm> { + #op_attrs + pub fn #method_insert(&mut self, #insert_val: #mod_insert::#mod_insert_struct_insert) -> Result<#type_key, #mod_insert::#mod_insert_enum_error> { + #limit_cons + #(#predicate_checks)* + #(#unique_checks)* + #(#splitting;)* + #add_action + #(#unique_updates)* + #add_trans + + Ok(#key_var) + } + } + } + .into(), + } + } +} diff --git a/crates/pulpit_gen/src/operations/mod.rs b/crates/pulpit_gen/src/operations/mod.rs new file mode 100644 index 0000000..002fada --- /dev/null +++ b/crates/pulpit_gen/src/operations/mod.rs @@ -0,0 +1,21 @@ +pub mod borrow; +pub mod count; +pub mod delete; +pub mod get; +pub mod insert; +pub mod scan; +pub mod transact; +pub mod unique_get; +pub mod update; + +use quote_debug::Tokens; +use syn::{ItemImpl, ItemMod}; + +pub struct SingleOp { + pub op_mod: Tokens, + pub op_impl: Tokens, +} + +pub struct SingleOpFn { + pub op_impl: Tokens, +} diff --git a/crates/pulpit_gen/src/operations/scan.rs b/crates/pulpit_gen/src/operations/scan.rs new file mode 100644 index 0000000..7795f42 --- /dev/null +++ b/crates/pulpit_gen/src/operations/scan.rs @@ -0,0 +1,36 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use crate::namer::CodeNamer; + +use super::SingleOpFn; + +pub fn generate(namer: &CodeNamer, op_attrs: &TokenStream) -> SingleOpFn { + let CodeNamer { + struct_window_method_scan_brw, + struct_window_method_scan_get, + type_key, + struct_window, + name_primary_column, + lifetime_imm, + struct_table_member_columns: table_member_columns, + .. + } = namer; + + SingleOpFn { + op_impl: quote! { + impl <#lifetime_imm> #struct_window<#lifetime_imm> { + #op_attrs + pub fn #struct_window_method_scan_brw(&self) -> impl Iterator + '_ { + self.#table_member_columns.#name_primary_column.scan_brw() + } + + #op_attrs + pub fn #struct_window_method_scan_get(&self) -> impl Iterator + '_ { + self.#table_member_columns.#name_primary_column.scan_get() + } + } + } + .into(), + } +} diff --git a/crates/pulpit_gen/src/operations/transact.rs b/crates/pulpit_gen/src/operations/transact.rs new file mode 100644 index 0000000..dea7adc --- /dev/null +++ b/crates/pulpit_gen/src/operations/transact.rs @@ -0,0 +1,177 @@ +use super::{update::Update, SingleOp}; +use crate::{groups::Groups, namer::CodeNamer}; +use proc_macro2::TokenStream; +use quote::quote; + +pub fn generate( + groups: &Groups, + updates: &[Update], + namer: &CodeNamer, + deletions: bool, + _transactions: bool, + op_attrs: &TokenStream, +) -> SingleOp { + let CodeNamer { + struct_window, + mod_transactions_struct_data, + mod_transactions_enum_logitem, + mod_transactions_enum_update, + mod_transactions, + mod_update, + mod_update_struct_update, + mod_transactions_enum_logitem_variant_update, + mod_transactions_enum_logitem_variant_insert, + mod_transactions_enum_logitem_variant_append, + mod_transactions_enum_logitem_variant_delete, + struct_table_member_transactions: table_member_transactions, + mod_transactions_struct_data_member_log, + mod_transactions_struct_data_member_rollback, + struct_table_member_columns: table_member_columns, + type_key, + name_primary_column, + struct_window_method_commit: method_commit, + struct_window_method_abort: method_abort, + struct_window_method_delete_hidden, + struct_window_method_reverse_insert, + struct_window_method_restore_hidden, + .. + } = namer; + + let updates_variants = updates.iter().map( + |Update { fields: _, alias }| quote!(#alias(super::#mod_update::#alias::#mod_update_struct_update)), + ); + + let log_variants = if deletions { + quote! { + #mod_transactions_enum_logitem_variant_update(super::#type_key, #mod_transactions_enum_update), + #mod_transactions_enum_logitem_variant_insert(super::#type_key), + #mod_transactions_enum_logitem_variant_delete(super::#type_key), + } + } else { + quote! { + #mod_transactions_enum_logitem_variant_update(super::#type_key, #mod_transactions_enum_update), + #mod_transactions_enum_logitem_variant_append, + } + }; + + let abort_update = updates.iter().map(|Update { fields: _, alias }| { + quote! { + #mod_transactions::#mod_transactions_enum_update::#alias(update) => { + self.#alias(update, key).unwrap(); + } + } + }); + let update_rollback_case = quote! {#mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_update(key, update) => { + match update { + #(#abort_update,)* + } + }}; + + let op_impl = if deletions { + quote! { + impl <'imm> #struct_window<'imm> { + /// Commit all current changes + /// - Requires concretely applying deletions (which until commit + /// or abort simply hide keys from the table) + #op_attrs + pub fn #method_commit(&mut self) { + debug_assert!(!self.#table_member_transactions.#mod_transactions_struct_data_member_rollback); + while let Some(entry) = self.#table_member_transactions.#mod_transactions_struct_data_member_log.pop() { + match entry { + #mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_delete(key) => { + self.#struct_window_method_delete_hidden(key); + }, + _ => (), + } + } + } + + /// Undo the transactions applied since the last commit + /// - Requires re-applying all updates, deleting inserts and undoing deletes + /// (deletes' keys are actually just hidden until commit or abort) + #op_attrs + pub fn #method_abort(&mut self) { + self.#table_member_transactions.#mod_transactions_struct_data_member_rollback = true; + while let Some(entry) = self.#table_member_transactions.#mod_transactions_struct_data_member_log.pop() { + match entry { + #mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_delete(key) => { + self.#struct_window_method_restore_hidden(key); + }, + #mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_insert(key) => { + self.#struct_window_method_reverse_insert(key); + }, + #update_rollback_case + } + } + self.#table_member_transactions.#mod_transactions_struct_data_member_rollback = false; + } + } + } + .into() + } else { + let assoc_cols = (0..groups.assoc.len()).map(|ind| { + let name = namer.name_assoc_column(ind); + quote!(self.#table_member_columns.#name.assoc_unppend()) + }); + + quote! { + impl <'imm> #struct_window<'imm> { + /// Commit all current changes + /// - Clears the rollback log + #op_attrs + pub fn #method_commit(&mut self) { + debug_assert!(!self.#table_member_transactions.#mod_transactions_struct_data_member_rollback); + self.#table_member_transactions.#mod_transactions_struct_data_member_log.clear() + } + + /// Undo the transactions applied since the last commit + /// - Requires re-applying all updates, deleting inserts and undoing deletes + /// (deletes' keys are actually just hidden until commit or abort) + #op_attrs + pub fn #method_abort(&mut self) { + self.#table_member_transactions.#mod_transactions_struct_data_member_rollback = true; + while let Some(entry) = self.#table_member_transactions.#mod_transactions_struct_data_member_log.pop() { + match entry { + #mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_append => { + unsafe{ + self.#table_member_columns.#name_primary_column.unppend(); + #(#assoc_cols;)* + } + }, + #update_rollback_case + } + } + self.#table_member_transactions.#mod_transactions_struct_data_member_rollback = false; + } + } + } + .into() + }; + + SingleOp { + op_mod: quote! { + mod #mod_transactions { + pub enum #mod_transactions_enum_update { + #(#updates_variants,)* + } + pub enum #mod_transactions_enum_logitem { + #log_variants + } + pub struct #mod_transactions_struct_data { + pub #mod_transactions_struct_data_member_log: Vec<#mod_transactions_enum_logitem>, + pub #mod_transactions_struct_data_member_rollback: bool, + } + impl #mod_transactions_struct_data { + pub fn new() -> Self { + Self { + #mod_transactions_struct_data_member_log: Vec::new(), + #mod_transactions_struct_data_member_rollback: false, + } + } + } + } + } + .into(), + op_impl, + } +} diff --git a/crates/pulpit_gen/src/operations/unique_get.rs b/crates/pulpit_gen/src/operations/unique_get.rs new file mode 100644 index 0000000..fc4a1ef --- /dev/null +++ b/crates/pulpit_gen/src/operations/unique_get.rs @@ -0,0 +1,49 @@ +use super::SingleOp; +use crate::{groups::Groups, namer::CodeNamer, uniques::Unique}; +use proc_macro2::TokenStream; +use quote::quote; + +pub fn generate( + groups: &Groups, + uniques: &[Unique], + namer: &CodeNamer, + op_attrs: &TokenStream, +) -> SingleOp { + let CodeNamer { + struct_window, + struct_table_member_uniques, + type_key, + mod_unique, + mod_unique_struct_notfound, + .. + } = namer; + + let unique_methods = uniques.iter().map(|Unique { alias, field }| { + let ty = groups.get_typefield(field).unwrap(); + quote!{ + #op_attrs + pub fn #alias(&self, value: &#ty) -> Result<#type_key, #mod_unique::#mod_unique_struct_notfound> { + match self.#struct_table_member_uniques.#field.lookup(value) { + Ok(k) => Ok(k), + Err(_) => Err(#mod_unique::#mod_unique_struct_notfound), + } + } + } + }); + + SingleOp { + op_mod: quote! { + pub mod #mod_unique { + #[derive(Debug)] + pub struct #mod_unique_struct_notfound; + } + } + .into(), + op_impl: quote! { + impl <'imm> #struct_window<'imm> { + #(#unique_methods)* + } + } + .into(), + } +} diff --git a/crates/pulpit_gen/src/operations/update.rs b/crates/pulpit_gen/src/operations/update.rs new file mode 100644 index 0000000..48ed3a2 --- /dev/null +++ b/crates/pulpit_gen/src/operations/update.rs @@ -0,0 +1,280 @@ +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use quote_debug::Tokens; +use syn::{ExprLet, ExprMethodCall, Ident, ImplItemFn, ItemMod, Variant}; + +use crate::{ + groups::{FieldIndex, Groups}, + namer::CodeNamer, + predicates::{generate_update_predicate_access, Predicate}, + uniques::Unique, +}; + +use super::SingleOp; + +/// An update operation, replacing [`Update::fields`] with new values. +/// - Named for the user by [`Update::alias`] +pub struct Update { + pub fields: Vec, + pub alias: Ident, +} + +pub fn generate( + updates: &[Update], + groups: &Groups, + uniques: &[Unique], + predicates: &[Predicate], + namer: &CodeNamer, + transactions: bool, + op_attrs: &TokenStream, +) -> SingleOp { + let CodeNamer { + mod_update, + struct_window, + .. + } = namer; + + let modules = updates + .iter() + .map(|update| update.generate_mod(groups, uniques, predicates, namer)); + let impl_fns = updates.iter().map(|update| { + update.generate_trait_impl_fn(namer, groups, uniques, predicates, transactions, op_attrs) + }); + + SingleOp { + op_mod: quote! { + pub mod #mod_update { + #(#modules)* + } + } + .into(), + op_impl: quote! { + impl <'imm> #struct_window<'imm> { + #(#impl_fns)* + } + } + .into(), + } +} + +impl Update { + fn generate_mod( + &self, + groups: &Groups, + uniques: &[Unique], + predicates: &[Predicate], + namer: &CodeNamer, + ) -> Tokens { + fn generate_unique_error_variants<'a>( + unique_indexes: impl Iterator, + ) -> Vec> { + unique_indexes + .map(|unique| { + let variant = &unique.alias; + quote!( + #variant + ) + .into() + }) + .collect() + } + + fn generate_predicate_error_variants(predicates: &[Predicate]) -> Vec> { + predicates + .iter() + .map(|pred| { + let variant = &pred.alias; + quote!( + #variant + ) + .into() + }) + .collect() + } + let CodeNamer { + mod_update_enum_error, + type_key_error, + mod_update_struct_update, + .. + } = namer; + + let update_name = &self.alias; + + // get the unique error types + let unique_indexes = uniques + .iter() + .filter(|uniq| self.fields.contains(&uniq.field)); + let unique_errors = generate_unique_error_variants(unique_indexes); + let predicate_errors = generate_predicate_error_variants(predicates); + + let struct_fields = self.fields.iter().map(|f| { + let ty = groups.get_type(groups.get_field_index(f).unwrap()).unwrap(); + quote!(#f : #ty) + }); + + let extra_comma = if unique_errors.is_empty() || predicate_errors.is_empty() { + quote!() + } else { + quote!(,) + }; + + quote! { + pub mod #update_name { + #[derive(Debug)] + pub enum #mod_update_enum_error { + #type_key_error, + #(#unique_errors),* #extra_comma + #(#predicate_errors),* + } + + pub struct #mod_update_struct_update { + #(pub #struct_fields),* + } + } + } + .into() + } + + fn generate_trait_impl_fn( + &self, + namer: &CodeNamer, + groups: &Groups, + uniques: &[Unique], + predicates: &[Predicate], + transactions: bool, + op_attrs: &TokenStream, + ) -> Tokens { + let CodeNamer { + mod_update, + mod_update_struct_update, + mod_update_enum_error, + name_primary_column, + struct_table_member_columns: table_member_columns, + type_key, + mod_predicates, + type_key_error, + pulpit_path, + mod_transactions_enum_logitem, + mod_transactions, + mod_transactions_enum_logitem_variant_update, + struct_table_member_transactions: table_member_transactions, + mod_transactions_enum_update, + mod_transactions_struct_data_member_rollback, + mod_transactions_struct_data_member_log, + struct_table_member_uniques: table_member_uniques, + .. + } = namer; + + let update_var = Ident::new("update", Span::call_site()); + let update_name = &self.alias; + + // Generate the table access to primary, and all associated! + let assoc_brw_muts = (0..groups.assoc.len()).map(|ind| { + let name = namer.name_assoc_column(ind); + quote!(let #name = unsafe { self.#table_member_columns.#name.assoc_brw_mut(index) } ) + }); + let table_access = quote! { + let #pulpit_path::column::Entry { index, data: #name_primary_column } = match self.#table_member_columns.#name_primary_column.brw_mut(key) { + Ok(entry) => entry, + Err(_) => return Err(#mod_update::#update_name::#mod_update_enum_error::#type_key_error), + }; + #(#assoc_brw_muts;)* + }; + + // Pass borrow of all fields to the predicate (check if it will be valid) + // needs to include new updated values + let predicate_args = + generate_update_predicate_access(groups, &self.fields, &update_var, namer); + let predicate_checks = predicates.iter().map(|pred| { + let pred = &pred.alias; + quote! { + if !#mod_predicates::#pred(#predicate_args) { + return Err(#mod_update::#update_name::#mod_update_enum_error::#pred); + } + } + }); + + let mut undo_prev_fields: Vec> = Vec::new(); + let mut unique_updates: Vec> = Vec::new(); + for Unique { alias, field } in uniques + .iter() + .filter(|uniq| self.fields.contains(&uniq.field)) + { + let field_index = groups.idents.get(field).unwrap(); + let from_data = match field_index { + FieldIndex::Primary(_) => namer.name_primary_column.clone(), + FieldIndex::Assoc { assoc_ind, .. } => namer.name_assoc_column(*assoc_ind), + }; + + let mutability = if field_index.is_imm() { + quote!(imm_data) + } else { + quote!(mut_data) + }; + + unique_updates.push(quote!{ + let #alias = match self.#table_member_uniques.#field.replace(&update.#field, &#from_data.#mutability.#field, key) { + Ok(old_val) => old_val, + Err(_) => { + #(#undo_prev_fields;)* + return Err(#mod_update::#update_name::#mod_update_enum_error::#alias) + }, + } + }.into()); + + undo_prev_fields.push( + quote! { + self.#table_member_uniques.#field.undo_replace(#alias, &update.#field, key) + } + .into(), + ) + } + + let update_pairs = self.fields.iter().map(|field| { + let field_index = groups.idents.get(field).unwrap(); + let name_id = match field_index { + FieldIndex::Primary(_) => namer.name_primary_column.clone(), + FieldIndex::Assoc { assoc_ind, .. } => namer.name_assoc_column(*assoc_ind), + }; + + (field, quote!(#name_id.mut_data.#field)) + }); + + let commit_updates = if transactions { + let updates = update_pairs.map(|(field, mut_access)| { + quote! { + std::mem::swap(&mut #mut_access, &mut update.#field); + } + }); + quote! { + let mut update = update; + #(#updates;)* + + if !self.#table_member_transactions.#mod_transactions_struct_data_member_rollback { + self.#table_member_transactions.#mod_transactions_struct_data_member_log.push(#mod_transactions::#mod_transactions_enum_logitem::#mod_transactions_enum_logitem_variant_update(key, #mod_transactions::#mod_transactions_enum_update::#update_name(update))); + } + } + } else { + let updates = update_pairs.map(|(field, mut_access)| { + quote! { + *(&mut #mut_access) = update.#field + } + }); + quote! { + #(#updates;)* + } + }; + + quote! { + #op_attrs + pub fn #update_name(&mut self, #update_var: #mod_update::#update_name::#mod_update_struct_update, key: #type_key) -> Result<(), #mod_update::#update_name::#mod_update_enum_error> { + #table_access + #(#predicate_checks)* + #(#unique_updates;)* + #commit_updates + Ok(()) + } + } + .into() + } +} diff --git a/crates/pulpit_gen/src/predicates.rs b/crates/pulpit_gen/src/predicates.rs new file mode 100644 index 0000000..73456f7 --- /dev/null +++ b/crates/pulpit_gen/src/predicates.rs @@ -0,0 +1,114 @@ +use std::iter::once; + +use quote::quote; +use quote_debug::Tokens; +use syn::{Expr, ExprStruct, Ident, ItemFn, ItemMod}; + +use crate::groups::Field; + +use super::{ + groups::{FieldName, Groups}, + namer::CodeNamer, +}; + +pub struct Predicate { + pub alias: Ident, + pub tokens: Tokens, +} + +impl Predicate { + /// Generates the predicate function to be stored in the predicate module. + pub fn generate_function(&self, groups: &Groups, namer: &CodeNamer) -> Tokens { + let CodeNamer { + mod_borrow, + mod_borrow_struct_borrow, + .. + } = namer; + let struct_args = if groups.idents.is_empty() { + quote!(..) + } else { + let args = groups.idents.keys(); + quote!(#(#args),*) + }; + + let name = &self.alias; + let body = &self.tokens; + quote! { + #[inline(always)] + pub fn #name(super::#mod_borrow::#mod_borrow_struct_borrow { #struct_args }: super::#mod_borrow::#mod_borrow_struct_borrow) -> bool { + #body + } + } + .into() + } +} + +/// Generate a module containing all predicates. +pub fn generate(predicates: &[Predicate], groups: &Groups, namer: &CodeNamer) -> Tokens { + let functions = predicates + .iter() + .map(|pred| pred.generate_function(groups, namer)); + let mod_predicates = &namer.mod_predicates; + + quote! { + mod #mod_predicates { + #(#functions)* + } + } + .into() +} + +/// Generates a tuple of immutable borrows from a `.brw_mut(..)` method call, +/// but for fields in `new_fields` it uses the `update_value_name` struct instead. +/// - Allows for the row to be checked by predicates before it is committed to +/// the table entry. +pub fn generate_update_predicate_access( + groups: &Groups, + new_fields: &[FieldName], + update_value_name: &Ident, + namer: &CodeNamer, +) -> Tokens { + let CodeNamer { + mod_borrow, + mod_borrow_struct_borrow, + name_phantom_member, + .. + } = namer; + + let accesses = once((namer.name_primary_column.clone(), &groups.primary.fields)) + .chain( + groups + .assoc + .iter() + .enumerate() + .map(|(ind, grp)| (namer.name_assoc_column(ind), &grp.fields)), + ) + .flat_map(|(var_name, fields)| { + fields + .imm_fields + .iter() + .map(|f| (quote!(imm_data), f)) + .chain(fields.mut_fields.iter().map(|f| (quote!(mut_data), f))) + .map(move |(access, Field { name, ty: _ })| { + if new_fields.contains(name) { + quote!(#name: &#update_value_name.#name) + } else { + quote!(#name: &#var_name.#access.#name) + } + }) + }) + .collect::>(); + + let access_fields = if accesses.is_empty() { + quote!(#name_phantom_member: std::marker::PhantomData) + } else { + quote!(#(#accesses),*) + }; + + quote! { + #mod_borrow::#mod_borrow_struct_borrow { + #access_fields + } + } + .into() +} diff --git a/crates/pulpit_gen/src/selector/mod.rs b/crates/pulpit_gen/src/selector/mod.rs new file mode 100644 index 0000000..a8b83e0 --- /dev/null +++ b/crates/pulpit_gen/src/selector/mod.rs @@ -0,0 +1,83 @@ +//! ## Table Implementation Selection +//! Provides functions for determining the structure of the [`crate::table::Table`] chosen. + +use crate::{ + limit::Limit, operations::update::Update, predicates::Predicate, table::Table, uniques::Unique, +}; +use quote_debug::Tokens; +use std::collections::HashMap; +use syn::{Ident, Type}; + +pub struct SelectOperations { + pub name: Ident, + pub transactions: bool, + pub deletions: bool, + pub fields: HashMap>, + pub uniques: Vec, + pub predicates: Vec, + pub updates: Vec, + pub limit: Option, + pub public: bool, +} + +mod mutability; +pub use mutability::MutabilitySelector; +mod thunderdome; +pub use thunderdome::ThunderdomeSelector; + +#[enumtrait::store(selector_impl_trait)] +pub trait SelectorImpl { + fn select_table(&self, ops: SelectOperations) -> Table; +} + +#[enumtrait::quick_enum] +#[enumtrait::quick_from] +#[enumtrait::store(table_selector_enum)] +pub enum TableSelectors { + MutabilitySelector, + ThunderdomeSelector, +} + +#[enumtrait::impl_trait(selector_impl_trait for table_selector_enum)] +impl SelectorImpl for TableSelectors {} + +mod utils { + use std::collections::HashMap; + + use quote_debug::Tokens; + use syn::{Ident, Type}; + + use crate::{ + groups::{Field, MutImmut}, + operations::update::Update, + }; + + pub fn determine_mutability( + updates: &[Update], + mut fields: HashMap>, + ) -> MutImmut> { + fn convert_fields(fields: HashMap>) -> Vec { + fields + .into_iter() + .map(|(name, ty)| Field { name, ty }) + .collect() + } + + let mut mut_fields = HashMap::new(); + for Update { + fields: update_fields, + alias: _, + } in updates + { + for field in update_fields { + if let Some(ty) = fields.remove(field) { + mut_fields.insert(field.clone(), ty); + } + } + } + MutImmut { + imm_fields: convert_fields(fields), + mut_fields: convert_fields(mut_fields), + } + } +} diff --git a/crates/pulpit_gen/src/selector/mutability.rs b/crates/pulpit_gen/src/selector/mutability.rs new file mode 100644 index 0000000..3b7f834 --- /dev/null +++ b/crates/pulpit_gen/src/selector/mutability.rs @@ -0,0 +1,65 @@ +use crate::{ + columns::{AssocBlocks, PrimaryRetain, PrimaryThunderDomeTrans, PrimaryThunderdome}, + groups::{Group, GroupConfig}, + table::Table, +}; + +use super::*; + +/// Generates a table data structure using the provided updates to determine +/// field mutability, and considering use of deletions and transactions. +/// - Assumes the cost of accumulating unused immutable fields (from +/// [`PrimaryRetain`]) is negated by the cost of referencing on `get` +pub struct MutabilitySelector; + +impl SelectorImpl for MutabilitySelector { + fn select_table( + &self, + SelectOperations { + name, + transactions, + deletions, + fields, + uniques, + predicates, + updates, + public, + limit, + }: SelectOperations, + ) -> Table { + let primary_fields = utils::determine_mutability(&updates, fields); + + let prim_col = if deletions { + if primary_fields.imm_fields.is_empty() { + if transactions { + PrimaryThunderDomeTrans.into() + } else { + PrimaryThunderdome.into() + } + } else { + PrimaryRetain { block_size: 1024 }.into() + } + } else { + AssocBlocks { block_size: 1024 }.into() + }; + + Table { + groups: GroupConfig { + primary: Group { + col: prim_col, + fields: primary_fields, + }, + assoc: vec![], + } + .into(), + uniques, + predicates, + updates, + limit, + name, + transactions, + deletions, + public, + } + } +} diff --git a/crates/pulpit_gen/src/selector/thunderdome.rs b/crates/pulpit_gen/src/selector/thunderdome.rs new file mode 100644 index 0000000..d50ef9f --- /dev/null +++ b/crates/pulpit_gen/src/selector/thunderdome.rs @@ -0,0 +1,47 @@ +use super::*; + +use crate::{ + columns::PrimaryThunderDomeTrans, + groups::{Group, GroupConfig}, + table::Table, +}; + +/// Generates a table structure using thunderdome with transaction support. +/// - Does not take advantage of mutability. +pub struct ThunderdomeSelector; + +impl SelectorImpl for ThunderdomeSelector { + fn select_table( + &self, + SelectOperations { + name, + transactions, + deletions: _, + fields, + uniques, + predicates, + updates, + limit, + public, + }: SelectOperations, + ) -> Table { + Table { + groups: GroupConfig { + primary: Group { + col: PrimaryThunderDomeTrans.into(), + fields: utils::determine_mutability(&updates, fields), + }, + assoc: vec![], + } + .into(), + uniques, + predicates, + updates, + name, + limit, + transactions, + deletions: true, + public, + } + } +} diff --git a/crates/pulpit_gen/src/table.rs b/crates/pulpit_gen/src/table.rs new file mode 100644 index 0000000..076cb09 --- /dev/null +++ b/crates/pulpit_gen/src/table.rs @@ -0,0 +1,280 @@ +use std::collections::HashMap; + +use crate::{ + groups::FieldName, + limit::Limit, + operations::{self, get::get_struct_fields, SingleOpFn}, + uniques::UniqueDec, +}; +use proc_macro2::TokenStream; +use quote::quote; +use quote_debug::Tokens; +use syn::{Ident, ItemImpl, ItemMod, ItemStruct, Type}; + +use super::{ + groups::{Groups, GroupsDef}, + namer::CodeNamer, + operations::{update::Update, SingleOp}, + predicates::{self, Predicate}, + uniques::{self, Unique}, +}; + +pub enum AttrKinds { + Inline, + Custom(TokenStream), +} + +/// Attributes to apply to the publicly accessible operation of the table +/// - Inlining for example +/// +/// TODO: In future add statistics (logging methods calls) +impl AttrKinds { + pub fn to_tokens(&self) -> TokenStream { + match self { + AttrKinds::Inline => quote!(#[inline(always)]), + AttrKinds::Custom(tokens) => quote!(#[#tokens]), + } + } +} + +pub struct Table { + pub groups: Groups, + pub uniques: Vec, + pub predicates: Vec, + pub updates: Vec, + pub name: Ident, + pub limit: Option, + pub transactions: bool, + pub deletions: bool, + pub public: bool, +} + +struct TableDec { + table_struct: Tokens, + table_impl: Tokens, + window_struct: Tokens, +} + +fn generate_table_and_window(transactions: bool, namer: &CodeNamer) -> TableDec { + let CodeNamer { + struct_window, + struct_table, + struct_table_member_columns: table_member_columns, + struct_column_holder, + struct_window_holder, + struct_table_member_uniques: table_member_uniques, + struct_unique, + mod_transactions, + mod_transactions_struct_data, + struct_table_member_transactions: table_member_transactions, + .. + } = namer; + + let (trans_table, trans_new, trans_wind, trans_wind_def) = if transactions { + ( + quote!(#table_member_transactions: #mod_transactions::#mod_transactions_struct_data ), + quote!(#table_member_transactions: #mod_transactions::#mod_transactions_struct_data::new() ), + quote!(#table_member_transactions: &mut self.#table_member_transactions), + quote!(#table_member_transactions: &'imm mut #mod_transactions::#mod_transactions_struct_data), + ) + } else { + (quote!(), quote!(), quote!(), quote!()) + }; + + TableDec { + table_struct: quote! { + pub struct #struct_table { + #table_member_columns: #struct_column_holder, + #table_member_uniques: #struct_unique, + #trans_table + } + } + .into(), + table_impl: quote! { + impl #struct_table { + pub fn new(size_hint: usize) -> Self { + Self { + #table_member_columns: #struct_column_holder::new(size_hint), + #table_member_uniques: #struct_unique::new(size_hint), + #trans_new + } + } + + pub fn window(&mut self) -> #struct_window<'_> { + #struct_window { + #table_member_columns: self.#table_member_columns.window(), + #table_member_uniques: &mut self.#table_member_uniques, + #trans_wind + } + } + } + } + .into(), + window_struct: quote! { + pub struct #struct_window<'imm> { + #table_member_columns: #struct_window_holder<'imm>, + #table_member_uniques: &'imm mut #struct_unique, + #trans_wind_def + } + } + .into(), + } +} + +impl Table { + pub fn op_get_types(&self, namer: &CodeNamer) -> HashMap> { + get_struct_fields(&self.groups, namer) + } + pub fn insert_can_error(&self) -> bool { + !self.predicates.is_empty() || !self.uniques.is_empty() || self.limit.is_some() + } + pub fn generate(&self, namer: &CodeNamer, attrs: Vec) -> Tokens { + let Self { + groups, + uniques, + predicates, + updates, + limit, + name, + public, + transactions, + deletions, + } = self; + + let op_attrs = attrs + .iter() + .map(AttrKinds::to_tokens) + .collect::(); + + let CodeNamer { + pulpit_path, + type_key_error, + .. + } = namer; + + let column_types = groups.column_types(namer); + let key_type = groups.key_type(namer); + + let GroupsDef { + columns_struct, + columns_impl, + window_holder_struct, + } = groups.columns_definition(namer); + + let predicate_mod = predicates::generate(predicates, groups, namer); + let UniqueDec { + unique_struct, + unique_impl, + } = uniques::generate(uniques, groups, namer); + + let mut ops_mod_code = vec![ + operations::borrow::generate(groups, namer, &op_attrs), + operations::get::generate(groups, namer, &op_attrs), + operations::update::generate( + updates, + groups, + uniques, + predicates, + namer, + *transactions, + &op_attrs, + ), + operations::insert::generate( + groups, + uniques, + predicates, + namer, + limit, + *deletions, + *transactions, + &op_attrs, + ), + operations::unique_get::generate(groups, uniques, namer, &op_attrs), + ]; + if *transactions { + ops_mod_code.push(operations::transact::generate( + groups, + updates, + namer, + *deletions, + *transactions, + &op_attrs, + )) + } + + let mut ops_fn_code = vec![ + operations::count::generate(namer, &op_attrs), + operations::scan::generate(namer, &op_attrs), + ]; + + if *deletions { + ops_fn_code.push(operations::delete::generate( + namer, + groups, + uniques, + *transactions, + &op_attrs, + )) + } + + let TableDec { + table_struct, + table_impl, + window_struct, + } = generate_table_and_window(*transactions, namer); + + let ops_tokens = ops_mod_code + .into_iter() + .map(|SingleOp { op_mod, op_impl }| { + quote! { + #op_mod + #op_impl + } + }) + .chain( + ops_fn_code + .into_iter() + .map(|SingleOpFn { op_impl }| quote! { #op_impl }), + ); + + let public_dec = if *public { quote!(pub) } else { quote!() }; + + quote! { + #public_dec mod #name { + #![allow(unused, non_camel_case_types)] + + use #pulpit_path::column::{ + PrimaryWindow, + PrimaryWindowApp, + PrimaryWindowPull, + PrimaryWindowHide, + AssocWindow, + AssocWindowPull, + Column, + }; + + #[derive(Debug)] + pub struct #type_key_error; + + #column_types + + #(#ops_tokens)* + + #key_type + + #predicate_mod + #unique_struct + #unique_impl + + #columns_struct + #columns_impl + #window_holder_struct + + #table_struct + #table_impl + #window_struct + } + } + .into() + } +} diff --git a/crates/pulpit_gen/src/uniques.rs b/crates/pulpit_gen/src/uniques.rs new file mode 100644 index 0000000..924a6ae --- /dev/null +++ b/crates/pulpit_gen/src/uniques.rs @@ -0,0 +1,51 @@ +use quote::quote; +use quote_debug::Tokens; +use syn::{Ident, ItemImpl, ItemStruct}; + +use super::{groups::Groups, namer::CodeNamer}; + +pub struct Unique { + pub alias: Ident, + pub field: Ident, +} + +pub struct UniqueDec { + pub unique_struct: Tokens, + pub unique_impl: Tokens, +} + +pub fn generate(uniques: &[Unique], groups: &Groups, namer: &CodeNamer) -> UniqueDec { + let CodeNamer { + pulpit_path, + struct_unique, + type_key, + .. + } = namer; + + let unique_fields_def = uniques.iter().map(|Unique { alias: _, field }| { + let ty = groups.get_typefield(field).unwrap(); + quote!(#field: #pulpit_path::access::Unique<#ty, #type_key>) + }); + let unique_fields_impl = uniques.iter().map( + |Unique { alias: _, field }| quote!(#field: #pulpit_path::access::Unique::new(size_hint)), + ); + + UniqueDec { + unique_struct: quote! { + struct #struct_unique { + #(#unique_fields_def),* + } + } + .into(), + unique_impl: quote! { + impl #struct_unique { + fn new(size_hint: usize) -> Self { + Self { + #(#unique_fields_impl),* + } + } + } + } + .into(), + } +} diff --git a/crates/pulpit_macro/Cargo.toml b/crates/pulpit_macro/Cargo.toml new file mode 100644 index 0000000..c8f986b --- /dev/null +++ b/crates/pulpit_macro/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "pulpit_macro" +version = "0.1.0" +edition = "2021" + +readme = "README.md" +description = "A crate for the macro interface supporting pulpit" +keywords = ["performance", "traits", "arenas"] +categories = ["data"] + +repository.workspace = true +homepage.workspace = true +license-file.workspace = true + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +syn = { version = "2.0.45", features = ["full", "extra-traits"] } +quote = "1.0.33" +pulpit_gen = { path = "../pulpit_gen" } +proc-macro-error = "1.0.4" + +[dev-dependencies] +divan = { git = "https://github.com/OliverKillane/divan.git", branch = "enh/file-output" } +trybuild = "1.0.91" +glob = "0.3.1" diff --git a/crates/pulpit_macro/README.md b/crates/pulpit_macro/README.md new file mode 100644 index 0000000..66e2b9b --- /dev/null +++ b/crates/pulpit_macro/README.md @@ -0,0 +1,4 @@ +## Pulpit Macros +The macros to be used for [pulpit](./../pulpit). As a separate crate because the +rust compiler does not allow non-proc macro exports from a proc macro crate, so +macros cannot bed defined directly inside [pulpit](./../pulpit). diff --git a/crates/pulpit_macro/src/lib.rs b/crates/pulpit_macro/src/lib.rs new file mode 100644 index 0000000..8dd2911 --- /dev/null +++ b/crates/pulpit_macro/src/lib.rs @@ -0,0 +1,22 @@ +use proc_macro::TokenStream; +use proc_macro_error::proc_macro_error; +use pulpit_gen::selector::{MutabilitySelector, SelectorImpl}; +use quote::ToTokens; + +#[proc_macro_error] +#[proc_macro] +pub fn simple(tokens: TokenStream) -> TokenStream { + match pulpit_gen::macros::simple::simple(tokens.into()) { + Ok(ts) => MutabilitySelector + .select_table(ts) + .generate(&pulpit_gen::namer::CodeNamer::pulpit(), vec![]) + .into_token_stream() + .into(), + Err(es) => { + for e in es { + e.emit(); + } + TokenStream::new() + } + } +} diff --git a/crates/quote_debug/Cargo.toml b/crates/quote_debug/Cargo.toml new file mode 100644 index 0000000..e30f862 --- /dev/null +++ b/crates/quote_debug/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "quote_debug" +version = "0.1.0" +edition = "2021" + +readme = "README.md" +description = "A crate for wrapping checks on generated tokenstreams" +keywords = ["macros"] +categories = ["macros"] + +repository.workspace = true +homepage.workspace = true +license-file.workspace = true + +[dependencies] +proc-macro2 = "1.0" +syn = { version = "2.0.45", features = ["full"] } +quote = "1.0.33" + + diff --git a/crates/quote_debug/README.md b/crates/quote_debug/README.md new file mode 100644 index 0000000..af950c6 --- /dev/null +++ b/crates/quote_debug/README.md @@ -0,0 +1,51 @@ +# Quote Debug +A crate containing a simple type for checking the syntax of produced tokenstreams when built with [`debug_assertions`](https://doc.rust-lang.org/reference/conditional-compilation.html#debug_assertions). + +Incurs no overhead when built optimised ([`debug_assertions`](https://doc.rust-lang.org/reference/conditional-compilation.html#debug_assertions) are disabled). + +To get access to the all the `syn` types, `syn` needs the `"full"` feature enabled. + +For example the following pass +```rust +use quote_debug::Tokens; +use quote::quote; +use syn::{ExprBlock, ItemEnum, TraitItemFn, Type, ExprLet}; + +Tokens::::from(quote! { + { + block_of_code(); + let y = 3; + } +}); +Tokens::::from(quote! { + /// stuff + fn method(&self) -> i32 { + 2 + } +}); +Tokens::::from(quote! { + enum Cool { + A, B, C + } +}); +Tokens::::from(quote! { + () +}); +Tokens::::from(quote! { + let x = 3 +}); +``` + +While generating invalid syntax fails. +```rust,should_panic +use quote_debug::Tokens; +use quote::quote; +use syn::ExprBlock; + +Tokens::::from(quote! { + not_in_block; { + block_of_code(); + let y = 3; + } +}); // Panic! +``` diff --git a/crates/quote_debug/src/lib.rs b/crates/quote_debug/src/lib.rs new file mode 100644 index 0000000..711a9ca --- /dev/null +++ b/crates/quote_debug/src/lib.rs @@ -0,0 +1,53 @@ +#![doc = include_str!("../README.md")] + +use proc_macro2::TokenStream; +use quote::ToTokens; +use std::{marker::PhantomData, ops::Deref}; + +pub struct Tokens { + tks: TokenStream, + phantom: PhantomData, +} + +impl From for Tokens { + fn from(value: TokenStream) -> Self { + #[cfg(debug_assertions)] + { + if let Err(err) = syn::parse2::(value.clone()) { + panic!( + "Attempted to parse as `{}` but failed with message:\n`{}`\nTokens: `{}`", + std::any::type_name::(), + err, + value + ) + } + } + Self { + tks: value, + phantom: PhantomData, + } + } +} + +impl Clone for Tokens { + fn clone(&self) -> Self { + Self { + tks: self.tks.clone(), + phantom: PhantomData, + } + } +} + +impl Deref for Tokens { + type Target = TokenStream; + + fn deref(&self) -> &Self::Target { + &self.tks + } +} + +impl ToTokens for Tokens { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.tks.to_tokens(tokens) + } +}