diff --git a/.gitignore b/.gitignore index 3d0d3b001..b1ebbb3ae 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,6 @@ tulsigen-* compile_commands.json .cache/ .vscode/* + +# Rust stuff +target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000..2ceda6681 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,296 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "brotli" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "cxx" +version = "1.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7129e341034ecb940c9072817cd9007974ea696844fc4dd582dc1653a7fbe2e8" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06fdd177fc61050d63f67f5bd6351fac6ab5526694ea8e359cd9cd3b75857f44" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "587663dd5fb3d10932c8aecfe7c844db1bcf0aee93eeab08fac13dc1212c2e7f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + +[[package]] +name = "libc" +version = "0.2.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" + +[[package]] +name = "link-cplusplus" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9" +dependencies = [ + "cc", +] + +[[package]] +name = "lz4" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "parquet-format-safe" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1131c54b167dd4e4799ce762e1ab01549ebb94d5bdd13e6ec1b467491c378e1f" + +[[package]] +name = "parquet2" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "579fe5745f02cef3d5f236bfed216fd4693e49e4e920a13475c6132233283bce" +dependencies = [ + "brotli", + "flate2", + "lz4", + "parquet-format-safe", + "seq-macro", + "snap", + "streaming-decompression", + "xxhash-rust", + "zstd", +] + +[[package]] +name = "parquet_logger" +version = "0.1.0" +dependencies = [ + "cxx", + "parquet2", +] + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "snap" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" + +[[package]] +name = "streaming-decompression" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6cc3b19bfb128a8ad11026086e31d3ce9ad23f8ea37354b31383a187c44cf3" +dependencies = [ + "fallible-streaming-iterator", +] + +[[package]] +name = "syn" +version = "2.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "xxhash-rust" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9828b178da53440fa9c766a3d2f73f7cf5d0ac1fe3980c1e5018d899fd19e07b" + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..755038def --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[workspace] +members = [ + # The below list must be kept in sync with the crates_repository.manifest + # key in the root WORKSPACE file. + "Source/santad/Logs/EndpointSecurity/ParquetLogger", +] diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/BUILD b/Source/santad/Logs/EndpointSecurity/ParquetLogger/BUILD new file mode 100644 index 000000000..4dab3fc35 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/BUILD @@ -0,0 +1,54 @@ +load("@crate_index//:defs.bzl", "aliases", "all_crate_deps") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") +load("@rules_rust//rust:defs.bzl", "rust_static_library", "rust_test") +load("//:helper.bzl", "rust_cxx_bridge", "santa_unit_test") + +rust_static_library( + name = "parquet_logger", + srcs = [ + "column_builder.rs", + "cpp_api.rs", + "page_builder.rs", + "parquet_logger.rs", + "table.rs", + "value.rs", + "writer.rs", + ], + aliases = aliases(), + proc_macro_deps = all_crate_deps( + proc_macro = True, + ), + deps = all_crate_deps( + normal = True, + ), +) + +cc_binary( + name = "write_test_file", + srcs = ["write_test_file.cc"], + deps = [":parquet_bridge"], +) + +rust_test( + name = "parquet_logger_test", + crate = ":parquet_logger", +) + +rust_cxx_bridge( + name = "parquet_bridge", + src = "cpp_api.rs", + deps = [":parquet_logger"], +) + +cc_library( + name = "ParquetLogger", + srcs = ["ParquetLogger.cc"], + hdrs = ["ParquetLogger.h"], + deps = [":parquet_bridge"], +) + +santa_unit_test( + name = "ParquetLoggerTest", + srcs = ["ParquetLoggerTest.mm"], + deps = [":ParquetLogger"], +) diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/Cargo.toml b/Source/santad/Logs/EndpointSecurity/ParquetLogger/Cargo.toml new file mode 100644 index 000000000..0702250bd --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "parquet_logger" +version = "0.1.0" +edition = "2021" +description = "Parquet output support for Santa" + +[lib] +name = "parquet_logger" +path = "parquet_logger.rs" +crate-type = ["cdylib", "staticlib"] + +[dependencies] +parquet2 = "0.17.2" +cxx = "1.0" + +# The release profile is tweaked for binary size. Not all of these options are +# applied by bazel at the moment. +[profile.release] +# Automatically strip symbols from the binary. Note: this seems to have less of +# an effect than just calling strip on the binary after the fact. +strip = true +opt-level = "z" # Optimize for size. +lto = true +codegen-units = 1 # Disable parallel codegen. +panic = "abort" # This matches the behavior of LOG(FATAL). diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLogger.cc b/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLogger.cc new file mode 100644 index 000000000..5e340ce5b --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLogger.cc @@ -0,0 +1,2 @@ +#include "ParquetLogger.h" + diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLogger.h b/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLogger.h new file mode 100644 index 000000000..1844b0946 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLogger.h @@ -0,0 +1,6 @@ +#include "Source/santad/Logs/EndpointSecurity/ParquetLogger/gen/cpp_api.rs.h" + +#ifndef SANTA__SANTAD__LOGS_ENDPOINTSECURITY_PARQUETLOGGER_PARQUETLOGGER_H +#define SANTA__SANTAD__LOGS_ENDPOINTSECURITY_PARQUETLOGGER_PARQUETLOGGER_H + +#endif diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLoggerTest.mm b/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLoggerTest.mm new file mode 100644 index 000000000..0772f743c --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/ParquetLoggerTest.mm @@ -0,0 +1,57 @@ +#import +#import +#import "ParquetLogger.h" + +@interface ParquetLoggerTest : XCTestCase +@end + +@implementation ParquetLoggerTest + +- (void)testWriteTable { + auto tmp_path = std::filesystem::temp_directory_path(); + try { + auto args = + pedro::wire::table_args_new("test_table", (tmp_path / "test_table.parquet").string()); + + pedro::wire::table_args_add_column(*args, "number", pedro::wire::CxxColumnType::Int32); + pedro::wire::table_args_add_column(*args, "text", pedro::wire::CxxColumnType::ByteArray); + auto table = pedro::wire::table_new(std::move(args)); + + pedro::wire::table_push_i32(*table, 0, 1337); + pedro::wire::table_push_string(*table, 1, "foo"); + + pedro::wire::table_push_i32(*table, 0, 1); + pedro::wire::table_push_i32(*table, 0, 2); + pedro::wire::table_push_i32(*table, 0, 3); + + pedro::wire::table_push_string(*table, 1, "bar"); + pedro::wire::table_push_string(*table, 1, "baz"); + pedro::wire::table_push_string(*table, 1, "qux"); + + pedro::wire::table_flush(*table); + pedro::wire::table_end(std::move(table)); + } catch (const std::exception &e) { + // None of this should throw. + XCTAssertFalse(true, "Exception: %s", e.what()); + } + + // The file should exist as expected. + std::filesystem::path testFilePath = tmp_path / "test_table.parquet"; + XCTAssertTrue(std::filesystem::exists(testFilePath)); +} + +- (void)testInvalidFlushThrows { + auto tmp_path = std::filesystem::temp_directory_path(); + auto args = pedro::wire::table_args_new("test_table", (tmp_path / "test_table.parquet").string()); + pedro::wire::table_args_add_column(*args, "number", pedro::wire::CxxColumnType::Int32); + pedro::wire::table_args_add_column(*args, "text", pedro::wire::CxxColumnType::ByteArray); + + auto table = pedro::wire::table_new(std::move(args)); + pedro::wire::table_push_i32(*table, 0, 1); + pedro::wire::table_push_i32(*table, 0, 2); + + // This should throw because we haven't pushed data to all columns. + XCTAssertThrows(pedro::wire::table_flush(*table)); +} + +@end diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/column_builder.rs b/Source/santad/Logs/EndpointSecurity/ParquetLogger/column_builder.rs new file mode 100644 index 000000000..6b56b02eb --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/column_builder.rs @@ -0,0 +1,98 @@ +use crate::{page_builder::PageBuilder, value::Value}; +use parquet2::{ + compression::CompressionOptions, + error::{Error, Result}, + metadata::Descriptor, + page::{CompressedPage, Page}, + write::{Compressor, DynIter, DynStreamingIterator}, +}; + +/// A column is a collection of pages. It can be drained to get compressed pages +/// out. After the compressed pages are written to a file, the column chunk can +/// be reused. +pub struct ColumnBuilder { + pages: Vec, + page_size: usize, + descriptor: Descriptor, + compression_options: CompressionOptions, +} + +impl ColumnBuilder { + /// Create a new column builder. Providing invalid options will result in + /// failures on push, not immediately. + pub fn new( + page_size: usize, + descriptor: Descriptor, + compression_options: CompressionOptions, + ) -> Self { + Self { + pages: vec![], + page_size: page_size, + descriptor: descriptor, + compression_options, + } + } + + /// Compress and return the buffered pages so they can be written to a file. + /// + /// WARNING: due to parquet2's iterator-centric design, it's necessary to + /// drain this iterator before calling push again. Otherwise, it's undefined + /// which row group the newly written data will end up, and it could even be + /// dropped. + pub fn drain<'a>(&'a mut self) -> DynStreamingIterator<'a, CompressedPage, Error> { + let pages: Vec> = self + .pages + .drain(..) + .map(|page| Ok(page.into_page())) + .collect(); + let compressor = Compressor::new( + DynIter::new(pages.into_iter()), + self.compression_options, + vec![], + ); + DynStreamingIterator::new(compressor) + } + + /// Append the value to the most recent page. If the page is full, create a + /// new one. + pub fn push(&mut self, value: Value) -> Result<()> { + self.page_builder(value.dyn_size())?.push(value) + } + + /// Return the most recent, partially built page. If the page can't fit the + /// size_hint without going over page_size, a new page is created. + /// + /// This call can only fail if the schema is invalid. + pub fn page_builder(&mut self, size_hint: usize) -> Result<&mut PageBuilder> { + let last_page = match self.pages.last_mut() { + Some(page) => page, + None => { + let mut buffer = vec![]; + buffer.reserve(self.page_size); + self.pages + .push(PageBuilder::new(self.descriptor.clone(), buffer)?); + self.pages.last_mut().unwrap() + } + }; + + if last_page.size() + size_hint > self.page_size { + let mut buffer = vec![]; + buffer.reserve(self.page_size); + self.pages + .push(PageBuilder::new(self.descriptor.clone(), buffer)?); + } + + Ok(self.pages.last_mut().unwrap()) + } + + /// Returns the current size of the column in bytes, as a sum of the sizes + /// of all buffered pages. + pub fn size(&self) -> usize { + self.pages.iter().map(|page| page.size()).sum() + } + + /// Returns the current number of buffered values. + pub fn count(&self) -> usize { + self.pages.iter().map(|page| page.count()).sum() + } +} diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/cpp_api.rs b/Source/santad/Logs/EndpointSecurity/ParquetLogger/cpp_api.rs new file mode 100644 index 000000000..b635e2a67 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/cpp_api.rs @@ -0,0 +1,145 @@ +//! C++ API for the ParquetLogger. This is a thin wrapper around the Table type. + +use cxx::CxxString; +use parquet2::{ + compression::{BrotliLevel, CompressionOptions}, + error::Error, + metadata::SchemaDescriptor, + schema::{ + types::{ParquetType, PhysicalType, PrimitiveType}, + Repetition, + }, + write::WriteOptions, +}; + +use crate::{ + table::{Options, Table}, + value::Value, + writer::Writer, +}; + +#[cxx::bridge(namespace = "pedro::wire")] +mod ffi { + /// Parquet types supported by the C++ API. + enum CxxColumnType { + Int32, + Int64, + Float, + Double, + ByteArray, + } + + extern "Rust" { + type Table; + type TableArgs; + + fn table_args_new(name: &CxxString, path: &CxxString) -> Box; + fn table_args_add_column( + args: &mut TableArgs, + name: &CxxString, + physical_type: CxxColumnType, + ) -> Result<()>; + + fn table_new(args: Box) -> Result>; + fn table_push_i32(table: &mut Table, column_no: usize, value: i32) -> Result<()>; + fn table_push_i64(table: &mut Table, column_no: usize, value: i64) -> Result<()>; + fn table_push_f32(table: &mut Table, column_no: usize, value: f32) -> Result<()>; + fn table_push_f64(table: &mut Table, column_no: usize, value: f64) -> Result<()>; + fn table_push_bytes(table: &mut Table, column_no: usize, value: &[u8]) -> Result<()>; + fn table_push_string(table: &mut Table, column_no: usize, value: &CxxString) -> Result<()>; + fn table_flush(table: &mut Table) -> Result; + fn table_end(table: Box) -> Result; + } +} + +/// A collection of arguments to construct a Table. Used by the C++ API for a +/// builder pattern. +struct TableArgs { + options: Options, + name: String, + path: String, + fields: Vec, +} + +fn cxx_column_type_to_physical_type(physical_type: ffi::CxxColumnType) -> Option { + match physical_type { + ffi::CxxColumnType::Int32 => Some(PhysicalType::Int32), + ffi::CxxColumnType::Int64 => Some(PhysicalType::Int64), + ffi::CxxColumnType::Float => Some(PhysicalType::Float), + ffi::CxxColumnType::Double => Some(PhysicalType::Double), + ffi::CxxColumnType::ByteArray => Some(PhysicalType::ByteArray), + _ => None, + } +} + +fn table_args_new(name: &CxxString, path: &CxxString) -> Box { + Box::new(TableArgs { + options: Options { + write_options: WriteOptions { + write_statistics: true, + version: parquet2::write::Version::V1, + }, + compression_options: CompressionOptions::Brotli(Some(BrotliLevel::try_new(5).unwrap())), + // compression_options: CompressionOptions::Uncompressed, + page_size: 1024, + }, + name: name.to_string(), + path: path.to_string(), + fields: vec![], + }) +} + +fn table_args_add_column( + args: &mut TableArgs, + name: &CxxString, + column_type: ffi::CxxColumnType, +) -> Result<(), Error> { + match cxx_column_type_to_physical_type(column_type) { + None => Err(Error::InvalidParameter("invalid column type".to_string())), + Some(physical_type) => { + let mut field = PrimitiveType::from_physical(name.to_string(), physical_type); + field.field_info.repetition = Repetition::Required; + args.fields.push(ParquetType::PrimitiveType(field)); + Ok(()) + } + } +} + +fn table_new(args: Box) -> Result, Error> { + let schema = SchemaDescriptor::new(args.name, args.fields); + let writer = Writer::open_file(schema.clone(), args.options.write_options, &args.path)?; + Ok(Box::new(Table::new(schema, args.options, writer))) +} + +fn table_push_i32(table: &mut Table, column_no: usize, value: i32) -> Result<(), Error> { + table.push(column_no, Value::I32(value)) +} + +fn table_push_i64(table: &mut Table, column_no: usize, value: i64) -> Result<(), Error> { + table.push(column_no, Value::I64(value)) +} + +fn table_push_f32(table: &mut Table, column_no: usize, value: f32) -> Result<(), Error> { + table.push(column_no, Value::F32(value)) +} + +fn table_push_f64(table: &mut Table, column_no: usize, value: f64) -> Result<(), Error> { + table.push(column_no, Value::F64(value)) +} + +fn table_push_bytes(table: &mut Table, column_no: usize, value: &[u8]) -> Result<(), Error> { + table.push(column_no, Value::Bytes(value)) +} + +fn table_push_string(table: &mut Table, column_no: usize, value: &CxxString) -> Result<(), Error> { + table.push(column_no, Value::Bytes(value.as_bytes())) +} + +fn table_flush(table: &mut Table) -> Result { + table.flush() +} + +fn table_end(table: Box
) -> Result { + let (n, _writer) = table.end()?; + Ok(n) +} diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/Dockerfile b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/Dockerfile new file mode 100644 index 000000000..e67cbbd2b --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/Dockerfile @@ -0,0 +1,3 @@ +FROM quay.io/jupyter/datascience-notebook:2023-11-17 + +COPY check_parquet_file.py /home/jovyan/work/check_parquet_file.py diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/README.md b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/README.md new file mode 100644 index 000000000..ca9d1af12 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/README.md @@ -0,0 +1,3 @@ +This is a really basic end-to-end test to confirm that the parquet file +generated with `write_test_file.cc` is valid and can be read by Arrow (via +pyarrow via pandas via ipython). diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/check_parquet_file.py b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/check_parquet_file.py new file mode 100644 index 000000000..a452a2910 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/check_parquet_file.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +import pandas as pd +import sys + +def expect_eq(got, want): + if got != want: + print(f"Expected {want}, got {got}") + sys.exit(1) + +df = pd.read_parquet(sys.argv[1]) +expect_eq(len(df), 4) + +# Strings should be stored correctly and decode as binary strings (not UTF-8). +expect_eq(df.to_dict()["text"][0], b"Hello, world!") +expect_eq(df.to_dict()["text"][3], b"Good bye, world!") + +# Check that numbers are encoded correctly. +expect_eq(df.to_dict()["big_number"][2], 0xcafed00d) + +print("[OK] - example parquet file is valid") +sys.exit(0) diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/e2e_test.sh b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/e2e_test.sh new file mode 100755 index 000000000..837556a3b --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/e2e_test/e2e_test.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -e + +SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd ) +ROOT="$(bazel info workspace)" +TMPDIR="$(mktemp -d)" + +# Go to the test directory and build the Docker image with Pandas. +pushd "${SCRIPT_DIR}" > /dev/null +docker build . -t parquet_logger_e2e_test + +# Generate the test file +pushd .. > /dev/null +bazel run :write_test_file -- "${TMPDIR}/test_file.parquet" + +# Run the test in a Docker container +docker run \ + --rm \ + -v "${TMPDIR}:/tmp" \ + parquet_logger_e2e_test \ + /usr/bin/env python /home/jovyan/work/check_parquet_file.py /tmp/test_file.parquet + +popd > /dev/null diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/page_builder.rs b/Source/santad/Logs/EndpointSecurity/ParquetLogger/page_builder.rs new file mode 100644 index 000000000..5faf40e22 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/page_builder.rs @@ -0,0 +1,203 @@ +use crate::value::Value; + +use parquet2::{ + encoding::Encoding, + error::{Error, Result}, + metadata::Descriptor, + page::{DataPage, DataPageHeader, DataPageHeaderV1, Page}, + schema::types::PhysicalType, + statistics::{serialize_statistics, BinaryStatistics, PrimitiveStatistics}, + types::NativeType, +}; +use std::cmp::PartialOrd; + +/// A page builder serializes primitive values into bytes and appends them to a +/// page (buffer). Implementations are provided for NativeType and &[u8] (byte +/// array). +pub struct PageBuilder { + page_builder: InnerBuilder, +} + +impl PageBuilder { + pub fn new(descriptor: Descriptor, buffer: Vec) -> Result { + match descriptor.primitive_type.physical_type { + PhysicalType::ByteArray => Ok(Self { + page_builder: InnerBuilder::ByteArray(ByteArrayPage::new(buffer, descriptor)), + }), + PhysicalType::Int32 => Ok(Self { + page_builder: InnerBuilder::I32(NativePage::new(buffer, descriptor)), + }), + PhysicalType::Int64 => Ok(Self { + page_builder: InnerBuilder::I64(NativePage::new(buffer, descriptor)), + }), + PhysicalType::Float => Ok(Self { + page_builder: InnerBuilder::F32(NativePage::new(buffer, descriptor)), + }), + PhysicalType::Double => Ok(Self { + page_builder: InnerBuilder::F64(NativePage::new(buffer, descriptor)), + }), + _ => Err(Error::FeatureNotSupported(format!( + "Unsupported type: {:?}", + descriptor.primitive_type + ))), + } + } + + pub fn push(&mut self, value: Value) -> Result<()> { + match self.page_builder { + InnerBuilder::ByteArray(ref mut builder) => builder.push(value.as_bytes()?), + InnerBuilder::I32(ref mut builder) => builder.push(value.as_i32()?), + InnerBuilder::I64(ref mut builder) => builder.push(value.as_i64()?), + InnerBuilder::F32(ref mut builder) => builder.push(value.as_f32()?), + InnerBuilder::F64(ref mut builder) => builder.push(value.as_f64()?), + } + Ok(()) + } + + pub fn size(&self) -> usize { + match self.page_builder { + InnerBuilder::ByteArray(ref builder) => builder.size(), + InnerBuilder::I32(ref builder) => builder.size(), + InnerBuilder::I64(ref builder) => builder.size(), + InnerBuilder::F32(ref builder) => builder.size(), + InnerBuilder::F64(ref builder) => builder.size(), + } + } + + pub fn count(&self) -> usize { + match self.page_builder { + InnerBuilder::ByteArray(ref builder) => builder.count, + InnerBuilder::I32(ref builder) => builder.count, + InnerBuilder::I64(ref builder) => builder.count, + InnerBuilder::F32(ref builder) => builder.count, + InnerBuilder::F64(ref builder) => builder.count, + } + } + + pub fn into_page(self) -> Page { + match self.page_builder { + InnerBuilder::ByteArray(builder) => builder.into_page(), + InnerBuilder::I32(builder) => builder.into_page(), + InnerBuilder::I64(builder) => builder.into_page(), + InnerBuilder::F32(builder) => builder.into_page(), + InnerBuilder::F64(builder) => builder.into_page(), + } + } +} + +enum InnerBuilder { + ByteArray(ByteArrayPage), + I32(NativePage), + I64(NativePage), + F32(NativePage), + F64(NativePage), +} + +/// Builds a page of variable length by arrays. Used for strings and other +/// blobs. +pub struct ByteArrayPage { + buffer: Vec, + count: usize, + descriptor: Descriptor, +} + +impl ByteArrayPage { + fn new(mut buffer: Vec, descriptor: Descriptor) -> Self { + buffer.clear(); + Self { + buffer: buffer, + count: 0, + descriptor: descriptor, + } + } + + fn push(&mut self, value: &[u8]) { + self.buffer + .extend_from_slice((value.len() as i32).to_le_bytes().as_ref()); + self.buffer.extend_from_slice(value); + self.count += 1; + } + + fn size(&self) -> usize { + self.buffer.len() + } + + fn into_page(self) -> Page { + let statistics = BinaryStatistics { + primitive_type: self.descriptor.primitive_type.clone(), + null_count: Some(0), // No NULLs allowed. + distinct_count: None, // Not worth the cost of counting. + max_value: None, + min_value: None, + }; + + let header = DataPageHeaderV1 { + num_values: self.count as i32, + encoding: Encoding::Plain.into(), + definition_level_encoding: Encoding::Rle.into(), + repetition_level_encoding: Encoding::Rle.into(), + statistics: Some(serialize_statistics(&statistics)), + }; + + Page::Data(DataPage::new( + DataPageHeader::V1(header), + self.buffer, + self.descriptor, + Some(self.count), + )) + } +} + +/// A page of numbers using plain encoding. This is implemented (and fast) for +/// most native numeric types. (Int96 isn't used at the moment.) +pub struct NativePage { + buffer: Vec, + count: usize, + statistics: PrimitiveStatistics, + descriptor: Descriptor, +} + +impl NativePage { + fn new(mut buffer: Vec, descriptor: Descriptor) -> Self { + buffer.clear(); + Self { + buffer: buffer, + count: 0, + statistics: PrimitiveStatistics { + primitive_type: descriptor.primitive_type.clone(), + null_count: Some(0), // No NULLs allowed. + distinct_count: None, // Not worth the cost of counting. + max_value: None, + min_value: None, + }, + descriptor: descriptor, + } + } + + fn push(&mut self, value: T) { + self.buffer.extend_from_slice(value.to_le_bytes().as_ref()); + self.count += 1; + // TODO(adam): Keep track of min and max and maybe distinct. + } + + fn size(&self) -> usize { + self.buffer.len() + } + + fn into_page(self) -> Page { + let header = DataPageHeaderV1 { + num_values: self.count as i32, + encoding: Encoding::Plain.into(), + definition_level_encoding: Encoding::Rle.into(), + repetition_level_encoding: Encoding::Rle.into(), + statistics: Some(serialize_statistics(&self.statistics)), + }; + + Page::Data(DataPage::new( + DataPageHeader::V1(header), + self.buffer, + self.descriptor, + Some(self.count), + )) + } +} diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/parquet_logger.rs b/Source/santad/Logs/EndpointSecurity/ParquetLogger/parquet_logger.rs new file mode 100644 index 000000000..8eb4cafc8 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/parquet_logger.rs @@ -0,0 +1,132 @@ +//! This package provides an opinionated API for producing a Parquet file +//! containing a simple table. It's intended to be easy to use from both Rust +//! and C++ code, and uses Cxx to expose a C++ API. (See cpp_api.rs.) +//! +//! We take the following simplifying assumptions: +//! +//! * All fields are always required (no NULLs). +//! * All fields are simple types: integers, floats and strings. +//! * All files are brotli-compressed. +//! +//! The API provides reasonable defaults for many of the knobs Parquet exposes, +//! and doesn't allow overriding most of them. This is intentional - the goal is +//! to be as simple to use as possible. +//! +//! To get started from C++, look at cpp_api.rs. To get started from Rust, look +//! at the Table type in table.rs. +//! +//! # Implementation Notes +//! +//! The API is implemented on top of parquet2, a minimal reimplamentation of the +//! official arrow crate. We chose parquet2 for its simplicity, compilation +//! speed and lack of unsafe code. (The official arrow project is extremely +//! large and depends on Boost in C++.) +//! +//! The code structure roughly mirrors that of a parquet file: +//! +//! * Table: represents a parquet file, which consists of one or more row +//! groups. +//! * ColumnBuilder: represents a column chunk in a row group. +//! * PageBuilder: represents a data page in a column chunk. +//! * Value: represents a single scalar (number or byte blob) in a data page. +//! +//! Correctness, including of types, is enforced at runtime. Value, rather than +//! being a generic type, is an enumeration (discriminated union) that can hold +//! any of the supported types. This is done for two reasons: +//! +//! 1. It makes the code eaiser to understand - multiple layers of generic +//! traits are required for static type checking of column chunks and pages. +//! 2. The Table type must expose a runtime-generic way of setting a cell in a +//! column, and this is the most common way of using the API, so any savings +//! gained from static type checking would be bypassed by the most common +//! code path anyway. +//! +//! # Future Work +//! +//! * Support fixed-length byte arrays. +//! * Reimplement FileWriter to use an arena-style buffer instead of nested +//! iterators. +//! * More work on code size - the build size (in opt) is about 3.5 MiB, which +//! could be reduced further by stripping unused compression code. +//! +//! # References +//! +//! * Parquet format thrift specification: +//! https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift +//! * Parquet format docs: https://parquet.apache.org/docs/overview/ +//! * parquet2 crate: https://github.com/jorgecarleitao/parquet2 +//! * Pedro's parquet code using C++ Arrow: +//! https://github.com/wowsignal-io/pedro/blob/master/pedro/output/parquet.h +//! * Arrow (mainline parquet code): https://github.com/apache/arrow + +mod column_builder; +mod cpp_api; +mod page_builder; +mod table; +mod value; +mod writer; + +// This is the main public API. +pub use crate::table::Table; + +#[cfg(test)] +mod test { + use crate::{ + table::{Options, Table}, + value::Value, + writer::Writer, + }; + use parquet2::{ + compression::{BrotliLevel, CompressionOptions}, + metadata::SchemaDescriptor, + schema::types::{ParquetType, PhysicalType}, + write::{Version, WriteOptions}, + }; + + #[test] + fn test_write() { + let options = Options { + write_options: WriteOptions { + write_statistics: true, + version: Version::V1, + }, + compression_options: CompressionOptions::Brotli(Some(BrotliLevel::try_new(5).unwrap())), + page_size: 1024, + }; + + let schema = SchemaDescriptor::new( + "schema".to_string(), + vec![ + ParquetType::from_physical("a".to_string(), PhysicalType::Int32), + ParquetType::from_physical("b".to_string(), PhysicalType::Int64), + ParquetType::from_physical("c".to_string(), PhysicalType::ByteArray), + ], + ); + + let writer = Writer::from_memory(schema.clone(), options.write_options, vec![]); + let mut table = Table::new(schema, options, writer); + + for i in 0..1000 { + table.push(0, Value::I32(i)).expect("push failed"); + table + .push(1, Value::I64((i * 2).into())) + .expect("push failed"); + + let s = format!("integer_{}", i); + table + .push(2, Value::Bytes(s.as_bytes())) + .expect("push failed"); + } + table.flush().expect("flush failed"); + let (_, writer) = table.end().expect("end failed"); + + let writer = if let Writer::Memory(w) = writer { + w + } else { + panic!("Expected Writer::Memory"); + }; + let result = writer.into_inner(); + assert!(!result.is_empty()); + println!("{:?}", result); + } +} diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/table.rs b/Source/santad/Logs/EndpointSecurity/ParquetLogger/table.rs new file mode 100644 index 000000000..40b48e7af --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/table.rs @@ -0,0 +1,118 @@ +use crate::{ + column_builder::ColumnBuilder, + value::Value, + writer::{write_row_group, Writer}, +}; +use parquet2::{ + compression::CompressionOptions, error::Result, metadata::SchemaDescriptor, write::WriteOptions, +}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Options { + pub write_options: WriteOptions, + pub compression_options: CompressionOptions, + pub page_size: usize, +} + +/// Wraps the API in a type that's easy to expose to C++. (Is not generic and is +/// easy to construct.) +pub struct Table { + columns: Vec, + writer: Writer, +} + +impl Table { + pub fn new(schema: SchemaDescriptor, options: Options, writer: Writer) -> Self { + let columns = schema + .columns() + .iter() + .map(|column| { + ColumnBuilder::new( + options.page_size, + column.descriptor.clone(), + options.compression_options, + ) + }) + .collect::>(); + Self { columns, writer } + } + + /// Pushes a value to the column. The column number is the index the column + /// had in the schema. The type of Value must match the type of the column + /// as specified in new. + /// + /// It is not necessary to push values in order of column number. It is also + /// not required to push one row at a time (it's fine to write column by + /// column). However, the same number of values must be pushed to each + /// column by the time flush is called. + pub fn push(&mut self, column_no: usize, value: Value) -> Result<()> { + self.columns[column_no].push(value) + } + + /// Convenience method for pushing a row of values at a time. See push. + pub fn push_row<'a, I>(&mut self, values: I) -> Result<()> + where + I: Iterator>, + { + for (column_no, value) in values.enumerate() { + self.push(column_no, value)?; + } + Ok(()) + } + + /// Convenience method for pushing a column of values at a time. See push + pub fn push_column<'a, I>(&mut self, column_no: usize, values: I) -> Result<()> + where + I: Iterator>, + { + for value in values { + self.push(column_no, value)?; + } + Ok(()) + } + + /// Checks that the table is well-formed and returns the number of rows in + /// the buffer. + pub fn validate(&self) -> Result { + match self.columns.len() { + 0 => Err(parquet2::error::Error::OutOfSpec("No columns".to_string())), + _ => { + let n = self.columns[0].count(); + if self.columns.iter().all(|column| column.count() == n) { + Ok(n) + } else { + Err(parquet2::error::Error::OutOfSpec( + "Column counts don't match".to_string(), + )) + } + } + } + } + + /// Flushes a rowg group to the writer and returns the number of rows + /// flushed. Does nothing if no rows are buffered. + pub fn flush(&mut self) -> Result { + match self.validate() { + Ok(0) => Ok(0), + Ok(n) => { + write_row_group(&mut self.writer, &mut self.columns)?; + Ok(n) + } + Err(e) => Err(e), + } + } + + /// Flushes all buffered data and ends the file, writing the footer. + pub fn end(mut self) -> Result<(u64, Writer)> { + self.flush()?; + let n = self.writer.end()?; + Ok((n, self.writer)) + } + + /// Return the total buffered size of the table in bytes. This does not + /// count bytes already written to disk, or any metadata and header and + /// footer. + pub fn size(&self) -> usize { + self.columns.iter().map(|column| column.size()).sum() + } +} diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/value.rs b/Source/santad/Logs/EndpointSecurity/ParquetLogger/value.rs new file mode 100644 index 000000000..56db564a0 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/value.rs @@ -0,0 +1,72 @@ +use parquet2::error::Result; + +/// A value can be written to a page in a column chunk in a row group in a +/// parquet file in the house that Jack built. +/// +/// Parquet only supports a handful of physical types. In addition to what's +/// listed in this enum, parquet supports int96 and fixed length arrays, which +/// are not yet implemented here. +pub enum Value<'a> { + I32(i32), + I64(i64), + F32(f32), + F64(f64), + Bytes(&'a [u8]), +} + +impl<'a> Value<'a> { + pub fn dyn_size(&self) -> usize { + match self { + Value::I32(_) => std::mem::size_of::(), + Value::I64(_) => std::mem::size_of::(), + Value::F32(_) => std::mem::size_of::(), + Value::F64(_) => std::mem::size_of::(), + Value::Bytes(b) => b.len(), + } + } + + pub fn as_i32(&self) -> Result { + match self { + Value::I32(v) => Ok(*v), + _ => Err(parquet2::error::Error::InvalidParameter( + "Expected i32".to_string(), + )), + } + } + + pub fn as_i64(&self) -> Result { + match self { + Value::I64(v) => Ok(*v), + _ => Err(parquet2::error::Error::InvalidParameter( + "Expected i64".to_string(), + )), + } + } + + pub fn as_f32(&self) -> Result { + match self { + Value::F32(v) => Ok(*v), + _ => Err(parquet2::error::Error::InvalidParameter( + "Expected f32".to_string(), + )), + } + } + + pub fn as_f64(&self) -> Result { + match self { + Value::F64(v) => Ok(*v), + _ => Err(parquet2::error::Error::InvalidParameter( + "Expected f64".to_string(), + )), + } + } + + pub fn as_bytes(&self) -> Result<&'a [u8]> { + match self { + Value::Bytes(v) => Ok(*v), + _ => Err(parquet2::error::Error::InvalidParameter( + "Expected bytes".to_string(), + )), + } + } +} diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/write_test_file.cc b/Source/santad/Logs/EndpointSecurity/ParquetLogger/write_test_file.cc new file mode 100644 index 000000000..a16850f19 --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/write_test_file.cc @@ -0,0 +1,64 @@ +#include + +#include "Source/santad/Logs/EndpointSecurity/ParquetLogger/gen/cpp_api.rs.h" + +// This demonstrates the use of the C++ API for parquet_logger, which is +// implemented in Rust. The C++ API uses an opaque type called Table, which is +// returned as a Box (effectively a unique_ptr). +// +// To easily check that the file is valid, run the e2e_test in the same +// directory. +int main(int argc, char* argv[]) { + try { + if (argc < 2) { + std::cout << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + + // IMPORTANT: If you change anything about the file, you MUST ALSO change + // check_parquet_file.py, otherwise the e2e test will break. + + // Make me a table builder: + auto args = pedro::wire::table_args_new("test_table", argv[1]); + + // Declare me some columns: + pedro::wire::table_args_add_column(*args, "number", + pedro::wire::CxxColumnType::Int32); + pedro::wire::table_args_add_column(*args, "text", + pedro::wire::CxxColumnType::ByteArray); + pedro::wire::table_args_add_column(*args, "big_number", + pedro::wire::CxxColumnType::Int64); + auto table = pedro::wire::table_new(std::move(args)); + + // Push one row: + pedro::wire::table_push_i32(*table, 0, 1337); + pedro::wire::table_push_string(*table, 1, "Hello, world!"); + pedro::wire::table_push_i64(*table, 2, 0xdeadbeef); + + // You can also push column by column - here three rows at a time. + pedro::wire::table_push_i32(*table, 0, 1); + pedro::wire::table_push_i32(*table, 0, 2); + pedro::wire::table_push_i32(*table, 0, 3); + + pedro::wire::table_push_string(*table, 1, "Hello, world!"); + pedro::wire::table_push_string(*table, 1, "Hello, world!"); + pedro::wire::table_push_string(*table, 1, "Good bye, world!"); + + pedro::wire::table_push_i64(*table, 2, 0xdeadbeef); + pedro::wire::table_push_i64(*table, 2, 0xcafed00d); + pedro::wire::table_push_i64(*table, 2, 0xfeedface); + + // As long as all columns are of equal lengths, we can now flush. (If + // they're not, we'll get an exception.) + pedro::wire::table_flush(*table); + // Flush doesn't write the table footer - it only writes the data in the + // buffer. To finalize the parquet file, we need to destroy the table. + pedro::wire::table_end(std::move(table)); + + std::cout << "Successfully wrote the example parquet file " << argv[0] << std::endl; + } catch (std::exception& e) { + std::cout << "Error writing parquet file:" << std::endl; + std::cout << e.what() << std::endl; + } + return 0; +} diff --git a/Source/santad/Logs/EndpointSecurity/ParquetLogger/writer.rs b/Source/santad/Logs/EndpointSecurity/ParquetLogger/writer.rs new file mode 100644 index 000000000..b4c44074e --- /dev/null +++ b/Source/santad/Logs/EndpointSecurity/ParquetLogger/writer.rs @@ -0,0 +1,54 @@ +use std::fs::File; + +use parquet2::{ + error::{Error, Result}, + metadata::SchemaDescriptor, + page::CompressedPage, + write::{DynIter, DynStreamingIterator, FileWriter, WriteOptions}, +}; + +use crate::column_builder::ColumnBuilder; + +/// Wraps the FileWriter for Table to allow constructing the latter from C++. +/// (FileWriter is generic, but Table cannot be.) +pub enum Writer { + Memory(FileWriter>), + File(FileWriter), +} + +impl Writer { + pub fn from_memory(schema: SchemaDescriptor, options: WriteOptions, buffer: Vec) -> Self { + Self::Memory(FileWriter::new(buffer, schema, options, None)) + } + + pub fn from_file(schema: SchemaDescriptor, options: WriteOptions, file: File) -> Self { + Self::File(FileWriter::new(file, schema, options, None)) + } + + pub fn open_file(schema: SchemaDescriptor, options: WriteOptions, path: &str) -> Result { + Ok(Self::from_file(schema, options, File::create(path)?)) + } + + pub fn write<'a>( + &mut self, + row_group: DynIter<'a, Result>>, + ) -> Result<()> { + match self { + Self::Memory(writer) => writer.write(row_group), + Self::File(writer) => writer.write(row_group), + } + } + + pub fn end(&mut self) -> Result { + match self { + Self::Memory(writer) => writer.end(None), + Self::File(writer) => writer.end(None), + } + } +} + +pub fn write_row_group(writer: &mut Writer, columns: &mut Vec) -> Result<()> { + let row_group = columns.iter_mut().map(|column| Ok(column.drain())); + let row_group = DynIter::new(row_group); + writer.write(row_group) +} diff --git a/WORKSPACE b/WORKSPACE index d634670ba..2ac284a44 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -224,3 +224,82 @@ rules_fuzzing_dependencies() load("@rules_fuzzing//fuzzing:init.bzl", "rules_fuzzing_init") rules_fuzzing_init() + +# Rust support: + +# To find additional information on this release or newer ones visit: +# https://github.com/bazelbuild/rules_rust/releases +http_archive( + name = "rules_rust", + sha256 = "36ab8f9facae745c9c9c1b33d225623d976e78f2cc3f729b7973d8c20934ab95", + urls = ["https://github.com/bazelbuild/rules_rust/releases/download/0.31.0/rules_rust-v0.31.0.tar.gz"], +) + +load("@rules_rust//rust:repositories.bzl", "rules_rust_dependencies", "rust_register_toolchains") + +rules_rust_dependencies() + +rust_register_toolchains( + edition = "2021", +) + +load("@rules_rust//crate_universe:repositories.bzl", "crate_universe_dependencies") + +crate_universe_dependencies() + +load("@rules_rust//crate_universe:defs.bzl", "crates_repository") + +# If we don't specify which platforms to build, rust_*_library targets will +# select() across platforms that are not guaranteed to exist in the local Bazel, +# which breaks the build. +# +# This is probably a Bazel bug, because rules_rust +# specifies the correct module dependency and Bazel just ignores it and fetches +# an old version. +# +# TODO(the80srobot): Find the right Bazel subproject and file a bug. +RUST_SUPPORTED_PLATFORM_TRIPLES = [ + "i686-apple-darwin", + "x86_64-apple-darwin", + "aarch64-apple-darwin", +] + +crates_repository( + name = "crate_index", + cargo_lockfile = "//:Cargo.lock", + manifests = [ + # Root Cargo file. + "//:Cargo.toml", + # The below this line must be kept in sync with the workspaces listed in + # the root Cargo file. + "//:Source/santad/Logs/EndpointSecurity/ParquetLogger/Cargo.toml", + ], + supported_platform_triples = RUST_SUPPORTED_PLATFORM_TRIPLES, +) + +load("@crate_index//:defs.bzl", "crate_repositories") + +crate_repositories() + +# cxxbridge is a codegen tool for Rust/C++ bindings. To understand why this is +# set up the way it is, read +# http://bazelbuild.github.io/rules_rust/crate_universe.html#binary-dependencies. +http_archive( + name = "cxxbridge-cmd", + build_file = "//external_patches/cxxbridge-cmd:BUILD", + sha256 = "dc5db43c367778010dff55b602f71eccff712b8edf54a3f08687bd1c7cbad6df", + strip_prefix = "cxxbridge-cmd-1.0.110", + type = "tar.gz", + urls = ["https://crates.io/api/v1/crates/cxxbridge-cmd/1.0.110/download"], +) + +# See above for notes. +crates_repository( + name = "cxxbridge_cmd_deps", + cargo_lockfile = "//external_patches/cxxbridge-cmd:Cargo.lock", + lockfile = "//external_patches/cxxbridge-cmd:Cargo.Bazel.lock", + manifests = ["@cxxbridge-cmd//:Cargo.toml"], + supported_platform_triples = RUST_SUPPORTED_PLATFORM_TRIPLES, +) +load("@cxxbridge_cmd_deps//:defs.bzl", cxxbridge_cmd_deps = "crate_repositories") +cxxbridge_cmd_deps() diff --git a/docs/development/contributing.md b/docs/development/contributing.md index c1bb7906a..3e0badea0 100644 --- a/docs/development/contributing.md +++ b/docs/development/contributing.md @@ -29,10 +29,49 @@ the tests are complete and passing. ### Code Style -All code submissions should try to match the surrounding code. Wherever possible, -code should adhere to either the -[Google Objective-C Style Guide](https://google.github.io/styleguide/objcguide.xml) -or the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). +Santa is written in a mix of C++, Objective-C and a small amount of Rust. All +code submissions should try to match the surrounding code. We follow the [Google +Objective-C Style Guide](https://google.github.io/styleguide/objcguide.xml), the +[Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) and +the [Rust Style Guide](https://doc.rust-lang.org/beta/style-guide/index.html). + +Files containing C++ and Objective-C code are named `ClassName.mm` and +`ClassName.h`. Rust code is named `library_name.rs`. The BUILD rules follow the +same naming convention: `ClassName` for the C-family, and `library_name` or +`library_bridge` (if using cxx) for Rust. + +### Using Rust + +Rust support in Santa is experimental, and currently only used for specific +external dependencies written in Rust. + +Adding new Rust libraries requires some extra steps: + +* Each Rust library must have both a `rust_static_library` BUILD target and a + `Cargo.toml` file listing its dependencies. (`Cargo.toml` helps rust-analyzer + and VSCode Rust extensions.) +* Each new `Cargo.toml` file must be added to the list in the root `Cargo.toml` + file AND the `workspace.members` key in the root `WORKSPACE` file. +* Each `rust_static_library` should be wrapped in a `cc_library`, rather than + depended on directly. (This is quite natural when using `cxx`.) +* Rust code shouldn't be placed just anywhere - generally, each + `rust_static_library` target should be in a separate directory, with its own + `Cargo.toml` file. (This lets rust-analyzer understand the code structure.) + +Additionally, please follow these guidelines: + +* Do not write new Rust code without discussing it with the maintainers first. +* Do not "rewrite it in Rust" for no practical reason. +* Keep the Rust code to the leaves. +* Don't hand over control between Rust and C++ more than absolutely necessary. +* Have C++ call into Rust, not the other way around. +* Use of `Result` types across the FFI is permitted, because it is the only enum + (variant) type Cxx supports and the only good way to indicate failures. If you + do return `Result`, **you MUST wrap the C++ call site in a `try-catch` + block.** +* Run `cargo fix && cargo fmt` before submitting code for review. You may need + to run the nightly to support all options in `rustfmt.toml`: `rustup run + nightly cargo fmt`. ### The small print Contributions made by corporations are covered by a different agreement than diff --git a/external_patches/cxxbridge-cmd/BUILD b/external_patches/cxxbridge-cmd/BUILD new file mode 100644 index 000000000..a1f5b1aee --- /dev/null +++ b/external_patches/cxxbridge-cmd/BUILD @@ -0,0 +1,20 @@ +load("@cxxbridge_cmd_deps//:defs.bzl", "aliases", "all_crate_deps") +load("@rules_rust//rust:defs.bzl", "rust_binary") + +licenses(["notice"]) + +# cxxbridge is a codegen tool for cxx. +# +# Note that you must build this target as @cxxbridge-cmd//:cxxbridge, not as +# //external_patches/cxxbridge-cmd:cxxbridge or any other path. +rust_binary( + name = "cxxbridge", + srcs = glob(["src/**/*.rs"]), + aliases = aliases(), + data = [ + "src/gen/include/cxx.h", + ], + edition = "2021", + visibility = ["//visibility:public"], + deps = all_crate_deps(), +) diff --git a/external_patches/cxxbridge-cmd/Cargo.Bazel.lock b/external_patches/cxxbridge-cmd/Cargo.Bazel.lock new file mode 100644 index 000000000..ef85d37cf --- /dev/null +++ b/external_patches/cxxbridge-cmd/Cargo.Bazel.lock @@ -0,0 +1,788 @@ +{ + "checksum": "3fb76a2b424dc7b23c83784d2b187d9b64c70d142c8ef6738119727803256bdb", + "crates": { + "anstyle 1.0.4": { + "name": "anstyle", + "version": "1.0.4", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/anstyle/1.0.4/download", + "sha256": "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + } + }, + "targets": [ + { + "Library": { + "crate_name": "anstyle", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "anstyle", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "default", + "std" + ], + "selects": {} + }, + "edition": "2021", + "version": "1.0.4" + }, + "license": "MIT OR Apache-2.0" + }, + "clap 4.4.10": { + "name": "clap", + "version": "4.4.10", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/clap/4.4.10/download", + "sha256": "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272" + } + }, + "targets": [ + { + "Library": { + "crate_name": "clap", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "clap", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "error-context", + "help", + "std", + "suggestions", + "usage" + ], + "selects": {} + }, + "deps": { + "common": [ + { + "id": "clap_builder 4.4.9", + "target": "clap_builder" + } + ], + "selects": {} + }, + "edition": "2021", + "version": "4.4.10" + }, + "license": "MIT OR Apache-2.0" + }, + "clap_builder 4.4.9": { + "name": "clap_builder", + "version": "4.4.9", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/clap_builder/4.4.9/download", + "sha256": "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1" + } + }, + "targets": [ + { + "Library": { + "crate_name": "clap_builder", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "clap_builder", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "error-context", + "help", + "std", + "suggestions", + "usage" + ], + "selects": {} + }, + "deps": { + "common": [ + { + "id": "anstyle 1.0.4", + "target": "anstyle" + }, + { + "id": "clap_lex 0.6.0", + "target": "clap_lex" + }, + { + "id": "strsim 0.10.0", + "target": "strsim" + } + ], + "selects": {} + }, + "edition": "2021", + "version": "4.4.9" + }, + "license": "MIT OR Apache-2.0" + }, + "clap_lex 0.6.0": { + "name": "clap_lex", + "version": "0.6.0", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/clap_lex/0.6.0/download", + "sha256": "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + } + }, + "targets": [ + { + "Library": { + "crate_name": "clap_lex", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "clap_lex", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "edition": "2021", + "version": "0.6.0" + }, + "license": "MIT OR Apache-2.0" + }, + "codespan-reporting 0.11.1": { + "name": "codespan-reporting", + "version": "0.11.1", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/codespan-reporting/0.11.1/download", + "sha256": "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" + } + }, + "targets": [ + { + "Library": { + "crate_name": "codespan_reporting", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "codespan_reporting", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "termcolor 1.4.0", + "target": "termcolor" + }, + { + "id": "unicode-width 0.1.11", + "target": "unicode_width" + } + ], + "selects": {} + }, + "edition": "2018", + "version": "0.11.1" + }, + "license": "Apache-2.0" + }, + "cxxbridge-cmd 1.0.110": { + "name": "cxxbridge-cmd", + "version": "1.0.110", + "repository": null, + "targets": [], + "library_target_name": null, + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "clap 4.4.10", + "target": "clap" + }, + { + "id": "codespan-reporting 0.11.1", + "target": "codespan_reporting" + }, + { + "id": "proc-macro2 1.0.70", + "target": "proc_macro2" + }, + { + "id": "quote 1.0.33", + "target": "quote" + }, + { + "id": "syn 2.0.39", + "target": "syn" + } + ], + "selects": {} + }, + "edition": "2021", + "version": "1.0.110" + }, + "license": "MIT OR Apache-2.0" + }, + "proc-macro2 1.0.70": { + "name": "proc-macro2", + "version": "1.0.70", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/proc-macro2/1.0.70/download", + "sha256": "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" + } + }, + "targets": [ + { + "Library": { + "crate_name": "proc_macro2", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + }, + { + "BuildScript": { + "crate_name": "build_script_build", + "crate_root": "build.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "proc_macro2", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "span-locations" + ], + "selects": {} + }, + "deps": { + "common": [ + { + "id": "proc-macro2 1.0.70", + "target": "build_script_build" + }, + { + "id": "unicode-ident 1.0.12", + "target": "unicode_ident" + } + ], + "selects": {} + }, + "edition": "2021", + "version": "1.0.70" + }, + "build_script_attrs": { + "data_glob": [ + "**" + ] + }, + "license": "MIT OR Apache-2.0" + }, + "quote 1.0.33": { + "name": "quote", + "version": "1.0.33", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/quote/1.0.33/download", + "sha256": "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" + } + }, + "targets": [ + { + "Library": { + "crate_name": "quote", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "quote", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "proc-macro2 1.0.70", + "target": "proc_macro2" + } + ], + "selects": {} + }, + "edition": "2018", + "version": "1.0.33" + }, + "license": "MIT OR Apache-2.0" + }, + "strsim 0.10.0": { + "name": "strsim", + "version": "0.10.0", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/strsim/0.10.0/download", + "sha256": "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + } + }, + "targets": [ + { + "Library": { + "crate_name": "strsim", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "strsim", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "edition": "2015", + "version": "0.10.0" + }, + "license": "MIT" + }, + "syn 2.0.39": { + "name": "syn", + "version": "2.0.39", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/syn/2.0.39/download", + "sha256": "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" + } + }, + "targets": [ + { + "Library": { + "crate_name": "syn", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "syn", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "clone-impls", + "full", + "parsing", + "printing", + "quote" + ], + "selects": {} + }, + "deps": { + "common": [ + { + "id": "proc-macro2 1.0.70", + "target": "proc_macro2" + }, + { + "id": "quote 1.0.33", + "target": "quote" + }, + { + "id": "unicode-ident 1.0.12", + "target": "unicode_ident" + } + ], + "selects": {} + }, + "edition": "2021", + "version": "2.0.39" + }, + "license": "MIT OR Apache-2.0" + }, + "termcolor 1.4.0": { + "name": "termcolor", + "version": "1.4.0", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/termcolor/1.4.0/download", + "sha256": "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" + } + }, + "targets": [ + { + "Library": { + "crate_name": "termcolor", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "termcolor", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [], + "selects": { + "cfg(windows)": [ + { + "id": "winapi-util 0.1.6", + "target": "winapi_util" + } + ] + } + }, + "edition": "2018", + "version": "1.4.0" + }, + "license": "Unlicense OR MIT" + }, + "unicode-ident 1.0.12": { + "name": "unicode-ident", + "version": "1.0.12", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/unicode-ident/1.0.12/download", + "sha256": "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + } + }, + "targets": [ + { + "Library": { + "crate_name": "unicode_ident", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "unicode_ident", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "edition": "2018", + "version": "1.0.12" + }, + "license": "(MIT OR Apache-2.0) AND Unicode-DFS-2016" + }, + "unicode-width 0.1.11": { + "name": "unicode-width", + "version": "0.1.11", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/unicode-width/0.1.11/download", + "sha256": "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + } + }, + "targets": [ + { + "Library": { + "crate_name": "unicode_width", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "unicode_width", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "default" + ], + "selects": {} + }, + "edition": "2015", + "version": "0.1.11" + }, + "license": "MIT/Apache-2.0" + }, + "winapi 0.3.9": { + "name": "winapi", + "version": "0.3.9", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/winapi/0.3.9/download", + "sha256": "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" + } + }, + "targets": [ + { + "Library": { + "crate_name": "winapi", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + }, + { + "BuildScript": { + "crate_name": "build_script_build", + "crate_root": "build.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "winapi", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "winapi 0.3.9", + "target": "build_script_build" + } + ], + "selects": { + "i686-pc-windows-gnu": [ + { + "id": "winapi-i686-pc-windows-gnu 0.4.0", + "target": "winapi_i686_pc_windows_gnu" + } + ], + "x86_64-pc-windows-gnu": [ + { + "id": "winapi-x86_64-pc-windows-gnu 0.4.0", + "target": "winapi_x86_64_pc_windows_gnu" + } + ] + } + }, + "edition": "2015", + "version": "0.3.9" + }, + "build_script_attrs": { + "data_glob": [ + "**" + ] + }, + "license": "MIT/Apache-2.0" + }, + "winapi-i686-pc-windows-gnu 0.4.0": { + "name": "winapi-i686-pc-windows-gnu", + "version": "0.4.0", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/winapi-i686-pc-windows-gnu/0.4.0/download", + "sha256": "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + } + }, + "targets": [ + { + "Library": { + "crate_name": "winapi_i686_pc_windows_gnu", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + }, + { + "BuildScript": { + "crate_name": "build_script_build", + "crate_root": "build.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "winapi_i686_pc_windows_gnu", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "winapi-i686-pc-windows-gnu 0.4.0", + "target": "build_script_build" + } + ], + "selects": {} + }, + "edition": "2015", + "version": "0.4.0" + }, + "build_script_attrs": { + "data_glob": [ + "**" + ] + }, + "license": "MIT/Apache-2.0" + }, + "winapi-util 0.1.6": { + "name": "winapi-util", + "version": "0.1.6", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/winapi-util/0.1.6/download", + "sha256": "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" + } + }, + "targets": [ + { + "Library": { + "crate_name": "winapi_util", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "winapi_util", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [], + "selects": { + "cfg(windows)": [ + { + "id": "winapi 0.3.9", + "target": "winapi" + } + ] + } + }, + "edition": "2021", + "version": "0.1.6" + }, + "license": "Unlicense/MIT" + }, + "winapi-x86_64-pc-windows-gnu 0.4.0": { + "name": "winapi-x86_64-pc-windows-gnu", + "version": "0.4.0", + "repository": { + "Http": { + "url": "https://crates.io/api/v1/crates/winapi-x86_64-pc-windows-gnu/0.4.0/download", + "sha256": "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + } + }, + "targets": [ + { + "Library": { + "crate_name": "winapi_x86_64_pc_windows_gnu", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + }, + { + "BuildScript": { + "crate_name": "build_script_build", + "crate_root": "build.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "winapi_x86_64_pc_windows_gnu", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "winapi-x86_64-pc-windows-gnu 0.4.0", + "target": "build_script_build" + } + ], + "selects": {} + }, + "edition": "2015", + "version": "0.4.0" + }, + "build_script_attrs": { + "data_glob": [ + "**" + ] + }, + "license": "MIT/Apache-2.0" + } + }, + "binary_crates": [], + "workspace_members": { + "cxxbridge-cmd 1.0.110": "" + }, + "conditions": { + "aarch64-apple-darwin": [ + "aarch64-apple-darwin" + ], + "cfg(windows)": [], + "i686-apple-darwin": [ + "i686-apple-darwin" + ], + "i686-pc-windows-gnu": [], + "x86_64-apple-darwin": [ + "x86_64-apple-darwin" + ], + "x86_64-pc-windows-gnu": [] + } +} diff --git a/external_patches/cxxbridge-cmd/Cargo.lock b/external_patches/cxxbridge-cmd/Cargo.lock new file mode 100644 index 000000000..77f2946bf --- /dev/null +++ b/external_patches/cxxbridge-cmd/Cargo.lock @@ -0,0 +1,143 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "clap" +version = "4.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1" +dependencies = [ + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "cxxbridge-cmd" +version = "1.0.110" +dependencies = [ + "clap", + "codespan-reporting", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/foo b/foo new file mode 100644 index 000000000..5e10239c4 Binary files /dev/null and b/foo differ diff --git a/helper.bzl b/helper.bzl index 00fa3bd6d..4465619c4 100644 --- a/helper.bzl +++ b/helper.bzl @@ -1,7 +1,9 @@ """This module defines some helper rules.""" +load("@bazel_skylib//rules:run_binary.bzl", "run_binary") load("@build_bazel_rules_apple//apple:macos.bzl", "macos_unit_test") load("@build_bazel_rules_apple//apple:resources.bzl", "apple_resource_group") +load("@rules_cc//cc:defs.bzl", "cc_library") def run_command(name, cmd, **kwargs): """A rule to run a command.""" @@ -53,3 +55,39 @@ def santa_unit_test( data = data, visibility = ["//:__subpackages__"], ) + +def rust_cxx_bridge(name, src, deps = []): + """ + Generates a cc_library target for interop with Rust code. + + More details: https://cxx.rs/build/bazel.html + + Args: + name: By convention, RUST_LIBRARY_bridge. The cc_library will be named + the same. + src: Rust (.rs) file with a #[cxx::bridge] section. + deps: Passed through to the cc_library target. + """ + out_h = "gen/%s.h" % src + out_cc = "gen/%s.cc" % src + + run_binary( + name = "%s/generated" % name, + srcs = [src], + outs = [out_h, out_cc], + args = [ + "$(location %s)" % src, + "-o", + "$(location %s)" % out_h, + "-o", + "$(location %s)" % out_cc, + ], + tool = "@cxxbridge-cmd//:cxxbridge", + ) + + cc_library( + name = name, + srcs = [out_cc], + hdrs = [out_h], + deps = deps, + ) diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 000000000..e01d06811 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,3 @@ +merge_imports = true +max_width = 100 +reorder_imports = true