diff --git a/x_read.py b/x_read.py index 1c7255ead..6000e8134 100644 --- a/x_read.py +++ b/x_read.py @@ -11,7 +11,7 @@ def turbodbc_read_sql( def mssql_connect_turbodbc() -> turbodbc.connection.Connection: - odbc_string = "Driver={libtdsodbc.so};Server=localhost,1433;Database=turbodbc;Encrypt=yes;TrustServerCertificate=no;UID=sa;PWD=QuantCo123" + odbc_string = Path("./conn_str_odbc.txt").read_text(encoding="utf-8") conn_string_components = { x.split("=")[0].lower(): x.split("=")[1] for x in odbc_string.split(";") } @@ -23,7 +23,23 @@ def mssql_connect_turbodbc() -> turbodbc.connection.Connection: ) +query_ops = Path("./query_ops.sql").read_text(encoding="utf-8") +query_icd = Path("./query_icd.sql").read_text(encoding="utf-8") + +which = input("Which query: ") +query = ( + query_ops if which == "ops" else + query_icd if which == "icd" else + f"SELECT * FROM simeon_benchmark_turbodbc.dbo.{which}" +) +if not query: + raise ValueError(which) +print("Using", query) + connection = mssql_connect_turbodbc() start = perf_counter() -turbodbc_read_sql("SELECT * FROM turbodbc.dbo.single_big", connection) +res = turbodbc_read_sql(query, connection) print("Elapsed seconds:", perf_counter() - start) + +tag = input("Output tag: ") +res.to_parquet(f"./{which}_{tag}.parquet") diff --git a/x_write.py b/x_write.py index a7977a325..0d20176c5 100644 --- a/x_write.py +++ b/x_write.py @@ -1,21 +1,53 @@ import sqlalchemy as sa +from typing import List + + print("Hey") -ROWS = 1_000_000 AT_ONCE = 1 -eng = sa.create_engine("mssql+pyodbc://sa:QuantCo123@localhost:1433/turbodbc?driver=libtdsodbc.so") +eng = sa.create_engine(Path("./x_conn_str_sa.txt").read_text(encoding="utf-8")) + + +def writeit(table_name: str, row_values: List[str], types: List[str], nrows: str): + with eng.begin() as transaction: + transaction.execute(sa.text(f"DROP TABLE IF EXISTS {table_name}")) + + stmt_types = ",\n".join([f"v_{idx} {type_}" for idx, type_ in enumerate(types)]) + transaction.execute(sa.text(f"CREATE TABLE {table_name} ({stmt_types})")) + + last = None + for idx in range(0, nrows, AT_ONCE): + percentage = int(idx/nrows * 100) + if percentage % 10 == 0 and percentage != last: + last = percentage + print(percentage) + + stmt_values = ",".join([f"'{value}'" for value in row_values]) + param = ", ".join([f"({stmt_values}) "] * AT_ONCE) + transaction.execute(sa.text(f"INSERT INTO {table_name} VALUES {param}")) + -with eng.begin() as transaction: - transaction.execute(sa.text("DELETE FROM single_big")) - last = None - for idx in range(0, ROWS, AT_ONCE): - percentage = int(idx/ROWS * 100) - if percentage % 10 == 0 and percentage != last: - last = percentage - print(percentage) +which = input("Which input: ") - value = "a" * 8_000 - param = ", ".join([f"('{value}') "] * AT_ONCE) - transaction.execute(sa.text(f"INSERT INTO single_big VALUES {param}")) +if which == "varchar": + writeit("varchar_100k", ["a" * 8000], ["VARCHAR(8000)"], 100_000) +elif which == "char": + writeit("char_10m", ["a" * 80], ["CHAR(80)"], 10_000_000) +elif which == "tinyint": + writeit("tinyint_50m", [123], ["TINYINT"], 50_000_000) +elif which == "date": + writeit("date_20m", ["2023-10-04"], ["DATE"], 20_000_000) +elif which == "all": + writeit("all_100k", ["v" * 8000, "c" * 8000, 123, "2023-10-04"], ["VARCHAR(8000)", "CHAR(8000)", "TINYINT", "DATE"], 100_000) +elif which == "varchar_many": + writeit("varchar_many_10k", ["a" * 80] * 100, ["VARCHAR(80)"] * 100, 10_000) +elif which == "tinyint_many": + writeit("tinyint_many_100k", [123] * 100, ["TINYINT"] * 100, 100_000) +elif which == "date_many": + writeit("date_many_100k", ["2023-10-04"] * 100, ["DATE"] * 100, 100_000) +elif which == "char_many": + writeit("char_many_100k", ["a" * 80] * 100, ["CHAR(80)"] * 100, 100_000) +else: + raise ValueError(which)