From 7086bbc4ec45de4f99a35efce04742dfbad9f652 Mon Sep 17 00:00:00 2001 From: Simeon Stoykov Date: Thu, 5 Oct 2023 09:56:10 +0200 Subject: [PATCH] temporary benchmark files --- x_read.py | 33 +++++++++++++++++++++++++++-- x_write.py | 61 ++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 79 insertions(+), 15 deletions(-) diff --git a/x_read.py b/x_read.py index 1c7255ead..544559e39 100644 --- a/x_read.py +++ b/x_read.py @@ -1,6 +1,18 @@ import turbodbc from time import perf_counter +from pathlib import Path +import sys + + +def smoke(): + # Leaves a sign and allows us to attach to the process + smoke_file = Path("smoke_hey_im_here") + smoke_file.touch(exist_ok=True) + with open(smoke_file): + input("Press enter to continue after attaching to the process") + + def turbodbc_read_sql( query: str, turbodbc_connection: turbodbc.connection.Connection @@ -11,10 +23,11 @@ def turbodbc_read_sql( def mssql_connect_turbodbc() -> turbodbc.connection.Connection: - odbc_string = "Driver={libtdsodbc.so};Server=localhost,1433;Database=turbodbc;Encrypt=yes;TrustServerCertificate=no;UID=sa;PWD=QuantCo123" + odbc_string = Path("./x_conn_str_odbc.txt").read_text(encoding="utf-8").strip("\n") conn_string_components = { x.split("=")[0].lower(): x.split("=")[1] for x in odbc_string.split(";") } + print(conn_string_components) return turbodbc.connect( **conn_string_components, turbodbc_options=turbodbc.make_options( @@ -23,7 +36,23 @@ def mssql_connect_turbodbc() -> turbodbc.connection.Connection: ) +smoke() + + +which = input("Which query: ") if len(sys.argv) < 2 else sys.argv[-1] +query = ( + Path("./query_ops.sql").read_text(encoding="utf-8") if which == "ops" else + Path("./query_icd.sql").read_text(encoding="utf-8") if which == "icd" else + f"SELECT * FROM {which}" +) +if not query: + raise ValueError(which) +print("Using", query) + connection = mssql_connect_turbodbc() start = perf_counter() -turbodbc_read_sql("SELECT * FROM turbodbc.dbo.single_big", connection) +res = turbodbc_read_sql(query, connection) print("Elapsed seconds:", perf_counter() - start) + +# tag = input("Output tag: ") +# res.to_parquet(f"./{which}_{tag}.parquet") diff --git a/x_write.py b/x_write.py index a7977a325..965682b86 100644 --- a/x_write.py +++ b/x_write.py @@ -1,21 +1,56 @@ import sqlalchemy as sa +from pathlib import Path +from typing import List + + print("Hey") -ROWS = 1_000_000 AT_ONCE = 1 -eng = sa.create_engine("mssql+pyodbc://sa:QuantCo123@localhost:1433/turbodbc?driver=libtdsodbc.so") +eng = sa.create_engine(Path("./x_conn_str_sa.txt").read_text(encoding="utf-8").strip("\n")) + + +def writeit(table_name: str, row_values: List[str], types: List[str], nrows: str): + with eng.begin() as transaction: + transaction.execute(sa.text(f"DROP TABLE IF EXISTS {table_name}")) + + stmt_types = ",\n".join([f"v_{idx} {type_}" for idx, type_ in enumerate(types)]) + transaction.execute(sa.text(f"CREATE TABLE {table_name} ({stmt_types})")) + + last = None + for idx in range(0, nrows, AT_ONCE): + percentage = int(idx/nrows * 100) + if percentage % 10 == 0 and percentage != last: + last = percentage + print(percentage) + + stmt_values = ",".join([f"'{value}'" for value in row_values]) + param = ", ".join([f"({stmt_values}) "] * AT_ONCE) + transaction.execute(sa.text(f"INSERT INTO {table_name} VALUES {param}")) + -with eng.begin() as transaction: - transaction.execute(sa.text("DELETE FROM single_big")) - last = None - for idx in range(0, ROWS, AT_ONCE): - percentage = int(idx/ROWS * 100) - if percentage % 10 == 0 and percentage != last: - last = percentage - print(percentage) +which = input("Which input: ") - value = "a" * 8_000 - param = ", ".join([f"('{value}') "] * AT_ONCE) - transaction.execute(sa.text(f"INSERT INTO single_big VALUES {param}")) +if which == "varchar": + writeit("varchar_100k", ["a" * 8000], ["VARCHAR(8000)"], 100_000) +elif which == "varchar_80": + writeit("varchar_100k_80", ["a" * 80], ["VARCHAR(8000)"], 100_000) +elif which == "char": + writeit("char_1m", ["a" * 80], ["CHAR(80)"], 1_000_000) +elif which == "tinyint": + writeit("tinyint_50m", [123], ["TINYINT"], 50_000_000) +elif which == "date": + writeit("date_20m", ["2023-10-04"], ["DATE"], 20_000_000) +elif which == "all": + writeit("all_100k", ["v" * 8000, "c" * 8000, 123, "2023-10-04"], ["VARCHAR(8000)", "CHAR(8000)", "TINYINT", "DATE"], 100_000) +elif which == "varchar_many": + writeit("varchar_many_100k", ["a" * 80] * 100, ["VARCHAR(80)"] * 100, 100_000) +elif which == "tinyint_many": + writeit("tinyint_many_100k", [123] * 100, ["TINYINT"] * 100, 100_000) +elif which == "date_many": + writeit("date_many_100k", ["2023-10-04"] * 100, ["DATE"] * 100, 100_000) +elif which == "char_many": + writeit("char_many_100k", ["a" * 80] * 100, ["CHAR(80)"] * 100, 100_000) +else: + raise ValueError(which)