diff --git a/README.md b/README.md index c4d3030..f69afd9 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ popd ``` ### table初期データ作成 -https://github.com/project-tsurugi/jogasaki-benchmarks/tree/master/tpc-c-datagen/README.md 参照 +https://github.com/project-tsurugi/tsubakuro-examples/tree/master/tools/tpc-c-datagen/README.md 参照 ### ベンチマーク実行 ベンチマーク実行は、tsurugidbを起動し、table初期データをloadした後にクライアントプログラムを実行する。 diff --git a/tools/cmake/CompileOptions.cmake b/tools/cmake/CompileOptions.cmake new file mode 100644 index 0000000..4708b86 --- /dev/null +++ b/tools/cmake/CompileOptions.cmake @@ -0,0 +1,44 @@ +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-omit-frame-pointer") + +set(sanitizers "address") +if(ENABLE_UB_SANITIZER) + # NOTE: UB check requires instrumented libstdc++ + set(sanitizers "${sanitizers},undefined") +endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + # do nothing for gcc +elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|AppleClang)$") + set(sanitizers "${sanitizers},nullability") +else() + message(FATAL_ERROR "unsupported compiler ${CMAKE_CXX_COMPILER_ID}") +endif() + +if(ENABLE_SANITIZER) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=${sanitizers}") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=alignment") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize-recover=${sanitizers}") +endif() +if(ENABLE_COVERAGE) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} --coverage") +endif() + +function(set_compile_options target_name) + target_compile_options(${target_name} + PRIVATE -Wall -Wextra -Werror) +endfunction(set_compile_options) + +if(TRACY_ENABLE) + message("trace enabled") + add_definitions(-DTRACY_ENABLE) + add_definitions(-DTRACY_NO_SAMPLING) + + # tracy code has many unused variables/parameters + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wno-unused-parameter -Wno-maybe-uninitialized") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -Wno-unused-parameter -Wno-maybe-uninitialized") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wno-unused-parameter -Wno-maybe-uninitialized") +endif() diff --git a/tools/cmake/Config.cmake.in b/tools/cmake/Config.cmake.in new file mode 100644 index 0000000..978ab20 --- /dev/null +++ b/tools/cmake/Config.cmake.in @@ -0,0 +1 @@ +include("${CMAKE_CURRENT_LIST_DIR}/@package_name@-targets.cmake") diff --git a/tools/cmake/Findgflags.cmake b/tools/cmake/Findgflags.cmake new file mode 100644 index 0000000..f0af39a --- /dev/null +++ b/tools/cmake/Findgflags.cmake @@ -0,0 +1,24 @@ +if(TARGET gflags::gflags) + return() +endif() + +find_library(gflags_LIBRARY_FILE NAMES gflags) +find_path(gflags_INCLUDE_DIR NAMES gflags/gflags.h) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(gflags DEFAULT_MSG + gflags_LIBRARY_FILE + gflags_INCLUDE_DIR) + +if(gflags_LIBRARY_FILE AND gflags_INCLUDE_DIR) + set(gflags_FOUND ON) + add_library(gflags::gflags SHARED IMPORTED) + set_target_properties(gflags::gflags PROPERTIES + IMPORTED_LOCATION "${gflags_LIBRARY_FILE}" + INTERFACE_INCLUDE_DIRECTORIES "${gflags_INCLUDE_DIR}") +else() + set(gflags_FOUND OFF) +endif() + +unset(gflags_LIBRARY_FILE CACHE) +unset(gflags_INCLUDE_DIR CACHE) diff --git a/tools/cmake/Findglog.cmake b/tools/cmake/Findglog.cmake new file mode 100644 index 0000000..4b35db6 --- /dev/null +++ b/tools/cmake/Findglog.cmake @@ -0,0 +1,24 @@ +if(TARGET glog::glog) + return() +endif() + +find_library(glog_LIBRARY_FILE NAMES glog) +find_path(glog_INCLUDE_DIR NAMES glog/logging.h) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(glog DEFAULT_MSG + glog_LIBRARY_FILE + glog_INCLUDE_DIR) + +if(glog_LIBRARY_FILE AND glog_INCLUDE_DIR) + set(glog_FOUND ON) + add_library(glog::glog SHARED IMPORTED) + set_target_properties(glog::glog PROPERTIES + IMPORTED_LOCATION "${glog_LIBRARY_FILE}" + INTERFACE_INCLUDE_DIRECTORIES "${glog_INCLUDE_DIR}") +else() + set(glog_FOUND OFF) +endif() + +unset(glog_LIBRARY_FILE CACHE) +unset(glog_INCLUDE_DIR CACHE) diff --git a/tools/cmake/Findmpdecpp.cmake b/tools/cmake/Findmpdecpp.cmake new file mode 100644 index 0000000..23d17b7 --- /dev/null +++ b/tools/cmake/Findmpdecpp.cmake @@ -0,0 +1,31 @@ +if(TARGET mpdecpp) + return() +endif() + +find_path(mpdecpp_INCLUDE_DIR NAMES decimal.hh) +find_library(mpdecpp_LIBRARY_FILE NAMES mpdec++) +find_library(mpdec_LIBRARY_FILE NAMES mpdec) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(mpdecpp DEFAULT_MSG + mpdecpp_INCLUDE_DIR + mpdecpp_LIBRARY_FILE + mpdec_LIBRARY_FILE + ) + +if(mpdecpp_INCLUDE_DIR AND mpdecpp_LIBRARY_FILE AND mpdec_LIBRARY_FILE) + set(mpdecpp_FOUND ON) + add_library(mpdecpp SHARED IMPORTED) + target_link_libraries(mpdecpp + INTERFACE "${mpdec_LIBRARY_FILE}" + ) + set_target_properties(mpdecpp PROPERTIES + IMPORTED_LOCATION "${mpdecpp_LIBRARY_FILE}" + INTERFACE_INCLUDE_DIRECTORIES "${mpdecpp_INCLUDE_DIR}") +else() + set(mpdecpp_FOUND OFF) +endif() + +unset(mpdecpp_INCLUDE_DIR CACHE) +unset(mpdecpp_LIBRARY_FILE CACHE) +unset(mpdec_LIBRARY_FILE CACHE) diff --git a/tools/cmake/InstallOptions.cmake b/tools/cmake/InstallOptions.cmake new file mode 100644 index 0000000..f70afc1 --- /dev/null +++ b/tools/cmake/InstallOptions.cmake @@ -0,0 +1,87 @@ +function(install_custom target_name export_name) + install( + TARGETS + ${target_name} + EXPORT + ${export_name} + LIBRARY + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT Runtime + ARCHIVE + DESTINATION ${CMAKE_INSTALL_LIBDIR}/${export_name} + COMPONENT Development + RUNTIME + DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + ) + # Add INSTALL_RPATH from CMAKE_INSTALL_PREFIX and CMAKE_PREFIX_PATH + # The default behavior of CMake omits RUNPATH if it is already in CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES. + if (FORCE_INSTALL_RPATH) + get_target_property(target_type ${target_name} TYPE) + if (target_type STREQUAL "SHARED_LIBRARY" + OR target_type STREQUAL "EXECUTABLE") + get_target_property(rpath ${target_name} INSTALL_RPATH) + + # add ${CMAKE_INSTALL_PREFIX}/lib if it is not in system link directories + get_filename_component(p "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" ABSOLUTE) + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${p}" is_system) + if (is_system STREQUAL "-1") + list(APPEND rpath "${p}") + endif() + + # add each ${CMAKE_PREFIX_PATH}/lib + foreach (p IN LISTS CMAKE_PREFIX_PATH) + get_filename_component(p "${p}/${CMAKE_INSTALL_LIBDIR}" ABSOLUTE) + list(APPEND rpath "${p}") + endforeach() + + if (rpath) + set_target_properties(${target_name} PROPERTIES + INSTALL_RPATH "${rpath}") + endif() + + # add other than */lib paths + set_target_properties(${target_name} PROPERTIES + INSTALL_RPATH_USE_LINK_PATH ON) + endif() + endif (FORCE_INSTALL_RPATH) + # Install include files of interface libraries manually + # INTERFACE_INCLUDE_DIRECTORIES must contains the following entries: + # - one or more `$` paths (may be absolute paths on source-tree) + # - just one `$` path (must be a relative path from the install prefix) + # then, this copies files in the BUILD_INTERFACE paths onto INSTALL_INTERFACE path + # e.g. + # add_library(shakujo-interface INTERFACE) + # target_include_directories(shakujo-interface INTERFACE + # $ + # $) + get_target_property( + _includes + ${target_name} INTERFACE_INCLUDE_DIRECTORIES + ) + if (_includes) + unset(_build_dir) + unset(_install_dir) + foreach (f IN LISTS _includes) + if (f MATCHES "^\\$$") + list(APPEND _build_dir ${CMAKE_MATCH_1}) + elseif (f MATCHES "^\\$$") + set(_install_dir ${CMAKE_MATCH_1}) + else() + message(FATAL_ERROR "invalid include specification (${target_name}): ${f}") + endif() + endforeach() + if (NOT _build_dir) + message(FATAL_ERROR "${target_name} must declare \$ in INTERFACE_INCLUDE_DIRECTORIES") + endif() + if (NOT _install_dir) + message(FATAL_ERROR "${target_name} must declare \$ in INTERFACE_INCLUDE_DIRECTORIES") + endif() + install( + DIRECTORY ${_build_dir}/ + DESTINATION ${_install_dir} + COMPONENT Development + PATTERN "doxygen.h" EXCLUDE + ) + endif() +endfunction(install_custom) diff --git a/tools/tpc-c-datagen/.gitignore b/tools/tpc-c-datagen/.gitignore new file mode 100644 index 0000000..0750d86 --- /dev/null +++ b/tools/tpc-c-datagen/.gitignore @@ -0,0 +1,26 @@ +# CMake +/build* +/*-config.cmake + +# MS Office +~* + +# Mac +.DS_Store + +# VSCode +/.vscode + +# CLion +cmake-build-* +.idea +*.tokens + +# work in progress files +*.wip + +#GLOBAL +GPATH +GRTAGS +GTAGS +HTML/ diff --git a/tools/tpc-c-datagen/CMakeLists.txt b/tools/tpc-c-datagen/CMakeLists.txt new file mode 100644 index 0000000..aba0d1b --- /dev/null +++ b/tools/tpc-c-datagen/CMakeLists.txt @@ -0,0 +1,81 @@ +# Copyright 2020-2021 tsurugi project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.10) + +project(tpcc-datagen + VERSION 0.0.1 + DESCRIPTION "TPC-C benchmark program running on Jogasaki SQL Execution Engine" + LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/../cmake") + +option(ENABLE_SANITIZER "enable sanitizer on debug build" ON) +option(ENABLE_UB_SANITIZER "enable undefined behavior sanitizer on debug build" OFF) +option(ENABLE_COVERAGE "enable coverage on debug build" OFF) +option(BUILD_DOCUMENTS "build documents" ON) +option(ENABLE_GOOGLE_PERFTOOLS "Enable Google Perftools" OFF) +option(TRACY_ENABLE "enable tracy profiler" OFF) + +if (ENABLE_GOOGLE_PERFTOOLS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_GOOGLE_PERFTOOLS") +endif() + +if(NOT DEFINED SHARKSFIN_IMPLEMENTATION) + set( + SHARKSFIN_IMPLEMENTATION "memory" + CACHE STRING + "sharksfin target name to link" + FORCE + ) +endif() + +find_package(Doxygen) +find_package(glog REQUIRED) +find_package(gflags REQUIRED) +find_package(Threads REQUIRED) +find_package(Boost + COMPONENTS filesystem thread system container stacktrace_backtrace + REQUIRED + ) +find_package(mpdecpp REQUIRED) +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) +include(CompileOptions) +include(InstallOptions) + +# add_subdirectory(third_party) # should be before enable_testing() + +set(export_name "tpcc-datagen") +set(package_name "tpcc-datagen") + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/../${package_name}-config.cmake + @ONLY +) + +# install( +# EXPORT ${package_name} +# NAMESPACE ${package_name}- +# FILE ${package_name}-targets.cmake +# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${package_name} +# EXPORT_LINK_INTERFACE_LIBRARIES +# ) + +add_subdirectory(src) diff --git a/tools/tpc-c-datagen/LICENSE b/tools/tpc-c-datagen/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/tools/tpc-c-datagen/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tools/tpc-c-datagen/README.md b/tools/tpc-c-datagen/README.md new file mode 100644 index 0000000..3bbd1b5 --- /dev/null +++ b/tools/tpc-c-datagen/README.md @@ -0,0 +1,29 @@ +# tpc-c-datagen - tpc-c table initial data generator program for tsurugidb + +## Requirements + +* CMake `>= 3.16` +* C++ Compiler `>= C++17` + +## build and install +### build + +```sh +mkdir -p build +cd build +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. +cmake --build . +``` + +### install + +```sh +cmake --build . --target install +``` + +## run +```sh +tpcc-datagen -w ${warehouse} -o ${directory} +``` + +where \${warehouse} is the number of warehouses, \${directory} is the directory name where the created data will be stored. diff --git a/tools/tpc-c-datagen/docs/tpcc-datagen_ja.md b/tools/tpc-c-datagen/docs/tpcc-datagen_ja.md new file mode 100644 index 0000000..607b293 --- /dev/null +++ b/tools/tpc-c-datagen/docs/tpcc-datagen_ja.md @@ -0,0 +1,41 @@ +# TPC-C ベンチマーク初期データ作成プログラム コマンドライン仕様 +2023.05.18 +horikawa + +## NAME +tpcc-datagen + +## SYNOPSIS +tpcc-datagen [-w NUM] [-o DIR] [-f] + +## DESCRIPTION +* TPC-Cベンチマークの初期データをCSV形式で作成する。 +* 初期データファイルは以下の通りに作成する。 + * -oで指定したディレクトリを作成する。 + * その下に各表名のディレクトリを作成する。 + * その下にwarehouse別のCSVファイルを作成する。 + * 以下にw=2として作成したファイルを示す +``` +db + WAREHOUSE + 1.csv + + 2.csv + + DISTRICT + 1.csv + + 2.csv + + CUSTOMER + 1.csv + + 2.csv + + NEW_ORDER + 1.csv + + 2.csv + + ORDERS + 1.csv + + 2.csv + + ORDER_LINE + 1.csv + + 2.csv + + STOCK + 1.csv + + 2.csv + + HISTORY + 1.csv + 2.csv + + ITEM + 1.csv (ITEM表はwarehouse数とは関係なく、全warehouseに共通しているので、1.csvのみが作成される) +``` + +## OPTIONS +* -w NUM 作成するTPC-Cベンチマーク初期データのwarehouse数を指定する。default値は1。 +* -o DIR 作成するTPC-Cベンチマーク初期データを書き込むディレクトリ名を指定する。defaultは'db'。 +* -f `-o DIR`で指定したディレクトリが存在する場合は、ディレクトリに存在するファイルを消去してからTPC-Cベンチマーク初期データを作成する。`-f`を指定せず、かつ、`-o DIR`で指定したディレクトリが存在する場合は、初期データを作成せず、ディレクトリが既に存在する旨のエラーとして終了する。 \ No newline at end of file diff --git a/tools/tpc-c-datagen/src/CMakeLists.txt b/tools/tpc-c-datagen/src/CMakeLists.txt new file mode 100644 index 0000000..0a627cc --- /dev/null +++ b/tools/tpc-c-datagen/src/CMakeLists.txt @@ -0,0 +1,50 @@ +# Copyright 2018-2021 tsurugi project. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +file(GLOB SOURCES + "*.cpp" +) + +# remove this because tateyama includes TracyClient.cpp +# +#if(TRACY_ENABLE) +# file(GLOB TRACY_CLIENT +# "../../third_party/tracy/TracyClient.cpp" +# ) +# list(APPEND SOURCES ${TRACY_CLIENT}) +#endif() + +add_executable(tpcc + ${SOURCES} +) + +set_target_properties(tpcc + PROPERTIES + RUNTIME_OUTPUT_NAME "tpcc-datagen" +) + +target_include_directories(tpcc + PRIVATE . +) + +target_link_libraries(tpcc + PRIVATE Threads::Threads + PRIVATE gflags::gflags + PRIVATE Boost::filesystem + PRIVATE glog::glog +) + +set_compile_options(tpcc) + +install_custom(tpcc ${export_name}) diff --git a/tools/tpc-c-datagen/src/csv_file.h b/tools/tpc-c-datagen/src/csv_file.h new file mode 100644 index 0000000..4db617e --- /dev/null +++ b/tools/tpc-c-datagen/src/csv_file.h @@ -0,0 +1,82 @@ +/* + * Copyright 2023-2023 tsurugi project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +namespace tpcc { + + class csv_file { + public: + csv_file(boost::filesystem::path path, std::size_t n) { + do { + ofs_.open(path / boost::filesystem::path((std::to_string(n) + ".csv").c_str())); + if (ofs_.is_open()) { + return; + } + if (!already_notified_) { + std::cerr << "reached open file limit, consider increasing open file limit" << std::endl; + already_notified_ = true; + } + ofs_.close(); + struct timespec delay = {1, 0}; + nanosleep(&delay, NULL); + } while (true); + } + + void set_int8(std::string_view, std::int64_t v) { + separator(); + ofs_ << std::dec << v; + } + void set_float8(std::string_view, double v) { + separator(); + ofs_.precision(2); + ofs_ << std::fixed << std::showpoint << v; + } + void set_character(std::string_view, std::string_view v) { + separator(); + ofs_ << v; + } + void set_null(std::string_view) { + separator(); + } + void end_of_row() { + state_ = new_line; + } + void close() { + separator(); + ofs_.close(); + } + + private: + boost::filesystem::ofstream ofs_; + enum {brand_new, cont, new_line} state_{brand_new}; + bool already_notified_{}; + + void separator() { + switch (state_) { + case brand_new: break; + case cont: ofs_ << ","; break; + case new_line: ofs_ << std::endl; break; + } + state_ = cont; + } + }; + +} // namespace tpcc diff --git a/tools/tpc-c-datagen/src/main.cpp b/tools/tpc-c-datagen/src/main.cpp new file mode 100644 index 0000000..78239db --- /dev/null +++ b/tools/tpc-c-datagen/src/main.cpp @@ -0,0 +1,67 @@ +/* + * Copyright 2018-2021 tsurugi project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include "tpcc_common.h" + +DEFINE_uint32(w, 1, "Database size (TPC-C scale factor)."); //NOLINT +DEFINE_bool(f, false, "Overwrite location."); //NOLINT +DEFINE_string(o, "db", "database location on file system"); // NOLINT +DEFINE_bool(display_progress, false, "Display progress of data generation"); //NOLINT + +namespace tpcc { + +std::uint16_t scale::warehouses = 1U; + +int driver_main(int argc, char **argv) +{ + gflags::SetUsageMessage("TPC-C data generator"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + scale::warehouses = FLAGS_w; + + boost::filesystem::path top(FLAGS_o); + try { + tpcc_gen_initialize(top); + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + return 1; + } + + std::vector threads; + threads.emplace_back(std::thread([&top](){ tpcc_items_gen(top); })); + for (std::uint16_t wid = 1; wid <= scale::warehouses; wid++) { + threads.emplace_back(std::thread([&top, wid](){ tpcc_warehouse_gen(boost::filesystem::path(FLAGS_o), wid); })); + } + + for (auto &t : threads) { + t.join(); + } + + return 0; +} + +} // namespace tpcc + +int main(int argc, char **argv) { + return tpcc::driver_main(argc, argv); +} diff --git a/tools/tpc-c-datagen/src/tpcc_common.h b/tools/tpc-c-datagen/src/tpcc_common.h new file mode 100644 index 0000000..95bcfe8 --- /dev/null +++ b/tools/tpc-c-datagen/src/tpcc_common.h @@ -0,0 +1,178 @@ +/* + * Copyright 2018-2021 tsurugi project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "tpcc_schema.h" + +namespace tpcc { + +namespace scale { +// cf. from tpc-c_V5.11.0.pdf Page 117 +// #define MAXITEMS 100000 +// #define CUST_PER_DIST 3000 +// #define DIST_PER_WARE 10 +// #define ORD_PER_DIST 3000 + + /** Number of warehouses. Does not grow dynamically */ + extern std::uint16_t warehouses; + + /** Number of items per warehouse. Does not grow dynamically */ + static constexpr inline std::uint32_t items = 100000U; + + /** Number of districts per warehouse. Does not grow dynamically */ + static constexpr inline std::uint8_t districts = 10U; + + /** Number of customers per district. Does not grow dynamically */ + static constexpr inline std::uint32_t customers = 3000U; + + /** Number of orders per district. Does grow dynamically. */ + static constexpr inline std::uint32_t orders = 3000U; + + /** Number of orderlines per order. Does not grow dynamically. */ + static constexpr inline std::uint16_t max_ol_count = 15U; + static constexpr inline std::uint8_t min_ol_count = 5U; + static constexpr inline std::uint16_t max_ol = max_ol_count + 1U; + + /** Number of variations of last names. Does not grow dynamically. */ + static constexpr inline std::uint32_t lnames = 1000U; +} // namespace scale + + + // random + class randomGeneratorClass { + private: + std::mt19937 mt; + + public: + randomGeneratorClass() { + std::random_device rnd; + mt.seed(rnd()); + } + unsigned int uniformWithin(unsigned int low, unsigned int high) + { + std::uniform_int_distribution<> randlh(low, high); + return randlh(mt); + } + unsigned int nonUniformWithin(unsigned int A, unsigned int x, unsigned int y) + { + unsigned int C = uniformWithin(0, A); + return (((uniformWithin(0, A) | uniformWithin(x, y)) + C) % (y - x + 1)) + x; + } + void MakeAddress(char *str1, char *str2, char *city, char *state, char *zip) + { + MakeAlphaString(10,20,str1); /* Street 1*/ + MakeAlphaString(10,20,str2); /* Street 2*/ + MakeAlphaString(10,20,city); /* City */ + MakeAlphaString(2,2,state); /* State */ + MakeNumberString(9,9,zip); /* Zip */ + } + int MakeAlphaString(int min, int max, char *str) + { + const char character = 'a'; + + int length = uniformWithin(min, max); + for (int i = 0; i < length; ++i) { + *str++ = static_cast(character + uniformWithin(0, 25)); + } + *str = '\0'; // NOLINT + return length; + } + int MakeNumberString(int min, int max, char *str) + { + const char character = '0'; + + int length = uniformWithin(min, max); + for (int i = 0; i < length; ++i) { + *str++ = static_cast(character + uniformWithin(0, 9)); + } + *str = '\0'; // NOLINT + return length; + } + inline int RandomNumber(int low, int high) { + return uniformWithin(low, high); } + }; + + + // for data generation + static inline void + gettimestamp(char *buf, int deltam=0) + { + time_t now_t = time(NULL); + now_t -= deltam * 60L; + struct tm * now = localtime(&now_t); + size_t result = strftime(buf, sizeof(TIMESTAMP), "%Y-%m-%e (%a) %H:%M:%S", now); + *(buf + result) = '\0'; // NOLINT + } + + static inline + void getdatestamp(char *buf, int deltad=0) + { + time_t now = time(nullptr); + now -= deltad * (24L * 60L * 60L); + struct tm *timeptr = localtime(&now);; + strftime(buf,12,"%Y-%m-%d",timeptr); + } + + static inline + void Lastname(int num, char *name) + { + const static std::array n = + {"BAR", "OUGHT", "ABLE", "PRI", "PRES", + "ESE", "ANTI", "CALLY", "ATION", "EING"}; + strcpy(name,(n.at(num/100))); + strcat(name,(n.at((num/10)%10))); + strcat(name,(n.at(num%10))); + } + + template + class Permutation { + std::array cid_array{}; + public: + Permutation() { + for (unsigned int i = 0; i < N; i++) cid_array.at(i) = false; + }; + Permutation(const Permutation& other) = default; + Permutation(Permutation&& other) = default; + Permutation& operator=(const Permutation& other) = default; + Permutation& operator=(Permutation&& other) = default; + ~Permutation() = default; + + int get_permutation(randomGeneratorClass *randomGenerator) + { + while (true) { + uint32_t r = randomGenerator->RandomNumber(0L, scale::customers -1); + if (cid_array.at(r)) { /* This number already taken */ + continue; + } + cid_array.at(r) = true; /* mark taken */ + return r+1; + } + } + }; + + void tpcc_gen_initialize(boost::filesystem::path); + int tpcc_items_gen(boost::filesystem::path); + int tpcc_warehouse_gen(boost::filesystem::path, std::uint16_t); + +} // namespace tpcc diff --git a/tools/tpc-c-datagen/src/tpcc_load.cpp b/tools/tpc-c-datagen/src/tpcc_load.cpp new file mode 100644 index 0000000..e4ead68 --- /dev/null +++ b/tools/tpc-c-datagen/src/tpcc_load.cpp @@ -0,0 +1,781 @@ +#include +#include +#include +#include + +#include +#include + +#include "tpcc_common.h" +#include "tpcc_schema.h" +#include "csv_file.h" + +DECLARE_bool(display_progress); //NOLINT +DECLARE_bool(f); //NOLINT + +namespace tpcc { + +/* Functions */ +int generate_Items(boost::filesystem::path); // item +int generate_Ware(boost::filesystem::path, uint32_t); // warehouse, Stock(), District(), thread +int generate_Ord(boost::filesystem::path, uint32_t); // Orders() for all districts, thread, use no randomGeneratorClass +int generate_Cust(boost::filesystem::path, uint32_t); // Customer() for all districts, thread, use no randomGeneratorClass +int Stock(boost::filesystem::path, uint32_t); // stock, thread +int District(boost::filesystem::path, randomGeneratorClass *, uint32_t); // district +int Customer(uint32_t, uint32_t, csv_file&, csv_file&); // customer +int Orders(uint32_t, uint32_t, csv_file&, csv_file&, csv_file&); // orders, new_order, order_line + +/* Global SQL Variables */ +// EXEC SQL BEGIN DECLARE SECTION; +// char timestamp[20]; +static TIMESTAMP timestamp; +// long count_ware; +// static uint32_t count_ware; +// EXEC SQL END DECLARE SECTION; +/* Global Variables */ +// static int i; + +void tpcc_gen_initialize(boost::filesystem::path db) { + gettimestamp(static_cast(timestamp)); + + boost::system::error_code error; + if (const bool result = boost::filesystem::exists(db, error); result && !error) { + if (!FLAGS_f) { + throw std::runtime_error(db.string() + " already exists"); + } + boost::filesystem::remove_all(db); + } + + if (const bool result = boost::filesystem::create_directory(db, error); !result || error) { + throw std::runtime_error(std::string("cannot create ") + db.string()); + } + for ( auto& e : {"WAREHOUSE", "DISTRICT", "CUSTOMER", "NEW_ORDER", "ORDERS", "ORDER_LINE", "ITEM", "STOCK", "HISTORY"} ) { + boost::filesystem::path table(e); + if (const bool result = boost::filesystem::create_directory(db / table, error); !result || error) { + throw std::runtime_error(std::string("cannot create ") + (db / table).string()); + } + } +} + +int tpcc_items_gen(boost::filesystem::path db) +{ + generate_Items(db); + return 0; +} + +int tpcc_warehouse_gen(boost::filesystem::path db, std::uint16_t w_id) +{ + auto threadWare = std::thread([&db, w_id](){generate_Ware(db, w_id);}); + auto threadCust = std::thread([&db, w_id](){generate_Cust(db, w_id);}); + auto threadOrd = std::thread([&db, w_id](){generate_Ord(db, w_id);}); + auto threadStock = std::thread([&db, w_id](){Stock(db, w_id);}); + + threadWare.join(); + threadCust.join(); + threadOrd.join(); + threadStock.join(); + + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | Items + | DESCRIPTION + | generate_s the Item table + | ARGUMENTS + | path to root directory + +==================================================================*/ +int generate_Items(boost::filesystem::path db) +{ + csv_file f(db / boost::filesystem::path("ITEM"), 1); + std::unique_ptr randomGenerator = std::make_unique(); + // EXEC SQL BEGIN DECLARE SECTION; + uint32_t i_id; + // char i_name[24]; + VARCHAR24 i_name; + // float i_price; + double i_price; + // char i_data[50]; + VARCHAR50 i_data; + // EXEC SQL END DECLARE SECTION; + int idatasiz; + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + if (FLAGS_display_progress) printf("Loading Item \n"); // NOLINT + + std::array orig_for_items = {}; + uint32_t pos; + + for (i_id=1; i_id<= scale::items; i_id++) { orig_for_items.at(i_id) = 0; } + for (uint32_t i=0; i< scale::items /10; i++) { + do { + pos = randomGenerator->RandomNumber(1L, scale::items); + } while (orig_for_items.at(pos) == 1); + orig_for_items.at(pos) = 1; + } + + for (i_id=1; i_id<= scale::items; i_id++) { + /* Generate Item Data */ + randomGenerator->MakeAlphaString(14, 24, i_name); + i_price=(static_cast(randomGenerator->RandomNumber(100L,10000L)))/100.0; + idatasiz=randomGenerator->MakeAlphaString(26,50, static_cast(i_data)); + if (orig_for_items.at(i_id) == 1) + { + uint32_t pos = randomGenerator->RandomNumber(0L,idatasiz-8); + i_data[pos]='o'; + i_data[pos+1]='r'; + i_data[pos+2]='i'; + i_data[pos+3]='g'; + i_data[pos+4]='i'; + i_data[pos+5]='n'; + i_data[pos+6]='a'; + i_data[pos+7]='l'; + } + f.set_int8("i_id", static_cast(i_id)); + f.set_character("i_name", i_name); + f.set_float8("i_price", i_price); + f.set_character("i_data", i_data); + f.end_of_row(); + if (FLAGS_display_progress) { + if ((i_id % 100) == 0) { + printf("i"); // NOLINT + // EXEC SQL COMMIT WORK; + if ( !(i_id % 5000) ) printf(" %d\n",i_id); // NOLINT + } + } + } + f.close(); + if (FLAGS_display_progress) printf("Item Done. \n"); // NOLINT + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | generate_Ware + | DESCRIPTION + | generate_s the Warehouse table + | generate_s District as Warehouses are created + | ARGUMENTS + | path to root directory, warehouse id + +==================================================================*/ +int generate_Ware(boost::filesystem::path db, uint32_t w_id) +{ + csv_file f(db / boost::filesystem::path("WAREHOUSE"), w_id); + std::unique_ptr randomGenerator = std::make_unique(); + + // EXEC SQL BEGIN DECLARE SECTION; + // uint32_t w_id; + // char w_name[10]; + VARCHAR10 w_name; + // char w_street_1[20]; + VARCHAR20 w_street_1; + // char w_street_2[20]; + VARCHAR20 w_street_2; + // char w_city[20]; + VARCHAR20 w_city; + // char w_state[2]; + CHAR2 w_state; + // char w_zip[9]; + CHAR9 w_zip; + // float w_tax; + double w_tax; + // float w_ytd; + double w_ytd; + // EXEC SQL END DECLARE SECTION; + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + if (FLAGS_display_progress) printf("Loading Warehouse \n"); // NOLINT + + /* Generate Warehouse Data */ + randomGenerator->MakeAlphaString( 6, 10, static_cast(w_name)); + randomGenerator->MakeAddress(static_cast(w_street_1), static_cast(w_street_2), static_cast(w_city), static_cast(w_state), static_cast(w_zip)); + w_tax=(static_cast(randomGenerator->RandomNumber(10L,20L)))/100.0; + w_ytd=3000000.00; + // if ( option_debug ) + // printf( "WID = %d, Name= %16s, Tax = %5.2f\n", + // w_id, w_name, w_tax ); + // EXEC SQL INSERT INTO + // warehouse (w_id, w_name, + // w_street_1, w_street_2, w_city, w_state, w_zip, + // w_tax, w_ytd) + // values (:w_id, :w_name, + // :w_street_1, :w_street_2, :w_city, :w_state, + // :w_zip, :w_tax, :w_ytd); + /** Make Rows associated with Warehouse **/ + f.set_int8("w_id", static_cast(w_id)); + f.set_character("w_name", w_name); + f.set_character("w_street_1", w_street_1); + f.set_character("w_street_2", w_street_2); + f.set_character("w_city", w_city); + f.set_character("w_state", w_state); + f.set_character("w_zip", w_zip); + f.set_float8("w_tax", w_tax); + f.set_float8("w_ytd", w_ytd); + f.end_of_row(); + f.close(); + + District(db, randomGenerator.get(), w_id); + + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | generate_Cust + | DESCRIPTION + | generate_s the Customer Table + | ARGUMENTS + | path to root directory, warehouse id + +==================================================================*/ +int generate_Cust(boost::filesystem::path db, uint32_t w_id) +{ + csv_file fc(db / boost::filesystem::path("CUSTOMER"), w_id); + csv_file fh(db / boost::filesystem::path("HISTORY"), w_id); + + // EXEC SQL BEGIN DECLARE SECTION; + // EXEC SQL END DECLARE SECTION; + // uint32_t w_id; + uint32_t d_id; + + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + for (d_id=1L; d_id<= scale::districts; d_id++) { + Customer(d_id,w_id, fc, fh); + } + fc.close(); + fh.close(); + + /* Just in case */ + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | generate_Ord + | DESCRIPTION + | generate_s the Orders and Order_Line Tables + | ARGUMENTS + | path to root directory, warehouse id + +==================================================================*/ +int generate_Ord(boost::filesystem::path db, uint32_t w_id) +{ + csv_file fno(db / boost::filesystem::path("NEW_ORDER"), w_id); + csv_file fo(db / boost::filesystem::path("ORDERS"), w_id); + csv_file fol(db / boost::filesystem::path("ORDER_LINE"), w_id); + // EXEC SQL BEGIN DECLARE SECTION; + // uint32_t w_id; + // float w_tax; + // float d_tax; + uint32_t d_id; + // EXEC SQL END DECLARE SECTION; + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + for (d_id=1L; d_id <= scale::districts; d_id++) { + Orders(d_id, w_id, fno, fo, fol); + // EXEC SQL COMMIT WORK; + /* Just in case */ + } + fno.close(); + fo.close(); + fol.close(); + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | Stock + | DESCRIPTION + | generate_s the Stock table + | ARGUMENTS + | path to root directory, warehouse id + +==================================================================*/ +int Stock(boost::filesystem::path db, uint32_t w_id) +{ + csv_file f(db / boost::filesystem::path("STOCK"), w_id); + std::unique_ptr randomGenerator = std::make_unique(); + + std::array orig_for_stock = {}; + uint32_t pos; + unsigned int i; + + for (i=0; i<= scale::items; i++) { orig_for_stock.at(i)=0; } + for (i=0; i< scale::items /10; i++) { + do { + pos=randomGenerator->RandomNumber(1L, scale::items); + } while (orig_for_stock.at(pos) == 1); + orig_for_stock.at(pos) = 1; + } + + // EXEC SQL BEGIN DECLARE SECTION; + uint32_t s_i_id = 1; + uint32_t s_w_id; + uint32_t s_quantity; + // char s_dist_01[24]; + VARCHAR50 s_dist_01; + // char s_dist_02[24]; + VARCHAR50 s_dist_02; + // char s_dist_03[24]; + VARCHAR50 s_dist_03; + // char s_dist_04[24]; + VARCHAR50 s_dist_04; + // char s_dist_05[24]; + VARCHAR50 s_dist_05; + // char s_dist_06[24]; + VARCHAR50 s_dist_06; + // char s_dist_07[24]; + VARCHAR50 s_dist_07; + // char s_dist_08[24]; + VARCHAR50 s_dist_08; + // char s_dist_09[24]; + VARCHAR50 s_dist_09; + // char s_dist_10[24]; + VARCHAR50 s_dist_10; + // char s_data[50]; + VARCHAR50 s_data; + // EXEC SQL END DECLARE SECTION; + int sdatasiz; + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + if (FLAGS_display_progress) printf("generating Stock for i_id=%d, w_id=%d\n", s_i_id, w_id); // NOLINT + + for (s_i_id = 1; s_i_id <= scale::items; s_i_id++) { + s_w_id = w_id; + /* Generate Stock Data */ + s_quantity=randomGenerator->RandomNumber(10L,100L); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_01)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_02)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_03)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_04)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_05)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_06)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_07)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_08)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_09)); + randomGenerator->MakeAlphaString(24,24,static_cast(s_dist_10)); + sdatasiz=randomGenerator->MakeAlphaString(26,50,static_cast(s_data)); + if (orig_for_stock.at(s_i_id) == 1) + { + uint32_t pos=randomGenerator->RandomNumber(0L,sdatasiz-8); + s_data[pos]='o'; + s_data[pos+1]='r'; + s_data[pos+2]='i'; + s_data[pos+3]='g'; + s_data[pos+4]='i'; + s_data[pos+5]='n'; + s_data[pos+6]='a'; + s_data[pos+7]='l'; + } + // EXEC SQL INSERT INTO + // stock (s_i_id, s_w_id, s_quantity, + // s_dist_01, s_dist_02, s_dist_03, s_dist_04, s_dist_05, + // s_dist_06, s_dist_07, s_dist_08, s_dist_09, s_dist_10, + // s_data, s_ytd, s_cnt_order, s_cnt_remote) + // values (:s_i_id, :s_w_id, :s_quantity, + // :s_dist_01, :s_dist_02, :s_dist_03, :s_dist_04, :s_dist_05, + // :s_dist_06, :s_dist_07, :s_dist_08, :s_dist_09, :s_dist_10, + // :s_data, 0, 0, 0); + f.set_int8("s_i_id", static_cast(s_i_id)); + f.set_int8("s_w_id", static_cast(s_w_id)); + f.set_int8("s_quantity", static_cast(s_quantity)); + f.set_character("s_dist_01", s_dist_01); + f.set_character("s_dist_02", s_dist_02); + f.set_character("s_dist_03", s_dist_03); + f.set_character("s_dist_04", s_dist_04); + f.set_character("s_dist_05", s_dist_05); + f.set_character("s_dist_06", s_dist_06); + f.set_character("s_dist_07", s_dist_07); + f.set_character("s_dist_08", s_dist_08); + f.set_character("s_dist_09", s_dist_09); + f.set_character("s_dist_10", s_dist_10); + f.set_character("s_data", s_data); + f.end_of_row(); + } + + if (FLAGS_display_progress) { + if ((s_i_id % 100) == 0) { + // EXEC SQL COMMIT WORK; + printf("s"); // NOLINT + if ( !(s_i_id % 5000) ) printf(" %d\n",s_i_id); // NOLINT + } + } + + if (FLAGS_display_progress) printf("Stock Done.\n"); // NOLINT + + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | District + | DESCRIPTION + | generate_s the District table + | ARGUMENTS + | path to root directory, random generator, warehouse id + +==================================================================*/ +int District(boost::filesystem::path db, randomGeneratorClass *randomGenerator, uint32_t w_id) +{ + csv_file f(db / boost::filesystem::path("DISTRICT"), w_id); + + // EXEC SQL BEGIN DECLARE SECTION; + uint32_t d_id; + uint32_t d_w_id; + // char d_name[10]; + VARCHAR10 d_name; + // char d_street_1[20]; + VARCHAR20 d_street_1; + // char d_street_2[20]; + VARCHAR20 d_street_2; + // char d_city[20]; + VARCHAR20 d_city; + // char d_state[2]; + CHAR2 d_state; + // char d_zip[9]; + CHAR9 d_zip; + // float d_tax; + double d_tax; + // float d_ytd; + double d_ytd; + uint32_t d_next_o_id; + // EXEC SQL END DECLARE SECTION; + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + + if (FLAGS_display_progress) printf("Loading District\n"); // NOLINT + d_w_id=w_id; + d_ytd=30000.0; + d_next_o_id= scale::orders +1; + + for (d_id=1; d_id <= scale::districts; d_id++) { + /* Generate District Data */ + randomGenerator->MakeAlphaString(6L,10L,static_cast(d_name)); + randomGenerator->MakeAddress(static_cast(d_street_1), static_cast(d_street_2), static_cast(d_city), static_cast(d_state), static_cast(d_zip)); + d_tax=(static_cast(randomGenerator->RandomNumber(10L,20L)))/100.0; + // EXEC SQL INSERT INTO + // district (d_id, d_w_id, d_name, + // d_street_1, d_street_2, d_city, d_state, d_zip, + // d_tax, d_ytd, d_next_o_id) + // values (:d_id, :d_w_id, :d_name, + // :d_street_1, :d_street_2, :d_city, :d_state, :d_zip, + // :d_tax, :d_ytd, :d_next_o_id); + f.set_int8("d_id", static_cast(d_id)); + f.set_int8("d_w_id", static_cast(d_w_id)); + f.set_character("d_name", d_name); + f.set_character("d_street_1", d_street_1); + f.set_character("d_street_2", d_street_2); + f.set_character("d_city", d_city); + f.set_character("d_state", d_state); + f.set_character("d_zip", d_zip); + f.set_float8("d_tax", d_tax); + f.set_float8("d_ytd", d_ytd); + f.set_int8("d_next_o_id", static_cast(d_next_o_id)); + f.end_of_row(); + } + f.close(); + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | Customer + | DESCRIPTION + | generate_s Customer Table + | Also inserts corresponding history record + | ARGUMENTS + | path to root directory, district id, warehouse id + +==================================================================*/ +int Customer(uint32_t d_id, uint32_t w_id, csv_file& fc, csv_file& fh) +{ + std::unique_ptr randomGenerator = std::make_unique(); + + // EXEC SQL BEGIN DECLARE SECTION; + uint32_t c_id; + uint32_t c_d_id; + uint32_t c_w_id; + // char c_first[16]; + VARCHAR16 c_first; + // char c_middle[2]; + CHAR2 c_middle; + // char c_last[16]; + VARCHAR16 c_last; + // char c_street_1[20]; + VARCHAR20 c_street_1; + // char c_street_2[20]; + VARCHAR20 c_street_2; + // char c_city[20]; + VARCHAR20 c_city; + // char c_state[2]; + CHAR2 c_state; + // char c_zip[9]; + CHAR9 c_zip; + // char c_phone[16]; + CHAR16 c_phone; + // char c_since[11]; + VARCHAR10 c_since; + // char c_credit[2]; + CHAR2 c_credit; + uint32_t c_credit_lim; + // float c_discount; + double c_discount; + // float c_balance; + double c_balance; + // char c_data[500]; + VARCHAR500 c_data; + // float h_amount; + double h_amount; + // char h_data[24]; + VARCHAR24 h_data; + // EXEC SQL END DECLARE SECTION; + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + if (FLAGS_display_progress) printf("generate_ing Customer for d_id=%d, w_id=%d\n", d_id, w_id); // NOLINT + + for (c_id = 1; c_id <= scale::customers; c_id++) { + /* Generate Customer Data */ + c_d_id=d_id; + c_w_id=w_id; + randomGenerator->MakeAlphaString( 8, 16, static_cast(c_first) ); + c_middle[0]='O'; c_middle[1]='E'; c_middle[2]='\0'; + if (c_id <= 1000) { + Lastname(c_id-1,static_cast(c_last)); + } else { + Lastname(randomGenerator->nonUniformWithin(255,0,999),static_cast(c_last)); + } + randomGenerator->MakeAddress( static_cast(c_street_1), static_cast(c_street_2), static_cast(c_city), static_cast(c_state), static_cast(c_zip) ); + randomGenerator->MakeNumberString( 16, 16, static_cast(c_phone) ); + if (randomGenerator->RandomNumber(0L,1L) > 0) { + c_credit[0]='G'; + } else { + c_credit[0]='B'; + } + c_credit[1]='C'; c_credit[2]='\0'; + c_credit_lim=50000; + c_discount=(static_cast(randomGenerator->RandomNumber(0L,50L)))/100.0; + c_balance= -10.0; + getdatestamp(static_cast(c_since), randomGenerator->RandomNumber(1L,365L*50L)); + randomGenerator->MakeAlphaString(300,500,static_cast(c_data)); + + // EXEC SQL INSERT INTO + // customer (c_id, c_d_id, c_w_id, + // c_first, c_middle, c_last, + // c_street_1, c_street_2, c_city, c_state, c_zip, + // c_phone, c_since, c_credit, + // c_credit_lim, c_discount, c_balance, c_data, + // c_ytd_payment, c_cnt_payment, c_cnt_delivery) + // values (:c_id, :c_d_id, :c_w_id, + // :c_first, :c_middle, :c_last, + // :c_street_1, :c_street_2, :c_city, :c_state, :c_zip, + // :c_phone, :timestamp, :c_credit, + // :c_credit_lim, :c_discount, :c_balance, :c_data, + // 10.0, 1, 0) ; + fc.set_int8("c_id", static_cast(c_id)); + fc.set_int8("c_d_id", static_cast(c_d_id)); + fc.set_int8("c_w_id", static_cast(c_w_id)); + fc.set_character("c_first", c_first); + fc.set_character("c_middle", c_middle); + fc.set_character("c_last", c_last); + fc.set_character("c_street_1", c_street_1); + fc.set_character("c_street_2", c_street_2); + fc.set_character("c_city", c_city); + fc.set_character("c_state", c_state); + fc.set_character("c_zip", c_zip); + fc.set_character("c_phone", c_phone); + fc.set_character("c_since", c_since); + fc.set_character("c_credit", c_credit); + fc.set_float8("c_credit_lim", static_cast(c_credit_lim)); + fc.set_float8("c_discount", c_discount); + fc.set_float8("c_balance", c_balance); + fc.set_character("c_data", c_data); + fc.end_of_row(); + + h_amount=10.0; + randomGenerator->MakeAlphaString(12,24,static_cast(h_data)); + // EXEC SQL INSERT INTO + // history (h_c_id, h_c_d_id, h_c_w_id, + // h_w_id, h_d_id, h_date, h_amount, h_data) + // values (:c_id, :c_d_id, :c_w_id, + // :c_w_id, :c_d_id, :timestamp, :h_amount, :h_data); + // if ( option_debug ) + // printf( "CID = %d, LST = %s, P# = %s\n", + // c_id, c_last, c_phone ); + fh.set_int8("h_c_id", c_id); + fh.set_int8("h_c_d_id", c_d_id); + fh.set_int8("h_c_w_id", c_w_id); + fh.set_int8("h_w_id", c_w_id); + fh.set_int8("h_d_id", c_d_id); + fh.set_character("h_date", timestamp); + fh.set_float8("h_amount", h_amount); + fh.set_character("h_data", h_data); + fh.end_of_row(); + + if (FLAGS_display_progress) { + if ((c_id % 100) == 0) { + // EXEC SQL COMMIT WORK; + printf("c"); // NOLINT + if ( !(c_id % 1000) ) printf(" %d\n",c_id); // NOLINT + } + } + } + + if (FLAGS_display_progress) printf("Customer Done.\n"); // NOLINT + return 0; +} + +/*==================================================================+ + | ROUTINE NAME + | Orders + | DESCRIPTION + | generate_s the Orders table + | Also loads the Order_Line table on the fly + | ARGUMENTS + | path to root directory, district id, warehouse id + +==================================================================*/ +int Orders(uint32_t d_id, uint32_t w_id, csv_file& fno, csv_file& fo, csv_file& fol) +{ + std::unique_ptr randomGenerator = std::make_unique(); + + // EXEC SQL BEGIN DECLARE SECTION; + uint32_t o_id = 1; + uint32_t o_c_id; + uint32_t o_d_id; + uint32_t o_w_id; + uint32_t o_carrier_id; + uint32_t o_ol_cnt; + uint32_t ol; + uint32_t ol_i_id; + uint32_t ol_supply_w_id; + uint32_t ol_quantity; + // long ol_amount; + // float ol_amount; + double ol_amount; + // char ol_dist_info[24]; + VARCHAR24 ol_dist_info; + // float i_price; + // float c_discount; + // EXEC SQL END DECLARE SECTION; + // EXEC SQL WHENEVER SQLERROR GOTO sqlerr; + if (FLAGS_display_progress) printf("generateing Orders for d_id=%d, w_id=%d\n", d_id, w_id); // NOLINT + o_d_id=d_id; + o_w_id=w_id; + Permutation permutation; + + for (o_id = 1; o_id <= scale::orders; o_id++) { + /* Generate Order Data */ + o_c_id=permutation.get_permutation(randomGenerator.get()); + o_carrier_id=randomGenerator->RandomNumber(1L,10L); + o_ol_cnt=randomGenerator->RandomNumber(5L,15L); + if (o_id > ((scale::orders * 7) / 10)) /* the last 900 orders have not been delivered) */ + { + // EXEC SQL INSERT INTO + // orders (o_id, o_c_id, o_d_id, o_w_id, + // o_entry_d, o_carrier_id, o_ol_cnt, o_all_local) + // values (:o_id, :o_c_id, :o_d_id, :o_w_id, + // :timestamp, NULL, :o_ol_cnt, 1); + // to set o_carrir_id NULL, we does not include this column in the SQL + fo.set_int8("o_id", static_cast(o_id)); + fo.set_int8("o_c_id", static_cast(o_c_id)); + fo.set_int8("o_d_id", static_cast(o_d_id)); + fo.set_int8("o_w_id", static_cast(o_w_id)); + fo.set_character("o_entry_d", timestamp); + fo.set_null("o_carrier_id"); + fo.set_int8("o_ol_cnt", static_cast(o_ol_cnt)); + fo.set_int8("o_all_local", static_cast(1)); + fo.end_of_row(); + + // EXEC SQL INSERT INTO + // new_order (no_o_id, no_d_id, no_w_id) + // values (:o_id, :o_d_id, :o_w_id); + fno.set_int8("no_o_id", static_cast(o_id)); + fno.set_int8("no_d_id", static_cast(o_d_id)); + fno.set_int8("no_w_id", static_cast(o_w_id)); + fno.end_of_row(); + } + else + // EXEC SQL INSERT INTO + // orders (o_id, o_c_id, o_d_id, o_w_id, + // o_entry_d, o_carrier_id, o_ol_cnt, o_all_local) + // values (:o_id, :o_c_id, :o_d_id, :o_w_id, + // :timestamp, :o_carrier_id, :o_ol_cnt, 1); + { + fo.set_int8("o_id", static_cast(o_id)); + fo.set_int8("o_c_id", static_cast(o_c_id)); + fo.set_int8("o_d_id", static_cast(o_d_id)); + fo.set_int8("o_w_id", static_cast(o_w_id)); + fo.set_character("o_entry_d", timestamp); + fo.set_int8("o_carrier_id", static_cast(o_carrier_id)); + fo.set_int8("o_ol_cnt", static_cast(o_ol_cnt)); + fo.set_int8("o_all_local", static_cast(1)); + fo.end_of_row(); + } + + // if ( option_debug ) + // printf( "OID = %d, CID = %d, DID = %d, WID = %d\n", + // o_id, o_c_id, o_d_id, o_w_id); + TIMESTAMP datetime; gettimestamp(static_cast(datetime), randomGenerator->RandomNumber(1L,90L*24L*60L)); + for (ol=1; ol<=o_ol_cnt; ol++) { + /* Generate Order Line Data */ + ol_i_id=randomGenerator->RandomNumber(1L, scale::items); + ol_supply_w_id=o_w_id; + ol_quantity=5; + ol_amount=0.0; + + randomGenerator->MakeAlphaString(24,24,static_cast(ol_dist_info)); + + if (o_id > ((scale::orders * 7) / 10)) + { + // EXEC SQL INSERT INTO + // order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, + // ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, + // ol_dist_info, ol_delivery_d) + // values (:o_id, :o_d_id, :o_w_id, :ol, + // :ol_i_id, :ol_supply_w_id, :ol_quantity, :ol_amount, + // :ol_dist_info, NULL); + // to set ol_delivery_d NULL, we does not include this column in the SQL + fol.set_int8("ol_o_id", static_cast(o_id)); + fol.set_int8("ol_d_id", static_cast(o_d_id)); + fol.set_int8("ol_w_id", static_cast(o_w_id)); + fol.set_int8("ol_number", static_cast(ol)); + fol.set_int8("ol_i_id", static_cast(ol_i_id)); + fol.set_int8("ol_supply_w_id", static_cast(ol_supply_w_id)); + fol.set_int8("ol_quantity", static_cast(ol_quantity)); + fol.set_float8("ol_amount", ol_amount); + fol.set_character("ol_dist_info", ol_dist_info); + fol.set_character("ol_delivery_d", {}); + fol.end_of_row(); + } + else + // EXEC SQL INSERT INTO + // order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, + // ol_i_id, ol_supply_w_id, ol_quantity, + // (float)(RandomNumber(10L, 10000L))/100.0, + // ol_dist_info, ol_delivery_d) + // values (:o_id, :o_d_id, :o_w_id, :ol, + // :ol_i_id, :ol_supply_w_id, :ol_quantity, + // :ol_amount, + // :ol_dist_info, datetime); + { + ol_amount = (static_cast(randomGenerator->RandomNumber(10L, 10000L)))/100.0; + fol.set_int8("ol_o_id", static_cast(o_id)); + fol.set_int8("ol_d_id", static_cast(o_d_id)); + fol.set_int8("ol_w_id", static_cast(o_w_id)); + fol.set_int8("ol_number", static_cast(ol)); + fol.set_int8("ol_i_id", static_cast(ol_i_id)); + fol.set_int8("ol_supply_w_id", static_cast(ol_supply_w_id)); + fol.set_int8("ol_quantity", static_cast(ol_quantity)); + fol.set_float8("ol_amount", ol_amount); + fol.set_character("ol_dist_info", ol_dist_info); + fol.set_character("ol_delivery_d", datetime); + fol.end_of_row(); + } + // if ( option_debug ) + // printf( "OL = %d, IID = %d, QUAN = %d, AMT = %8.2f\n", + // ol, ol_i_id, ol_quantity, ol_amount); + } + + if (FLAGS_display_progress) { + if ((o_id % 100) == 0) { + printf("o"); // NOLINT + // EXEC SQL COMMIT WORK; + if ( !(o_id % 1000) ) printf(" %d\n",o_id); // NOLINT + } + } + // EXEC SQL COMMIT WORK; + + } + return 0; +} + +} // namespace tpcc diff --git a/tools/tpc-c-datagen/src/tpcc_schema.h b/tools/tpc-c-datagen/src/tpcc_schema.h new file mode 100644 index 0000000..d17e5bb --- /dev/null +++ b/tools/tpc-c-datagen/src/tpcc_schema.h @@ -0,0 +1,40 @@ +/* + * Copyright 2018-2021 tsurugi project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace tpcc { + + // Add one byete to store the delimiter ('\0') + using VARCHAR10 = char[11]; + using VARCHAR16 = char[17]; + using VARCHAR20 = char[21]; + using VARCHAR24 = char[25]; + using VARCHAR50 = char[51]; + using VARCHAR500 = char[501]; + + using CHAR2 = char[3]; + using CHAR9 = char[10]; + using CHAR16 = char[17]; + using CHAR24 = char[25]; + using TIMESTAMP = char[26]; + + using DOUBLE = double; + using SMALLINT = uint32_t; // for the time being + using INTEGER = uint64_t; + +} // Namespace tpcc