diff --git a/.github/workflows/install-lint-test-on-mac.yml b/.github/workflows/install-lint-test-on-mac.yml index 515a447b9..be3f9f56a 100644 --- a/.github/workflows/install-lint-test-on-mac.yml +++ b/.github/workflows/install-lint-test-on-mac.yml @@ -15,10 +15,25 @@ jobs: strategy: matrix: - python-version: # github.com/actions/python-versions/releases + python-version: # github.com/actions/python-versions/releases - 3.12 # - 3.13 + services: + mysql: + image: mysql:9.1.0 + env: + MYSQL_USER: root + MYSQL_PASSWORD: password + MYSQL_DATABASE: test + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping --silent" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + steps: - name: Checkout Repo uses: actions/checkout@v4 # github.com/actions/checkout @@ -28,12 +43,22 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Install Poetry run: make get-poetry - name: Install Package & Dependencies run: make install + - name: Wait for MySQL to be ready + run: | + until mysqladmin ping -h 127.0.0.1 --silent; do + echo "Waiting for MySQL..." + sleep 5 + done + - name: Lint Code run: make lint @@ -42,3 +67,6 @@ jobs: env: LEPTON_API_KEY: ${{ secrets.LEPTON_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MYSQL_USER: root + MYSQL_PASSWORD: password + MYSQL_DATABASE: test diff --git a/.github/workflows/install-lint-test-on-ubuntu.yml b/.github/workflows/install-lint-test-on-ubuntu.yml index 880056400..250b1d2eb 100644 --- a/.github/workflows/install-lint-test-on-ubuntu.yml +++ b/.github/workflows/install-lint-test-on-ubuntu.yml @@ -19,6 +19,20 @@ jobs: - 3.12 # - 3.13 + services: + mysql: + image: mysql:latest + env: + MYSQL_ALLOW_EMPTY_PASSWORD: "yes" + MYSQL_DATABASE: test + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping --silent" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + steps: - name: Checkout Repo uses: actions/checkout@v4 # github.com/actions/checkout @@ -34,6 +48,22 @@ jobs: - name: Install Package & Dependencies run: make install + - name: Wait for MySQL to be ready + run: | + until mysqladmin ping -h 127.0.0.1 --silent; do + echo "Waiting for MySQL..." + sleep 5 + done + + - name: Install Alembic + run: pip install alembic + + - name: Run Migrations + run: | + python -m alembic upgrade head + env: + DATABASE_URL: mysql+pymysql://root@127.0.0.1/test + - name: Lint Code run: make lint @@ -42,3 +72,4 @@ jobs: env: LEPTON_API_KEY: ${{ secrets.LEPTON_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + DB_NAME: test diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 000000000..d515de166 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,85 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = mysql+pymysql://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks=black +# black.type=console_scripts +# black.entrypoint=black +# black.options=-l 79 + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/chroma.sqlite3 b/chroma.sqlite3 new file mode 100644 index 000000000..8b32ac1c4 Binary files /dev/null and b/chroma.sqlite3 differ diff --git a/examples/japanese-easy-demo/pyproject.toml b/examples/japanese-easy-demo/pyproject.toml index f5dfe97ad..bd1860427 100644 --- a/examples/japanese-easy-demo/pyproject.toml +++ b/examples/japanese-easy-demo/pyproject.toml @@ -8,6 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.12,<3.13" openssa = "^0.24.3.12" +python-dotenv = "^1.0.1" [build-system] diff --git a/examples/use-rdb-resource/.env.template b/examples/use-rdb-resource/.env.template new file mode 100644 index 000000000..09adbdcb7 --- /dev/null +++ b/examples/use-rdb-resource/.env.template @@ -0,0 +1,7 @@ +OPENAI_API_KEY=your openai API key + +DB_USERNAME=your_username +DB_PASSWORD=your_password +DB_HOST=your_host +DB_PORT=your_port +DB_NAME=your_database_name diff --git a/examples/use-rdb-resource/.gitignore b/examples/use-rdb-resource/.gitignore new file mode 100644 index 000000000..ef2003889 --- /dev/null +++ b/examples/use-rdb-resource/.gitignore @@ -0,0 +1,4 @@ +db_config.yaml +Chinook.sqlite +chroma.sqlite3 +*.bin diff --git a/examples/use-rdb-resource/README.md b/examples/use-rdb-resource/README.md new file mode 100644 index 000000000..d8bdd515f --- /dev/null +++ b/examples/use-rdb-resource/README.md @@ -0,0 +1,26 @@ +# Access Database resources and inference by DANA + +## What is this example doing? + +- Use DbResource file to get data from RDB(MySQL) Using SQL made by Vanna, and answer question by DANA. + +## Setting-up + +- What you need + - commands (if you are mac user, you can install those things by Homebrew) + - mysql + - Also, create or use existing database for this example. + - poetry + - API Key + - Use your own OpenAI API key + +- Setting up Commands + - `cd examples/use-rdb-resource` + - `poetry install` + - `cp .env.template .env` + - update .env data with your environment data + - `poetry run python make_example_table_data.py` + - if this command doesn't work, then run `poetry env use 3.12` + - change python version to resolve dependensies version + - `poetry run python main.py` + - run main file to answer question by DANA using DbResource, and see the result in the terminal. diff --git a/examples/use-rdb-resource/main.py b/examples/use-rdb-resource/main.py new file mode 100644 index 000000000..5d6096c8b --- /dev/null +++ b/examples/use-rdb-resource/main.py @@ -0,0 +1,41 @@ +from dotenv import load_dotenv + +from myvanna import generate_sql_from_prompt + +# from openssa import DANA, DbResource +from openssa.core.agent.dana import DANA # , FileResource +from openssa.core.resource.db import DbResource + +load_dotenv() + + +def get_or_create_agent(query) -> DANA: + return DANA( + resources=[DbResource(query=query)] + ) + + +def solve(question, query) -> str: + agent = get_or_create_agent(query) + try: + return agent.solve(problem=question) + except ValueError as err: + return f'ERROR: {err}' + except RuntimeError as err: + return f'ERROR: {err}' + + +if __name__ == '__main__': + QUESTION = ( + "Can you list the products in order of sales volume from highest to lowest?" + ) + + query = generate_sql_from_prompt(QUESTION) + print(query) + answer = solve(QUESTION, query) + + print('--------------------------------') + print(answer) + print('--------------------------------') + print(query) + print('--------------------------------') diff --git a/examples/use-rdb-resource/make_example_table_data.py b/examples/use-rdb-resource/make_example_table_data.py new file mode 100644 index 000000000..8fb6e7c97 --- /dev/null +++ b/examples/use-rdb-resource/make_example_table_data.py @@ -0,0 +1,103 @@ +import os +import random +import secrets + +from dotenv import load_dotenv +from faker import Faker +from sqlalchemy import Column, Integer, String, Date, inspect, create_engine +from sqlalchemy.orm import sessionmaker, declarative_base + +from myvanna import train_vanna_for_sales_data + +load_dotenv() + +Base = declarative_base() + + +class SalesData(Base): + __tablename__ = 'sales_data' + sale_id = Column(Integer, primary_key=True, autoincrement=True) + product_id = Column(Integer) + product_name = Column(String(255)) + sale_date = Column(Date) + region = Column(String(255)) + + +class MySQLDatabase: + def __init__(self): + self.engine = self.create_engine() + self.Session = sessionmaker(bind=self.engine) + + def create_engine(self): + username = os.getenv('DB_USERNAME') + password = os.getenv('DB_PASSWORD') + host = os.getenv('DB_HOST') + port = os.getenv('DB_PORT') + database = os.getenv('DB_NAME') + connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' + return create_engine(connection_string) + + def get_session(self): + return self.Session() + + def create_tables(self): + Base.metadata.create_all(self.engine) + + def drop_table(self, table_class): + inspector = inspect(self.engine) + if inspector.has_table(table_class.__tablename__): + table_class.__table__.drop(self.engine) + + +fake = Faker() +seed_value = 42 +random.seed(seed_value) +Faker.seed(seed_value) + +products = [ + {"id": 101, "name": "Smartwatch", "price": 150.00}, + {"id": 102, "name": "Laptop", "price": 1200.00}, + {"id": 103, "name": "Smartphone", "price": 800.00}, + {"id": 104, "name": "Tablet", "price": 400.00}, + {"id": 105, "name": "Headphones", "price": 100.00} +] + +regions = ["North America", "Europe", "Asia", "South America", "Africa"] + + +def generate_sales_data(session, num_records): + sales_data_list = [] + for _ in range(num_records): + product = secrets.choice(products) + region = secrets.choice(regions) + sale_date = fake.date_between(start_date='-1y', end_date='today') + sales_data = SalesData( + product_id=product["id"], + product_name=product["name"], + sale_date=sale_date, + region=region + ) + sales_data_list.append(sales_data) + session.bulk_save_objects(sales_data_list) + session.commit() + + +if __name__ == "__main__": + db = MySQLDatabase() + + db.drop_table(SalesData) + db.create_tables() + + session = db.get_session() + + generate_sales_data(session, 20000) + + train_vanna_for_sales_data(""" + CREATE TABLE sales_data ( + sale_id INT PRIMARY KEY AUTO_INCREMENT, + product_id INT, + product_name VARCHAR(255), + sale_date DATE, + region VARCHAR(255) + ) + """) diff --git a/examples/use-rdb-resource/myvanna.py b/examples/use-rdb-resource/myvanna.py new file mode 100644 index 000000000..9d3b3ce7e --- /dev/null +++ b/examples/use-rdb-resource/myvanna.py @@ -0,0 +1,31 @@ +import os + +from dotenv import load_dotenv +from vanna.chromadb import ChromaDB_VectorStore +from vanna.openai import OpenAI_Chat + +load_dotenv() + +db_user = os.getenv('DB_USERNAME') +db_password = os.getenv('DB_PASSWORD') +db_host = os.getenv('DB_HOST') +db_port = int(os.getenv('DB_PORT')) +db_database = os.getenv('DB_NAME') +openai_api_key = os.getenv('OPENAI_API_KEY') + + +class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): + def __init__(self, config=None): + ChromaDB_VectorStore.__init__(self, config=config) + OpenAI_Chat.__init__(self, config=config) + + +def train_vanna_for_sales_data(ddl): + vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) + vn_openai.train(ddl=ddl) + + +def generate_sql_from_prompt(question) -> str: + vn_openai = MyVanna(config={'model': 'gpt-4o', 'api_key': openai_api_key}) + vn_openai.connect_to_mysql(host=db_host, dbname=db_database, user=db_user, password=db_password, port=db_port) + return vn_openai.generate_sql(question) diff --git a/examples/use-rdb-resource/pyproject.toml b/examples/use-rdb-resource/pyproject.toml new file mode 100644 index 000000000..58c170130 --- /dev/null +++ b/examples/use-rdb-resource/pyproject.toml @@ -0,0 +1,23 @@ +[tool.poetry] +name = "use-rdb-resource" +version = "0.1.0" +description = "" +authors = ["Your Name "] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.12,<3.13" +pymysql = "^1.1.1" +kaleido = "0.2.1" +vanna = "^0.7.3" +chromadb = "^0.5.11" +faker = "^30.1.0" +sqlalchemy = "^2.0.35" +openai = "^1.51.2" +openssa = "^0.24.10.10" +plotly = "^5.24.1" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/openssa/__init__.py b/openssa/__init__.py index 9913ac767..ca6daf92d 100644 --- a/openssa/__init__.py +++ b/openssa/__init__.py @@ -21,6 +21,7 @@ from .core.reasoning.simple.simple_reasoner import SimpleReasoner from .core.resource.file import FileResource +from .core.resource.db import DbResource from .core.task.task import Task diff --git a/openssa/core/resource/db.py b/openssa/core/resource/db.py index 3af58cf6b..86f62b98a 100644 --- a/openssa/core/resource/db.py +++ b/openssa/core/resource/db.py @@ -2,15 +2,84 @@ ============================================== [future work] Database Informational Resources ============================================== -""" +This module contains `DbResource` class, +which enables querying information from relational databases. +""" from __future__ import annotations from .base import BaseResource +from dataclasses import dataclass +from typing import Any +import os +from sqlalchemy import create_engine, text +from sqlalchemy.orm import sessionmaker +from llama_index.core import SummaryIndex, Document + from ._global import global_register +class MySQLDatabase: + def __init__(self): + self.engine = self.create_engine() + self.Session = sessionmaker(bind=self.engine) + self.config = { + 'host': os.getenv('DB_HOST'), + 'database': os.getenv('DB_NAME') + } + + def create_engine(self): + username = os.getenv('DB_USERNAME', 'root') + password = os.getenv('DB_PASSWORD', '') + host = os.getenv('DB_HOST', 'localhost') + port = os.getenv('DB_PORT', '3306') + database = os.getenv('DB_NAME') + connection_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}' + return create_engine(connection_string) + + def get_session(self): + return self.Session() + + @global_register +@dataclass class DbResource(BaseResource): """Database Informational Resource.""" + query: str + + def __post_init__(self): + """Post-initialize database resource.""" + self.db = MySQLDatabase() + + @property + def unique_name(self) -> str: + """Return globally-unique name of Resource.""" + return f"DBResource_{self.db.config['host']}_{self.db.config['database']}" + + @property + def name(self) -> str: + """Return potentially non-unique, but informationally helpful name of Resource.""" + return f"Database at {self.db.config['host']}/{self.db.config['database']}" + + def fetch_data(self) -> list[tuple[Any]]: + """Fetch data from the database using the provided query.""" + session = self.db.get_session() + result = session.execute(text(self.query)) + return result.fetchall() + + def answer(self, question: str, n_words: int = 1000) -> str: + """Answer question from database-stored Informational Resource.""" + data = self.fetch_data() + print(data) + # データベースから取得したデータをドキュメントに変換 + documents = [Document(text=str(row[0]), metadata={'id': row[1]}) for row in data] + # print(documents) + index = SummaryIndex.from_documents(documents) + # print(index) + # set Logging to DEBUG for more detailed outputs + query_engine = index.as_query_engine() + # print(query_engine) + response = query_engine.query(question) + # print(response) + return response diff --git a/pyproject.toml b/pyproject.toml index 4de7a7233..822130408 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ streamlit-mic-recorder = {version = ">=0.0.8", optional = true} langchainhub = ">=0.1" faiss-cpu = ">=1.8" +pymysql = "^1.1.1" [tool.poetry.extras] contrib = [ diff --git a/tests/core/resource/test_db_resource.py b/tests/core/resource/test_db_resource.py new file mode 100644 index 000000000..48d7e9c82 --- /dev/null +++ b/tests/core/resource/test_db_resource.py @@ -0,0 +1,27 @@ +from dotenv import load_dotenv +from openssa.core.resource.db import DbResource + +load_dotenv() + + +# TODO: Fix hallucination +# Given Data: [(1, 'Laptop', 100000), (2, 'Smartphone', 60000), (3, 'Headphones', 8000), (4, 'Keyboard', 3000), (5, 'Mouse', 2000), (6, 'Monitor', 25000), (7, 'Tablet', 50000), (8, 'Smartwatch', 20000), (9, 'Camera', 45000), (10, 'Speaker', 15000)] +# Answer: The item that is the most expensive from the given data is the Camera. +# The Answer Should be: Laptop(100000). +# How to fix it? Make query by vanna or change the process in llama_index? + +def test_db_resource(): + test_query = "SELECT * FROM items" + test_question = "Which item is the most expensive from given data?" + + rdb1 = DbResource(query=test_query) + # print(f"unique name = {rdb1.name}") + # print(f"unique name = {rdb1.unique_name}") + # print(f"answer = {rdb1.answer(test_question)}") + # print(f"summary = {rdb1.get_summary()}") + _ = rdb1.answer(test_question) + # assert isinstance(answer, str) + print(_) + + +test_db_resource() diff --git a/tests/core/resource/test_webpage_resource.py b/tests/core/resource/test_webpage_resource.py index 9ee7c4319..ffd8002b1 100644 --- a/tests/core/resource/test_webpage_resource.py +++ b/tests/core/resource/test_webpage_resource.py @@ -16,4 +16,3 @@ def test_webpage_resource(): _ = webpage1.answer(test_question) _ = webpage1.get_summary() # assert isinstance(answer, str) - # assert isinstance(summary, str)