Skip to content

Commit

Permalink
Merge pull request #469 from softeerbootcamp-2nd/refactor/cache-enhance
Browse files Browse the repository at this point in the history
[FEAT] #225: DataFrame 캐싱을 통한 성능향상 시도
  • Loading branch information
tank3a authored Aug 27, 2023
2 parents e695375 + 66639c7 commit 49c95ff
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 19 deletions.
Binary file modified backend-recommend/__pycache__/recommend.cpython-311.pyc
Binary file not shown.
Binary file not shown.
45 changes: 26 additions & 19 deletions backend-recommend/recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from mlxtend.frequent_patterns import association_rules
import pymysql
import time
import redisConnection
import os
from dotenv import load_dotenv
from flask import jsonify
Expand All @@ -13,7 +14,7 @@
def mysql_create_session():
conn = pymysql.connect(host=os.getenv('host'), user=os.getenv('user'), password=os.getenv('password'), db=os.getenv('db'))
cur = conn.cursor()
return conn, cur
return conn, cur

def recByApriori(body):
start = time.time()
Expand All @@ -32,26 +33,32 @@ def recByApriori(body):
input = set(input)
dataset = []

conn, cur = mysql_create_session()
try:
cur.execute('SELECT hm.history_id, sh.sold_count, sh.sold_options_id FROM SalesHistory sh INNER JOIN HistoryModelMapper hm ON sh.history_id = hm.history_id WHERE sh.car_id = %s AND hm.model_id IN (%s, %s, %s) GROUP BY hm.history_id HAVING COUNT(DISTINCT hm.model_id) = 3;', (carId, powerTrainId, bodyTypeId, operationId))
dbRow = cur.fetchall()
finally:
conn.close()

for j in range(len(dbRow)):
oneRow = dbRow[j][2]
if(oneRow == ''):
continue
options = oneRow.split(",")
for i in range(int(dbRow[j][1])):
dataset.append(options)
df = redisConnection.redis_getData()
if df is None:
conn, cur = mysql_create_session()
try:
cur.execute('SELECT hm.history_id, sh.sold_count, sh.sold_options_id FROM SalesHistory sh INNER JOIN HistoryModelMapper hm ON sh.history_id = hm.history_id WHERE sh.car_id = %s AND hm.model_id IN (%s, %s, %s) GROUP BY hm.history_id HAVING COUNT(DISTINCT hm.model_id) = 3;', (carId, powerTrainId, bodyTypeId, operationId))
dbRow = cur.fetchall()
finally:
conn.close()

for j in range(len(dbRow)):
oneRow = dbRow[j][2]
if(oneRow == ''):
continue
options = oneRow.split(",")
for i in range(int(dbRow[j][1])):
dataset.append(options)

start = time.time()
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
redisConnection.redis_setData(df)

start = time.time()
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
df = df.iloc[:100000]


frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

result_itemsets = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)
Expand Down
22 changes: 22 additions & 0 deletions backend-recommend/redisConnection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import io
import redis
import pandas as pd
import os

redis_pool = redis.ConnectionPool(host=os.getenv('redis_host'), port=6379, password=os.getenv('redis_password'), db=0)

def redis_getData():
with redis.StrictRedis(connection_pool=redis_pool) as conn:
bytes_object = conn.get("rec_dataframe")
if bytes_object is None:
return None
buffer = io.BytesIO(bytes_object)
return pd.read_pickle(buffer)

def redis_setData(df):
with redis.StrictRedis(connection_pool=redis_pool) as conn:
buffer = io.BytesIO()
df.to_pickle(buffer)
buffer.seek(0)
bytes_object = buffer.read()
conn.set('rec_dataframe', bytes_object, 60*60*24*7)

0 comments on commit 49c95ff

Please sign in to comment.