Skip to content

Commit

Permalink
Download submissions
Browse files Browse the repository at this point in the history
  • Loading branch information
nbirillo committed Jul 12, 2023
1 parent 45413bc commit 38baea8
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 20 deletions.
91 changes: 71 additions & 20 deletions jba/src/jba/gathering/gather_submissions_info.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,108 @@
import json

import argparse
import logging
import os
from typing import List

import pandas as pd
import requests
import sys
from dotenv import load_dotenv
from typing import List, Optional

from core.model.column_name import SubmissionColumns
from core.utils.df_utils import filter_df_by_single_value, read_df, write_df
from core.utils.file.file_utils import get_output_path
from core.utils.logging_utils import configure_logger
from jba.gathering.query_info_storage import QueryInfoStorage
from jba.models.edu_columns import EduColumnName

logger = logging.getLogger(__name__)


def _get_submissions_by_course_and_user_id(course_id: int, user_id: str) -> pd.DataFrame:
load_dotenv()
auth_secret = os.getenv('JBA_API_AUTH_SECRET')
base_end_point = os.getenv('BASE_END_POINT')
headers = {"Authorization": f"Bearer {auth_secret}"}

def _get_submissions_by_course_and_user_id(query_storage_info: QueryInfoStorage,
course_id: int, user_id: str,
to_gather_code: bool = False) -> pd.DataFrame:
has_next = True
page = 0
submissions_df = []
submissions_dfs = []
logger.info(f'Start getting submissions for course {course_id}, user {user_id}')
while has_next:
endpoint = f'{base_end_point}/course/{course_id}/user/{user_id}/submissions/all?page={page}'
response = requests.get(endpoint, headers=headers)
endpoint = f'{query_storage_info.base_end_point}/admin/course/{course_id}/user/{user_id}/submissions/all?page={page}'
response = requests.get(endpoint, headers=query_storage_info.get_auth_headers())
if response.status_code != 200:
logger.error(f'Can not gather submissions for course {course_id}, user {user_id}, and page {page}')
break
submissions = response.json()
submissions_df.append(pd.DataFrame(submissions['submissions']))
submissions_df = pd.DataFrame(submissions['submissions'])
if submissions_df.shape == (0, 0):
break
if to_gather_code:
submissions_df[EduColumnName.CODE_SNIPPETS.value] = submissions_df.apply(
lambda row: _get_solution_from_s3(query_storage_info, row), axis=1)
submissions_dfs.append(submissions_df)
logger.info(f'Page {page} was handled successfully')
has_next = submissions['has_next']
page += 1
logger.info(f'Submissions for course {course_id}, user {user_id} were gathered successfully')
return pd.concat(submissions_df)
if len(submissions_dfs) == 0:
return pd.DataFrame([
EduColumnName.ID.value,
EduColumnName.TASK_ID.value,
EduColumnName.SOLUTION_AWS_KEY.value,
SubmissionColumns.TIME.value,
EduColumnName.FORMAT_VERSION.value,
EduColumnName.UPDATE_VERSION.value,
EduColumnName.STATUS.value,
EduColumnName.CHECKER_OUTPUT.value,
EduColumnName.TASK_TYPE.value,
EduColumnName.USER_ID.value,
EduColumnName.UUID.value,
EduColumnName.VISIBILITY.value,
EduColumnName.TASK_NAME.value,
EduColumnName.CODE_SNIPPETS.value,
])
return pd.concat(submissions_dfs)


def _get_solution_from_s3(query_storage_info: QueryInfoStorage, row: pd.DataFrame) -> Optional[str]:
aws_key = row[EduColumnName.SOLUTION_AWS_KEY.value]
endpoint = f'{query_storage_info.base_end_point}/solution?solutionKey={aws_key}'
response = requests.get(endpoint, headers=query_storage_info.get_auth_headers())
if response.status_code != 200:
logger.error(f'Can not gather code for aws key: {aws_key}')
return None
s3_link = response.content.decode('utf-8')
solution_response = requests.get(s3_link)
if solution_response.status_code != 200:
logger.error(f'Can not gather code for s3 link: {s3_link}')
return None
if len(solution_response.content.decode('utf-8')) == 0:
return ''
solution = json.dumps(list(map(lambda s: {key: s[key] for key in ['name', 'text']}, solution_response.json())))
logger.info(f'User solution is {solution}')
return solution


def _get_submissions_by_course_id_and_users(course_id: int, user_ids: List[str]) -> pd.DataFrame:
def _get_submissions_by_course_id_and_users(query_storage_info: QueryInfoStorage, course_id: int, user_ids: List[str],
to_gather_code: bool = False) -> pd.DataFrame:
submission_dfs = []
for user_id in user_ids:
logger.info(f'------------START HANDLING USER {user_id}------------')
submission_dfs.append(_get_submissions_by_course_and_user_id(course_id, user_id))
submission_dfs.append(
_get_submissions_by_course_and_user_id(query_storage_info, course_id, user_id, to_gather_code))
logger.info(f'------------FINISH HANDLING USER {user_id}------------')
return pd.concat(submission_dfs)


def _get_submission_keys(course_data_df: pd.DataFrame) -> pd.DataFrame:
def _get_submissions(query_storage_info: QueryInfoStorage,
course_data_df: pd.DataFrame,
to_gather_code: bool = False) -> pd.DataFrame:
submissions_df = []
course_ids = course_data_df[EduColumnName.COURSE_ID.value].unique()
for course_id in course_ids:
logger.info(f'------START HANDLING COURSE {course_id}------')
user_ids = filter_df_by_single_value(course_data_df, EduColumnName.COURSE_ID.value, course_id)[
EduColumnName.USER_ID.value].unique()
submissions_by_course_id_and_users = _get_submissions_by_course_id_and_users(course_id, user_ids)
submissions_by_course_id_and_users = \
_get_submissions_by_course_id_and_users(query_storage_info, course_id, user_ids, to_gather_code)
submissions_df.append(submissions_by_course_id_and_users)
logger.info(f'------FINISH HANDLING COURSE {course_id}------')
return pd.concat(submissions_df)
Expand All @@ -68,6 +113,10 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
parser.add_argument('preprocessed_course_data_path', type=str,
help='Path to .csv file with preprocessed course data.')
parser.add_argument('--log-path', type=str, default=None, help='Path to directory for log.')
parser.add_argument(
'--gather-code', action='store_true',
help='Indicates if you need to download students code.',
)


if __name__ == '__main__':
Expand All @@ -78,4 +127,6 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
configure_logger(args.preprocessed_course_data_path, 'submissions_info', args.log_path)
output_path = get_output_path(args.preprocessed_course_data_path, '_submissions_info')
course_df = read_df(args.preprocessed_course_data_path)
write_df(_get_submission_keys(course_df), output_path)
query_storage_info = QueryInfoStorage()
submissions_df = _get_submissions(query_storage_info, course_df, to_gather_code=args.gather_code)
write_df(submissions_df, output_path)
25 changes: 25 additions & 0 deletions jba/src/jba/gathering/query_info_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
from dataclasses import dataclass
from dotenv import load_dotenv
from typing import Optional


@dataclass
class QueryInfoStorage:
auth_secret: str
base_end_point: str

def __init__(self, auth_secret: Optional[str] = None, base_end_point: Optional[str] = None):
load_dotenv()
self.auth_secret = self._get_env_value(auth_secret, 'JBA_API_AUTH_SECRET')
self.base_end_point = self._get_env_value(base_end_point, 'BASE_END_POINT')

def get_auth_headers(self) -> dict:
return {"Authorization": f"Bearer {self.auth_secret}"}

@classmethod
def _get_env_value(cls, value: Optional[str], env_name: str) -> str:
load_dotenv()
if value is None:
return os.getenv(env_name)
return value
8 changes: 8 additions & 0 deletions jba/src/jba/models/edu_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class EduColumnName(Enum):
STATUS = 'status'
USER_ID = 'user_id'
TASK_TYPE = 'task_type'
CODE_SNIPPETS = 'code_snippets'
UUID = 'uuid'

TASK_ID = 'task_id'
TASK_GLOBAL_NUMBER = 'task_global_number'
Expand All @@ -26,6 +28,12 @@ class EduColumnName(Enum):
SECTION_NUMBER = 'section_number'
SECTIONS_AMOUNT = 'sections_amount'

SOLUTION_AWS_KEY = 'solution_aws_key'
FORMAT_VERSION = 'format_version'
UPDATE_VERSION = 'update_version'
CHECKER_OUTPUT = 'checker_output'
VISIBILITY = 'visibility'


@unique
class EduTaskStatus(Enum):
Expand Down

0 comments on commit 38baea8

Please sign in to comment.