Skip to content

Commit

Permalink
update scripts with all feedback (syntax, exceptions, etc.)
Browse files Browse the repository at this point in the history
  • Loading branch information
naishasinha committed Aug 9, 2024
1 parent f878763 commit 575abf7
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 210 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ name = "pypi"

[packages]
flickrapi = "*"
GitPython = "*"
google-api-python-client = "*"
internetarchive = "*"
jupyterlab = ">=3.6.7"
Expand All @@ -18,7 +19,6 @@ requests = ">=2.31.0"
seaborn = "*"
urllib3 = ">=1.26.18"
wordcloud = "*"
gitpython = "*"

[dev-packages]
black = "*"
Expand Down
Binary file modified data/2024Q2/3-report/gcs_country_report.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified data/2024Q2/3-report/gcs_language_report.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified data/2024Q2/3-report/gcs_licensetype_report.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 6 additions & 4 deletions data/2024Q2/README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@

# 2024Q2 Quantifying the Commons
<!-- GCS Start -->
## Data Source: Google Custom Search


<!-- Country Report Start -->
### Country Report
![Number of Google Webpages Licensed by Country](3-report/gcs_country_report.png)
Number of Google Webpages Licensed by Country

<!-- Country Report End -->
<!-- License Type Report Start -->
### License Type Report
![Number of Webpages Licensed by License Type](3-report/gcs_licensetype_report.png)
Number of Webpages Licensed by License Type

<!-- License Type Report End -->
<!-- Language Report Start -->
### Language Report
![Number of Google Webpages Licensed by Language](3-report/gcs_language_report.png)
Number of Google Webpages Licensed by Language
<!-- Language Report End -->
<!-- GCS End -->
Binary file modified data/2024Q3/3-report/gcs_licensetype_report.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
32 changes: 13 additions & 19 deletions scripts/1-fetch/gcs_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def set_up_data_file():
"LICENSE TYPE, No Priori, United States, Canada, "
"India, United Kingdom, Australia, Japan, "
"English, Spanish, French, Arabic, "
"Chinese (Simplified), Indonesian\r\n"
"Chinese (Simplified), Indonesian\n"
# "LICENSE TYPE,No Priori,Australia,Brazil,Canada,Egypt,"
# "Germany,India,Japan,Spain,"
# "United Kingdom,United States,Arabic,"
Expand Down Expand Up @@ -333,12 +333,8 @@ def record_results(results):

def main():

try:
# Fetch and merge changes
shared.fetch_and_merge(PATHS["repo"])
except shared.GitOperationError as e:
LOGGER.error(f"Fetch and merge failed: {e}")
sys.exit(e.exit_code)
# Fetch and merge changes
shared.fetch_and_merge(PATHS["repo"])

args = parse_arguments()
state = load_state()
Expand Down Expand Up @@ -378,24 +374,22 @@ def main():
state["total_records_retrieved"] = total_records_retrieved
save_state(state)

try:
# Add and commit changes
shared.add_and_commit(PATHS["repo"], "Fetched and updated new data")
except shared.GitOperationError as e:
LOGGER.error(f"Add and commit failed: {e}")
sys.exit(e.exit_code)
# Add and commit changes
shared.add_and_commit(PATHS["repo"], "Added and committed new reports")

try:
# Push changes
shared.push_changes(PATHS["repo"])
except shared.GitOperationError as e:
LOGGER.error(f"Push changes failed: {e}")
sys.exit(e.exit_code)
# Push changes
shared.push_changes(PATHS["repo"])


if __name__ == "__main__":
try:
main()
except shared.QuantifyingException as e:
if e.exit_code == 0:
LOGGER.info(e.message)
else:
LOGGER.error(e.message)
sys.exit(e.exit_code)
except SystemExit as e:
LOGGER.error(f"System exit with code: {e.code}")
sys.exit(e.code)
Expand Down
34 changes: 13 additions & 21 deletions scripts/2-process/gcs_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,35 +158,27 @@


def main():
LOGGER.info("No current code for Phase 2")
raise shared.QuantifyingException("No current code for Phase 2", 0)

# try:
# # Fetch and merge changes
# shared.fetch_and_merge(PATHS["repo"])
# except shared.GitOperationError as e:
# LOGGER.error(f"Fetch and merge failed: {e}")
# sys.exit(e.exit_code)
# # Fetch and merge changes
# shared.fetch_and_merge(PATHS["repo"])

# try:
# # Add and commit changes
# shared.add_and_commit(PATHS["repo"], "Fetched and updated new data")
# except shared.GitOperationError as e:
# LOGGER.error(f"Add and commit failed: {e}")
# sys.exit(e.exit_code)
# # Add and commit changes
# shared.add_and_commit(PATHS["repo"], "Fetched and updated new data")

# try:
# # Push changes
# shared.push_changes(PATHS["repo"])
# except shared.GitOperationError as e:
# LOGGER.error(f"Push changes failed: {e}")
# sys.exit(e.exit_code)

pass
# # Push changes
# shared.push_changes(PATHS["repo"])


if __name__ == "__main__":
try:
main()
except shared.QuantifyingException as e:
if e.exit_code == 0:
LOGGER.info(e.message)
else:
LOGGER.error(e.message)
sys.exit(e.code)
except SystemExit as e:
LOGGER.error(f"System exit with code: {e.code}")
sys.exit(e.code)
Expand Down
176 changes: 21 additions & 155 deletions scripts/3-report/gcs_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,140 +67,6 @@ def load_data(args):
return data


def update_readme(image_path, description, section_title, args):
"""
Update the README.md file with the generated images and descriptions.
"""
readme_path = os.path.join(PATHS["data"], args.quarter, "README.md")
section_marker_start = "<!-- GCS Start -->"
section_marker_end = "<!-- GCS End -->"
data_source_title = "## Data Source: Google Custom Search"

# Define section markers for each report type
specific_section_start = f"<!-- {section_title} Start -->"
specific_section_end = f"<!-- {section_title} End -->"
data_source_title = "## Data Source: Google Custom Search"

# Convert image path to a relative path
rel_image_path = os.path.relpath(image_path, os.path.dirname(readme_path))

if os.path.exists(readme_path):
with open(readme_path, "r") as f:
lines = f.readlines()
else:
lines = []

# Main GCS Section
section_start = None
section_end = None
for i, line in enumerate(lines):
if section_marker_start in line:
section_start = i
if section_marker_end in line:
section_end = i

# Check if the main section is present
if section_start is None or section_end is None:
# If the main section is not present, add it
lines.extend(
[
f"# {args.quarter} Quantifying the Commons\n",
f"{section_marker_start}\n",
f"{data_source_title}\n\n",
f"{section_marker_end}\n",
]
)
section_start = len(lines) - 2
section_end = len(lines) - 1

# Locate the specific section markers within the main section
specific_start = None
specific_end = None
for i in range(section_start, section_end):
if specific_section_start in lines[i]:
specific_start = i
if specific_section_end in lines[i]:
specific_end = i

# If the specific section is found, replace the content
if specific_start is not None and specific_end is not None:
# Prepare the new content for this specific section
new_content = [
f"{specific_section_start}\n",
f"### {section_title}\n",
f"![{description}]({rel_image_path})\n",
f"{description}\n",
f"{specific_section_end}\n",
]
# Replace the content between the specific markers
lines = (
lines[:specific_start]
+ new_content
+ lines[specific_end + 1 :] # noqa: E203
)
else:
# If specific section does not exist, add it before main end marker
new_content = [
f"{specific_section_start}\n",
f"### {section_title}\n",
f"![{description}]({rel_image_path})\n",
f"{description}\n",
f"{specific_section_end}\n",
]
lines = lines[:section_end] + new_content + lines[section_end:]

# # If markers are found, replace the content between them
# if section_start is not None and section_end is not None:
# # Prepare the new content to replace the old one
# new_content = [
# f"{section_marker_start}\n",
# f"{data_source_title}\n\n",
# f"### {section_title}\n",
# f"![{description}]({rel_image_path})\n",
# f"{description}\n",
# f"{section_marker_end}\n"
# ]

# # Replace the content between the start and end markers
# lines = lines[:section_start] + new_content + lines[section_end + 1:]
# else:
# # If the section does not exist, add it at the end
# new_content = [
# f"\n{section_marker_start}\n",
# f"{data_source_title}\n\n",
# f"### {section_title}\n",
# f"![{description}]({rel_image_path})\n",
# f"{description}\n",
# f"{section_marker_end}\n"
# ]
# lines.extend(new_content)

# if section_start is None or section_end is None:
# # If the section does not exist, add it at the end
# lines.append(f"\n# {args.quarter} Quantifying the Commons\n")
# lines.append(f"{section_marker_start}\n")
# lines.append(f"{data_source_title}\n\n")
# lines.append(f"{section_marker_end}\n")
# section_start = len(lines) - 3
# section_end = len(lines) - 1

# # Prepare the content to be added
# new_content = [
# f"\n### {section_title}\n",
# f"![{description}]({rel_image_path})\n",
# f"{description}\n",
# ]

# # Insert the new content before the section end marker
# lines = lines[:section_end] + new_content + lines[section_end:]

# Write back to the README.md file
with open(readme_path, "w") as f:
f.writelines(lines)

LOGGER.info(f"Updated {readme_path} with new image and description.")


def visualize_by_country(data, args):
"""
Create a bar chart for the number of webpages licensed by country.
Expand Down Expand Up @@ -265,8 +131,10 @@ def visualize_by_country(data, args):

plt.show()

update_readme(
shared.update_readme(
PATHS,
image_path,
"Google Custom Search",
"Number of Google Webpages Licensed by Country",
"Country Report",
args,
Expand Down Expand Up @@ -332,8 +200,10 @@ def millions_formatter(x, pos):

plt.show()

update_readme(
shared.update_readme(
PATHS,
image_path,
"Google Custom Search",
"Number of Webpages Licensed by License Type",
"License Type Report",
args,
Expand Down Expand Up @@ -406,8 +276,10 @@ def visualize_by_language(data, args):

plt.show()

update_readme(
shared.update_readme(
PATHS,
image_path,
"Google Custom Search",
"Number of Google Webpages Licensed by Language",
"Language Report",
args,
Expand All @@ -418,12 +290,8 @@ def visualize_by_language(data, args):

def main():

try:
# Fetch and merge changes
shared.fetch_and_merge(PATHS["repo"])
except shared.GitOperationError as e:
LOGGER.error(f"Fetch and merge failed: {e}")
sys.exit(e.exit_code)
# Fetch and merge changes
shared.fetch_and_merge(PATHS["repo"])

args = parse_arguments()

Expand All @@ -438,24 +306,22 @@ def main():
visualize_by_license_type(data, args)
visualize_by_language(data, args)

try:
# Add and commit changes
shared.add_and_commit(PATHS["repo"], "Added and committed new reports")
except shared.GitOperationError as e:
LOGGER.error(f"Add and commit failed: {e}")
sys.exit(e.exit_code)
# Add and commit changes
shared.add_and_commit(PATHS["repo"], "Added and committed new reports")

try:
# Push changes
shared.push_changes(PATHS["repo"])
except shared.GitOperationError as e:
LOGGER.error(f"Push changes failed: {e}")
sys.exit(e.exit_code)
# Push changes
shared.push_changes(PATHS["repo"])


if __name__ == "__main__":
try:
main()
except shared.QuantifyingException as e:
if e.exit_code == 0:
LOGGER.info(e.message)
else:
LOGGER.error(e.message)
sys.exit(e.exit_code)
except SystemExit as e:
LOGGER.error(f"System exit with code: {e.code}")
sys.exit(e.code)
Expand Down
Loading

0 comments on commit 575abf7

Please sign in to comment.