Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gcs database #12817

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
2 changes: 1 addition & 1 deletion bin/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import babis
import sentry_sdk
from apscheduler.schedulers.blocking import BlockingScheduler
from db_s3_utils import DATA_PATH
from db_utils import DATA_PATH
from sentry_sdk.integrations.logging import LoggingIntegration

# ROOT path of the project. A pathlib.Path object.
Expand Down
File renamed without changes.
22 changes: 18 additions & 4 deletions bin/run-db-download.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

import os
import sys
from pathlib import Path

import requests
from db_s3_utils import (
from db_utils import (
DATA_PATH,
DB_FILE,
JSON_DATA_FILE_NAME,
Expand All @@ -18,13 +19,26 @@
set_db_data,
)

BUCKET_NAME = os.getenv("AWS_DB_S3_BUCKET", "bedrock-db-dev")
REGION_NAME = os.getenv("AWS_DB_REGION", "us-west-2")
# ROOT path of the project. A pathlib.Path object.
ROOT_PATH = Path(__file__).resolve().parents[1]
ROOT = str(ROOT_PATH)

# add bedrock to path
sys.path.append(ROOT)

# must import after adding bedrock to path
from bedrock.base.config_manager import config # noqa

BUCKET_NAME = config("AWS_DB_S3_BUCKET", default="bedrock-db-dev")
REGION_NAME = config("AWS_DB_REGION", default="us-west-2")
S3_BASE_URL = f"https://s3-{REGION_NAME}.amazonaws.com/{BUCKET_NAME}"
GCS_BASE_URL = f"https://storage.googleapis.com/{BUCKET_NAME}"
DOWNLOAD_FROM_GCS = config("DOWNLOAD_FROM_GCS", parser=bool, default="false")


def get_file_url(filename):
return "/".join([S3_BASE_URL, filename])
base_url = GCS_BASE_URL if DOWNLOAD_FROM_GCS else S3_BASE_URL
return "/".join([base_url, filename])


def download_db_info():
Expand Down
40 changes: 37 additions & 3 deletions bin/run-db-upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

import os
import sys
from pathlib import Path
from time import time

import boto3
from boto3.exceptions import Boto3Error
from db_s3_utils import (
from db_utils import (
DB_FILE,
JSON_DATA_FILE,
JSON_DATA_FILE_NAME,
Expand All @@ -19,10 +20,22 @@
get_prev_db_data,
set_db_data,
)
from google.cloud import storage

# ROOT path of the project. A pathlib.Path object.
ROOT_PATH = Path(__file__).resolve().parents[1]
ROOT = str(ROOT_PATH)

# add bedrock to path
sys.path.append(ROOT)

# must import after adding bedrock to path
from bedrock.base.config_manager import config # noqa

CACHE = {}
BUCKET_NAME = os.getenv("AWS_DB_S3_BUCKET", "bedrock-db-dev")
REGION_NAME = os.getenv("AWS_DB_S3_REGION", "us-west-2")
BUCKET_NAME = config("AWS_DB_S3_BUCKET", default="bedrock-db-dev")
REGION_NAME = config("AWS_DB_S3_REGION", default="us-west-2")
UPLOAD_TO_GCS = config("UPLOAD_TO_GCS", parser=bool, default="false")


# Requires setting some environment variables:
Expand All @@ -41,6 +54,15 @@ def s3_client():
return s3


def gcs_client():
gcs = CACHE.get("gcs_client")
if not gcs:
gcs = storage.Client()
CACHE["gcs_client"] = gcs

return gcs


def delete_s3_obj(filename):
s3 = s3_client()
s3.delete_object(Bucket=BUCKET_NAME, Key=filename)
Expand All @@ -63,6 +85,18 @@ def upload_db_data(db_data):
except Boto3Error:
return f"ERROR: Failed to upload the new database info file: {db_data}"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want anything around the if not s3 line above, so that in the future if we remove the AWS credentials this won't short circuit and never make it this far? Or would changes around S3 come later?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My idea was to do kind of a multi step deploy:

  1. Get the code out
  2. Turn on GCS so it uploads to both GCS and S3, let that sit for a while
  3. Turn on download from GCS, let that sit
  4. Come back and remove the s3 references

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, sounds good to me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bkochendorfer @pmac Can we get this rolled out to prod? Good to do ahead of the postgres/cms tango

if UPLOAD_TO_GCS:
gcs = gcs_client()
bucket = gcs.bucket(BUCKET_NAME)

# upload the database
db_file = bucket.blob(db_data["file_name"])
db_file.upload_from_filename(DB_FILE, predefined_acl="public-read")

# upload the json metadata
db_file_info = bucket.blob(JSON_DATA_FILE_NAME)
db_file_info.upload_from_filename(JSON_DATA_FILE, predefined_acl="public-read")

return 0


Expand Down
156 changes: 156 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ braceexpand==0.1.7 \
--hash=sha256:91332d53de7828103dcae5773fb43bc34950b0c8160e35e0f44c4427a3b85014 \
--hash=sha256:e6e539bd20eaea53547472ff94f4fb5c3d3bf9d0a89388c4b56663aba765f705
# via -r requirements/dev.in
cachetools==5.3.0 \
--hash=sha256:13dfddc7b8df938c21a940dfa6557ce6e94a2f1cdfa58eb90c805721d58f2c14 \
--hash=sha256:429e1a1e845c008ea6c85aa35d4b98b65d6a9763eeef3e37e92728a12d1de9d4
# via
# -r requirements/prod.txt
# google-auth
certifi==2022.12.7 \
--hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \
--hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18
Expand Down Expand Up @@ -435,6 +441,115 @@ glean-parser==6.2.1 \
--hash=sha256:672689a97fe458d38f230b5f8d2e2f8e5c582ce98dd31e38363a6b437639d56c \
--hash=sha256:daa4b7b127a432118733f0ddc77c95bdd7a8b78ac9a4f3254f68b45a25ccb32e
# via -r requirements/prod.txt
google-api-core==2.11.0 \
--hash=sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22 \
--hash=sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e
# via
# -r requirements/prod.txt
# google-cloud-core
# google-cloud-storage
google-auth==2.16.1 \
--hash=sha256:5fd170986bce6bfd7bb5c845c4b8362edb1e0cba901e062196e83f8bb5d5d32c \
--hash=sha256:75d76ea857df65938e1f71dcbcd7d0cd48e3f80b34b8870ba229c9292081f7ef
# via
# -r requirements/prod.txt
# google-api-core
# google-cloud-core
# google-cloud-storage
google-cloud-core==2.3.2 \
--hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \
--hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a
# via
# -r requirements/prod.txt
# google-cloud-storage
google-cloud-storage==2.7.0 \
--hash=sha256:1ac2d58d2d693cb1341ebc48659a3527be778d9e2d8989697a2746025928ff17 \
--hash=sha256:f78a63525e72dd46406b255bbdf858a22c43d6bad8dc5bdeb7851a42967e95a1
# via -r requirements/prod.txt
google-crc32c==1.5.0 \
--hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \
--hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \
--hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \
--hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \
--hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \
--hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \
--hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \
--hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \
--hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \
--hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \
--hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \
--hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \
--hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \
--hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \
--hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \
--hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \
--hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \
--hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \
--hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \
--hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \
--hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \
--hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \
--hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \
--hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \
--hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \
--hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \
--hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \
--hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \
--hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \
--hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \
--hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \
--hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \
--hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \
--hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \
--hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \
--hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \
--hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \
--hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \
--hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \
--hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \
--hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \
--hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \
--hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \
--hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \
--hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \
--hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \
--hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \
--hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \
--hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \
--hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \
--hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \
--hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \
--hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \
--hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \
--hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \
--hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \
--hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \
--hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \
--hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \
--hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \
--hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \
--hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \
--hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \
--hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \
--hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \
--hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \
--hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \
--hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4
# via
# -r requirements/prod.txt
# google-resumable-media
google-resumable-media==2.4.1 \
--hash=sha256:15b8a2e75df42dc6502d1306db0bce2647ba6013f9cd03b6e17368c0886ee90a \
--hash=sha256:831e86fd78d302c1a034730a0c6e5369dd11d37bad73fa69ca8998460d5bae8d
# via
# -r requirements/prod.txt
# google-cloud-storage
googleapis-common-protos==1.58.0 \
--hash=sha256:c727251ec025947d545184ba17e3578840fc3a24a0516a020479edab660457df \
--hash=sha256:ca3befcd4580dab6ad49356b46bf165bb68ff4b32389f028f1abd7c10ab9519a
# via
# -r requirements/prod.txt
# google-api-core
greenlet==0.4.17 \
--hash=sha256:1023d7b43ca11264ab7052cb09f5635d4afdb43df55e0854498fc63070a0b206 \
--hash=sha256:124a3ae41215f71dc91d1a3d45cbf2f84e46b543e5d60b99ecc20e24b4c8f272 \
Expand Down Expand Up @@ -882,10 +997,41 @@ pluggy==1.0.0 \
# via
# pypom
# pytest
protobuf==4.22.0 \
--hash=sha256:1669cb7524221a8e2d9008d0842453dbefdd0fcdd64d67672f657244867635fb \
--hash=sha256:29288813aacaa302afa2381db1d6e0482165737b0afdf2811df5fa99185c457b \
--hash=sha256:47d31bdf58222dd296976aa1646c68c6ee80b96d22e0a3c336c9174e253fd35e \
--hash=sha256:652d8dfece122a24d98eebfef30e31e455d300efa41999d1182e015984ac5930 \
--hash=sha256:7c535d126e7dcc714105ab20b418c4fedbd28f8b8afc42b7350b1e317bbbcc71 \
--hash=sha256:86c3d20428b007537ba6792b475c0853bba7f66b1f60e610d913b77d94b486e4 \
--hash=sha256:a33a273d21852f911b8bda47f39f4383fe7c061eb1814db2c76c9875c89c2491 \
--hash=sha256:ab4d043865dd04e6b09386981fe8f80b39a1e46139fb4a3c206229d6b9f36ff6 \
--hash=sha256:b2fea9dc8e3c0f32c38124790ef16cba2ee0628fe2022a52e435e1117bfef9b1 \
--hash=sha256:c27f371f0159feb70e6ea52ed7e768b3f3a4c5676c1900a7e51a24740381650e \
--hash=sha256:c3325803095fb4c2a48649c321d2fbde59f8fbfcb9bfc7a86df27d112831c571 \
--hash=sha256:e474b63bab0a2ea32a7b26a4d8eec59e33e709321e5e16fb66e766b61b82a95e \
--hash=sha256:e894e9ae603e963f0842498c4cd5d39c6a60f0d7e4c103df50ee939564298658
# via
# -r requirements/prod.txt
# google-api-core
# googleapis-common-protos
py==1.11.0 \
--hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \
--hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378
# via -r requirements/dev.in
pyasn1==0.4.8 \
--hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \
--hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba
# via
# -r requirements/prod.txt
# pyasn1-modules
# rsa
pyasn1-modules==0.2.8 \
--hash=sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e \
--hash=sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74
# via
# -r requirements/prod.txt
# google-auth
pycodestyle==2.10.0 \
--hash=sha256:347187bdb476329d98f695c213d7295a846d1152ff4fe9bacb8a9590b8ee7053 \
--hash=sha256:8a4eaf0d0495c7395bdab3589ac2db602797d76207242c17d470186815706610
Expand Down Expand Up @@ -1125,6 +1271,8 @@ requests==2.28.2 \
# bpython
# contentful
# django-mozilla-product-details
# google-api-core
# google-cloud-storage
# pygithub
# pytest-base-url
# pytest-selenium
Expand All @@ -1138,6 +1286,12 @@ rich-text-renderer==0.2.7 \
--hash=sha256:d34f1bfc1a2903e9b087d7550a01089b06867f0be21fa494fdee73b656c80bc7 \
--hash=sha256:e6fab8d1243dddd29bb65717ade63ca57f10561a3944fc49dac770c4d95ab71c
# via -r requirements/prod.txt
rsa==4.9 \
--hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \
--hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21
# via
# -r requirements/prod.txt
# google-auth
s3transfer==0.6.0 \
--hash=sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd \
--hash=sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947
Expand Down Expand Up @@ -1169,6 +1323,8 @@ six==1.16.0 \
# blessed
# compare-locales
# dirsync
# fluent-runtime
# google-auth
# html5lib
# parsimonious
# python-dateutil
Expand Down
1 change: 1 addition & 0 deletions requirements/prod.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ everett==3.2.0
fluent.runtime==0.4.0
fluent.syntax==0.19.0
glean-parser==6.2.1 # Must match the required version in the Glean NPM package.
google-cloud-storage==2.7.0
greenlet==0.4.17 # Pinned for stability but subdep of Meinheld
gunicorn==19.7.1
honcho==1.1.0
Expand Down
Loading