-
Notifications
You must be signed in to change notification settings - Fork 213
/
env.template
152 lines (131 loc) · 7.84 KB
/
env.template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# ,-. ;-. ,--. . . . , ,--. ,-. ,-. ,--. ,-. ,-. . . ,--. , ,-. #
# / \ | ) | |\ | | / | | ) ( ` | / / \ |\ | | | / #
# | | |-' |- | \| | / |- |-< `-. |- | | | | \| |- | | -. #
# \ / | | | | |/ | | \ . ) | \ \ / | | | | \ | #
# `-' ' `--' ' ' ' `--' ' ' `-' `--' `-' `-' ' ' ' ' `-' #
########################################################################################
# Airflow Settings
########################################################################################
# Some brand-based suggestions: #C52B9B (pink), #FFE033 (yellow)
AIRFLOW__WEBSERVER__NAVBAR_COLOR="#FFF"
# Disabled by default to make development easier
# (enabled on prod for security)
AIRFLOW__CORE__HIDE_SENSITIVE_VAR_CONN_FIELDS=False
# Use the following python code to generate a fernet key for production
# python -c "import base64, os; print(base64.urlsafe_b64encode(os.urandom(32)).decode())"
# AIRFLOW__CORE__FERNET_KEY=
# CSRF key https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#secret-key
AIRFLOW__WEBSERVER__SECRET_KEY=sample-secret-key=
# Executor to use
AIRFLOW__CORE__EXECUTOR=LocalExecutor
# Environment this instance is being run in
AIRFLOW_VAR_ENVIRONMENT=local
########################################################################################
# API Keys
########################################################################################
# See: https://airflow.apache.org/docs/apache-airflow/stable/howto/variable.html#storing-variables-in-environment-variables
# Example:
# AIRFLOW_VAR_API_KEY_BROOKLYN_MUSEUM=apikey
########################################################################################
# Connection/Variable info
########################################################################################
# Airflow primary metadata database
# Change the following line in production to use the appropriate DB
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@upstream_db:5432/airflow
# Remote logging connection ID
# Replace "access_key" and "secret+key" with the real values. Secret key must be URL-encoded
AIRFLOW_CONN_AWS_DEFAULT=aws://test_key:test_secret@?region_name=us-east-1&endpoint_url=http%3A%2F%2Fs3%3A5000
# Catalog DB connection. Change the following line in production to use the appropriate DB
AIRFLOW_CONN_POSTGRES_OPENLEDGER_UPSTREAM=postgres://deploy:deploy@upstream_db:5432/openledger
AIRFLOW_CONN_POSTGRES_OPENLEDGER_TESTING=postgres://deploy:deploy@upstream_db:5432/openledger
AIRFLOW_CONN_POSTGRES_OPENLEDGER_API_STAGING=postgres://deploy:deploy@db:5432/openledger
TEST_CONN_ID=postgres_openledger_testing
# Elasticsearch connections. Change the following line in production to use the appropriate URLs.
AIRFLOW_CONN_ELASTICSEARCH_HTTP_PRODUCTION=http://es:9200
AIRFLOW_CONN_ELASTICSEARCH_HTTP_STAGING=http://es:9200
# AWS CloudWatch connection. Change the following line to toggle alarms during a Data Refresh.
# AIRFLOW_CONN_AWS_CLOUDWATCH=aws://<key>:<secret>@?region_name=us-east-1
# API DB connection. Change the following line in production to use the appropriate DB
AIRFLOW_CONN_POSTGRES_OPENLEDGER_API=postgres://deploy:deploy@db:5432/openledger
# Slack webhook connection info (note that these values are modified by the Docker entrypoint)
# A distinction is made here between "notifications" and "alerts", the former being
# useful updates and the latter being alarms or actionable errors.
AIRFLOW_CONN_SLACK_NOTIFICATIONS=https://slack
AIRFLOW_CONN_SLACK_ALERTS=https://slack
S3_LOCAL_ENDPOINT=http://s3:5000
# Set to a non-default value supported by minio in local development to workaround
# Minio's lack of support for all AWS storage classes, while still using a non-default
# value so that the expected behaviour can be verified (specifically, that the storage
# class is not the default "STANDARD")
# https://github.com/minio/minio/issues/5469
AIRFLOW_VAR_DEFAULT_S3_TSV_STORAGE_CLASS=REDUCED_REDUNDANCY
# Connection to the Ingestion Server, used for managing data refreshes. Default is used to
# connect to your locally running ingestion server.
AIRFLOW_CONN_DATA_REFRESH=http://ingestion_server:8001
# Connection to the Staging Ingestion Server, used for managing indices on the staging
# elasticsearch cluster. Default is used to connect to your locally running ingestion
# server.
AIRFLOW_CONN_STAGING_DATA_REFRESH=http://ingestion_server:8001
# Connect to sensitive terms list used for filtered index creation in the data refresh.
# For local development, the mock terms list is served by the catalog indexer worker. Change to the
# full production URL in order to use the production sensitive terms list.
AIRFLOW_CONN_SENSITIVE_TERMS="http://catalog_indexer_worker:8003/static/mock_sensitive_terms.txt"
# Django Admin url. Change the following line to use the appropriate environment.
DJANGO_ADMIN_URL="https://localhost:8000/admin"
# GitHub - used for maintenance
AIRFLOW_VAR_GITHUB_API_KEY="not_set"
########################################################################################
# Other config
########################################################################################
# Version of the catalog docker image to use. Defaults to `latest` and is not used for
# local development (since the image is always built locally). See available tags at
# https://ghcr.io/wordpress/openverse-catalog
DOCKER_IMAGE_TAG=latest
# External port airflow will be mounted to
AIRFLOW_PORT=9090
# Minutes to wait until processing a file that hasn't been modified
LOADER_FILE_AGE=1
# Contact email for any APIs
# AWS/S3 configuration - does not need to be changed for development
AWS_ACCESS_KEY=test_key
AWS_SECRET_KEY=test_secret
AWS_DEFAULT_REGION=us-east-1
# General bucket used for TSV->DB ingestion and logging
OPENVERSE_BUCKET=openverse-catalog
# Seconds to wait before poking for availability of the data refresh pool when running a data_refresh
# DAG. Used to shorten the time for testing purposes.
DATA_REFRESH_POKE_INTERVAL=5
# The pool to use for the data refresh tasks. Defaults to `data_refresh` in production but
# we override this locally to prevent having to frequently recreate it.
DATA_REFRESH_POOL=default_pool
# Number of Retries if DAG task fails to run
DEFAULT_RETRY_COUNT = 2
# Whether to enable catchup for dated DAGs, allowing automatic backfill.
AIRFLOW_VAR_CATCHUP_ENABLED=false
# Number of records to expect in a healthy ES index. Used during the data refresh to verify that
# a new index is healthy before promoting.
ES_INDEX_READINESS_RECORD_COUNT=1000
# Throttle for elasticsearch reindexing. This is used by DAGs that directly connect
# to Elasticsearch; it is not used for DAGs which reindex via the ingestion server, such as
# the data refreshes.
AIRFLOW_VAR_ES_INDEX_THROTTLING_RATE=20000
# Warning in dependency, nothing we can do
# https://docs.sqlalchemy.org/en/20/errors.html#error-b8d9
SQLALCHEMY_SILENCE_UBER_WARNING=1
# Default size of batches for the data refresh altering step
AIRFLOW_VAR_DATA_REFRESH_ALTER_BATCH_SIZE=1000
# S3 prefix location for the Rekognition dataset
AIRFLOW_VAR_REKOGNITION_DATASET_PREFIX=image_analysis_labels.jsonl
# In-memory buffer size for processing Rekognition data
AIRFLOW_VAR_REKOGNITION_MEMORY_BUFFER_SIZE=25
# File buffer size for reading Rekognition data from S3
AIRFLOW_VAR_REKOGNITION_FILE_BUFFER_SIZE=16384
AIRFLOW_VAR_AIRFLOW_RDS_ARN=unset
AIRFLOW_VAR_AIRFLOW_RDS_SNAPSHOTS_TO_RETAIN=7
# Whether to toggle production CloudWatch alarms when running a data refresh DAG.
# Used to prevent requiring AWS credentials when running locally.
AIRFLOW_VAR_TOGGLE_CLOUDWATCH_ALARMS=false
# Whether to delete source data from airflow and DB once ingestion is complete.
# This is used to support data quality testing in SQL-only DAGs like iNaturalist
AIRFLOW_VAR_SQL_RM_SOURCE_DATA_AFTER_INGESTION=false