Skip to content

Commit

Permalink
Merge branch 'Sefaria:master' into staging
Browse files Browse the repository at this point in the history
  • Loading branch information
Lungsangg authored Jan 29, 2024
2 parents 9ae31b7 + 0f2f64a commit 21fbc16
Show file tree
Hide file tree
Showing 14 changed files with 51 additions and 1,028 deletions.
4 changes: 1 addition & 3 deletions build/ci/production-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ nginx:
containerImage:
imageRegistry:
tag:
SEARCH_HOST: elasticsearch-data
SEARCH_HOST: elasticsearch-es-http.elasticsearch.svc
disableScraping: false
replicaCount: 2
resources:
Expand Down Expand Up @@ -179,8 +179,6 @@ cronJobs:
enabled: true
reindexElasticSearch:
enabled: true
SEARCH_HOST_ES6: "elasticsearch-data"
SEARCH_HOST_ES8: "elasticsearch-es-http.elasticsearch.svc"
topicsIndexing:
enabled: true
trello:
Expand Down
1 change: 0 additions & 1 deletion helm-chart/sefaria-project/templates/configmap/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ data:
# allow urls which aren't caught by regex above
location /api/search/ {
rewrite ^/(?:api/search)/(.*)$ /$1 break;
proxy_set_header Content-Type application/json; # es 6.0 requires this header
proxy_set_header Authorization "Basic ${ELASTIC_AUTH_HEADER}";
add_header 'Access-Control-Allow-Origin' '';
proxy_pass http://elasticsearch_upstream/;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,77 +0,0 @@
{{- if .Values.cronJobs.reindexElasticSearch.enabled }}
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ .Values.deployEnv }}-reindex-elastic-search-es6
labels:
{{- include "sefaria.labels" . | nindent 4 }}
spec:
schedule: "20 13 * * 0"
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- mongo
topologyKey: kubernetes.io.hostname
containers:
- name: reindex-elastic-search-es6
image: "{{ .Values.web.containerImage.imageRegistry }}:{{ .Values.web.containerImage.tag }}"
resources:
limits:
memory: 9Gi
requests:
memory: 7Gi
env:
- name: SEARCH_HOST
value: "{{ .Values.cronJobs.reindexElasticSearch.SEARCH_HOST_ES6 }}"
- name: REDIS_HOST
value: "redis-{{ .Values.deployEnv }}"
- name: NODEJS_HOST
value: "node-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: VARNISH_HOST
value: "varnish-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: SLACK_URL
valueFrom:
secretKeyRef:
name: {{ template "sefaria.secrets.slackWebhook" . }}
key: slack-webhook
envFrom:
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
optional: true
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
- secretRef:
name: local-settings-secrets-{{ .Values.deployEnv }}
optional: true
volumeMounts:
- mountPath: /app/sefaria/local_settings.py
name: local-settings
subPath: local_settings.py
readOnly: true
command: ["bash"]
args: [
"-c",
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob_ES6.py"
]
restartPolicy: Never
volumes:
- name: local-settings
configMap:
name: local-settings-file-{{ .Values.deployEnv }}
items:
- key: local_settings.py
path: local_settings.py
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ spec:
memory: 7Gi
env:
- name: SEARCH_HOST
value: "{{ .Values.cronJobs.reindexElasticSearch.SEARCH_HOST_ES8 }}"
value: "{{ .Values.nginx.SEARCH_HOST }}"
- name: REDIS_HOST
value: "redis-{{ .Values.deployEnv }}"
- name: NODEJS_HOST
Expand Down Expand Up @@ -64,7 +64,7 @@ spec:
command: ["bash"]
args: [
"-c",
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy elasticsearch==8.8.2 git+https://github.com/Sefaria/[email protected]#egg=elasticsearch-dsl && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
]
restartPolicy: Never
volumes:
Expand Down
2 changes: 1 addition & 1 deletion reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4253,7 +4253,7 @@ def search_wrapper_api(request, es6_compat=False):
search_obj = get_query_obj(search_obj=search_obj, **j)
response = search_obj.execute()
if response.success():
response_json = getattr(response.to_dict(), 'body', response.to_dict())
response_json = response.to_dict().body
if es6_compat and isinstance(response_json['hits']['total'], dict):
response_json['hits']['total'] = response_json['hits']['total']['value']
return jsonResponse(response_json, callback=request.GET.get("callback", None))
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ django==1.11.*
djangorestframework @ https://github.com/encode/django-rest-framework/archive/3.11.1.tar.gz
djangorestframework_simplejwt==3.3.0
PyJWT==1.7.1 # pinned b/c current version 2.0.0 breaks simplejwt. waiting for 2.0.1
elasticsearch==7.9.1
elasticsearch_dsl==7.3.0
elasticsearch==8.8.2
git+https://github.com/Sefaria/[email protected]#egg=elasticsearch-dsl
geojson==2.5.0
geopy==2.3.0
gevent==20.12.0; sys_platform != 'darwin'
Expand Down
49 changes: 0 additions & 49 deletions scripts/scheduled/reindex_elasticsearch_cronjob_ES6.py
Original file line number Diff line number Diff line change
@@ -1,49 +0,0 @@
"""
This file is meant to be temporary while we are migrating to elasticsearch 8
"""
from datetime import datetime
import requests
import traceback
import os
import django
django.setup()
from sefaria.model import *
from sefaria.search_ES6 import index_all
from sefaria.local_settings import SEFARIA_BOT_API_KEY
from sefaria.pagesheetrank import update_pagesheetrank

"""
Source sheets added after last_sheet_timestamp will be missing from the index process. We want to manually index all
source sheets created after this. Depending on the database being used to index the timestamp will be different. If
running against a production database, last_sheet_timestamp will be the time this script began running. Otherwise, this
value will need to be set to the time at which the last mongo dump was created (assuming the database is using the most
up-to-date mongo dump).
"""
# last_sheet_timestamp = datetime.fromtimestamp(os.path.getmtime("/var/data/sefaria_public/dump/sefaria")).isoformat()
try:
last_sheet_timestamp = datetime.now().isoformat()
update_pagesheetrank()
index_all()
r = requests.post("https://www.sefaria.org/admin/index-sheets-by-timestamp", data={"timestamp": last_sheet_timestamp, "apikey": SEFARIA_BOT_API_KEY})
if "error" in r.text:
raise Exception("Error when calling admin/index-sheets-by-timestamp API: " + r.text)
else:
print("SUCCESS!", r.text)
except Exception as e:
tb_str = traceback.format_exc()
print("Caught exception")
post_object = {
"icon_emoji": ":facepalm:",
"username": "Reindex ElasticSearch",
"channel": "#engineering-discuss",
"attachments": [
{
"fallback": tb_str,
"color": "#a30200",
"pretext": "Cronjob Error",
"text": tb_str
}
]
}
requests.post(os.environ['SLACK_URL'], json=post_object)
raise e
2 changes: 1 addition & 1 deletion sefaria/helper/linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class _FindRefsTextOptions:
@attr version_preferences_by_corpus: dict of dicts of the form { <corpus>: { <lang>: <vtitle> }}
"""

debug: bool = False
with_text: bool = False
debug: bool = False
max_segments: int = 0
version_preferences_by_corpus: dict = None

Expand Down
4 changes: 2 additions & 2 deletions sefaria/helper/tests/linker_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def test_find_refs_text(self, mock_is_hebrew: Mock):
assert find_refs_text.lang == 'en'

def test_find_refs_text_options(self):
find_refs_text_options = linker._FindRefsTextOptions(True, True, 10, {})
assert find_refs_text_options.debug
find_refs_text_options = linker._FindRefsTextOptions(True, False, 10, {})
assert not find_refs_text_options.debug
assert find_refs_text_options.with_text
assert find_refs_text_options.max_segments == 10
assert find_refs_text_options.version_preferences_by_corpus == {}
Expand Down
Loading

0 comments on commit 21fbc16

Please sign in to comment.