Skip to content

Commit

Permalink
Merge branch 'Sefaria:master' into production-master
Browse files Browse the repository at this point in the history
  • Loading branch information
Lungsangg authored Jan 16, 2024
2 parents 3f48ebc + ac69069 commit 2b22bd7
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ spec:
command: ["bash"]
args: [
"-c",
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy elasticsearch==8.8.2 git+https://github.com/Sefaria/[email protected]
.0#egg=elasticsearch-dsl && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy elasticsearch==8.8.2 git+https://github.com/Sefaria/[email protected]#egg=elasticsearch-dsl && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
]
restartPolicy: Never
volumes:
Expand Down
2 changes: 1 addition & 1 deletion sefaria/model/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def remove_shared_term(self, term):
class Term(abst.AbstractMongoRecord, AbstractTitledObject):
"""
A Term is a shared title node. It can be referenced and used by many different Index nodes.
Examples: Noah, Perek HaChovel, Even HaEzer
Examples: Noah, HaChovel
Terms that use the same TermScheme can be ordered.
"""
collection = 'term'
Expand Down
91 changes: 79 additions & 12 deletions sefaria/model/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ def sub_content_with_ref(self, ref=None, value=None):

def sub_content(self, key_list=None, indx_list=None, value=None):
"""
Get's or sets values deep within the content of this version.
Gets or sets values deep within the content of this version.
This returns the result by reference, NOT by value.
http://stackoverflow.com/questions/27339165/slice-nested-list-at-variable-depth
:param key_list: The node keys to traverse to get to the content node
Expand Down Expand Up @@ -3945,8 +3945,8 @@ def all_context_refs(self, include_self = True, include_book = False):

def context_ref(self, level=1):
"""
:return: :class:`Ref` that is more general than this :class:`Ref`.
:param level: how many levels to 'zoom out' from the most specific possible :class:`Ref`
:return: :class:`Ref` that is more general than this :class:`Ref`.
::
Expand Down Expand Up @@ -5033,7 +5033,7 @@ def rebuild_toc(self, skip_toc_tree=False):
While building these ToC data structures, this function also builds the equivalent JSON structures
as an API optimization.
@param: skip_toc_tree boolean
:param skip_toc_tree: Boolean
"""
if not skip_toc_tree:
self._toc_tree = self.get_toc_tree(rebuild=True)
Expand Down Expand Up @@ -5127,7 +5127,7 @@ def get_topic_toc(self, rebuild=False):
def get_topic_toc_json(self, rebuild=False):
"""
Returns JSON representation of Topics ToC.
@param: rebuild boolean
:param rebuild: Boolean
"""
if rebuild or not self._topic_toc_json:
if not rebuild:
Expand All @@ -5141,9 +5141,9 @@ def get_topic_toc_json(self, rebuild=False):
def get_topic_toc_json_recursive(self, topic=None, explored=None, with_descriptions=False):
"""
Returns JSON representation of Topics ToC
@param: topic Topic
@param: explored Set
@param: with_descriptions boolean
:param topic: Topic
:param explored: Set
:param with_descriptions: Boolean
"""
from .topic import Topic, TopicSet, IntraTopicLinkSet
explored = explored or set()
Expand Down Expand Up @@ -5210,7 +5210,7 @@ def build_topic_toc_category_mapping(self) -> dict:
def get_topic_toc_category_mapping(self, rebuild=False) -> dict:
"""
Returns the category mapping as a dictionary for the topics ToC. Loads on Library startup.
@param: rebuild boolean
:param rebuild: Boolean
"""
if rebuild or not self._topic_toc_category_mapping:
if not rebuild:
Expand Down Expand Up @@ -5263,7 +5263,7 @@ def root_title_sorter(t):
def get_topic_link_type(self, link_type):
"""
Returns a TopicLinkType with a slug of link_type (parameter) if not already present
@param: link_type String
:param link_type: String
"""
from .topic import TopicLinkTypeSet
if not self._topic_link_types:
Expand All @@ -5276,7 +5276,7 @@ def get_topic_link_type(self, link_type):
def get_topic_data_source(self, data_source):
"""
Returns a TopicDataSource with the data_source (parameter) slug if not already present
@param: data_source String
:param data_source: String
"""
from .topic import TopicDataSourceSet
if not self._topic_data_sources:
Expand Down Expand Up @@ -5380,7 +5380,7 @@ def lexicon_auto_completer(self, lexicon):
is not present, it assumes the need to rebuild the lexicon_auto_completer and calls the build
function with appropriate logger warnings before returning the desired result
@param: lexicon String
:param lexicon: String
"""
try:
return self._lexicon_auto_completer[lexicon]
Expand Down Expand Up @@ -5640,6 +5640,12 @@ def get_term_dict(self, lang="en"):
return term_dict

def build_term_mappings(self):
"""
Build simple and full term mappings
A full term mapping has the term name as the key, and the term as the value.
A simple term mapping has the term name as the key, and a dictionary containing the English and Hebrew
primary titles for the terms as the value.
"""
self._simple_term_mapping = {}
self._full_term_mapping = {}
for term in TermSet():
Expand Down Expand Up @@ -5675,22 +5681,46 @@ def get_simple_term_mapping_json(self, rebuild=False):
return self._simple_term_mapping_json

def get_term(self, term_name):
"""
Returns the full term, if mapping not present, builds the full term mapping.
:param term_name: String
:returns: full Term (Mongo Record)
"""
if not self._full_term_mapping:
self.build_term_mappings()
return self._full_term_mapping.get(term_name) if term_name in self._full_term_mapping else Term().load({"name": term_name})



def get_topic(self, slug):
"""
Returns a dictionary containing the keys "en" and "he".
The "en" field has a value of the topic's English primary title, and the "he" field has a
value of the topic's Hebrew primary title.
:param slug: String
:returns: topic map for the given slug Dictionary
"""
return self._topic_mapping[slug]

def get_topic_mapping(self, rebuild=False):
"""
Returns the topic mapping if it exists, if not rebuilds it and returns
:param rebuild: Boolean (optional, default set to False)
"""
tm = self._topic_mapping
if not tm or rebuild:
tm = self._build_topic_mapping()
return tm

def _build_topic_mapping(self):
"""
Builds the topic mapping. The topic mapping is a dictionary with keys, where each key
is a slug of a topic.
That key contains the value of another dictionary, with the keys "en" and "he".
The "en" field has a value of the topic's English primary title, and the "he" field has a
value of the topic's Hebrew primary title.
:returns: topic map for the given slug Dictionary
"""
from .topic import Topic, TopicSet
self._topic_mapping = {t.slug: {"en": t.get_primary_title("en"), "he": t.get_primary_title("he")} for t in TopicSet()}
return self._topic_mapping
Expand Down Expand Up @@ -5734,6 +5764,9 @@ def get_index_forest(self):
return root_nodes

def all_index_records(self):
"""
Returns an array of all index records
"""
return [self._index_map[k] for k in list(self._index_title_maps["en"].keys())]

def get_title_node_dict(self, lang="en"):
Expand Down Expand Up @@ -5774,9 +5807,9 @@ def citing_title_list(self, lang="en"):

def full_title_list(self, lang="en", with_terms=False):
"""
:return: list of strings of all possible titles
:param lang: "he" or "en"
:param with_terms: if True, includes shared titles ('terms')
:return: list of strings of all possible titles
"""
key = lang
key += "_terms" if with_terms else ""
Expand All @@ -5800,6 +5833,11 @@ def build_text_titles_json(self, lang="en"):
return title_list

def get_text_titles_json(self, lang="en", rebuild=False):
"""
Returns the json text title list
:param lang: String (optional, default set to 'en')
:param rebuild: Boolean (optional, default set to False)
"""
if rebuild or not self._full_title_list_jsons.get(lang):
if not rebuild:
self._full_title_list_jsons[lang] = scache.get_shared_cache_elem('books_'+lang+'_json')
Expand All @@ -5813,6 +5851,9 @@ def get_text_titles_json(self, lang="en", rebuild=False):
return self._full_title_list_jsons[lang]

def reset_text_titles_cache(self):
"""
Resets the text titles for all languages by clearing the existing titles from the cache.
"""
for lang in self.langs:
scache.delete_shared_cache_elem('books_' + lang)
scache.delete_shared_cache_elem('books_' + lang + '_json')
Expand Down Expand Up @@ -6058,6 +6099,20 @@ def get_multi_title_regex_string(self, titles, lang, for_js=False, anchored=Fals

# do we want to move this to the schema node? We'd still have to pass the title...
def get_regex_string(self, title, lang, for_js=False, anchored=False, capture_title=False, parentheses=False):
"""
Given a book title, this function returns a regex for a Ref.
This works for references not in Sefaria format (i.e. "See Genesis 2 3" as opposed to "Genesis 2:3",
as well as for references in Sefaria format.
If the language is 'en', it calls the full_regex() function which returns the regex, whereas for 'he' we
limit the regex creation to content inside parenthesis to limit false positives (i.e. the phrase שבת לא תעשה
could be caught by mistake as Shabbat 31)
:param title: String
:param lang: 'en' or 'he'
:param for_js: Boolean (default set to False, optional)
:param anchored: Boolean (default set to False, optional)
:param capture_title: Boolean (default set to False, optional)
:param parentheses: Boolean (default set to False, optional)
"""
node = self.get_schema_node(title, lang)
assert isinstance(node, JaggedArrayNode) # Assumes that node is a JaggedArrayNode

Expand Down Expand Up @@ -6210,6 +6265,12 @@ def repl(match):
return re.sub(fr"{dummy_char}+", repl, dummy_text)

def category_id_dict(self, toc=None, cat_head="", code_head=""):
"""Returns a dict of unique category ids based on the ToC, with the
values being the category IDs.
:param toc: ToC object (optional, default is None)
:param cat_head: String, (optional, default is "" - an empty string)
:param code_head: String, (optional, default is "" - an empty string)
"""
if toc is None:
if not self._category_id_dict:
self._category_id_dict = self.category_id_dict(self.get_toc())
Expand All @@ -6233,6 +6294,12 @@ def category_id_dict(self, toc=None, cat_head="", code_head=""):
return d

def simplify_toc(self, lang=None, toc_node=None, path=None):
"""
Simplifies the table of contents (ToC)
:param lang: 'en' or 'he', default is None (optional)
:param toc_node: ToC Node, default is None (optional)
:param path: Node Path, default is None (optional)
"""
is_root = toc_node is None and path is None
toc_node = toc_node if toc_node else self.get_toc()
path = path if path else []
Expand Down
2 changes: 1 addition & 1 deletion sefaria/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def modify_bulk_text(user: int, version: model.Version, text_map: dict, vsource=
"""
user: user ID of user making modification
version: version object of text being modified
text_map: dict with segment ref keys and text values. Each key/value pair represents a segment that should be modified. Segments that don't have changes will be ignored.
text_map: dict with segment ref keys and text values. Each key/value pair represents a segment that should be modified. Segments that don't have changes will be ignored. The key should be the tref, and the value the text, ex: {'Mishnah Berakhot 1:1': 'Text of the Mishnah goes here'}
vsource: optional parameter to set the version source of the version. not sure why this is here. I copied it from modify_text.
"""
def populate_change_map(old_text, en_tref, he_tref, _):
Expand Down

0 comments on commit 2b22bd7

Please sign in to comment.