Merge branch 'Sefaria:master' into production-master

OpenPecha · Jan 16, 2024 · 2b22bd7 · 2b22bd7
2 parents 3f48ebc + ac69069
commit 2b22bd7
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 16 deletions.
diff --git a/helm-chart/sefaria-project/templates/cronjob/reindex-elasticsearch.yaml b/helm-chart/sefaria-project/templates/cronjob/reindex-elasticsearch.yaml
@@ -64,8 +64,7 @@ spec:
             command: ["bash"]
             args: [
               "-c",
-              "mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy elasticsearch==8.8.2 git+https://github.com/Sefaria/[email protected]
-              .0#egg=elasticsearch-dsl && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
+              "mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy elasticsearch==8.8.2 git+https://github.com/Sefaria/[email protected]#egg=elasticsearch-dsl && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
             ]
           restartPolicy: Never
           volumes:

diff --git a/sefaria/model/schema.py b/sefaria/model/schema.py
@@ -243,7 +243,7 @@ def remove_shared_term(self, term):
 class Term(abst.AbstractMongoRecord, AbstractTitledObject):
     """
     A Term is a shared title node.  It can be referenced and used by many different Index nodes.
-    Examples:  Noah, Perek HaChovel, Even HaEzer
+    Examples:  Noah, HaChovel
     Terms that use the same TermScheme can be ordered.
     """
     collection = 'term'

diff --git a/sefaria/model/text.py b/sefaria/model/text.py
@@ -997,7 +997,7 @@ def sub_content_with_ref(self, ref=None, value=None):
 
     def sub_content(self, key_list=None, indx_list=None, value=None):
         """
-        Get's or sets values deep within the content of this version.
+        Gets or sets values deep within the content of this version.
         This returns the result by reference, NOT by value.
         http://stackoverflow.com/questions/27339165/slice-nested-list-at-variable-depth
         :param key_list: The node keys to traverse to get to the content node
@@ -3945,8 +3945,8 @@ def all_context_refs(self, include_self = True, include_book = False):
 
     def context_ref(self, level=1):
         """
-        :return: :class:`Ref` that is more general than this :class:`Ref`.
         :param level: how many levels to 'zoom out' from the most specific possible :class:`Ref`
+        :return: :class:`Ref` that is more general than this :class:`Ref`.
 
         ::
 
@@ -5033,7 +5033,7 @@ def rebuild_toc(self, skip_toc_tree=False):
         While building these ToC data structures, this function also builds the equivalent JSON structures
         as an API optimization.
 
-        @param: skip_toc_tree boolean
+        :param skip_toc_tree: Boolean
         """
         if not skip_toc_tree:
             self._toc_tree = self.get_toc_tree(rebuild=True)
@@ -5127,7 +5127,7 @@ def get_topic_toc(self, rebuild=False):
     def get_topic_toc_json(self, rebuild=False):
         """
         Returns JSON representation of Topics ToC.
-        @param: rebuild boolean
+        :param rebuild: Boolean
         """
         if rebuild or not self._topic_toc_json:
             if not rebuild:
@@ -5141,9 +5141,9 @@ def get_topic_toc_json(self, rebuild=False):
     def get_topic_toc_json_recursive(self, topic=None, explored=None, with_descriptions=False):
         """
         Returns JSON representation of Topics ToC
-        @param: topic Topic
-        @param: explored Set
-        @param: with_descriptions boolean
+        :param topic: Topic
+        :param explored: Set
+        :param with_descriptions: Boolean
         """
         from .topic import Topic, TopicSet, IntraTopicLinkSet
         explored = explored or set()
@@ -5210,7 +5210,7 @@ def build_topic_toc_category_mapping(self) -> dict:
     def get_topic_toc_category_mapping(self, rebuild=False) -> dict:
         """
         Returns the category mapping as a dictionary for the topics ToC. Loads on Library startup.
-        @param: rebuild boolean
+        :param rebuild: Boolean
         """
         if rebuild or not self._topic_toc_category_mapping:
             if not rebuild:
@@ -5263,7 +5263,7 @@ def root_title_sorter(t):
     def get_topic_link_type(self, link_type):
         """
         Returns a TopicLinkType with a slug of link_type (parameter) if not already present
-        @param: link_type String
+        :param link_type: String
         """
         from .topic import TopicLinkTypeSet
         if not self._topic_link_types:
@@ -5276,7 +5276,7 @@ def get_topic_link_type(self, link_type):
     def get_topic_data_source(self, data_source):
         """
         Returns a TopicDataSource with the data_source (parameter) slug if not already present
-        @param: data_source String
+        :param data_source: String
         """
         from .topic import TopicDataSourceSet
         if not self._topic_data_sources:
@@ -5380,7 +5380,7 @@ def lexicon_auto_completer(self, lexicon):
         is not present, it assumes the need to rebuild the lexicon_auto_completer and calls the build
         function with appropriate logger warnings before returning the desired result
 
-        @param: lexicon String
+        :param lexicon: String
         """
         try:
             return self._lexicon_auto_completer[lexicon]
@@ -5640,6 +5640,12 @@ def get_term_dict(self, lang="en"):
         return term_dict
 
     def build_term_mappings(self):
+        """
+           Build simple and full term mappings
+           A full term mapping has the term name as the key, and the term as the value.
+           A simple term mapping has the term name as the key, and a dictionary containing the English and Hebrew
+           primary titles for the terms as the value.
+        """
         self._simple_term_mapping = {}
         self._full_term_mapping = {}
         for term in TermSet():
@@ -5675,22 +5681,46 @@ def get_simple_term_mapping_json(self, rebuild=False):
         return self._simple_term_mapping_json
 
     def get_term(self, term_name):
+        """
+        Returns the full term, if mapping not present, builds the full term mapping.
+        :param term_name: String
+        :returns: full Term (Mongo Record)
+        """
         if not self._full_term_mapping:
             self.build_term_mappings()
         return self._full_term_mapping.get(term_name) if term_name in self._full_term_mapping else Term().load({"name": term_name})
 
 
 
     def get_topic(self, slug):
+        """
+        Returns a dictionary containing the keys "en" and "he".
+        The "en" field has a value of the topic's English primary title, and the "he" field has a
+        value of the topic's Hebrew primary title.
+        :param slug: String
+        :returns: topic map for the given slug Dictionary
+        """
         return self._topic_mapping[slug]
 
     def get_topic_mapping(self, rebuild=False):
+        """
+        Returns the topic mapping if it exists, if not rebuilds it and returns
+        :param rebuild: Boolean (optional, default set to False)
+        """
         tm = self._topic_mapping
         if not tm or rebuild:
             tm = self._build_topic_mapping()
         return tm
 
     def _build_topic_mapping(self):
+        """
+        Builds the topic mapping. The topic mapping is a dictionary with keys, where each key
+        is a slug of a topic.
+        That key contains the value of another dictionary, with the keys "en" and "he".
+        The "en" field has a value of the topic's English primary title, and the "he" field has a
+        value of the topic's Hebrew primary title.
+        :returns: topic map for the given slug Dictionary
+        """
         from .topic import Topic, TopicSet
         self._topic_mapping = {t.slug: {"en": t.get_primary_title("en"), "he": t.get_primary_title("he")} for t in TopicSet()}
         return self._topic_mapping
@@ -5734,6 +5764,9 @@ def get_index_forest(self):
         return root_nodes
 
     def all_index_records(self):
+        """
+        Returns an array of all index records
+        """
         return [self._index_map[k] for k in list(self._index_title_maps["en"].keys())]
 
     def get_title_node_dict(self, lang="en"):
@@ -5774,9 +5807,9 @@ def citing_title_list(self, lang="en"):
 
     def full_title_list(self, lang="en", with_terms=False):
         """
-        :return: list of strings of all possible titles
         :param lang: "he" or "en"
         :param with_terms: if True, includes shared titles ('terms')
+        :return: list of strings of all possible titles
         """
         key = lang
         key += "_terms" if with_terms else ""
@@ -5800,6 +5833,11 @@ def build_text_titles_json(self, lang="en"):
         return title_list
 
     def get_text_titles_json(self, lang="en", rebuild=False):
+        """
+        Returns the json text title list
+        :param lang: String (optional, default set to 'en')
+        :param rebuild: Boolean (optional, default set to False)
+        """
         if rebuild or not self._full_title_list_jsons.get(lang):
             if not rebuild:
                 self._full_title_list_jsons[lang] = scache.get_shared_cache_elem('books_'+lang+'_json')
@@ -5813,6 +5851,9 @@ def get_text_titles_json(self, lang="en", rebuild=False):
         return self._full_title_list_jsons[lang]
 
     def reset_text_titles_cache(self):
+        """
+        Resets the text titles for all languages by clearing the existing titles from the cache.
+        """
         for lang in self.langs:
             scache.delete_shared_cache_elem('books_' + lang)
             scache.delete_shared_cache_elem('books_' + lang + '_json')
@@ -6058,6 +6099,20 @@ def get_multi_title_regex_string(self, titles, lang, for_js=False, anchored=Fals
 
     # do we want to move this to the schema node? We'd still have to pass the title...
     def get_regex_string(self, title, lang, for_js=False, anchored=False, capture_title=False, parentheses=False):
+        """
+        Given a book title, this function returns a regex for a Ref.
+        This works for references not in Sefaria format (i.e. "See Genesis 2 3" as opposed to "Genesis 2:3",
+        as well as for references in Sefaria format.
+        If the language is 'en', it calls the full_regex() function which returns the regex, whereas for 'he' we
+        limit the regex creation to content inside parenthesis to limit false positives (i.e. the phrase שבת לא תעשה
+        could be caught by mistake as Shabbat 31)
+        :param title: String
+        :param lang: 'en' or 'he'
+        :param for_js: Boolean (default set to False, optional)
+        :param anchored: Boolean (default set to False, optional)
+        :param capture_title: Boolean (default set to False, optional)
+        :param parentheses: Boolean (default set to False, optional)
+        """
         node = self.get_schema_node(title, lang)
         assert isinstance(node, JaggedArrayNode)  # Assumes that node is a JaggedArrayNode
 
@@ -6210,6 +6265,12 @@ def repl(match):
         return re.sub(fr"{dummy_char}+", repl, dummy_text)
 
     def category_id_dict(self, toc=None, cat_head="", code_head=""):
+        """Returns a dict of unique category ids based on the ToC, with the
+           values being the category IDs.
+            :param toc: ToC object (optional, default is None)
+            :param cat_head: String, (optional, default is "" - an empty string)
+            :param code_head: String, (optional, default is "" - an empty string)
+        """
         if toc is None:
             if not self._category_id_dict:
                 self._category_id_dict = self.category_id_dict(self.get_toc())
@@ -6233,6 +6294,12 @@ def category_id_dict(self, toc=None, cat_head="", code_head=""):
         return d
 
     def simplify_toc(self, lang=None, toc_node=None, path=None):
+        """
+        Simplifies the table of contents (ToC)
+        :param lang: 'en' or 'he', default is None (optional)
+        :param toc_node: ToC Node, default is None (optional)
+        :param path: Node Path, default is None (optional)
+        """
         is_root = toc_node is None and path is None
         toc_node = toc_node if toc_node else self.get_toc()
         path = path if path else []

diff --git a/sefaria/tracker.py b/sefaria/tracker.py
@@ -46,7 +46,7 @@ def modify_bulk_text(user: int, version: model.Version, text_map: dict, vsource=
     """
     user: user ID of user making modification
     version: version object of text being modified
-    text_map: dict with segment ref keys and text values. Each key/value pair represents a segment that should be modified. Segments that don't have changes will be ignored.
+    text_map: dict with segment ref keys and text values. Each key/value pair represents a segment that should be modified. Segments that don't have changes will be ignored. The key should be the tref, and the value the text, ex: {'Mishnah Berakhot 1:1': 'Text of the Mishnah goes here'}
     vsource: optional parameter to set the version source of the version. not sure why this is here. I copied it from modify_text.
     """
     def populate_change_map(old_text, en_tref, he_tref, _):