Implement CopyCohort (closes #25)

GrammarSoft · Aug 8, 2024 · 006e004 · 006e004
1 parent 1c8c32a
commit 006e004
Show file tree

Hide file tree

Showing 9 changed files with 297 additions and 18 deletions.
diff --git a/manual/rules.xml b/manual/rules.xml
@@ -57,6 +57,10 @@
   Cohort manipulation:
       ADDCOHORT &lt;cohort tags&gt; BEFORE|AFTER [WITHCHILD &lt;child_set&gt;|NOCHILD]
           &lt;target&gt; [contextual_tests] ;
+      COPYCOHORT &lt;added tags&gt; [EXCEPT &lt;removed tags&gt;] &lt;target&gt; [contextual_tests]
+          TO [BEFORE|AFTER] [WITHCHILD &lt;child_set&gt;|NOCHILD] &lt;contextual targets&gt; ;
+      COPYCOHORT &lt;added tags&gt; [EXCEPT &lt;removed tags&gt;] [BEFORE|AFTER] [WITHCHILD &lt;child_set&gt;|NOCHILD]
+          &lt;target&gt; [contextual_tests] FROM &lt;contextual targets&gt; ;
       REMCOHORT &lt;target&gt; [contextual_tests] ;
       SPLITCOHORT &lt;cohort recipe&gt; &lt;target&gt; [contextual_tests] ;
       MERGECOHORTS &lt;cohort recipe&gt; &lt;target&gt; [contextual_tests] WITH &lt;contextual targets&gt; ;
@@ -183,6 +187,37 @@
     </screen>
   </section>
 
+  <section id="copycohort">
+    <title>COPYCOHORT</title>
+    <indexterm>
+      <primary>COPYCOHORT</primary>
+    </indexterm>
+    <screen>
+      [wordform] COPYCOHORT &lt;added tags&gt; [EXCEPT &lt;removed tags&gt;] &lt;target&gt; [contextual_tests]
+          TO [BEFORE|AFTER] [WITHCHILD &lt;child_set&gt;|NOCHILD] &lt;contextual targets&gt; ;
+      [wordform] COPYCOHORT &lt;added tags&gt; [EXCEPT &lt;removed tags&gt;] [BEFORE|AFTER] [WITHCHILD &lt;child_set&gt;|NOCHILD]
+          &lt;target&gt; [contextual_tests] FROM &lt;contextual targets&gt; ;
+    </screen>
+    <para>
+      Copies the current cohort to before or after the contextual target.
+      Or copies a contextual target cohort to before or after the current cohort.
+      The added tags is not optional, but you can specify * to not actually add any tags.
+    </para>
+    <para>
+      The newly added cohort will be dependency-attached to the nearest cohort towards the target.
+      Relations are not currently copied.
+    </para>
+    <para>
+      WITHCHILD uses the children of the cohort you're targeting as edges so you can avoid creating cohorts
+      in the middle of another dependency group.
+      If you specify WITHCHILD you will need to provide a set that the children you want to apply must match.
+      The (*) set will match all children.
+    </para>
+    <screen>
+      CopyCohort (copied) Except (snip) Before WithChild (*) (target) From (1* (from) - (copied)) ;
+    </screen>
+  </section>
+
   <section id="remcohort">
     <title>REMCOHORT</title>
     <indexterm>
@@ -232,7 +267,7 @@
         "&lt;$1&gt;"v "$1"v tags * tags c->2
         # inherit named relations with R:*, and inherit dependency parents with 2->p
         "&lt;$2&gt;"v "$2"v tags go here R:* 2->p
-        ) ("&lt;([^-]+)-([^-]+)&gt;" other tags) (1* (context)) ;
+        ) ("&lt;([^-]+)-([^-]+)&gt;"r other tags) (1* (context)) ;
     </screen>
   </section>
 

diff --git a/src/GrammarApplicator_runRules.cpp b/src/GrammarApplicator_runRules.cpp
@@ -2126,6 +2126,169 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 
 				reset_cohorts_for_loop = true;
 			}
+			else if (rule->type == K_COPYCOHORT) {
+				Cohort* attach = nullptr;
+				Cohort* cohort = context_stack.back().target.cohort;
+				uint32_t c = cohort->local_number;
+				dep_deep_seen.clear();
+				tmpl_cntx.clear();
+				context_stack.back().attach_to.cohort = nullptr;
+				context_stack.back().attach_to.reading = nullptr;
+				context_stack.back().attach_to.subreading = nullptr;
+				if (runContextualTest(&current, c, rule->dep_target, &attach) && attach) {
+					profileRuleContext(true, rule, rule->dep_target);
+
+					if (get_attach_to().cohort) {
+						attach = get_attach_to().cohort;
+					}
+					context_target = attach;
+					bool good = true;
+					for (auto it : rule->dep_tests) {
+						context_stack.back().mark = attach;
+						dep_deep_seen.clear();
+						tmpl_cntx.clear();
+						bool test_good = (runContextualTest(attach->parent, attach->local_number, it) != nullptr);
+
+						profileRuleContext(test_good, rule, it);
+
+						if (!test_good) {
+							good = test_good;
+							break;
+						}
+					}
+
+					if (!good || cohort == attach || cohort->local_number == 0) {
+						return;
+					}
+
+					auto childset = rule->childset2;
+					if (rule->flags & RF_REVERSE) {
+						std::swap(cohort, attach);
+						childset = rule->childset1;
+					}
+
+					Cohort* cCohort = alloc_cohort(attach->parent);
+					cCohort->global_number = gWindow->cohort_counter++;
+					cCohort->wordform = cohort->wordform;
+					insert_if_exists(cCohort->possible_sets, grammar->sets_any);
+
+					auto theTags = ss_taglist.get();
+					getTagList(*rule->maplist, theTags);
+
+					for (auto& tter : *theTags) {
+						if (tter->type & T_VSTR) {
+							VARSTRINGIFY(tter);
+						}
+					}
+
+					auto excepts = ss_taglist.get();
+					if (rule->sublist) {
+						getTagList(*rule->sublist, excepts);
+						FILL_TAG_LIST_RAW(excepts);
+					}
+
+					std::vector<Reading*> rs;
+					for (auto r : cohort->readings) {
+						rs.clear();
+						for (; r; r = r->next) {
+							auto cReading = alloc_reading(cCohort);
+							++numReadings;
+							cReading->hit_by.push_back(rule->number);
+							cReading->noprint = false;
+							TagList mappings;
+							for (auto hash : r->tags_list) {
+								auto tter = grammar->single_tags[hash];
+								if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
+									mappings.push_back(tter);
+								}
+								else {
+									hash = addTagToReading(*cReading, hash);
+								}
+								if (updateValidRules(rules, intersects, hash, *cReading)) {
+									iter_rules = intersects.find(rule->number);
+									iter_rules_end = intersects.end();
+								}
+							}
+							for (auto tter : *theTags) {
+								auto hash = tter->hash;
+								if (hash == grammar->tag_any) {
+									continue;
+								}
+								if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
+									mappings.push_back(tter);
+								}
+								else {
+									hash = addTagToReading(*cReading, hash);
+								}
+								if (updateValidRules(rules, intersects, hash, *cReading)) {
+									iter_rules = intersects.find(rule->number);
+									iter_rules_end = intersects.end();
+								}
+							}
+							if (!mappings.empty()) {
+								splitMappings(mappings, *cCohort, *cReading);
+							}
+							rs.push_back(cReading);
+						}
+						auto rn = rs.front();
+						for (size_t j = 1; j < rs.size(); ++j) {
+							rn->next = rs[j];
+							rn = rn->next;
+						}
+						cCohort->appendReading(rs.front());
+					}
+
+					if (cCohort->readings.empty()) {
+						initEmptyCohort(*cCohort);
+						if (trace) {
+							auto r = cCohort->readings.front();
+							r->hit_by.push_back(rule->number);
+							r->noprint = false;
+						}
+					}
+
+					for (auto r : cCohort->readings) {
+						for (; r; r = r->next) {
+							for (auto tter : *excepts) {
+								delTagFromReading(*r, tter);
+							}
+						}
+					}
+
+					if (cohort->wread) {
+						cCohort->wread = alloc_reading(cCohort);
+						for (auto hash : cohort->wread->tags_list) {
+							hash = addTagToReading(*cCohort->wread, hash);
+							if (updateValidRules(rules, intersects, hash, *cCohort->wread)) {
+								iter_rules = intersects.find(rule->number);
+								iter_rules_end = intersects.end();
+							}
+						}
+					}
+
+					current.parent->cohort_map[cCohort->global_number] = cCohort;
+					current.parent->dep_window[cCohort->global_number] = cCohort;
+
+					CohortSet edges;
+					collect_subtree(edges, attach, childset);
+
+					if (rule->flags & RF_BEFORE) {
+						current.cohorts.insert(current.cohorts.begin() + edges.front()->local_number, cCohort);
+						current.all_cohorts.insert(std::find(current.all_cohorts.begin() + edges.front()->local_number, current.all_cohorts.end(), edges.front()), cCohort);
+						attachParentChild(*edges.front(), *cCohort);
+					}
+					else {
+						current.cohorts.insert(current.cohorts.begin() + edges.back()->local_number + 1, cCohort);
+						current.all_cohorts.insert(std::find(current.all_cohorts.begin() + edges.back()->local_number, current.all_cohorts.end(), edges.back()) + 1, cCohort);
+						attachParentChild(*edges.back(), *cCohort);
+					}
+
+					foreach(iter, current.cohorts) {
+						(*iter)->local_number = UI32(std::distance(current.cohorts.begin(), iter));
+					}
+					gWindow->rebuildCohortLinks();
+				}
+			}
 			else if (rule->type == K_SETPARENT || rule->type == K_SETCHILD || rule->type == K_ADDRELATION || rule->type == K_SETRELATION || rule->type == K_REMRELATION || rule->type == K_ADDRELATIONS || rule->type == K_SETRELATIONS || rule->type == K_REMRELATIONS) {
 				auto dep_target_cb = [&]() -> bool {
 					Cohort* target = context_stack.back().target.cohort;

diff --git a/src/GrammarWriter.cpp b/src/GrammarWriter.cpp
@@ -288,7 +288,7 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
 	u_fprintf(to, " ");
 
 	for (uint32_t i = 0; i < FLAGS_COUNT; i++) {
-		if (i == FL_BEFORE || i == FL_AFTER) {
+		if (i == FL_BEFORE || i == FL_AFTER || i == FL_WITHCHILD) {
 			continue;
 		}
 		if (rule.flags & (1ull << i)) {
@@ -302,7 +302,7 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
 	}
 
 	if (rule.flags & RF_WITHCHILD) {
-		u_fprintf(to, "%S ", grammar->sets_list[rule.childset1]->name.data());
+		u_fprintf(to, "WITHCHILD %S ", grammar->sets_list[rule.childset1]->name.data());
 	}
 
 	if (rule.type == K_SUBSTITUTE || rule.type == K_EXECUTE) {
@@ -313,21 +313,24 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
 		u_fprintf(to, "%S ", rule.maplist->name.data());
 	}
 
-	if (rule.sublist && (rule.type == K_ADDRELATIONS || rule.type == K_SETRELATIONS || rule.type == K_REMRELATIONS || rule.type == K_SETVARIABLE || rule.type == K_COPY)) {
-		if (rule.type == K_COPY) {
+	if (rule.sublist && (rule.type == K_ADDRELATIONS || rule.type == K_SETRELATIONS || rule.type == K_REMRELATIONS || rule.type == K_SETVARIABLE || rule.type == K_COPY || rule.type == K_COPYCOHORT)) {
+		if (rule.type == K_COPY || rule.type == K_COPYCOHORT) {
 			u_fprintf(to, "EXCEPT ");
 		}
 		u_fprintf(to, "%S ", rule.sublist->name.data());
 	}
 
-	if (rule.type == K_ADD || rule.type == K_MAP || rule.type == K_SUBSTITUTE || rule.type == K_COPY) {
+	if (rule.type == K_ADD || rule.type == K_MAP || rule.type == K_SUBSTITUTE || rule.type == K_COPY || rule.type == K_COPYCOHORT) {
 		if (rule.flags & RF_BEFORE) {
 			u_fprintf(to, "BEFORE ");
 		}
 		if (rule.flags & RF_AFTER) {
 			u_fprintf(to, "AFTER ");
 		}
 		if (rule.childset1) {
+			if (rule.type == K_COPYCOHORT) {
+				u_fprintf(to, "WITHCHILD ");
+			}
 			u_fprintf(to, "%S ", grammar->sets_list[rule.childset1]->name.data());
 		}
 	}
@@ -349,7 +352,7 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
 		u_fprintf(to, ") ");
 	}
 
-	if (rule.type == K_SETPARENT || rule.type == K_SETCHILD || rule.type == K_ADDRELATIONS || rule.type == K_ADDRELATION || rule.type == K_SETRELATIONS || rule.type == K_SETRELATION || rule.type == K_REMRELATIONS || rule.type == K_REMRELATION) {
+	if (rule.type == K_SETPARENT || rule.type == K_SETCHILD || rule.type == K_ADDRELATIONS || rule.type == K_ADDRELATION || rule.type == K_SETRELATIONS || rule.type == K_SETRELATION || rule.type == K_REMRELATIONS || rule.type == K_REMRELATION || rule.type == K_COPYCOHORT) {
 		u_fprintf(to, "TO ");
 	}
 	else if (rule.type == K_MOVE_AFTER) {

diff --git a/src/Strings.hpp b/src/Strings.hpp
@@ -95,6 +95,7 @@ enum KEYWORDS : uint32_t {
 	K_OSET,
 	K_CMDARGS,
 	K_CMDARGS_OVERRIDE,
+	K_COPYCOHORT,
 	KEYWORD_COUNT,
 };
 
@@ -257,6 +258,7 @@ constexpr UStringView keywords[KEYWORD_COUNT] = {
 	u"OSET",
 	u"CMDARGS",
 	u"CMDARGS-OVERRIDE",
+	u"COPYCOHORT",
 };
 
 constexpr UStringView stringbits[] = {