Skip to content

Commit

Permalink
Implement CopyCohort (closes #25)
Browse files Browse the repository at this point in the history
  • Loading branch information
TinoDidriksen committed Aug 8, 2024
1 parent 1c8c32a commit 006e004
Show file tree
Hide file tree
Showing 9 changed files with 297 additions and 18 deletions.
37 changes: 36 additions & 1 deletion manual/rules.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@
Cohort manipulation:
ADDCOHORT <cohort tags> BEFORE|AFTER [WITHCHILD <child_set>|NOCHILD]
<target> [contextual_tests] ;
COPYCOHORT <added tags> [EXCEPT <removed tags>] <target> [contextual_tests]
TO [BEFORE|AFTER] [WITHCHILD <child_set>|NOCHILD] <contextual targets> ;
COPYCOHORT <added tags> [EXCEPT <removed tags>] [BEFORE|AFTER] [WITHCHILD <child_set>|NOCHILD]
<target> [contextual_tests] FROM <contextual targets> ;
REMCOHORT <target> [contextual_tests] ;
SPLITCOHORT <cohort recipe> <target> [contextual_tests] ;
MERGECOHORTS <cohort recipe> <target> [contextual_tests] WITH <contextual targets> ;
Expand Down Expand Up @@ -183,6 +187,37 @@
</screen>
</section>

<section id="copycohort">
<title>COPYCOHORT</title>
<indexterm>
<primary>COPYCOHORT</primary>
</indexterm>
<screen>
[wordform] COPYCOHORT &lt;added tags&gt; [EXCEPT &lt;removed tags&gt;] &lt;target&gt; [contextual_tests]
TO [BEFORE|AFTER] [WITHCHILD &lt;child_set&gt;|NOCHILD] &lt;contextual targets&gt; ;
[wordform] COPYCOHORT &lt;added tags&gt; [EXCEPT &lt;removed tags&gt;] [BEFORE|AFTER] [WITHCHILD &lt;child_set&gt;|NOCHILD]
&lt;target&gt; [contextual_tests] FROM &lt;contextual targets&gt; ;
</screen>
<para>
Copies the current cohort to before or after the contextual target.
Or copies a contextual target cohort to before or after the current cohort.
The added tags is not optional, but you can specify * to not actually add any tags.
</para>
<para>
The newly added cohort will be dependency-attached to the nearest cohort towards the target.
Relations are not currently copied.
</para>
<para>
WITHCHILD uses the children of the cohort you're targeting as edges so you can avoid creating cohorts
in the middle of another dependency group.
If you specify WITHCHILD you will need to provide a set that the children you want to apply must match.
The (*) set will match all children.
</para>
<screen>
CopyCohort (copied) Except (snip) Before WithChild (*) (target) From (1* (from) - (copied)) ;
</screen>
</section>

<section id="remcohort">
<title>REMCOHORT</title>
<indexterm>
Expand Down Expand Up @@ -232,7 +267,7 @@
"&lt;$1&gt;"v "$1"v tags * tags c->2
# inherit named relations with R:*, and inherit dependency parents with 2->p
"&lt;$2&gt;"v "$2"v tags go here R:* 2->p
) ("&lt;([^-]+)-([^-]+)&gt;" other tags) (1* (context)) ;
) ("&lt;([^-]+)-([^-]+)&gt;"r other tags) (1* (context)) ;
</screen>
</section>

Expand Down
163 changes: 163 additions & 0 deletions src/GrammarApplicator_runRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2126,6 +2126,169 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const

reset_cohorts_for_loop = true;
}
else if (rule->type == K_COPYCOHORT) {
Cohort* attach = nullptr;
Cohort* cohort = context_stack.back().target.cohort;
uint32_t c = cohort->local_number;
dep_deep_seen.clear();
tmpl_cntx.clear();
context_stack.back().attach_to.cohort = nullptr;
context_stack.back().attach_to.reading = nullptr;
context_stack.back().attach_to.subreading = nullptr;
if (runContextualTest(&current, c, rule->dep_target, &attach) && attach) {
profileRuleContext(true, rule, rule->dep_target);

if (get_attach_to().cohort) {
attach = get_attach_to().cohort;
}
context_target = attach;
bool good = true;
for (auto it : rule->dep_tests) {
context_stack.back().mark = attach;
dep_deep_seen.clear();
tmpl_cntx.clear();
bool test_good = (runContextualTest(attach->parent, attach->local_number, it) != nullptr);

profileRuleContext(test_good, rule, it);

if (!test_good) {
good = test_good;
break;
}
}

if (!good || cohort == attach || cohort->local_number == 0) {
return;
}

auto childset = rule->childset2;
if (rule->flags & RF_REVERSE) {
std::swap(cohort, attach);
childset = rule->childset1;
}

Cohort* cCohort = alloc_cohort(attach->parent);
cCohort->global_number = gWindow->cohort_counter++;
cCohort->wordform = cohort->wordform;
insert_if_exists(cCohort->possible_sets, grammar->sets_any);

auto theTags = ss_taglist.get();
getTagList(*rule->maplist, theTags);

for (auto& tter : *theTags) {
if (tter->type & T_VSTR) {
VARSTRINGIFY(tter);
}
}

auto excepts = ss_taglist.get();
if (rule->sublist) {
getTagList(*rule->sublist, excepts);
FILL_TAG_LIST_RAW(excepts);
}

std::vector<Reading*> rs;
for (auto r : cohort->readings) {
rs.clear();
for (; r; r = r->next) {
auto cReading = alloc_reading(cCohort);
++numReadings;
cReading->hit_by.push_back(rule->number);
cReading->noprint = false;
TagList mappings;
for (auto hash : r->tags_list) {
auto tter = grammar->single_tags[hash];
if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
mappings.push_back(tter);
}
else {
hash = addTagToReading(*cReading, hash);
}
if (updateValidRules(rules, intersects, hash, *cReading)) {
iter_rules = intersects.find(rule->number);
iter_rules_end = intersects.end();
}
}
for (auto tter : *theTags) {
auto hash = tter->hash;
if (hash == grammar->tag_any) {
continue;
}
if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
mappings.push_back(tter);
}
else {
hash = addTagToReading(*cReading, hash);
}
if (updateValidRules(rules, intersects, hash, *cReading)) {
iter_rules = intersects.find(rule->number);
iter_rules_end = intersects.end();
}
}
if (!mappings.empty()) {
splitMappings(mappings, *cCohort, *cReading);
}
rs.push_back(cReading);
}
auto rn = rs.front();
for (size_t j = 1; j < rs.size(); ++j) {
rn->next = rs[j];
rn = rn->next;
}
cCohort->appendReading(rs.front());
}

if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
if (trace) {
auto r = cCohort->readings.front();
r->hit_by.push_back(rule->number);
r->noprint = false;
}
}

for (auto r : cCohort->readings) {
for (; r; r = r->next) {
for (auto tter : *excepts) {
delTagFromReading(*r, tter);
}
}
}

if (cohort->wread) {
cCohort->wread = alloc_reading(cCohort);
for (auto hash : cohort->wread->tags_list) {
hash = addTagToReading(*cCohort->wread, hash);
if (updateValidRules(rules, intersects, hash, *cCohort->wread)) {
iter_rules = intersects.find(rule->number);
iter_rules_end = intersects.end();
}
}
}

current.parent->cohort_map[cCohort->global_number] = cCohort;
current.parent->dep_window[cCohort->global_number] = cCohort;

CohortSet edges;
collect_subtree(edges, attach, childset);

if (rule->flags & RF_BEFORE) {
current.cohorts.insert(current.cohorts.begin() + edges.front()->local_number, cCohort);
current.all_cohorts.insert(std::find(current.all_cohorts.begin() + edges.front()->local_number, current.all_cohorts.end(), edges.front()), cCohort);
attachParentChild(*edges.front(), *cCohort);
}
else {
current.cohorts.insert(current.cohorts.begin() + edges.back()->local_number + 1, cCohort);
current.all_cohorts.insert(std::find(current.all_cohorts.begin() + edges.back()->local_number, current.all_cohorts.end(), edges.back()) + 1, cCohort);
attachParentChild(*edges.back(), *cCohort);
}

foreach(iter, current.cohorts) {
(*iter)->local_number = UI32(std::distance(current.cohorts.begin(), iter));
}
gWindow->rebuildCohortLinks();
}
}
else if (rule->type == K_SETPARENT || rule->type == K_SETCHILD || rule->type == K_ADDRELATION || rule->type == K_SETRELATION || rule->type == K_REMRELATION || rule->type == K_ADDRELATIONS || rule->type == K_SETRELATIONS || rule->type == K_REMRELATIONS) {
auto dep_target_cb = [&]() -> bool {
Cohort* target = context_stack.back().target.cohort;
Expand Down
15 changes: 9 additions & 6 deletions src/GrammarWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
u_fprintf(to, " ");

for (uint32_t i = 0; i < FLAGS_COUNT; i++) {
if (i == FL_BEFORE || i == FL_AFTER) {
if (i == FL_BEFORE || i == FL_AFTER || i == FL_WITHCHILD) {
continue;
}
if (rule.flags & (1ull << i)) {
Expand All @@ -302,7 +302,7 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
}

if (rule.flags & RF_WITHCHILD) {
u_fprintf(to, "%S ", grammar->sets_list[rule.childset1]->name.data());
u_fprintf(to, "WITHCHILD %S ", grammar->sets_list[rule.childset1]->name.data());
}

if (rule.type == K_SUBSTITUTE || rule.type == K_EXECUTE) {
Expand All @@ -313,21 +313,24 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
u_fprintf(to, "%S ", rule.maplist->name.data());
}

if (rule.sublist && (rule.type == K_ADDRELATIONS || rule.type == K_SETRELATIONS || rule.type == K_REMRELATIONS || rule.type == K_SETVARIABLE || rule.type == K_COPY)) {
if (rule.type == K_COPY) {
if (rule.sublist && (rule.type == K_ADDRELATIONS || rule.type == K_SETRELATIONS || rule.type == K_REMRELATIONS || rule.type == K_SETVARIABLE || rule.type == K_COPY || rule.type == K_COPYCOHORT)) {
if (rule.type == K_COPY || rule.type == K_COPYCOHORT) {
u_fprintf(to, "EXCEPT ");
}
u_fprintf(to, "%S ", rule.sublist->name.data());
}

if (rule.type == K_ADD || rule.type == K_MAP || rule.type == K_SUBSTITUTE || rule.type == K_COPY) {
if (rule.type == K_ADD || rule.type == K_MAP || rule.type == K_SUBSTITUTE || rule.type == K_COPY || rule.type == K_COPYCOHORT) {
if (rule.flags & RF_BEFORE) {
u_fprintf(to, "BEFORE ");
}
if (rule.flags & RF_AFTER) {
u_fprintf(to, "AFTER ");
}
if (rule.childset1) {
if (rule.type == K_COPYCOHORT) {
u_fprintf(to, "WITHCHILD ");
}
u_fprintf(to, "%S ", grammar->sets_list[rule.childset1]->name.data());
}
}
Expand All @@ -349,7 +352,7 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
u_fprintf(to, ") ");
}

if (rule.type == K_SETPARENT || rule.type == K_SETCHILD || rule.type == K_ADDRELATIONS || rule.type == K_ADDRELATION || rule.type == K_SETRELATIONS || rule.type == K_SETRELATION || rule.type == K_REMRELATIONS || rule.type == K_REMRELATION) {
if (rule.type == K_SETPARENT || rule.type == K_SETCHILD || rule.type == K_ADDRELATIONS || rule.type == K_ADDRELATION || rule.type == K_SETRELATIONS || rule.type == K_SETRELATION || rule.type == K_REMRELATIONS || rule.type == K_REMRELATION || rule.type == K_COPYCOHORT) {
u_fprintf(to, "TO ");
}
else if (rule.type == K_MOVE_AFTER) {
Expand Down
2 changes: 2 additions & 0 deletions src/Strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ enum KEYWORDS : uint32_t {
K_OSET,
K_CMDARGS,
K_CMDARGS_OVERRIDE,
K_COPYCOHORT,
KEYWORD_COUNT,
};

Expand Down Expand Up @@ -257,6 +258,7 @@ constexpr UStringView keywords[KEYWORD_COUNT] = {
u"OSET",
u"CMDARGS",
u"CMDARGS-OVERRIDE",
u"COPYCOHORT",
};

constexpr UStringView stringbits[] = {
Expand Down
Loading

0 comments on commit 006e004

Please sign in to comment.