Skip to content

Commit

Permalink
Closes #1333: Integrate mining algorithm for RISIS RI
Browse files Browse the repository at this point in the history
Intoducing RISIS related test case in the community mining integration tests suite.

This squashed commit includes the following set of projects.sql madis script related changes:
* add risis generic mining and services
* risis to lower case
  • Loading branch information
marekhorst committed Feb 29, 2024
1 parent b0c55cf commit 68fdb7a
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,37 @@ select docid, conceptId, conceptLabel, stripchars(middle,'.)(,[]') as middle, pr
from (
setschema 'docid,prev,middle,next' select c1, textwindow2s(keywords(filterstopwords(c2)),7,1,3, '\bDARIAH') from pubs where c2 is not null
), grants where conceptLabel="DARIAH EU" and (not regexprmatches("edariah",lower(middle)) and not regexprmatches("riyadh",lower(context)) )
) group by docid;
) group by docid

union all

-- RISIS
select jdict('documentId', id, 'conceptId', 'risis', 'confidenceLevel', 0.8, 'textsnippet', prev||" <<< "||middle||" >>> "||next) from
(
select * from
(
-- cortext
select id, "CORTEXT" as ma, prev, middle, next from (setschema 'id,text,prev,middle,next' select id, text, textwindow2s(lower(text), 10,1,10, "(?:\b|\W)cortext(?:\b|\d)") from (setschema 'id,text' select c1,c2 from pubs)) where
regexprmatches("cortext\.net|cortext\.org|www\.cortext\.|risis|ifris|text analysis|text mining|software|platform|plateforme|cortext manager|analysis|mining|nltk|github\.com\/cortext\/|corpus|\blisis\b|\b\inrae\b", prev||" "||middle||" "||next)
or regexprmatches("\bRISIS\b|\bINRAE\b|CorTexT|\bLISIS\b",text)

-- gate
union all

select id, "GATE" as ma, prev, middle, next from (setschema 'id,prev,middle,next' select id, textwindow2s(text, 10,1,10, "\bGATE(?:\b|\d)|gatecloud|gate\.ac\.uk") from (setschema 'id,text' select c1,c2 from pubs))
where regexprmatches("text mining|gatecloud|gate\.ac\.uk|\buima\b|classifier|semantic|\bnlp\b|text engineering|natural language|language engineering|information extraction|text analytics|cunningham|text process|architecture text|maynard|tablan|bontcheva|gate framework|tokenizer|tokeniser|sheffield|text annotation|language processing|\bnltk\b|treetagger|\byatea\b", lower(prev||" "||middle||" "||next))


union all

select id, upper(regexpr("(orgreg|firmreg)",middle)) as ma, prev, middle, next from (setschema 'id,text,prev,middle,next' select id, text, textwindow2s(lower(text), 10,1,10, "\borgreg\b|\bfirmreg\b") from (setschema 'id,text' select c1,c2 from pubs))
where regexprmatches("\brisis\b", prev||" "||middle||" "||next) or regexprmatches("\bRISIS\b", text)



union all

select id, "risis" as ma, prev, middle, next from (setschema 'id,prev,middle,next' select id, textwindow2s(text, 10,1,10, "\bRISIS\b|\bRISIS1\b|\bRISIS2\b|\brisis\.eu\b") from (setschema 'id,text' select c1,c2 from pubs))
where (regexprmatches("recherche|patent|grant|support|acknowledge|innovation|research", prev||" "||middle||" "||next) and not regexprmatches("risis\.eu",lower(middle)) )
or regexprmatches("risis\.eu",lower(middle))
) group by id) ;
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,11 @@
{"name": "suggestedAcknowledgement", "value":""}
]
}
{
"id": "risis",
"label": "RISIS",
"params": []
}



Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
{"text":"Acknowledgments Marc Ruff team was supported by grant from Instruct, part of the European Strategy Forum on Research Infrastructure (ESFRI) supported by national members subscription. ","id":"50|dedup_wf_001::ed8e5e23ebff00fd270be8e10259c252"}
{"text":"Liaison with the DARIAH research infrastructure was institutionalized at the start of the project by the arrangement for the University of Oxford to act as the official liaison partner","id":"50|belief______::b1d27bbd867754230abfb55238e1e627"}
{"text":"Acknowledgements This work was supported by ANR programs Investissements d'Avenir: MetaboHub, EMBRC-France, France Génomique, Institut Français de Bioinformatique.","id":"50|dedup_wf_001::f6e0a74009879de5c13103f1e4b99827"}

{"text":"We acknowledge support of this work by the project “INSPIRED” (MIS 5002550), which is implemented under the Action Reinforcement of the Research and Innovation Infrastructure","id":"50|pmid________::1d7ef02818bae06e15ab18bc6a986ca7"}
{"text":"We acknowledge support of this work by the project “INSPIRED” (MIS 5002550), which is implemented under the Action Reinforcement of the Research and Innovation Infrastructure","id":"50|pmid________::1d7ef02818bae06e15ab18bc6a986ca7"}
{"text":"Acknowledgements This work was supported by RISIS2 (Research Infrastructure for and Innovation Policy Studies)","id":"50|doi_________::38502b081c386672f71750c99160e836"}
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,9 @@
"confidenceLevel": 0.5,
"textsnippet": "acknowledge support work project INSPIRED MIS 5002550 implemented Action Reinforcement"
}

{
"documentId": "50|doi_________::38502b081c386672f71750c99160e836",
"conceptId": "risis",
"confidenceLevel": 0.8,
"textsnippet": "Acknowledgements This work was supported by <<< RISIS2 >>> (Research Infrastructure for and Innovation Policy Studies)"
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
{"key":"processing.referenceExtraction.community.references.byrootid.embrc", "type":"COUNTER", "value": "1"}
{"key":"processing.referenceExtraction.community.references.byrootid.eric", "type":"COUNTER", "value": "1"}
{"key":"processing.referenceExtraction.community.references.byrootid.fli", "type":"COUNTER", "value": "1"}
{"key":"processing.referenceExtraction.community.references.byrootid.inspired-ris", "type":"COUNTER", "value": "1"}
{"key":"processing.referenceExtraction.community.references.byrootid.inspired-ris", "type":"COUNTER", "value": "1"}
{"key":"processing.referenceExtraction.community.references.byrootid.risis", "type":"COUNTER", "value": "1"}

0 comments on commit 68fdb7a

Please sign in to comment.