Width-ai · andrebeu · Feb 14, 2024 · Feb 15, 2024 · Feb 15, 2024 · Feb 15, 2024
diff --git a/api-service/main.py b/api-service/main.py
@@ -38,11 +38,11 @@
     create_df_from_analysis_data,
     combine_all_usages,
 )
-from utils.dynamic_rule_checking import check_rule_modification
 from utils.rule_splitting import (
     split_rule_by_or_operands,
     split_broad_rule_with_instructions,
 )
+from utils.dynamic_rule_checking import validate_modified_rule
 
 
 logger = setup_logger(__name__)
@@ -233,7 +233,7 @@ def rule_rewriting(input_data: RuleInputData) -> JSONResponse:
             specific_actions=input_data.specific_actions,
         )
         response = response.replace("```xml\n", "").replace("\n```", "")
-        response, usages = check_rule_modification(response)
+        response, usages = validate_modified_rule(response)
         all_usages = [usage] + usages
         combined_usage = combine_all_usages(all_usages)
         return JSONResponse(content={"response": response, "usage": combined_usage})
@@ -286,7 +286,7 @@ async def bulk_rule_rewriting(csv_file: UploadFile = File(...)) -> JSONResponse:
                 modified_rule_text = modified_rule_text.replace("```xml\n", "").replace(
                     "\n```", ""
                 )
-                modified_rule_text, usages = check_rule_modification(modified_rule_text)
+                modified_rule_text, usages = validate_modified_rule(modified_rule_text)
                 responses.append(
                     {
                         "original_rule_id": rule_id,
@@ -322,15 +322,17 @@ def rule_rewriting(input_data: RuleInputData) -> JSONResponse:
                 target_or_index=input_data.target_or_index,
             )
         elif input_data.element_action == "toobroad":
+            logger.info("splitting rule that is toobroad")
             new_rules, usage = split_broad_rule_with_instructions(
                 input_data.original_rule_text,
                 input_data.specific_actions[0],
             )
         # validate rules and calculate total usage
         new_rules_verified = []
         all_usages = [usage]
+        logger.info("validating modified rules")
         for r in new_rules:
-            validated_rule, _usage = check_rule_modification(r)
+            validated_rule, _usage = validate_modified_rule(r)
             new_rules_verified.append(validated_rule)
             all_usages.extend(_usage)
         combined_usage = combine_all_usages(all_usages)
@@ -380,7 +382,7 @@ async def create_rule(input_data: CreateRuleInput) -> JSONResponse:
         )
         new_id = "".join(str(random.randint(0, 9)) for _ in range(40))
         response = response.replace("{new_rule_id}", f"BRIEFCATCH_{new_id}")
-        response, usages = check_rule_modification(response)
+        response, usages = validate_modified_rule(response)
         all_usages = [usage] + usages
         combined_usage = combine_all_usages(all_usages)
         return JSONResponse(content={"response": response, "usage": combined_usage})
@@ -443,7 +445,7 @@ async def bulk_rule_creation(csv_file: UploadFile = File(...)) -> JSONResponse:
             new_id = "".join(str(random.randint(0, 9)) for _ in range(40))
             response = response.replace("{new_rule_id}", f"BRIEFCATCH_{new_id}")
             new_rule_name = f"BRIEFCATCH_{record.get('category').upper()}_{record.get('rule_number')}"
-            new_rule_name, usages = check_rule_modification(new_rule_name)
+            new_rule_name, usages = validate_modified_rule(new_rule_name)
             responses.append(
                 {"rule_name": new_rule_name, "rule": response, "usage": usage}
             )

diff --git a/api-service/tmp-wip.ipynb b/api-service/tmp-wip.ipynb
@@ -0,0 +1,110 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xml = \"\"\"\n",
+    "<rule id=\"BRIEFCATCH_72217380358443072298334619098248039878\" name=\"BRIEFCATCH_PUNCHINESS_921\">\n",
+    "    <pattern>\n",
+    "        <token inflected=\"yes\">call</token>\n",
+    "        <token>upon</token>\n",
+    "    </pattern>\n",
+    "    <message>Would shorter words add punch?|**Example** from Justice Gorsuch: “When **called on** to interpret a statute, this Court generally seeks to discern and apply the ordinary meaning of its terms at the time of their adoption.”|**Example** from Deanne Maynard: “The [order] merely confirms that it was not until later proceedings that he was **called on** to single out these waters.”</message>\n",
+    "    <suggestion><match no=\"1\"/> on</suggestion>\n",
+    "    <suggestion><match no=\"1\" postag=\"(V.*)\" postag_regexp=\"yes\" postag_replace=\"$1\">ask</match></suggestion>\n",
+    "    <short>{\"ruleGroup\":null,\"ruleGroupIdx\":0,\"isConsistency\":false,\"isStyle\":true,\"correctionCount\":2,\"priority\":\"2.84\",\"WORD\":true,\"OUTLOOK\":true}</short>\n",
+    "    <example correction=\"called on|asked\">She was <marker>called upon</marker> three times.</example>\n",
+    "</rule>\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/consult/miniconda3/envs/briefcatch/lib/python3.9/site-packages/pinecone/index.py:4: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from tqdm.autonotebook import tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import openai\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()\n",
+    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+    "from utils.example_tag_validation import (\n",
+    "    validate_examples,\n",
+    "    replace_examples,\n",
+    "    generate_corrected_examples,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-02-16 13:30:45,764 [INFO] resp='{\\n  \"thought\": \"The provided rule has a pattern that matches the phrase \\'call upon\\' with suggestions to replace it with \\'call on\\' or \\'ask\\'. The example given correctly demonstrates the pattern with \\'called upon\\' being marked and provides appropriate corrections (\\'called on\\' or \\'asked\\'). Therefore, the example tag is correct and does not need to be changed or rewritten. There are no antipatterns provided in this rule, so there\\'s no need to add or check for antipattern examples.\",\\n  \"examples\": [\\n    \"<example correction=\\\\\"called on|asked\\\\\">She was <marker>called upon</marker> three times.</example>\"\\n  ]\\n}'\n"
+     ]
+    }
+   ],
+   "source": [
+    "examples_list, usages = generate_corrected_examples(xml)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "validate_examples(xml)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "briefcatch",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/api-service/utils/dynamic_rule_checking.py b/api-service/utils/dynamic_rule_checking.py
@@ -11,13 +11,14 @@
     replace_all_instances_of_tag,
 )
 from utils.logger import setup_logger
+from utils.regexp_validation import post_process_xml
 from utils.rule_similarity import get_similar_template_rules
+from utils.example_tag_validation import validate_examples
 from utils.utils import (
     call_gpt_with_backoff,
     generate_simple_message,
     remove_thought_tags,
 )
-from regexp_validation import post_process_xml
 
 
 dynamic_logger = setup_logger(__name__)
@@ -153,6 +154,19 @@ def check_rule_modification(input_rule_xml: str) -> Tuple[str, List[Dict]]:
             response_model_rule_rewrite,
         )
         validated_rule_xml = new_rule_xml
-    # post process
-    validated_rule_xml = post_process_xml(validated_rule_xml)
+
     return validated_rule_xml, usages
+
+
+def validate_modified_rule(xml: str) -> Tuple[str, List[Dict]]:
+    usages = []
+    # post process
+    dynamic_logger.info("post processing: regexp validation")
+    xml = post_process_xml(xml)
+    dynamic_logger.info("post processing: validating example tags")
+    xml, usage = validate_examples(xml)
+    usages.extend(usage)
+    dynamic_logger.info("post processing: catch all checker")
+    xml, usage = check_rule_modification(xml)
+    usages.extend(usage)
+    return xml, usages
diff --git a/api-service/utils/example_tag_validation.py b/api-service/utils/example_tag_validation.py
@@ -0,0 +1,138 @@
+import xml.etree.ElementTree as ET
+import json
+import re
+from typing import Dict, List, Tuple
+from utils.logger import setup_logger
+from utils.utils import call_gpt_with_backoff, generate_simple_message
+
+logger = setup_logger(__name__)
+
+SYSTEM_PROMPT = """# Task
+You are a system focused on making sure the XML rule <pattern> and <antipattern> tags match with the <example> tags. 
+
+# Pattern -> Example rules
+(1)The <example> tags that correspond to the <pattern> incorporate the suggestion as a `correction` field and surround the part of the sentence that matches the pattern with <marker>...</marker> tags.
+  (1.a) The <marker> tags **must** surround the full pattern of rule
+(2) There **must** be an example for the pattern
+
+# Antipattern -> Example rules
+(1) The <example> tags that correspond to the <antipattern> do not contain `correction` fields or <marker>...</marker> tags, they just have an example sentence that includes a match for the <antipattern>
+(2) A valid rule has only ONE <example> *per* <antipattern>. If a rule has three antipatterns, it needs three examples to be valid. The 1:1 ratio is crucial.
+
+
+Here are some examples of how <pattern> and <antipattern> tags match <example> tags:
+
+
+# Pattern Matching Examples
+Pattern:
+<pattern>
+    <token inflected="yes">ascertain<exception>ascertaining</exception></token>
+</pattern>
+<suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">determine</match></suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">learn</match></suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">establish</match></suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">discover</match></suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">find</match> out</suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">figure</match> out</suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">decide</match></suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">arrive</match> at</suggestion>
+    <suggestion><match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">learn</match> of</suggestion>
+
+Matching example:
+<example correction="determined|learned|learnt|established|discovered|found out|figured out|decided|arrived at|learned of|learnt of">She <marker>ascertained</marker> the item's whereabouts.</example>
+
+
+# Antipattern Matching Examples
+--------
+Antipattern:
+<antipattern>
+    <token regexp="yes">can|could|shall|should</token>
+    <token>ascertain</token>
+</antipattern>
+
+Matching example:
+<example>We can ascertain their intent from the examples provided</example>
+--------
+Antipattern:
+<antipattern>
+    <token inflected="yes">ascertain<exception>ascertaining</exception></token>
+    <token>the</token>
+    <token>citizenship</token>
+</antipattern>
+
+Matching example:
+<example>To ascertain the citizenship.</example>
+--------
+
+
+Given the input rule, use the above instructions to validate the example tags. Specifically:
+(a) if an example tag is correct, DO NOT CHANGE IT;
+(b) write any examples that are missing;
+(c) rewrite examples that need to be corrected.
+
+Before making a decision, you should think through what example tags will be needed to be
+(a) kept
+(b) added
+(c) rewritten to ensure this is a valid rule.
+
+You should respond in the following JSON format with the following fields:
+1. `thought`: a string field where you show your thought process around which rules should be kept, added or rewritten;
+2. `examples`: an array field where each item is an example tag. Make sure that the `examples` array contains all example tags, whether they were kept unchanged, added or modified. 
+"""
+
+
+def generate_corrected_examples(xml, max_retry=5):
+    examples = None
+    retries = 0
+    usages = []
+
+    while examples is None and retries < max_retry:
+        retries += 1
+        message = generate_simple_message(SYSTEM_PROMPT, xml)
+        resp, usage = call_gpt_with_backoff(
+            message,
+            response_format="json_object",
+            model="gpt-4-0125-preview",
+            temperature=0,
+            max_length=1000,
+        )
+        usages.append(usage)
+        logger.info(f"{resp=}")
+
+        try:
+            examples = json.loads(resp)["examples"]
+        except json.JSONDecodeError:
+            # Specifically catch JSON errors
+            logger.error("bad json, retrying...")
+    return examples, usages
+
+
+def replace_examples(xml, examples: List):
+    """
+    counts back from the end of the xml string by the number of characters in the "<rule/> substring, then inserts the list
+    """
+    for example in examples:
+        xml = append_example(xml, example)
+    return xml
+
+
+def append_example(xml_string, example_xml_string):
+    # parse the original XML string into an ElementTree object
+    root = ET.fromstring(xml_string)
+    example_root = ET.fromstring(f"<root>{example_xml_string}\n</root>")
+    new_example = example_root[0]
+    # append the new example tag
+    root.append(new_example)
+    updated_xml_string = ET.tostring(root, encoding="unicode")
+    # corrections to stylistic choices by ET
+    updated_xml_string = updated_xml_string.replace(" />", "/>")
+    # updated_xml_string += "\n"
+    return updated_xml_string
+
+
+def validate_examples(xml: str, max_retry: int = 5) -> Tuple[str, List[Dict]]:
+    examples, usages = generate_corrected_examples(xml, max_retry)
+    if examples:
+        logger.info("example tag replacements required")
+        xml = replace_examples(xml, examples)
+    return xml, usages
diff --git a/api-service/utils/utils.py b/api-service/utils/utils.py
@@ -59,12 +59,17 @@ def call_gpt_with_backoff(
     model: str = "gpt-4",
     temperature: float = 0.7,
     max_length: int = 256,
+    **kwargs,
 ) -> Tuple[str, Dict]:
     """
     Generic function to call GPT4 with specified messages
     """
     return call_gpt(
-        model=model, messages=messages, temperature=temperature, max_length=max_length
+        model=model,
+        messages=messages,
+        temperature=temperature,
+        max_length=max_length,
+        **kwargs,
     )