CogStack · mart-r · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/install_requires.txt b/install_requires.txt
@@ -19,6 +19,6 @@
 'xxhash>=3.0.0' # allow later versions, tested with 3.1.0
 'blis>=0.7.5,<1.0.0' # allow later versions, tested with 0.7.9, avoid 1.0.0 (depends on numpy 2)
 'click>=8.0.4' # allow later versions, tested with 8.1.3
-'pydantic>=1.10.0,<2.0' # for spacy compatibility; avoid 2.0 due to breaking changes
+'pydantic>=1.10.0,<3.0' # avoid next major release
 "humanfriendly~=10.0"  # for human readable file / RAM sizes
-"peft>=0.8.2"
+"peft>=0.8.2"
diff --git a/medcat/cat.py b/medcat/cat.py
@@ -42,6 +42,7 @@
 from medcat.stats.stats import get_stats
 from medcat.utils.filters import set_project_filters
 from medcat.utils.usage_monitoring import UsageMonitor
+from medcat.utils.pydantic_version import get_model_dump
 
 
 logger = logging.getLogger(__name__) # separate logger from the package-level one
@@ -585,7 +586,7 @@ def _print_stats(self,
 
     def _init_ckpts(self, is_resumed, checkpoint):
         if self.config.general.checkpoint.steps is not None or checkpoint is not None:
-            checkpoint_config = CheckpointConfig(**self.config.general.checkpoint.dict())
+            checkpoint_config = CheckpointConfig(**get_model_dump(self.config.general.checkpoint))
             checkpoint_manager = CheckpointManager('cat_train', checkpoint_config)
             if is_resumed:
                 # TODO: probably remove is_resumed mark and always resume if a checkpoint is provided,

diff --git a/medcat/config.py b/medcat/config.py
@@ -1,6 +1,5 @@
 from datetime import datetime
-from pydantic import BaseModel, Extra, ValidationError
-from pydantic.fields import ModelField
+from pydantic import BaseModel, ValidationError
 from typing import List, Set, Tuple, cast, Any, Callable, Dict, Optional, Union, Type, Literal
 from multiprocessing import cpu_count
 import logging
@@ -13,6 +12,7 @@
 from medcat.utils.matutils import intersect_nonempty_set
 from medcat.utils.config_utils import attempt_fix_weighted_average_function
 from medcat.utils.config_utils import weighted_average, is_old_type_config_dict
+from medcat.utils.pydantic_version import get_model_dump, get_model_fields
 from medcat.utils.saving.coding import CustomDelegatingEncoder, default_hook
 
 
@@ -125,7 +125,7 @@ def merge_config(self, config_dict: Dict) -> None:
                 attr = None # new attribute
             value = config_dict[key]
             if isinstance(value, BaseModel):
-                value = value.dict()
+                value = get_model_dump(value)
             if isinstance(attr, MixingConfig):
                 attr.merge_config(value)
             else:
@@ -177,7 +177,7 @@ def rebuild_re(self) -> None:
     def _calc_hash(self, hasher: Optional[Hasher] = None) -> Hasher:
         if hasher is None:
             hasher = Hasher()
-        for _, v in cast(BaseModel, self).dict().items():
+        for _, v in get_model_dump(cast(BaseModel, self)).items():
             if isinstance(v, MixingConfig):
                 v._calc_hash(hasher)
             else:
@@ -189,7 +189,7 @@ def get_hash(self, hasher: Optional[Hasher] = None):
         return hasher.hexdigest()
 
     def __str__(self) -> str:
-        return str(cast(BaseModel, self).dict())
+        return str(get_model_dump(cast(BaseModel, self)))
 
     @classmethod
     def load(cls, save_path: str) -> "MixingConfig":
@@ -238,15 +238,15 @@ def asdict(self) -> Dict[str, Any]:
         Returns:
             Dict[str, Any]: The dictionary associated with this config
         """
-        return cast(BaseModel, self).dict()
+        return get_model_dump(cast(BaseModel, self))
 
-    def fields(self) -> Dict[str, ModelField]:
+    def fields(self) -> dict:
         """Get the fields associated with this config.
 
         Returns:
-            Dict[str, ModelField]: The dictionary of the field names and fields
+            dict: The dictionary of the field names and fields
         """
-        return cast(BaseModel, self).__fields__
+        return get_model_fields(cast(BaseModel, self))
 
 
 class VersionInfo(MixingConfig, BaseModel):
@@ -272,7 +272,7 @@ class VersionInfo(MixingConfig, BaseModel):
     """Which version of medcat was used to build the CDB"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -290,7 +290,7 @@ class CDBMaker(MixingConfig, BaseModel):
     """Minimum number of letters required in a name to be accepted for a concept"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -303,7 +303,7 @@ class AnnotationOutput(MixingConfig, BaseModel):
     include_text_in_output: bool = False
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -317,7 +317,7 @@ class CheckPoint(MixingConfig, BaseModel):
     """When training the maximum checkpoints will be kept on the disk"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -351,7 +351,7 @@ class General(MixingConfig, BaseModel):
                                        'entity_linker', 'sentencizer', 'entity_ruler', 'merge_noun_chunks',
                                        'merge_entities', 'merge_subtokens']
     checkpoint: CheckPoint = CheckPoint()
-    usage_monitor = UsageMonitor()
+    usage_monitor: UsageMonitor = UsageMonitor()
     """Checkpointing config"""
     log_level: int = logging.INFO
     """Logging config for everything | 'tagger' can be disabled, but will cause a drop in performance"""
@@ -392,7 +392,7 @@ class General(MixingConfig, BaseModel):
     reliable due to not taking into account all the details of the changes."""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -417,7 +417,7 @@ class Preprocessing(MixingConfig, BaseModel):
     """Documents longer  than this will be trimmed"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -437,7 +437,7 @@ class Ner(MixingConfig, BaseModel):
     """Try reverse word order for short concepts (2 words max), e.g. heart disease -> disease heart"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -572,7 +572,7 @@ class Linking(MixingConfig, BaseModel):
     """If true when the context of a concept is calculated (embedding) the words making that concept are not taken into accout"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -593,7 +593,7 @@ class Config:
         # this if for word_skipper and punct_checker which would otherwise
         # not have a validator
         arbitrary_types_allowed = True
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
     def __init__(self, *args, **kwargs):
@@ -611,7 +611,7 @@ def rebuild_re(self) -> None:
     # Override
     def get_hash(self):
         hasher = Hasher()
-        for k, v in self.dict().items():
+        for k, v in get_model_dump(self).items():
             if k in ['hash', ]:
                 # ignore hash
                 continue
@@ -667,4 +667,6 @@ def wrapper(*args, **kwargs):
 # we get a nicer exceptio
 _waf_advice = "You can use `cat.cdb.weighted_average_function` to access it directly"
 Linking.__getattribute__ = _wrapper(Linking.__getattribute__, Linking, _waf_advice, AttributeError)  # type: ignore
+if hasattr(Linking, '__getattr__'):
+    Linking.__getattr__ = _wrapper(Linking.__getattr__, Linking, _waf_advice, AttributeError)  # type: ignore
 Linking.__getitem__ = _wrapper(Linking.__getitem__, Linking, _waf_advice, KeyError)  # type: ignore
diff --git a/medcat/config_meta_cat.py b/medcat/config_meta_cat.py
@@ -1,5 +1,5 @@
 from typing import Dict, Any
-from medcat.config import MixingConfig, BaseModel, Optional, Extra
+from medcat.config import MixingConfig, BaseModel, Optional
 
 
 class General(MixingConfig, BaseModel):
@@ -57,7 +57,7 @@ class General(MixingConfig, BaseModel):
     Otherwise defaults to doc._.ents or doc.ents per the annotate_overlapping settings"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -136,7 +136,7 @@ class Model(MixingConfig, BaseModel):
     """If set to True center positions will be ignored when calculating representation"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -158,7 +158,7 @@ class Train(MixingConfig, BaseModel):
     """If set only this CUIs will be used for training"""
     auto_save_model: bool = True
     """Should do model be saved during training for best results"""
-    last_train_on: Optional[int] = None
+    last_train_on: Optional[float] = None
     """When was the last training run"""
     metric: Dict[str, str] = {'base': 'weighted avg', 'score': 'f1-score'}
     """What metric should be used for choosing the best model"""
@@ -173,7 +173,7 @@ class Train(MixingConfig, BaseModel):
     """Focal Loss hyperparameter - determines importance the loss gives to hard-to-classify examples"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -184,5 +184,5 @@ class ConfigMetaCAT(MixingConfig, BaseModel):
     train: Train = Train()
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
diff --git a/medcat/config_rel_cat.py b/medcat/config_rel_cat.py
@@ -1,6 +1,6 @@
 import logging
 from typing import Dict, Any, List
-from medcat.config import MixingConfig, BaseModel, Optional, Extra
+from medcat.config import MixingConfig, BaseModel, Optional
 
 
 class General(MixingConfig, BaseModel):
@@ -56,7 +56,7 @@ class Model(MixingConfig, BaseModel):
     """If set to True center positions will be ignored when calculating represenation"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -83,7 +83,7 @@ class Train(MixingConfig, BaseModel):
     """Should the model be saved during training for best results"""
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -94,5 +94,5 @@ class ConfigRelCAT(MixingConfig, BaseModel):
     train: Train = Train()
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
diff --git a/medcat/config_transformers_ner.py b/medcat/config_transformers_ner.py
@@ -1,4 +1,4 @@
-from medcat.config import MixingConfig, BaseModel, Optional, Extra
+from medcat.config import MixingConfig, BaseModel, Optional
 
 
 class General(MixingConfig, BaseModel):
@@ -16,11 +16,11 @@ class General(MixingConfig, BaseModel):
     chunking_overlap_window: Optional[int] = 5
     """Size of the overlap window used for chunking"""
     test_size: float = 0.2
-    last_train_on: Optional[int] = None
+    last_train_on: Optional[float] = None
     verbose_metrics: bool = False
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
 
 
@@ -29,5 +29,5 @@ class ConfigTransformersNER(MixingConfig, BaseModel):
     general: General = General()
 
     class Config:
-        extra = Extra.allow
+        extra = 'allow'
         validate_assignment = True
diff --git a/medcat/meta_cat.py b/medcat/meta_cat.py
@@ -114,8 +114,8 @@ def get_hash(self) -> str:
         """
         hasher = Hasher()
         # Set last_train_on if None
-        if self.config.train['last_train_on'] is None:
-            self.config.train['last_train_on'] = datetime.now().timestamp()
+        if self.config.train.last_train_on is None:
+            self.config.train.last_train_on = datetime.now().timestamp()
 
         hasher.update(self.config.get_hash())
         return hasher.hexdigest()
@@ -311,7 +311,7 @@ def train_raw(self, data_loaded: Dict, save_dir_path: Optional[str] = None, data
                 # Save everything now
                 self.save(save_dir_path=save_dir_path)
 
-        self.config.train['last_train_on'] = datetime.now().timestamp()
+        self.config.train.last_train_on = datetime.now().timestamp()
         return report
 
     def eval(self, json_path: str) -> Dict:

diff --git a/medcat/ner/transformers_ner.py b/medcat/ner/transformers_ner.py
@@ -97,8 +97,8 @@ def get_hash(self) -> str:
         """
         hasher = Hasher()
         # Set last_train_on if None
-        if self.config.general['last_train_on'] is None:
-            self.config.general['last_train_on'] = datetime.now().timestamp()
+        if self.config.general.last_train_on is None:
+            self.config.general.last_train_on = datetime.now().timestamp()
 
         hasher.update(self.config.get_hash())
         return hasher.hexdigest()
@@ -236,7 +236,7 @@ def train(self,
         trainer.train() # type: ignore
 
         # Save the training time
-        self.config.general['last_train_on'] = datetime.now().timestamp() # type: ignore
+        self.config.general.last_train_on = datetime.now().timestamp() # type: ignore
 
         # Save everything
         self.save(save_dir_path=os.path.join(self.training_arguments.output_dir, 'final_model'))

diff --git a/medcat/utils/decorators.py b/medcat/utils/decorators.py
@@ -7,16 +7,21 @@ def _format_version(ver: Tuple[int, int, int]) -> str:
     return ".".join(str(v) for v in ver)
 
 
-def deprecated(message: str, depr_version: Tuple[int, int, int], removal_version: Tuple[int, int, int]) -> Callable:
+def deprecated(message: str, depr_version: Tuple[int, int, int],
+               removal_version: Tuple[int, int, int],
+               allow_usage: bool = False) -> Callable:
     """Deprecate a method.
 
+    NOTE: The `allow_usage` argument is only read and used during test time.
+
     Args:
         message (str): The deprecation message.
         depr_version (Tuple[int, int, int]): The first version of MedCAT where this was deprecated.
         removal_version (Tuple[int, int, int]): The first version of MedCAT where this will be removed.
+        allow_usage (bool): Whether to allow usage during test time.
 
     Returns:
-        Callable: _description_
+        Callable: The wrapped method.
     """
     def decorator(func: Callable) -> Callable:
         @functools.wraps(func)