From 269d7329cb5c7d1ff14abc8a2b2254aa1f6fe1d5 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Thu, 31 Oct 2024 17:57:43 +0100 Subject: [PATCH] notify users about the deprecated argument Signed-off-by: Michele Dolfi --- .../doc_chunk/python/src/doc_chunk_local_python.py | 1 + .../language/doc_chunk/python/src/doc_chunk_transform.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/transforms/language/doc_chunk/python/src/doc_chunk_local_python.py b/transforms/language/doc_chunk/python/src/doc_chunk_local_python.py index e0fdfa871..0c830ee98 100644 --- a/transforms/language/doc_chunk/python/src/doc_chunk_local_python.py +++ b/transforms/language/doc_chunk/python/src/doc_chunk_local_python.py @@ -38,6 +38,7 @@ "runtime_job_id": "job_id", "runtime_code_location": ParamsUtils.convert_to_ast(code_location), # doc_chunk params + # "doc_chunk_dl_min_chunk_len": 10, # for testing the usage of the deprecated argument # "doc_chunk_chunking_type": "li_markdown", "doc_chunk_chunking_type": "dl_json", # "doc_chunk_chunking_type": chunking_types.LI_TOKEN_TEXT, diff --git a/transforms/language/doc_chunk/python/src/doc_chunk_transform.py b/transforms/language/doc_chunk/python/src/doc_chunk_transform.py index da5540cba..e64a7c1d1 100644 --- a/transforms/language/doc_chunk/python/src/doc_chunk_transform.py +++ b/transforms/language/doc_chunk/python/src/doc_chunk_transform.py @@ -234,6 +234,11 @@ def add_input_params(self, parser: ArgumentParser) -> None: type=int, help="Number of tokens overlapping between chunks for the fixed-sized chunker.", ) + parser.add_argument( + f"--{cli_prefix}dl_min_chunk_len", + default=None, + help="Deprecated. This option is no longer considered.", + ) def apply_input_params(self, args: Namespace) -> bool: """ @@ -244,5 +249,7 @@ def apply_input_params(self, args: Namespace) -> bool: captured = CLIArgumentProvider.capture_parameters(args, cli_prefix, False) self.params = self.params | captured + if self.params.get("dl_min_chunk_len") is not None: + self.logger.warning("The `dl_min_chunk_len` option is deprecated and will be ignored. Please stop using it, it will not accepted anymore in future versions.") self.logger.info(f"doc_chunk parameters are : {self.params}") return True