Merge pull request #805 from myhloli/dev

refactor(table): disable StructEqTable support and add TableMaster support
opendatalab · Oct 28, 2024 · 03dd0cb · 03dd0cb
2 parents 3879bf8 + 377b09c
commit 03dd0cb
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -45,7 +45,7 @@
   - Refactored the sorting module code to use [layoutreader](https://github.com/ppaanngggg/layoutreader) for reading order sorting, ensuring high accuracy in various layouts.
   - Refactored the paragraph concatenation module to achieve good results in cross-column, cross-page, cross-figure, and cross-table scenarios.
   - Refactored the list and table of contents recognition functions, significantly improving the accuracy of list blocks and table of contents blocks, as well as the parsing of corresponding text paragraphs.
-  - Refactored the matching logic for figures, tables, and descriptive text, greatly enhancing the accuracy of matching captions and footnotes to figures and tables, and reducing the loss rate of descriptive text to zero.
+  - Refactored the matching logic for figures, tables, and descriptive text, greatly enhancing the accuracy of matching captions and footnotes to figures and tables, and reducing the loss rate of descriptive text to near zero.
   - Added multi-language support for OCR, supporting detection and recognition of 84 languages.For the list of supported languages, see [OCR Language Support List](https://paddlepaddle.github.io/PaddleOCR/latest/en/ppocr/blog/multi_languages.html#5-support-languages-and-abbreviations).
   - Added memory recycling logic and other memory optimization measures, significantly reducing memory usage. The memory requirement for enabling all acceleration features except table acceleration (layout/formula/OCR) has been reduced from 16GB to 8GB, and the memory requirement for enabling all acceleration features has been reduced from 24GB to 10GB.
   - Optimized configuration file feature switches, adding an independent formula detection switch to significantly improve speed and parsing results when formula detection is not needed.

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -46,7 +46,7 @@
   - 重构排序模块代码，使用 [layoutreader](https://github.com/ppaanngggg/layoutreader) 进行阅读顺序排序，确保在各种排版下都能实现极高准确率
   - 重构段落拼接模块，在跨栏、跨页、跨图、跨表情况下均能实现良好的段落拼接效果
   - 重构列表和目录识别功能，极大提升列表块和目录块识别的准确率及对应文本段落的解析效果
-  - 重构图、表与描述性文本的匹配逻辑，大幅提升 caption 和 footnote 与图表的匹配准确率，并将描述性文本的丢失率降至零
+  - 重构图、表与描述性文本的匹配逻辑，大幅提升 caption 和 footnote 与图表的匹配准确率，并将描述性文本的丢失率降至接近0
   - 增加 OCR 的多语言支持，支持 84 种语言的检测与识别，语言支持列表详见 [OCR 语言支持列表](https://paddlepaddle.github.io/PaddleOCR/latest/ppocr/blog/multi_languages.html#5)
   - 增加显存回收逻辑及其他显存优化措施，大幅降低显存使用需求。开启除表格加速外的全部加速功能(layout/公式/OCR)的显存需求从16GB降至8GB，开启全部加速功能的显存需求从24GB降至10GB
   - 优化配置文件的功能开关，增加独立的公式检测开关，无需公式检测时可大幅提升速度和解析效果

diff --git a/magic_pdf/model/pdf_extract_kit.py b/magic_pdf/model/pdf_extract_kit.py
@@ -38,19 +38,24 @@
 from magic_pdf.model.pek_sub_modules.layoutlmv3.model_init import Layoutlmv3_Predictor
 from magic_pdf.model.pek_sub_modules.post_process import latex_rm_whitespace
 from magic_pdf.model.pek_sub_modules.self_modify import ModifiedPaddleOCR
-from magic_pdf.model.pek_sub_modules.structeqtable.StructTableModel import StructTableModel
+# from magic_pdf.model.pek_sub_modules.structeqtable.StructTableModel import StructTableModel
 from magic_pdf.model.ppTableModel import ppTableModel
 
 
 def table_model_init(table_model_type, model_path, max_time, _device_='cpu'):
     if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
-        table_model = StructTableModel(model_path, max_time=max_time, device=_device_)
-    else:
+        # table_model = StructTableModel(model_path, max_time=max_time, device=_device_)
+        logger.error("StructEqTable is under upgrade, the current version does not support it.")
+        exit(1)
+    elif table_model_type == MODEL_NAME.TABLE_MASTER:
         config = {
             "model_dir": model_path,
             "device": _device_
         }
         table_model = ppTableModel(config)
+    else:
+        logger.error("table model type not allow")
+        exit(1)
     return table_model
 
 

diff --git a/magic_pdf/model/pek_sub_modules/structeqtable/StructTableModel.py b/magic_pdf/model/pek_sub_modules/structeqtable/StructTableModel.py
@@ -1,5 +1,12 @@
-from struct_eqtable.model import StructTable
+from loguru import logger
+
+try:
+    from struct_eqtable.model import StructTable
+except ImportError:
+    logger.error("StructEqTable is under upgrade, the current version does not support it.")
 from pypandoc import convert_text
+
+
 class StructTableModel:
     def __init__(self, model_path, max_new_tokens=2048, max_time=400, device = 'cpu'):
         # init