From 82018658211df6aa2a3f6e891dd1a190b9b101ae Mon Sep 17 00:00:00 2001 From: Aris Katsikaridis Date: Wed, 23 Oct 2024 19:20:55 +0300 Subject: [PATCH] Read and write file in binary mode to remove BOM (Byte Order Mark) bytes --- openformats/formats/docx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/openformats/formats/docx.py b/openformats/formats/docx.py index 9f20c465..faee4683 100644 --- a/openformats/formats/docx.py +++ b/openformats/formats/docx.py @@ -127,6 +127,10 @@ def get_document(self): with io.open(self.__document_path, 'r') as f: self.__document = f.read() + if self.__document.startswith("\ufeff"): + # Remove BOM + self.__document = self.__document.replace("\ufeff", "") + return self.__document def set_document(self, document):