-
Notifications
You must be signed in to change notification settings - Fork 0
/
trans_format.py
71 lines (63 loc) · 3.03 KB
/
trans_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
'''
Author: Victor-kawai [email protected]
Date: 2024-04-11 18:31:28
LastEditors: Victor-kawai [email protected]
LastEditTime: 2024-05-23 16:55:56
FilePath: \毕设\code\trans_format.py
Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
'''
import re
# 字段:官品 编制 简称 职源 职掌
# 来源: preprocess2.py
test_dict = {"简称与别名": ["简称"],
"简称与别称": ["简称"],
"官品、编制、简称与别名": ["官品", "编制", "简称"],
"简称与追改": ["简称"],
"简称与旧称": ["简称"],
"职源与沿革": ["职源"],
"职源与改革": ["职源"],
"职源与沿革、职掌、官品": ["职源", "职掌", "官品"],
"职掌与沿革": ["职掌", "职源"],
"职源、职掌、编制": ["职源", "职掌", "编制"],
"职掌、官品、编制": ["职掌", "官品", "编制"],
"职源、职掌": ["职源", "职掌"],
"职掌、品位": ["职掌", "官品"],
"职掌": ["职掌"],
"职能": ["职掌"],
"编制与品位": ["编制", "官品"],
"位遇": ["官品"],
"序位": ["官品"],
"地位": ["官品"],
"品秩": ["官品"],
"编制": ["编制"],
"职源": ["职源"],
"简称": ["简称"],
"通称": ["简称"],
"省称": ["简称"],
"别名": ["简称"],
"别称": ["简称"],
"合称": ["简称"],
"官品": ["官品"],
"品位": ["官品"]}
def md_format_change2(file_name, replace_dict):
print("==== markdown格式转换开始 ====")
with open(file_name, "r", encoding="utf-8") as src:
data = src.read()
# Process replace_list items
for key in replace_dict.keys():
item = "\n"+key
stripped_item = item.strip() # Remove leading/trailing whitespace
pattern = re.escape(item)
replacement = f"\n#### {stripped_item}\n"
data = re.sub(pattern, replacement, data)
# Replace image file patterns (e.g., 123L.jpg) with Markdown format
# image_pattern = r'([0-9]{2}[LR])\.jpg' # Matches 3 digits followed by L or R and ending with .jpg
# image_replacement = r'## /page{\1}' # Replaces with /page{digitsLorR}
# data = re.sub(image_pattern, image_replacement, data)
# Write modified content to the output file
with open(file_name[:file_name.rfind(".")]+"转换后.md", "w", encoding="utf-8") as dst:
dst.write(data)
print("==== markdown格式转换完成 ====")
if __name__ == "__main__":
file_name = "宦官门文本.md"
md_format_change2(file_name, test_dict)