forked from EwingYangs/awesome-open-gpt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
auto_translator.py
87 lines (73 loc) · 2.58 KB
/
auto_translator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import re
import html2text
import markdown
import requests
import requests_cache
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from markdownify import markdownify as md
# 获取给定URL的简介英文
def get_description(url):
# 启用缓存,并设置过期时间为 6 小时
requests_cache.install_cache(expire_after=21600)
# 设置代理
proxy = {
"http":os.getenv("HTTP_PROXY")
}
access_token = os.getenv("GITHUB_ACCESS_KEY")
headers = {'Authorization': access_token}
api_url = url.replace("github.com", "api.github.com/repos")
response = requests.get(api_url, proxies=proxy, headers=headers)
if response.ok:
description = response.json()["description"]
return description
else:
return None
def do_auto_update_star():
# 读取md文件的内容
with open("./README_en.md", 'r', encoding='utf-8') as f:
content = f.read()
html = markdown.markdown(content, extensions=['markdown.extensions.tables', 'markdown.extensions.toc'])
soup = BeautifulSoup(html, "html.parser")
# 提取所有表格
tables = soup.find_all("table")
# 处理每个表格
for table in tables:
# 增加一列
header_row = table.find('tr')
# 找到所有的单元格
cells = header_row.find_all('th')
# 找到 "Last Name" 所在的列
last_name_column_index = None
for i, cell in enumerate(cells):
if cell.text == 'introduction':
last_name_column_index = i
break
# 添加列数据
data_rows = table.find_all('tr')[1:]
for row in data_rows:
match = re.search(r'<a href="(.*?)">', str(row))
if match:
new_data_cell = soup.new_tag('td')
url = match.group(1)
new_data_cell.string = get_description(url) if get_description(url) else ""
cells_td = row.find_all('td')
update_row = cells_td[last_name_column_index]
if len(new_data_cell.string) != 0:
update_row.string = new_data_cell.string
# 将HTML保存回Markdown文件
h = html2text.HTML2Text()
h.ignore_links = True
h.body_width = 0
h.ignore_emphasis = True
h.unicode_snob = True # 设置为 True 可以避免一些编码问题
h.wrap_links = True
h.single_line_break = True
markdown_text = md(str(soup))
with open("README_en.md", "w") as f:
f.write(markdown_text)
if __name__ == '__main__':
# 加载.env文件
load_dotenv()
do_auto_update_star()