Skip to content

Commit

Permalink
Merge pull request #144 from NekoAria/2.0
Browse files Browse the repository at this point in the history
修正对标题的处理逻辑,修正 ` 标签处理` 的逻辑
  • Loading branch information
Quan authored Jun 23, 2021
2 parents 2d0f327 + 8d17281 commit d26ec74
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ENVIRONMENT=prod
VERSION='v2.2.5'
VERSION='v2.2.6'
12 changes: 6 additions & 6 deletions src/plugins/ELF_RSS2/RSS/rss_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ async def start(rss: rss_class.Rss) -> None:
title = item["title"]
if not config.blockquote:
title = re.sub(r" - 转发 .*", "", title)
title_msg = await handle_title(title=title)
if not rss.only_title:
# 先判断与正文相识度,避免标题正文一样,或者是标题为正文前N字等情况
try:
Expand All @@ -162,16 +163,17 @@ async def start(rss: rss_class.Rss) -> None:
)
# 标题正文相似度
if rss.only_pic or similarity.ratio() <= 0.6:
item_msg += await handle_title(title=title)
item_msg += title_msg
if rss.translation:
item_msg += await handle_translation(content=title)
# 处理正文
item_msg += await handle_summary(summary=summary, rss=rss)
except Exception as e:
logger.info(f"{rss.name} 没有正文内容! E: {e}")
item_msg += await handle_title(title=title)
if title_msg not in item_msg:
item_msg += title_msg
else:
item_msg += await handle_title(title=title)
item_msg += title_msg
if rss.translation:
item_msg += await handle_translation(content=title)

Expand Down Expand Up @@ -395,8 +397,6 @@ async def handle_title(title: str) -> str:

# 处理正文,图片放后面
async def handle_summary(summary: str, rss: rss_class.Rss) -> str:
# 去掉换行
# summary = re.sub('\n', '', summary)
# 处理 summary 使其 HTML标签统一,方便处理
try:
summary_html = Pq(summary)
Expand Down Expand Up @@ -725,7 +725,7 @@ async def handle_html_tag(html) -> str:

# <a> 标签处理
for a in new_html("a").items():
a_str = re.search(r"<a.+?</a>", html_unescape(str(a)))[0]
a_str = re.search(r"<a.+?</a>", html_unescape(str(a)), flags=re.DOTALL)[0]
if a.text() and str(a.text()) != a.attr("href"):
rss_str = rss_str.replace(a_str, f" {a.text()}: {a.attr('href')}\n")
else:
Expand Down

0 comments on commit d26ec74

Please sign in to comment.