Skip to content

Commit

Permalink
Merge pull request #272 from NekoAria/2.0
Browse files Browse the repository at this point in the history
v2.5.4
  • Loading branch information
Quan666 authored Mar 17, 2022
2 parents 5bf41a0 + f75d6d3 commit 93e8d50
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ENVIRONMENT=prod
VERSION='v2.5.3'
VERSION='v2.5.4'
12 changes: 6 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[tool.poetry]
name = "ELF_RSS"
version = "2.5.3"
description = "ELF_RSS"
authors = ["Quan666"]
license = "GPL v3"
version = "2.5.4"
description = "QQ机器人 RSS订阅 插件,订阅源建议选择 RSSHub"
authors = ["Quan666 <[email protected]>"]
license = "GPL-3.0-only"
readme = "README.md"
packages = [
{ include = "ELF_RSS2", from = "src/plugins" },
Expand Down Expand Up @@ -38,5 +38,5 @@ nonebot-plugin-guild-patch = "^0.1.1"
nb-cli = "^0.6.4"

[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
23 changes: 0 additions & 23 deletions setup.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/plugins/ELF_RSS2/RSS/routes/Parsing/handle_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ async def fuck_pixiv_cat(url: str) -> str:
@retry(stop=(stop_after_attempt(5) | stop_after_delay(30)))
async def download_image_detail(url: str, proxy: bool) -> Union[bytes, None]:
async with httpx.AsyncClient(proxies=get_proxy(open_proxy=proxy)) as client:
referer = re.search("[hH][tT]{2}[pP][sS]?://[^/]+", url).group()
referer = f"{httpx.URL(url).scheme}://{httpx.URL(url).host}/"
headers = {"referer": referer}
try:
pic = await client.get(url, headers=headers)
Expand Down
18 changes: 10 additions & 8 deletions src/plugins/ELF_RSS2/RSS/routes/Parsing/send_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,20 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
flag = False
if not msg:
return False
error_msg = f"消息发送失败,已达最大重试次数!\n链接:[{item['link']}]"
error_msg = f"消息发送失败,已达最大重试次数!\n链接:[{item.get('link')}]"
if rss.user_id:
friend_list = await get_bot_friend_list(bot)
for user_id in rss.user_id:
if int(user_id) not in friend_list:
logger.error(
f"QQ号[{user_id}]不是Bot[{bot.self_id}]的好友 链接:[{item['link']}]"
f"QQ号[{user_id}]不是Bot[{bot.self_id}]的好友 链接:[{item.get('link')}]"
)
continue
try:
await bot.send_private_msg(user_id=int(user_id), message=str(msg))
flag = True
except Exception as e:
logger.error(f"E: {repr(e)} 链接:[{item['link']}]")
logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]")
if item.get("count") == 3:
await bot.send_private_msg(
user_id=int(user_id), message=f"{error_msg}\nE: {repr(e)}"
Expand All @@ -38,13 +38,15 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
group_list = await get_bot_group_list(bot)
for group_id in rss.group_id:
if int(group_id) not in group_list:
logger.error(f"Bot[{bot.self_id}]未加入群组[{group_id}] 链接:[{item['link']}]")
logger.error(
f"Bot[{bot.self_id}]未加入群组[{group_id}] 链接:[{item.get('link')}]"
)
continue
try:
await bot.send_group_msg(group_id=int(group_id), message=str(msg))
flag = True
except Exception as e:
logger.error(f"E: {repr(e)} 链接:[{item['link']}]")
logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]")
if item.get("count") == 3:
await bot.send_group_msg(
group_id=int(group_id), message=f"E: {repr(e)}\n{error_msg}"
Expand All @@ -60,7 +62,7 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
"guild_name"
]
logger.error(
f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}] 链接:[{item['link']}]"
f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}] 链接:[{item.get('link')}]"
)
continue

Expand All @@ -70,7 +72,7 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
"guild_name"
]
logger.error(
f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]的子频道[{channel_id}] 链接:[{item['link']}]"
f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]的子频道[{channel_id}] 链接:[{item.get('link')}]"
)
continue

Expand All @@ -80,7 +82,7 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
)
flag = True
except Exception as e:
logger.error(f"E: {repr(e)} 链接:[{item['link']}]")
logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]")
if item.get("count") == 3:
await bot.send_guild_channel_msg(
message=f"E: {repr(e)}\n{error_msg}",
Expand Down
11 changes: 7 additions & 4 deletions src/plugins/ELF_RSS2/RSS/rss_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
from typing import Union

import httpx
from nonebot.log import logger
from tinydb import Query, TinyDB
from tinydb.operations import set
Expand Down Expand Up @@ -30,18 +31,19 @@ def __init__(self):
self.duplicate_filter_mode = [] # 去重模式
self.max_image_number = 0 # 图片数量限制,防止消息太长刷屏
self.content_to_remove = None # 正文待移除内容,支持正则
self.error_count = 0 # 连续抓取失败的次数,超过 100 就停止更新
self.stop = False # 停止更新

# 返回订阅链接
def get_url(self, rsshub: str = config.rsshub) -> str:
if re.match("[hH][tT]{2}[pP][sS]?://", self.url, flags=0):
if httpx.URL(self.url).scheme in ["http", "https"]:
return self.url
else:
# 先判断地址是否 / 开头
if re.match("/", self.url):
if self.url.startswith("/"):
return rsshub + self.url
else:
return rsshub + "/" + self.url

return rsshub + "/" + self.url

# 读取记录
@staticmethod
Expand Down Expand Up @@ -240,6 +242,7 @@ def __str__(self) -> str:
f"{mode_msg}" if self.duplicate_filter_mode else "",
f"图片数量限制:{self.max_image_number}" if self.max_image_number else "",
f"正文待移除内容:{self.content_to_remove}" if self.content_to_remove else "",
f"连续抓取失败的次数:{self.error_count}" if self.error_count else "",
f"停止更新:{self.stop}" if self.stop else "",
]
return "\n".join([i for i in ret_list if i != ""])
66 changes: 43 additions & 23 deletions src/plugins/ELF_RSS2/RSS/rss_parsing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: UTF-8 -*-

import asyncio
import re
from pathlib import Path

import feedparser
Expand All @@ -15,13 +14,14 @@
stop_after_delay,
wait_fixed,
)
from tinydb import TinyDB
from tinydb import Query, TinyDB
from tinydb.middlewares import CachingMiddleware
from tinydb.storages import JSONStorage

from ..config import DATA_PATH, config
from ..config import DATA_PATH, JSON_PATH, config
from ..RSS import my_trigger as tr
from . import rss_class
from .routes.Parsing import ParsingRss, get_proxy
from .routes.Parsing import ParsingRss, get_proxy, send_msg
from .routes.Parsing.cache_manage import cache_filter
from .routes.Parsing.check_update import dict_hash

Expand All @@ -47,8 +47,10 @@ async def start(rss: rss_class.Rss) -> None:
try:
new_rss = await get_rss(rss)
except RetryError:
cookies_str = "及 cookies " if rss.cookies else ""
logger.error(f"{rss.name}[{rss.get_url()}]抓取失败!已达最大重试次数!请检查订阅地址{cookies_str}!")
rss.error_count += 1
logger.warning(f"{rss.name} 抓取失败!已经尝试最多 6 次!")
if rss.error_count >= 100:
await auto_stop_and_notify_all(rss)
return
# 检查是否存在rss记录
_file = DATA_PATH / (rss.name + ".json")
Expand All @@ -75,22 +77,41 @@ async def start(rss: rss_class.Rss) -> None:
await pr.start(rss_name=rss.name, new_rss=new_rss)


async def auto_stop_and_notify_all(rss: rss_class.Rss) -> None:
rss.stop = True
db = TinyDB(
JSON_PATH,
encoding="utf-8",
sort_keys=True,
indent=4,
ensure_ascii=False,
)
db.update(rss.__dict__, Query().name == str(rss.name))
await tr.delete_job(rss)
cookies_str = "及 cookies " if rss.cookies else ""
await send_msg(
rss=rss,
msg=f"{rss.name}[{rss.get_url()}]已经连续抓取失败超过 100 次!已自动停止更新!请检查订阅地址{cookies_str}!",
item={},
)


async def raise_on_4xx_5xx(response: httpx.Response):
response.raise_for_status()


# 获取 RSS 并解析为 json ,失败重试
@retry(wait=wait_fixed(1), stop=(stop_after_attempt(5) | stop_after_delay(30)))
async def get_rss(rss: rss_class.Rss) -> dict:
proxies = get_proxy(rss.img_proxy)
rss_url = rss.get_url()
# 对本机部署的 RSSHub 不使用代理
no_proxy = [
local_host = [
"localhost",
"127.0.0.1",
]
for i in no_proxy:
if i in rss.get_url():
proxies = None
proxies = (
get_proxy(rss.img_proxy) if httpx.URL(rss_url).host not in local_host else None
)

# 判断是否使用cookies
cookies = rss.cookies if rss.cookies else None
Expand All @@ -104,29 +125,28 @@ async def get_rss(rss: rss_class.Rss) -> dict:
event_hooks={"response": [raise_on_4xx_5xx]},
) as client:
try:
r = await client.get(rss.get_url())
r = await client.get(rss_url)
# 解析为 JSON
d = feedparser.parse(r.content)
except Exception:
if (
not re.match("[hH][tT]{2}[pP][sS]?://", rss.url, flags=0)
and config.rsshub_backup
):
logger.warning(f"[{rss.get_url()}]访问失败!将使用备用 RSSHub 地址!")
if not httpx.URL(rss.url).scheme and config.rsshub_backup:
logger.debug(f"[{rss_url}]访问失败!将使用备用 RSSHub 地址!")
for rsshub_url in list(config.rsshub_backup):
rss_url = rss.get_url(rsshub=rsshub_url)
try:
r = await client.get(rss.get_url(rsshub=rsshub_url))
r = await client.get(rss_url)
d = feedparser.parse(r.content)
except Exception:
logger.warning(
f"[{rss.get_url(rsshub=rsshub_url)}]访问失败!将使用备用 RSSHub 地址!"
)
logger.debug(f"[{rss_url}]访问失败!将使用备用 RSSHub 地址!")
continue
if d.get("feed"):
logger.info(f"[{rss.get_url(rsshub=rsshub_url)}]抓取成功!")
logger.info(f"[{rss_url}]抓取成功!")
break
finally:
if not d or not d.get("feed"):
logger.warning(f"{rss.name} 抓取失败!将重试最多 5 次!")
logger.debug(f"{rss.name} 抓取失败!将重试最多 5 次!")
rss.error_count += 1
raise TryAgain
if d.get("feed") and rss.error_count > 0:
rss.error_count = 0
return d
4 changes: 3 additions & 1 deletion src/plugins/ELF_RSS2/change_dy.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ async def handle_change_list(
"stop",
]:
value_to_change = bool(int(value_to_change)) # type:ignore
if key_to_change == "stop" and not value_to_change and rss.error_count > 0:
rss.error_count = 0
elif (
key_to_change in ["downkey", "wkey", "blackkey", "bkey"]
and len(value_to_change.strip()) == 0
Expand Down Expand Up @@ -227,7 +229,7 @@ async def handle_rss_change(
await tr.add_job(rss)
else:
await tr.delete_job(rss)
logger.info(f"{rss.name} 已停止更新")
logger.info(f"{rss_name} 已停止更新")
rss_msg = str(rss)

# 隐私考虑,群组下不展示除当前群组外的群号和QQ
Expand Down

0 comments on commit 93e8d50

Please sign in to comment.