Merge pull request #272 from NekoAria/2.0

v2.5.4
Quan666 · Mar 17, 2022 · 93e8d50 · 93e8d50
2 parents 5bf41a0 + f75d6d3
commit 93e8d50
Show file tree

Hide file tree

Showing 8 changed files with 71 additions and 67 deletions.
diff --git a/.env b/.env
@@ -1,2 +1,2 @@
 ENVIRONMENT=prod
-VERSION='v2.5.3'
+VERSION='v2.5.4'
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,9 +1,9 @@
 [tool.poetry]
 name = "ELF_RSS"
-version = "2.5.3"
-description = "ELF_RSS"
-authors = ["Quan666"]
-license = "GPL v3"
+version = "2.5.4"
+description = "QQ机器人 RSS订阅 插件，订阅源建议选择 RSSHub"
+authors = ["Quan666 <[email protected]>"]
+license = "GPL-3.0-only"
 readme = "README.md"
 packages = [
     { include = "ELF_RSS2", from = "src/plugins" },
@@ -38,5 +38,5 @@ nonebot-plugin-guild-patch = "^0.1.1"
 nb-cli = "^0.6.4"
 
 [build-system]
-requires = ["poetry>=0.12"]
-build-backend = "poetry.masonry.api"
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/setup.py b/setup.py
diff --git a/src/plugins/ELF_RSS2/RSS/routes/Parsing/handle_images.py b/src/plugins/ELF_RSS2/RSS/routes/Parsing/handle_images.py
@@ -171,7 +171,7 @@ async def fuck_pixiv_cat(url: str) -> str:
 @retry(stop=(stop_after_attempt(5) | stop_after_delay(30)))
 async def download_image_detail(url: str, proxy: bool) -> Union[bytes, None]:
     async with httpx.AsyncClient(proxies=get_proxy(open_proxy=proxy)) as client:
-        referer = re.search("[hH][tT]{2}[pP][sS]?://[^/]+", url).group()
+        referer = f"{httpx.URL(url).scheme}://{httpx.URL(url).host}/"
         headers = {"referer": referer}
         try:
             pic = await client.get(url, headers=headers)

diff --git a/src/plugins/ELF_RSS2/RSS/routes/Parsing/send_message.py b/src/plugins/ELF_RSS2/RSS/routes/Parsing/send_message.py
@@ -15,20 +15,20 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
     flag = False
     if not msg:
         return False
-    error_msg = f"消息发送失败，已达最大重试次数！\n链接：[{item['link']}]"
+    error_msg = f"消息发送失败，已达最大重试次数！\n链接：[{item.get('link')}]"
     if rss.user_id:
         friend_list = await get_bot_friend_list(bot)
         for user_id in rss.user_id:
             if int(user_id) not in friend_list:
                 logger.error(
-                    f"QQ号[{user_id}]不是Bot[{bot.self_id}]的好友 链接：[{item['link']}]"
+                    f"QQ号[{user_id}]不是Bot[{bot.self_id}]的好友 链接：[{item.get('link')}]"
                 )
                 continue
             try:
                 await bot.send_private_msg(user_id=int(user_id), message=str(msg))
                 flag = True
             except Exception as e:
-                logger.error(f"E: {repr(e)} 链接：[{item['link']}]")
+                logger.error(f"E: {repr(e)} 链接：[{item.get('link')}]")
                 if item.get("count") == 3:
                     await bot.send_private_msg(
                         user_id=int(user_id), message=f"{error_msg}\nE: {repr(e)}"
@@ -38,13 +38,15 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
         group_list = await get_bot_group_list(bot)
         for group_id in rss.group_id:
             if int(group_id) not in group_list:
-                logger.error(f"Bot[{bot.self_id}]未加入群组[{group_id}] 链接：[{item['link']}]")
+                logger.error(
+                    f"Bot[{bot.self_id}]未加入群组[{group_id}] 链接：[{item.get('link')}]"
+                )
                 continue
             try:
                 await bot.send_group_msg(group_id=int(group_id), message=str(msg))
                 flag = True
             except Exception as e:
-                logger.error(f"E: {repr(e)} 链接：[{item['link']}]")
+                logger.error(f"E: {repr(e)} 链接：[{item.get('link')}]")
                 if item.get("count") == 3:
                     await bot.send_group_msg(
                         group_id=int(group_id), message=f"E: {repr(e)}\n{error_msg}"
@@ -60,7 +62,7 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
                     "guild_name"
                 ]
                 logger.error(
-                    f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}] 链接：[{item['link']}]"
+                    f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}] 链接：[{item.get('link')}]"
                 )
                 continue
 
@@ -70,7 +72,7 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
                     "guild_name"
                 ]
                 logger.error(
-                    f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]的子频道[{channel_id}] 链接：[{item['link']}]"
+                    f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]的子频道[{channel_id}] 链接：[{item.get('link')}]"
                 )
                 continue
 
@@ -80,7 +82,7 @@ async def send_msg(rss: rss_class.Rss, msg: str, item: dict) -> bool:
                 )
                 flag = True
             except Exception as e:
-                logger.error(f"E: {repr(e)} 链接：[{item['link']}]")
+                logger.error(f"E: {repr(e)} 链接：[{item.get('link')}]")
                 if item.get("count") == 3:
                     await bot.send_guild_channel_msg(
                         message=f"E: {repr(e)}\n{error_msg}",

diff --git a/src/plugins/ELF_RSS2/RSS/rss_class.py b/src/plugins/ELF_RSS2/RSS/rss_class.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 from typing import Union
 
+import httpx
 from nonebot.log import logger
 from tinydb import Query, TinyDB
 from tinydb.operations import set
@@ -30,18 +31,19 @@ def __init__(self):
         self.duplicate_filter_mode = []  # 去重模式
         self.max_image_number = 0  # 图片数量限制，防止消息太长刷屏
         self.content_to_remove = None  # 正文待移除内容，支持正则
+        self.error_count = 0  # 连续抓取失败的次数，超过 100 就停止更新
         self.stop = False  # 停止更新
 
     # 返回订阅链接
     def get_url(self, rsshub: str = config.rsshub) -> str:
-        if re.match("[hH][tT]{2}[pP][sS]?://", self.url, flags=0):
+        if httpx.URL(self.url).scheme in ["http", "https"]:
             return self.url
         else:
             # 先判断地址是否 / 开头
-            if re.match("/", self.url):
+            if self.url.startswith("/"):
                 return rsshub + self.url
-            else:
-                return rsshub + "/" + self.url
+
+        return rsshub + "/" + self.url
 
     # 读取记录
     @staticmethod
@@ -240,6 +242,7 @@ def __str__(self) -> str:
             f"{mode_msg}" if self.duplicate_filter_mode else "",
             f"图片数量限制：{self.max_image_number}" if self.max_image_number else "",
             f"正文待移除内容：{self.content_to_remove}" if self.content_to_remove else "",
+            f"连续抓取失败的次数：{self.error_count}" if self.error_count else "",
             f"停止更新：{self.stop}" if self.stop else "",
         ]
         return "\n".join([i for i in ret_list if i != ""])
diff --git a/src/plugins/ELF_RSS2/RSS/rss_parsing.py b/src/plugins/ELF_RSS2/RSS/rss_parsing.py
@@ -1,7 +1,6 @@
 # -*- coding: UTF-8 -*-
 
 import asyncio
-import re
 from pathlib import Path
 
 import feedparser
@@ -15,13 +14,14 @@
     stop_after_delay,
     wait_fixed,
 )
-from tinydb import TinyDB
+from tinydb import Query, TinyDB
 from tinydb.middlewares import CachingMiddleware
 from tinydb.storages import JSONStorage
 
-from ..config import DATA_PATH, config
+from ..config import DATA_PATH, JSON_PATH, config
+from ..RSS import my_trigger as tr
 from . import rss_class
-from .routes.Parsing import ParsingRss, get_proxy
+from .routes.Parsing import ParsingRss, get_proxy, send_msg
 from .routes.Parsing.cache_manage import cache_filter
 from .routes.Parsing.check_update import dict_hash
 
@@ -47,8 +47,10 @@ async def start(rss: rss_class.Rss) -> None:
     try:
         new_rss = await get_rss(rss)
     except RetryError:
-        cookies_str = "及 cookies " if rss.cookies else ""
-        logger.error(f"{rss.name}[{rss.get_url()}]抓取失败！已达最大重试次数！请检查订阅地址{cookies_str}！")
+        rss.error_count += 1
+        logger.warning(f"{rss.name} 抓取失败！已经尝试最多 6 次！")
+        if rss.error_count >= 100:
+            await auto_stop_and_notify_all(rss)
         return
     # 检查是否存在rss记录
     _file = DATA_PATH / (rss.name + ".json")
@@ -75,22 +77,41 @@ async def start(rss: rss_class.Rss) -> None:
     await pr.start(rss_name=rss.name, new_rss=new_rss)
 
 
+async def auto_stop_and_notify_all(rss: rss_class.Rss) -> None:
+    rss.stop = True
+    db = TinyDB(
+        JSON_PATH,
+        encoding="utf-8",
+        sort_keys=True,
+        indent=4,
+        ensure_ascii=False,
+    )
+    db.update(rss.__dict__, Query().name == str(rss.name))
+    await tr.delete_job(rss)
+    cookies_str = "及 cookies " if rss.cookies else ""
+    await send_msg(
+        rss=rss,
+        msg=f"{rss.name}[{rss.get_url()}]已经连续抓取失败超过 100 次！已自动停止更新！请检查订阅地址{cookies_str}！",
+        item={},
+    )
+
+
 async def raise_on_4xx_5xx(response: httpx.Response):
     response.raise_for_status()
 
 
 # 获取 RSS 并解析为 json ，失败重试
 @retry(wait=wait_fixed(1), stop=(stop_after_attempt(5) | stop_after_delay(30)))
 async def get_rss(rss: rss_class.Rss) -> dict:
-    proxies = get_proxy(rss.img_proxy)
+    rss_url = rss.get_url()
     # 对本机部署的 RSSHub 不使用代理
-    no_proxy = [
+    local_host = [
         "localhost",
         "127.0.0.1",
     ]
-    for i in no_proxy:
-        if i in rss.get_url():
-            proxies = None
+    proxies = (
+        get_proxy(rss.img_proxy) if httpx.URL(rss_url).host not in local_host else None
+    )
 
     # 判断是否使用cookies
     cookies = rss.cookies if rss.cookies else None
@@ -104,29 +125,28 @@ async def get_rss(rss: rss_class.Rss) -> dict:
         event_hooks={"response": [raise_on_4xx_5xx]},
     ) as client:
         try:
-            r = await client.get(rss.get_url())
+            r = await client.get(rss_url)
             # 解析为 JSON
             d = feedparser.parse(r.content)
         except Exception:
-            if (
-                not re.match("[hH][tT]{2}[pP][sS]?://", rss.url, flags=0)
-                and config.rsshub_backup
-            ):
-                logger.warning(f"[{rss.get_url()}]访问失败！将使用备用 RSSHub 地址！")
+            if not httpx.URL(rss.url).scheme and config.rsshub_backup:
+                logger.debug(f"[{rss_url}]访问失败！将使用备用 RSSHub 地址！")
                 for rsshub_url in list(config.rsshub_backup):
+                    rss_url = rss.get_url(rsshub=rsshub_url)
                     try:
-                        r = await client.get(rss.get_url(rsshub=rsshub_url))
+                        r = await client.get(rss_url)
                         d = feedparser.parse(r.content)
                     except Exception:
-                        logger.warning(
-                            f"[{rss.get_url(rsshub=rsshub_url)}]访问失败！将使用备用 RSSHub 地址！"
-                        )
+                        logger.debug(f"[{rss_url}]访问失败！将使用备用 RSSHub 地址！")
                         continue
                     if d.get("feed"):
-                        logger.info(f"[{rss.get_url(rsshub=rsshub_url)}]抓取成功！")
+                        logger.info(f"[{rss_url}]抓取成功！")
                         break
         finally:
             if not d or not d.get("feed"):
-                logger.warning(f"{rss.name} 抓取失败！将重试最多 5 次！")
+                logger.debug(f"{rss.name} 抓取失败！将重试最多 5 次！")
+                rss.error_count += 1
                 raise TryAgain
+            if d.get("feed") and rss.error_count > 0:
+                rss.error_count = 0
     return d
diff --git a/src/plugins/ELF_RSS2/change_dy.py b/src/plugins/ELF_RSS2/change_dy.py
@@ -108,6 +108,8 @@ async def handle_change_list(
         "stop",
     ]:
         value_to_change = bool(int(value_to_change))  # type:ignore
+        if key_to_change == "stop" and not value_to_change and rss.error_count > 0:
+            rss.error_count = 0
     elif (
         key_to_change in ["downkey", "wkey", "blackkey", "bkey"]
         and len(value_to_change.strip()) == 0
@@ -227,7 +229,7 @@ async def handle_rss_change(
             await tr.add_job(rss)
         else:
             await tr.delete_job(rss)
-            logger.info(f"{rss.name} 已停止更新")
+            logger.info(f"{rss_name} 已停止更新")
         rss_msg = str(rss)
 
         # 隐私考虑，群组下不展示除当前群组外的群号和QQ