Merge pull request #331 from Quan666/pikpak-3

✨ 支持 PikPak 磁力链接离线下载、支持正则匹配正文中关键字作为保存目录
Quan666 · Jul 19, 2022 · 1fb7dab · 1fb7dab
2 parents b04952f + 71d0118
commit 1fb7dab
Show file tree

Hide file tree

Showing 13 changed files with 294 additions and 32 deletions.
diff --git a/.env.dev b/.env.dev
@@ -35,6 +35,11 @@ BLOCKQUOTE=true  # 是否显示转发的内容(主要是微博)，默认打开
 #DOWN_STATUS_MSG_GROUP=[]  # 下载进度消息提示群组 示例 [12345678] 注意：最好是将该群设置为免打扰
 #DOWN_STATUS_MSG_DATE=10  # 下载进度检查及提示间隔时间，秒，不建议小于 10s
 
+# pikpak 相关设置
+#PIKPAK_USERNAME=""  # pikpak 用户名
+#PIKPAK_PASSWORD=""  # pikpak 密码
+#PIKPAK_DOWNLOAD_PATH=""  # pikpak 离线保存的目录, 默认是根目录，示例: ELF_RSS/Downloads ,目录不存在会自动创建, 不能/结尾
+
 # MYELF博客地址 https://myelf.club
 # 出现问题请在 GitHub 上提 issues
 # 项目地址 https://github.com/Quan666/ELF_RSS
diff --git a/README.md b/README.md
@@ -28,6 +28,7 @@
 * 多平台支持
 * 图片压缩后发送
 * 种子下载并上传到群文件
+* 离线下载到 PikPak 网盘（方便追番）
 * 消息支持根据链接、标题、图片去重
 * 可设置只发送限定数量的图片，防止刷屏
 * 可设置从正文中要移除的指定内容，支持正则
@@ -57,9 +58,9 @@
 * [x] 1. 订阅信息保护，不在群组中输出订阅QQ、群组
 * [x] 2. 更为强大的检查更新时间设置
 * [x] 3. RSS 源中 torrent 自动下载并上传至订阅群（适合番剧订阅）
-* [ ] 4. 暂停检查订阅更新
-* [ ] 5. 模糊匹配订阅名
-* [ ] 6. 性能优化，全部替换为异步操作
+* [x] 4. 暂停检查订阅更新
+* [x] 5. 正则匹配订阅名
+* [x] 6. 性能优化，尽可能替换为异步操作
 
 ## 感谢以下项目或服务
 

diff --git a/docs/2.0 使用教程.md b/docs/2.0 使用教程.md
@@ -110,6 +110,8 @@
 >| 图片数量限制   | -img_num  | 正整数                         | 只发送限定数量的图片，防止刷屏                                                                                                                                                                                                    |
 >| 正文待移除内容 | -rm_list  | 无空格字符串 / -1              | 从正文中要移除的指定内容，支持正则表达式<br />因为参数解析的缘故，格式必须如：`rm_list='a'` 或 `rm_list='a','b'`<br />该处理过程是在解析 html 标签后进行的<br />要将该参数设为空，使用 `rm_list='-1'`                             |
 >| 停止更新       | -stop     | 1 / 0                          | 对订阅停止、恢复检查更新                                                                                                                                                                                                          |
+>| PikPak 离线下载| -pikpak   | 1 / 0                          | 将磁力链接离线到 PikPak 网盘，方便追番
+>| PikPak 离线下载路径匹配| -ppk   | 无空格字符串                          | 匹配正文中的关键字作为目录
 >
 > **注：**
 >

diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ nonebot-adapter-onebot = "^2.1.1"
 nonebot-plugin-apscheduler = "^0.1.3"
 nonebot-plugin-guild-patch = "^0.2.0"
 nonebot2 = "^2.0.0b4"
+pikpakapi = "^0.0.5"
 Pillow = "^9.2.0"
 pydantic = "^1.9.1"
 pyquery = "^1.4.3"

diff --git a/requirements.txt b/requirements.txt
@@ -11,6 +11,7 @@ nonebot-adapter-onebot==2.1.1
 nonebot-plugin-apscheduler~=0.1.3
 nonebot-plugin-guild-patch~=0.2.0
 nonebot2==2.0.0b4
+pikpakapi~=0.0.5
 Pillow~=9.2.0
 pydantic~=1.9.1
 pyquery~=1.4.3

diff --git a/src/plugins/ELF_RSS2/command/change_dy.py b/src/plugins/ELF_RSS2/command/change_dy.py
@@ -66,6 +66,8 @@ def handle_property(value: str, property_list: List[Any]) -> List[Any]:
     "mode": "duplicate_filter_mode",
     "img_num": "max_image_number",
     "stop": "stop",
+    "pikpak": "pikpak_offline",
+    "ppk": "pikpak_path_key",
 }
 
 
@@ -103,12 +105,13 @@ def handle_change_list(
         "upgroup",
         "downopen",
         "stop",
+        "pikpak",
     }:
         value_to_change = bool(int(value_to_change))  # type:ignore
         if key_to_change == "stop" and not value_to_change and rss.error_count > 0:
             rss.error_count = 0
     elif (
-        key_to_change in {"downkey", "wkey", "blackkey", "bkey"}
+        key_to_change in {"downkey", "wkey", "blackkey", "bkey", "ppk"}
         and len(value_to_change.strip()) == 0
     ):
         value_to_change = None  # type:ignore
@@ -140,10 +143,12 @@ def handle_change_list(
     去重模式(-mode)
     图片数量限制(-img_num): 只发送限定数量的图片，防止刷屏
     正文移除内容(-rm_list): 从正文中移除指定内容，支持正则
-    停止更新-stop"
+    停止更新(-stop): 停止更新订阅
+    PikPak离线(-pikpak): 开启PikPak离线下载
+    PikPak离线路径匹配(-ppk): 匹配离线下载的文件夹,设置该值后生效
 注：
     1. 仅含有图片不同于仅图片，除了图片还会发送正文中的其他文本信息
-    2. proxy/tl/ot/op/ohp/downopen/upgroup/stop 值为 1/0
+    2. proxy/tl/ot/op/ohp/downopen/upgroup/stop/pikpak 值为 1/0
     3. 去重模式分为按链接(link)、标题(title)、图片(image)判断，其中 image 模式生效对象限定为只带 1 张图片的消息。如果属性中带有 or 说明判断逻辑是任一匹配即去重，默认为全匹配
     4. 白名单关键词支持正则表达式，匹配时推送消息及下载，设为空(wkey=)时不生效
     5. 黑名单关键词同白名单相似，匹配时不推送，两者可以一起用

diff --git a/src/plugins/ELF_RSS2/config.py b/src/plugins/ELF_RSS2/config.py
@@ -37,6 +37,12 @@ class Config:
     down_status_msg_group: Optional[List[int]] = None
     down_status_msg_date: int = 10
 
+    pikpak_username: Optional[str] = None  # pikpak 用户名
+    pikpak_password: Optional[str] = None  # pikpak 密码
+    pikpak_download_path: str = (
+        ""  # pikpak 离线保存的目录, 默认是根目录，示例: ELF_RSS/Downloads ,目录不存在会自动创建, 不能/结尾
+    )
+
 
 config = ELFConfig(**get_driver().config.dict())
 logger.debug(f"RSS Config loaded: {config!r}")
diff --git a/src/plugins/ELF_RSS2/parsing/__init__.py b/src/plugins/ELF_RSS2/parsing/__init__.py
@@ -2,7 +2,7 @@
 import re
 import sqlite3
 from email.utils import parsedate_to_datetime
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 import arrow
 import emoji
@@ -19,7 +19,7 @@
     write_item,
 )
 from .check_update import check_update
-from .download_torrent import down_torrent
+from .download_torrent import down_torrent, pikpak_offline
 from .handle_html_tag import handle_bbcode, handle_html_tag
 from .handle_images import handle_img
 from .handle_translation import handle_translation
@@ -287,7 +287,7 @@ async def handle_torrent(
     tmp: str,
     tmp_state: Dict[str, Any],
 ) -> str:
-    res = ""
+    res: List[str] = []
     if not rss.is_open_upload_group:
         rss.group_id = []
     if rss.down_torrent:
@@ -297,12 +297,26 @@ async def handle_torrent(
                 rss=rss, item=item, proxy=get_proxy(rss.img_proxy)
             )
             if hash_list and hash_list[0] is not None:
-                res = "\n磁力：\n" + "\n".join(
-                    [f"magnet:?xt=urn:btih:{h}" for h in hash_list]
-                )
+                res.append("\n磁力：")
+                res.extend([f"magnet:?xt=urn:btih:{h}" for h in hash_list])
         except Exception:
             logger.exception("下载种子时出错")
-    return res
+    if rss.pikpak_offline:
+        try:
+            result = await pikpak_offline(
+                rss=rss, item=item, proxy=get_proxy(rss.img_proxy)
+            )
+            if result:
+                res.append("\nPikPak 离线成功")
+                res.extend(
+                    [
+                        f"{r.get('name')}\n{r.get('file_size')} - {r.get('path')}"
+                        for r in result
+                    ]
+                )
+        except Exception:
+            logger.exception("PikPak 离线时出错")
+    return "\n".join(res)
 
 
 # 处理日期

diff --git a/src/plugins/ELF_RSS2/parsing/download_torrent.py b/src/plugins/ELF_RSS2/parsing/download_torrent.py
@@ -1,13 +1,23 @@
+import re
 from typing import Any, Dict, List, Optional
 
+import aiohttp
+from nonebot.log import logger
+
+from ..config import config
+from ..parsing.utils import get_summary
+from ..pikpak_offline import pikpak_offline_download
 from ..qbittorrent_download import start_down
 from ..rss_class import Rss
+from ..utils import convert_size, get_torrent_b16_hash, send_msg
 
 
-# 创建下载种子任务
 async def down_torrent(
     rss: Rss, item: Dict[str, Any], proxy: Optional[str]
 ) -> List[str]:
+    """
+    创建下载种子任务
+    """
     hash_list = []
     for tmp in item["links"]:
         if (
@@ -23,3 +33,59 @@ async def down_torrent(
                 )
             )
     return hash_list
+
+
+async def pikpak_offline(
+    rss: Rss, item: Dict[str, Any], proxy: Optional[str]
+) -> List[Dict[str, Any]]:
+    """
+    创建pikpak 离线下载任务
+    下载到 config.pikpak_download_path/rss.name or find rss.pikpak_path_rex
+    """
+    download_infos = []
+    for tmp in item["links"]:
+        if (
+            tmp["type"] == "application/x-bittorrent"
+            or tmp["href"].find(".torrent") > 0
+        ):
+            url = tmp["href"]
+            if not re.search(r"magnet:\?xt=urn:btih:", tmp["href"]):
+                async with aiohttp.ClientSession(
+                    timeout=aiohttp.ClientTimeout(total=100)
+                ) as session:
+                    try:
+                        resp = await session.get(tmp["href"], proxy=proxy)
+                        content = await resp.read()
+                        url = f"magnet:?xt=urn:btih:{get_torrent_b16_hash(content)}"
+                    except Exception as e:
+                        msg = f"{rss.name} 下载种子失败: {e}"
+                        logger.error(msg)
+                        await send_msg(
+                            msg=msg, user_ids=rss.user_id, group_ids=rss.group_id
+                        )
+                        continue
+            try:
+                path = f"{config.pikpak_download_path}/{rss.name}"
+                summary = get_summary(item)
+                if rss.pikpak_path_key and (
+                    result := re.findall(rss.pikpak_path_key, summary)
+                ):
+                    path = (
+                        config.pikpak_download_path
+                        + "/"
+                        + re.sub(r'[?*:"<>\\/|]', "_", result[0])
+                    )
+                logger.info(f"Offline download {url} to {path}")
+                info = await pikpak_offline_download(url=url, path=path)
+                download_infos.append(
+                    {
+                        "name": info["task"]["name"],
+                        "file_size": convert_size(int(info["task"]["file_size"])),
+                        "path": path,
+                    }
+                )
+            except Exception as e:
+                msg = f"{rss.name} PikPak 离线下载失败: {e}"
+                logger.error(msg)
+                await send_msg(msg=msg, user_ids=rss.user_id, group_ids=rss.group_id)
+    return download_infos
diff --git a/src/plugins/ELF_RSS2/pikpak_offline.py b/src/plugins/ELF_RSS2/pikpak_offline.py
@@ -0,0 +1,114 @@
+from typing import Any, Dict, List, Optional
+
+from nonebot.log import logger
+from pikpakapi.async_api import PikPakApiAsync
+from pikpakapi.PikpakException import PikpakAccessTokenExpireException, PikpakException
+
+from .config import config
+
+pikpak_client = PikPakApiAsync(
+    username=config.pikpak_username,
+    password=config.pikpak_password,
+)
+
+
+async def refresh_access_token() -> None:
+    """
+    Login or Refresh access_token PikPak
+
+    """
+    try:
+        await pikpak_client.refresh_access_token()
+    except (PikpakException, PikpakAccessTokenExpireException) as e:
+        logger.warning(f"refresh_access_token {e}")
+        await pikpak_client.login()
+
+
+async def login() -> None:
+    if not pikpak_client.access_token:
+        await pikpak_client.login()
+
+
+async def path_to_id(
+    path: Optional[str] = None, create: bool = False
+) -> List[Dict[str, Any]]:
+    """
+    path: str like "/1/2/3"
+    create: bool create path if not exist
+    将形如 /path/a/b 的路径转换为 文件夹的id
+    """
+    if not path:
+        return []
+    paths = [p.strip() for p in path.split("/") if len(p) > 0]
+    path_ids = []
+    count = 0
+    next_page_token = None
+    parent_id = None
+    while count < len(paths):
+        data = await pikpak_client.file_list(
+            parent_id=parent_id, next_page_token=next_page_token
+        )
+        if _id := next(
+            (
+                f.get("id")
+                for f in data.get("files", [])
+                if f.get("kind", "") == "drive#folder" and f.get("name") == paths[count]
+            ),
+            "",
+        ):
+            path_ids.append(
+                {
+                    "id": _id,
+                    "name": paths[count],
+                }
+            )
+            count += 1
+            parent_id = _id
+        elif data.get("next_page_token"):
+            next_page_token = data.get("next_page_token")
+        elif create:
+            data = await pikpak_client.create_folder(
+                name=paths[count], parent_id=parent_id
+            )
+            _id = data.get("file").get("id")
+            path_ids.append(
+                {
+                    "id": _id,
+                    "name": paths[count],
+                }
+            )
+            count += 1
+            parent_id = _id
+        else:
+            break
+    return path_ids
+
+
+async def pikpak_offline_download(
+    url: str,
+    path: Optional[str] = None,
+    parent_id: Optional[str] = None,
+    name: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Offline download
+    当有path时, 表示下载到指定的文件夹, 否则下载到根目录
+    如果存在 parent_id, 以 parent_id 为准
+    """
+    await login()
+    try:
+        if not parent_id:
+            path_ids = await path_to_id(path, create=True)
+            if path_ids and len(path_ids) > 0:
+                parent_id = path_ids[-1].get("id")
+        return await pikpak_client.offline_download(url, parent_id=parent_id, name=name)
+    except (PikpakAccessTokenExpireException, PikpakException) as e:
+        logger.warning(e)
+        await refresh_access_token()
+        return await pikpak_offline_download(
+            url=url, path=path, parent_id=parent_id, name=name
+        )
+    except Exception as e:
+        msg = f"PikPak Offline Download Error: {e}"
+        logger.error(msg)
+        raise Exception(msg) from e