From e965330c066044a6b6a4f849caf3de74c6388ed2 Mon Sep 17 00:00:00 2001 From: Zehina Date: Sun, 20 Oct 2024 20:27:49 +0100 Subject: [PATCH] add ability to use foreign proxy addreses --- webtoon_downloader/cmd/cli.py | 7 +++++++ webtoon_downloader/core/webtoon/client.py | 6 ++++-- webtoon_downloader/core/webtoon/downloaders/chapter.py | 1 + webtoon_downloader/core/webtoon/downloaders/comic.py | 8 +++++--- webtoon_downloader/core/webtoon/downloaders/options.py | 5 ++++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/webtoon_downloader/cmd/cli.py b/webtoon_downloader/cmd/cli.py index 63a5b0e..9eed0da 100644 --- a/webtoon_downloader/cmd/cli.py +++ b/webtoon_downloader/cmd/cli.py @@ -136,6 +136,11 @@ def validate_concurrent_count(ctx: Any, param: Any, value: int | None) -> int | callback=validate_concurrent_count, help="Number of workers for concurrent image downloads. This value is shared between all concurrent chapter downloads.", ) +@click.option( + "--proxy", + type=str, + help="proxy address to use for making requests. e.g. http://127.0.0.1:7890", +) @click.option("--debug", type=bool, is_flag=True, help="Enable debug mode") def cli( ctx: click.Context, @@ -151,6 +156,7 @@ def cli( save_as: StorageType, concurrent_chapters: int, concurrent_pages: int, + proxy: str, debug: bool, ) -> None: log, console = logger.setup( @@ -197,6 +203,7 @@ def cli( on_webtoon_fetched=progress_manager.on_webtoon_fetched, concurrent_chapters=concurrent_chapters, concurrent_pages=concurrent_pages, + proxy=proxy, ) loop = asyncio.get_event_loop() diff --git a/webtoon_downloader/core/webtoon/client.py b/webtoon_downloader/core/webtoon/client.py index 07c818d..24ffb83 100644 --- a/webtoon_downloader/core/webtoon/client.py +++ b/webtoon_downloader/core/webtoon/client.py @@ -46,7 +46,7 @@ def _generate_headers() -> dict[str, str]: } -def new() -> httpx.AsyncClient: +def new(proxy: str | None = None) -> httpx.AsyncClient: """ Creates and returns an asynchronous HTTP client configured for scrapping webtoon website. @@ -59,10 +59,11 @@ def new() -> httpx.AsyncClient: http2=True, headers=_generate_headers(), follow_redirects=True, + proxy=proxy, ) -def new_image_client() -> httpx.AsyncClient: +def new_image_client(proxy: str | None = None) -> httpx.AsyncClient: """ Creates and returns an asynchronous HTTP client configured for downloading webtoon images. @@ -77,4 +78,5 @@ def new_image_client() -> httpx.AsyncClient: "referer": "https://www.webtoons.com/", **_generate_headers(), }, + proxy=proxy, ) diff --git a/webtoon_downloader/core/webtoon/downloaders/chapter.py b/webtoon_downloader/core/webtoon/downloaders/chapter.py index 5e824cf..d7570c9 100644 --- a/webtoon_downloader/core/webtoon/downloaders/chapter.py +++ b/webtoon_downloader/core/webtoon/downloaders/chapter.py @@ -79,6 +79,7 @@ async def _run( await self._report_progress(chapter_info, "Start") resp = await self.client.get(chapter_info.viewer_url) + print('Fetched: "%s" from chapter "%s" => %s', chapter_info.viewer_url, chapter_info.title, resp.status_code) extractor = WebtoonViewerPageExtractor(resp.text) img_urls = extractor.get_img_urls() await self._report_progress(chapter_info, "ChapterInfoFetched", extractor) diff --git a/webtoon_downloader/core/webtoon/downloaders/comic.py b/webtoon_downloader/core/webtoon/downloaders/comic.py index b6a312e..af4d4d3 100644 --- a/webtoon_downloader/core/webtoon/downloaders/comic.py +++ b/webtoon_downloader/core/webtoon/downloaders/comic.py @@ -46,6 +46,7 @@ class WebtoonDownloader: directory : The directory where the downloaded chapters will be stored. exporter : Optional data exporter for exporting series details. on_webtoon_fetched : Optional callback executed after fetching Webtoon information. + proxy : Optional proxy address for making requests. """ url: str @@ -57,6 +58,7 @@ class WebtoonDownloader: directory: str | PathLike[str] | None = None exporter: DataExporter | None = None on_webtoon_fetched: OnWebtoonFetchCallback | None = None + proxy: str | None = None _directory: Path = field(init=False) @@ -77,7 +79,7 @@ async def run(self) -> list[DownloadResult]: Returns: A list containing download results for each chapter. """ - async with webtoon.client.new() as client: + async with webtoon.client.new(self.proxy) as client: chapter_list = await self._get_chapters(client) resp = await client.get(self.url) extractor = WebtoonMainPageExtractor(resp.text) @@ -182,14 +184,14 @@ async def download_webtoon(opts: WebtoonDownloadOptions) -> list[DownloadResult] else NonSeparateFileNameGenerator() ) image_downloader = ImageDownloader( - client=webtoon.client.new_image_client(), + client=webtoon.client.new_image_client(opts.proxy), transformers=[AioImageFormatTransformer(opts.image_format)], concurent_downloads_limit=opts.concurrent_pages, ) exporter = DataExporter(opts.exporter_format) if opts.export_metadata else None chapter_downloader = ChapterDownloader( - client=webtoon.client.new(), + client=webtoon.client.new(opts.proxy), exporter=exporter, progress_callback=opts.chapter_progress_callback, image_downloader=image_downloader, diff --git a/webtoon_downloader/core/webtoon/downloaders/options.py b/webtoon_downloader/core/webtoon/downloaders/options.py index 6b1c5bf..2b23f07 100644 --- a/webtoon_downloader/core/webtoon/downloaders/options.py +++ b/webtoon_downloader/core/webtoon/downloaders/options.py @@ -39,7 +39,8 @@ class WebtoonDownloadOptions: chapter_progress_callback : Callback function for chapter download progress. on_webtoon_fetched : function invoked after fetching Webtoon information. concurrent_chapters : The number of chapters to download concurrently. - concurrent_pages : The number of images to download concurrently. + concurrent_pages : The number of images to download concurrently. + proxy: : proxy address to use for making requests. """ url: str @@ -61,3 +62,5 @@ class WebtoonDownloadOptions: concurrent_chapters: int = DEFAULT_CONCURENT_CHAPTER_DOWNLOADS concurrent_pages: int = DEFAULT_CONCURENT_IMAGE_DOWNLOADS + + proxy: str | None = None