Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to set number of workers to fix rate limiting issue #61

Merged
merged 4 commits into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 55 additions & 13 deletions webtoon_downloader/cmd/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,24 @@

from webtoon_downloader import logger
from webtoon_downloader.cmd.exceptions import (
LatestWithStartOrEndError,
SeparateOptionWithNonImageSaveAsError,
CLIInvalidConcurrentCountError,
CLIInvalidStartAndEndRangeError,
CLILatestWithStartOrEndError,
CLISeparateOptionWithNonImageSaveAsError,
handle_deprecated_options,
)
from webtoon_downloader.cmd.progress import ChapterProgressManager, init_progress
from webtoon_downloader.core.exceptions import WebtoonDownloadError
from webtoon_downloader.core.webtoon.downloaders import comic
from webtoon_downloader.core.webtoon.downloaders.options import StorageType, WebtoonDownloadOptions
from webtoon_downloader.core.webtoon.downloaders.options import (
DEFAULT_CONCURENT_CHAPTER_DOWNLOADS,
DEFAULT_CONCURENT_IMAGE_DOWNLOADS,
StorageType,
WebtoonDownloadOptions,
)
from webtoon_downloader.core.webtoon.exporter import DataExporterFormat
from webtoon_downloader.transformers.image import ImageFormat

log, console = logger.setup()
help_config = click.RichHelpConfiguration(
show_metavars_column=False,
append_metavars_help=True,
Expand All @@ -33,6 +40,13 @@ class GracefulExit(SystemExit):
code = 1


def validate_concurrent_count(ctx: Any, param: Any, value: int | None) -> int | None:
if value is not None and value <= 0:
raise CLIInvalidConcurrentCountError(value)

return value


@click.command()
@click.version_option()
@click.pass_context
Expand All @@ -44,12 +58,7 @@ class GracefulExit(SystemExit):
type=int,
help="Start chapter",
)
@click.option(
"--end",
"-e",
type=int,
help="End chapter",
)
@click.option("--end", "-e", type=int, help="End chapter")
@click.option(
"--latest",
"-l",
Expand Down Expand Up @@ -113,6 +122,21 @@ class GracefulExit(SystemExit):
hidden=True,
help="[Deprecated] Use --export-metadata instead",
)
@click.option(
"--concurrent-chapters",
type=int,
default=DEFAULT_CONCURENT_CHAPTER_DOWNLOADS,
callback=validate_concurrent_count,
help="Number of workers for concurrent chapter downloads",
)
@click.option(
"--concurrent-pages",
type=int,
default=DEFAULT_CONCURENT_IMAGE_DOWNLOADS,
callback=validate_concurrent_count,
help="Number of workers for concurrent image downloads. This value is shared between all concurrent chapter downloads.",
)
@click.option("--debug", type=bool, is_flag=True, help="Enable debug mode")
def cli(
ctx: click.Context,
url: str,
Expand All @@ -125,17 +149,28 @@ def cli(
export_metadata: bool,
export_format: DataExporterFormat,
save_as: StorageType,
concurrent_chapters: int,
concurrent_pages: int,
debug: bool,
) -> None:
log, console = logger.setup(
log_filename="webtoon_downloader.log" if debug else None,
enable_traceback=debug,
enable_console_logging=debug,
)

loop = asyncio.get_event_loop()
if not url:
console.print(
'[red]A Webtoon URL of the form [green]"https://www.webtoons.com/.../list?title_no=??"[/] of is required.'
)
ctx.exit(1)
if latest and (start or end):
raise LatestWithStartOrEndError(ctx)
raise CLILatestWithStartOrEndError(ctx)
if separate and (save_as != "images"):
raise SeparateOptionWithNonImageSaveAsError(ctx)
raise CLISeparateOptionWithNonImageSaveAsError(ctx)
if start is not None and end is not None and start > end:
raise CLIInvalidStartAndEndRangeError(ctx)

progress = init_progress(console)
series_download_task = progress.add_task(
Expand All @@ -160,6 +195,8 @@ def cli(
save_as=save_as,
chapter_progress_callback=progress_manager.advance_progress,
on_webtoon_fetched=progress_manager.on_webtoon_fetched,
concurrent_chapters=concurrent_chapters,
concurrent_pages=concurrent_pages,
)

loop = asyncio.get_event_loop()
Expand All @@ -181,11 +218,16 @@ def _raise_graceful_exit(*_: Any) -> None:
signal.signal(signal.SIGINT, _raise_graceful_exit)
signal.signal(signal.SIGTERM, _raise_graceful_exit)
with contextlib.suppress(GracefulExit):
loop.run_until_complete(main_task)
try:
loop.run_until_complete(main_task)
except WebtoonDownloadError as exc:
console.print(f"[red][bold]Download error:[/bold] {exc}[/]")
log.exception("Download error")


def run() -> None:
"""CLI entrypoint"""
if len(sys.argv) <= 1:
sys.argv.append("--help")

cli() # pylint: disable=no-value-for-parameter
33 changes: 28 additions & 5 deletions webtoon_downloader/cmd/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,20 @@
import rich_click as click


class LatestWithStartOrEndError(click.UsageError):
class CLIInvalidStartAndEndRangeError(click.UsageError):
"""
This error is raised when the user provides a start that is greater than the end.

Args:
ctx: The Click context associated with the error, if any.
"""

def __init__(self, ctx: click.Context | None = None) -> None:
message = "Start chapter cannot be greater than end chapter."
super().__init__(message, ctx)


class CLILatestWithStartOrEndError(click.UsageError):
"""
This error is raised when the user attempts to use --latest in conjunction
with either --start or --end options, which is not allowed due to their
Expand All @@ -20,7 +33,7 @@ def __init__(self, ctx: click.Context | None = None) -> None:
super().__init__(message, ctx)


class SeparateOptionWithNonImageSaveAsError(click.UsageError):
class CLISeparateOptionWithNonImageSaveAsError(click.UsageError):
"""
This error is raised when the user attempts to use --separate with a save-as
option other than 'images'. The --separate option is only compatible with
Expand All @@ -35,7 +48,7 @@ def __init__(self, ctx: click.Context | None = None) -> None:
super().__init__(message, ctx)


class DeprecatedOptionError(click.UsageError):
class CLIDeprecatedOptionError(click.UsageError):
"""
Custom error for handling deprecated options in the CLI.
"""
Expand All @@ -45,9 +58,19 @@ def __init__(self, deprecated_option: str, use_instead_option: str):
super().__init__(message)


class CLIInvalidConcurrentCountError(click.BadParameter):
"""
Custom error for handling invalid value for concurrent workers in the CLI.
"""

def __init__(self, value: Any):
message = f"Invalid value for concurrent workers {value}."
super().__init__(message)


def handle_deprecated_options(_: click.Context, param: click.Parameter, value: Any) -> None:
"""Handler for deprecated options"""
if param.name == "export_texts" and value:
raise DeprecatedOptionError(deprecated_option="--export-texts", use_instead_option="--export-metadata")
raise CLIDeprecatedOptionError(deprecated_option="--export-texts", use_instead_option="--export-metadata")
elif param.name == "dest" and value is not None:
raise DeprecatedOptionError(deprecated_option="--dest", use_instead_option="--out")
raise CLIDeprecatedOptionError(deprecated_option="--dest", use_instead_option="--out")
13 changes: 12 additions & 1 deletion webtoon_downloader/core/downloaders/image.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import asyncio
import logging
from dataclasses import dataclass, field
from typing import Awaitable, Callable

Expand All @@ -9,6 +11,8 @@
from webtoon_downloader.storage import AioWriter
from webtoon_downloader.transformers.base import AioImageTransformer

log = logging.getLogger(__name__)

ImageProgressCallback = Callable[[int], Awaitable[None]]
"""
Progress callback called for each image download.
Expand All @@ -32,9 +36,15 @@ class ImageDownloadResult:
@dataclass
class ImageDownloader:
client: httpx.AsyncClient
concurent_downloads_limit: int
transformers: list[AioImageTransformer] = field(default_factory=list)
progress_callback: ImageProgressCallback | None = None

_semaphore: asyncio.Semaphore = field(init=False)

def __post_init__(self) -> None:
self._semaphore = asyncio.Semaphore(self.concurent_downloads_limit)

async def run(self, url: str, target: str, storage: AioWriter) -> ImageDownloadResult:
"""
Initiates the downloading of an image from a specified URL.
Expand All @@ -50,7 +60,8 @@ async def run(self, url: str, target: str, storage: AioWriter) -> ImageDownloadR
ImageDownloadError: If an error occurs during the download process.
"""
try:
return await self._download_image(self.client, url, target, storage)
async with self._semaphore:
return await self._download_image(self.client, url, target, storage)
except Exception as exc:
raise ImageDownloadError(url=url, cause=exc) from exc

Expand Down
57 changes: 56 additions & 1 deletion webtoon_downloader/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,20 @@ class DownloadError(Exception):
def __str__(self) -> str:
if self.message:
return self.message
return f'Failed to download from "{self.url}" due to: {self.cause}'

if self.cause:
cause_msg = str(self.cause)
if cause_msg:
return f"Failed to download from {self.url} => {cause_msg}"

return f"Failed to download from {self.url} due to: {self.cause.__class__.__name__}"

return f"Failed to download from {self.url}"


@dataclass
class WebtoonDownloadError(DownloadError):
"""Exception raised for Webtoon download errors"""


@dataclass
Expand All @@ -31,21 +44,63 @@ class ChapterDownloadError(DownloadError):
chapter_info: ChapterInfo | None = None


@dataclass
class WebtoonGetError(Exception):
"""Exception raised due to a fetch error when retreiving Webtoon information"""

series_url: str
status_code: int

def __str__(self) -> str:
return f"Failed to fetch Webtoon information from {self.series_url}. Status code: {self.status_code}"


@dataclass
class FetchError(Exception):
"""Exception raised due to a fetch error"""

msg: str | None = None


@dataclass
class ChapterURLFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving the chapter URL"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch chapter URL"


@dataclass
class ChapterTitleFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving the chapter title"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch chapter title"


@dataclass
class ChapterDataEpisodeNumberFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving data chapter number"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch data episode number"


@dataclass
class SeriesTitleFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving the series title"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch series title"
24 changes: 17 additions & 7 deletions webtoon_downloader/core/webtoon/downloaders/chapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import asyncio
import logging
from dataclasses import dataclass
from dataclasses import dataclass, field
from os import PathLike
from pathlib import Path

Expand All @@ -27,20 +27,27 @@ class ChapterDownloader:
Downloads chapters from a Webtoon.

Attributes:
client : HTTP client for making web requests.
image_downloader : Downloader for Webtoon images.
file_name_generator : Generator for file names based on chapter and page details.
exporter : Optional data exporter for exporting chapter details.
progress_callback : Optional callback for reporting chapter download progress.
client : HTTP client for making web requests.
image_downloader : Downloader for Webtoon images.
file_name_generator : Generator for file names based on chapter and page details.
concurrent_downloads_limit : The number of chapters to download concurrently.
exporter : Optional data exporter for exporting chapter details.
progress_callback : Optional callback for reporting chapter download progress.
"""

client: httpx.AsyncClient
image_downloader: ImageDownloader
file_name_generator: FileNameGenerator
concurrent_downloads_limit: int

exporter: DataExporter | None = None
progress_callback: ChapterProgressCallback | None = None

_semaphore: asyncio.Semaphore = field(init=False)

def __post_init__(self) -> None:
self._semaphore = asyncio.Semaphore(self.concurrent_downloads_limit)

async def run(
self, chapter_info: ChapterInfo, directory: str | PathLike[str], storage: AioWriter
) -> list[DownloadResult]:
Expand All @@ -59,7 +66,8 @@ async def run(
ChapterDownloadError in case of error downloading the chapter.
"""
try:
return await self._run(chapter_info, directory, storage)
async with self._semaphore:
return await self._run(chapter_info, directory, storage)
except Exception as exc:
raise ChapterDownloadError(chapter_info.viewer_url, exc, chapter_info=chapter_info) from exc

Expand Down Expand Up @@ -104,7 +112,9 @@ def _create_task(self, chapter_info: ChapterInfo, url: str, name: str, storage:
"""

async def _task() -> ImageDownloadResult:
log.debug('Downloading: "%s" from "%s" from chapter "%s"', name, url, chapter_info.viewer_url)
res = await self.image_downloader.run(url, name, storage)
log.debug('Finished downloading: "%s" from "%s" from chapter "%s"', name, url, chapter_info.viewer_url)
await self._report_progress(chapter_info, "PageCompleted")
return res

Expand Down
Loading
Loading