Skip to content

Commit

Permalink
add DB_OR_PATH interface
Browse files Browse the repository at this point in the history
  • Loading branch information
chapmanjacobd committed Oct 15, 2024
1 parent db5e478 commit 45a1f47
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 65 deletions.
25 changes: 18 additions & 7 deletions xklb/mediafiles/process_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
from xklb.mediadb import db_history
from xklb.mediafiles import process_ffmpeg, process_image
from xklb.utils import (
arg_utils,
arggroups,
argparse_utils,
consts,
devices,
file_utils,
iterables,
nums,
printing,
Expand Down Expand Up @@ -46,7 +48,7 @@ def parse_args() -> argparse.Namespace:
arggroups.process_ffmpeg(parser)
arggroups.debug(parser)

arggroups.database(parser)
arggroups.database_or_paths(parser)
args = parser.parse_intermixed_args()
arggroups.args_post(args, parser)

Expand All @@ -56,6 +58,20 @@ def parse_args() -> argparse.Namespace:
return args


def collect_media(args):
if args.database:
db_history.create(args)

try:
media = list(args.db.query(*sqlgroups.media_sql(args)))
except sqlite3.OperationalError:
media = list(args.db.query(*sqlgroups.fs_sql(args, args.limit)))
else:
media = arg_utils.gen_d(args)
media = [d if "size" in d else file_utils.get_filesize(d) for d in media]
return media


def check_shrink(args, m) -> list:
m["ext"] = os.path.splitext(m["path"])[1].lower().lstrip(".")
filetype = (m.get("type") or "").lower()
Expand Down Expand Up @@ -135,12 +151,7 @@ def check_shrink(args, m) -> list:

def process_media() -> None:
args = parse_args()
db_history.create(args)

try:
media = list(args.db.query(*sqlgroups.media_sql(args)))
except sqlite3.OperationalError:
media = list(args.db.query(*sqlgroups.fs_sql(args, args.limit)))
media = collect_media(args)

media = iterables.conform(check_shrink(args, m) for m in media)
media = sorted(media, key=lambda d: d["savings"] / d["processing_time"], reverse=True)
Expand Down
48 changes: 6 additions & 42 deletions xklb/utils/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,7 @@ def gen_paths(args):
if args.paths is None:
processes.exit_error("No paths passed in")

if args.from_file:
for path in args.paths:
with open(path, "r") as f:
for line in f:
line = line.rstrip("\n")
if line.strip():
if args.from_json:
json_data = json.loads(line)
if isinstance(json_data, list):
yield from (d["path"] for d in json_data)
elif isinstance(json_data, dict):
yield json_data["path"]
else:
raise TypeError
else:
yield line
elif args.from_json:
if args.from_json:
for path in args.paths:
json_data = json.loads(path)
if isinstance(json_data, list):
Expand All @@ -36,40 +20,20 @@ def gen_paths(args):
else:
raise TypeError
else:
is_large = len(args.paths) > 1000
for path in args.paths:
if path.strip():
if is_large:
yield path
p = Path(path)
if p.is_dir():
yield from file_utils.rglob(str(p), args.ext or None, getattr(args, "exclude", None))[0]
else:
p = Path(path)
if p.is_dir():
yield from file_utils.rglob(str(p), args.ext or None, getattr(args, "exclude", None))[0]
else:
yield path
yield path


def gen_d(args):
if args.paths is None:
processes.exit_error("No data passed in")

if args.from_file:
for path in args.paths:
with open(path, "r") as f:
for line in f:
line = line.rstrip("\n")
if line.strip():
if args.from_json:
json_data = json.loads(line)
if isinstance(json_data, list):
yield from json_data
elif isinstance(json_data, dict):
yield json_data
else:
raise TypeError
else:
yield {"path": line}
elif args.from_json:
if args.from_json:
for path in args.paths:
json_data = json.loads(path)
if isinstance(json_data, list):
Expand Down
29 changes: 22 additions & 7 deletions xklb/utils/arggroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,8 @@ def database(parent_parser):

def paths_or_stdin(parent_parser, required=True, destination=False):
parser = parent_parser.add_argument_group("Paths")
parser.add_argument(
"--from-file",
"--from-text",
"--file",
action="store_true",
help="Read paths from line-delimited file(s)",
)
parser.add_argument("--from-json", "--json", action="store_true", help="Read JSON or JSONL from stdin")

if destination:
parser.add_argument("paths", nargs="+", action=argparse_utils.ArgparseArgsOrStdin)
else:
Expand All @@ -206,6 +200,27 @@ def paths_or_stdin(parent_parser, required=True, destination=False):
)


def database_or_paths(parent_parser, required=True, destination=False):
parser = parent_parser.add_argument_group("Database")
parser.add_argument("--db", "-db", help="Positional argument override")
capability_soft_delete(parent_parser)
capability_delete(parent_parser)

parser = parent_parser.add_argument_group("Paths")
parser.add_argument("--from-json", "--json", action="store_true", help="Read JSON or JSONL from stdin")

if destination:
parser.add_argument("paths", nargs="+", action=argparse_utils.ArgparseDBOrPaths, metavar="DB_OR_PATH")
else:
parser.add_argument(
"paths",
nargs="*",
default=argparse_utils.STDIN_DASH if required else None,
action=argparse_utils.ArgparseDBOrPaths,
metavar="DB_OR_PATH",
)


def sql_fs(parent_parser):
parse_fs = parent_parser.add_argument_group("FileSystemDB SQL")
parse_fs.add_argument(
Expand Down
29 changes: 29 additions & 0 deletions xklb/utils/argparse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,35 @@ def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, lines)


def is_sqlite(path):
try:
with open(path, "rb") as f:
header = f.read(16)
return header == b"SQLite format 3\000"
except OSError:
return False


class ArgparseDBOrPaths(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
database = None
paths = None
if values == STDIN_DASH:
print(f"{parser.prog}: Reading from stdin...", file=sys.stderr)
paths = sys.stdin.readlines()
if not paths or (len(paths) == 1 and paths[0].strip() == ""):
paths = None
else:
paths = [s.strip() for s in paths]
elif values is not None and len(values) == 1 and is_sqlite(values[0]):
database = values[0]
paths = None
else:
paths = values
setattr(namespace, "database", database)
setattr(namespace, self.dest, paths)


def type_to_str(t):
type_dict = {
int: "Integer",
Expand Down
9 changes: 0 additions & 9 deletions xklb/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,15 +314,6 @@ def move_files_bash(file_list):
print(rf"PARALLEL_SHELL=sh parallel --colsep '\t' -a {temp.name} -j 20 {move_sh_path}")


def is_sqlite(path):
try:
with open(path, "rb") as f:
header = f.read(16)
return header == b"SQLite format 3\000"
except OSError:
return False


def get_file_encoding(path):
import chardet

Expand Down

0 comments on commit 45a1f47

Please sign in to comment.