Merge pull request #8 from MusicalNinjaRandInt/working

v0.2.1
MusicalNinjaDad · Dec 12, 2023 · f15b4d4 · f15b4d4
2 parents 3ffd1e5 + 412695e
commit f15b4d4
Show file tree

Hide file tree

Showing 12 changed files with 144 additions and 41 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # CHANGELOG - Link Duplicates
 
+## v0.2.1
+
+- improved output while running to give some kind of info on progress
+
 ## v0.2.0
 
 - added info on storage usage and savings

diff --git a/__version__ b/__version__
@@ -1 +1 @@
-0.2.0
+0.2.1
diff --git a/duplicates.code-workspace b/duplicates.code-workspace
@@ -4,5 +4,9 @@
 			"path": "."
 		}
 	],
-	"settings": {}
+	"settings": {
+		"black-formatter.args": [
+			"-S"
+		]
+	}
 }
diff --git a/duplicates/__init__.py b/duplicates/__init__.py
@@ -1,2 +1,4 @@
+LOGROOT = 'dupes'
+
 from .dupes import *
 from .bufferediofile import *
diff --git a/duplicates/cli.py b/duplicates/cli.py
@@ -1,9 +1,12 @@
+import logging
 import os
 from pathlib import Path
+import sys
 from click import argument, command, confirm, option
 
-from . import DuplicateFiles
+from . import DuplicateFiles, LOGROOT
 
+_logger = logging.getLogger(LOGROOT)
 
 @command()
 @argument('rootdir')
@@ -12,17 +15,17 @@
 @option('--list', '_list', is_flag=True)
 @option('--short', is_flag=True)
 def dupes(rootdir, link, approved, _list, short):
-    roodir = Path(rootdir)
-    duplicatefiles = DuplicateFiles.frompath(roodir)
-
-    sets = len(duplicatefiles.duplicates)
-    totalfiles = len([file for group in duplicatefiles.duplicates for file in group])
-    print(f'{sets} sets of duplicates found, totalling {totalfiles} files')
+    _logger.setLevel(logging.INFO)
+    consoleoutput = logging.StreamHandler()
+    consoleoutput.setLevel(logging.INFO)
+    consoleoutput.setStream(sys.stdout)
+    outputformat = logging.Formatter('%(message)s')
+    consoleoutput.setFormatter(outputformat)
+    _logger.addHandler(consoleoutput)
+
+    rootdir = Path(rootdir)
+    duplicatefiles = DuplicateFiles.frompath(rootdir)
 
-    totalsize = sum(file.stat.st_size for group in duplicatefiles.duplicates for file in group)
-    futuresize = sum(next(iter(group)).stat.st_size for group in duplicatefiles.duplicates)
-    print(f'current usage: {totalsize}, potential usage: {futuresize}, saving: {totalsize-futuresize}')
-
     if short:
         print(duplicatefiles.printout(ignoresamenames=True))
     elif _list:

diff --git a/duplicates/dupes.py b/duplicates/dupes.py
@@ -1,25 +1,42 @@
 from collections import defaultdict
 from contextlib import ExitStack
+import logging
 import os
 from pathlib import Path
 from typing import Any, Callable, Iterable
 from uuid import uuid1
 
 from .bufferediofile import BufferedIOFile, IsASymlinkError
+from . import LOGROOT
 
 class DuplicateFiles:
 
     @classmethod
     def frompath(cls, rootpath: Path):
+        _logger = logging.getLogger(f'{LOGROOT}.frompath')
+        _logger.info(f'Initiating search of {rootpath}')
+
         samesizefiles = _filesofsamesize(rootpath)
+        _logger.info(f'Found {len(samesizefiles)} groups of same-sized files')
+
         inoindex = _indexbyino(file for samesizeset in samesizefiles for file in samesizeset)
+        allfiles = {file for fileset in inoindex.values() for file in fileset}
         uniqueinos = frozenset(next(iter(files)) for files in inoindex.values())
+        _logger.info(f'Identified {len(allfiles)-len(uniqueinos)} pre-existing hard links')
+        _logger.info(f'Will now begin comparing file contents, this may take some time')
+
         dupes = set()
         for fileset in samesizefiles:
             nohardlinks = fileset.intersection(uniqueinos)
             with ExitStack() as stack:
                 _ = [stack.enter_context(file.open()) for file in nohardlinks]
                 dupes |= comparefilecontents({frozenset(nohardlinks)})
+        alldupes = {file for fileset in dupes for file in fileset}
+        totalsize = sum(file.stat.st_size for file in alldupes)
+        futuresize = sum(next(iter(group)).stat.st_size for group in dupes)
+        _logger.info(f'Identified {len(dupes)} sets of duplicate files, totalling {len(alldupes)} files')
+        _logger.info(f'Current usage: {totalsize}, future usage: {futuresize}, saving: {totalsize-futuresize}')
+
         return DuplicateFiles(duplicates=dupes, inoindex=inoindex)
 
     def __init__(self, duplicates: set[frozenset[BufferedIOFile]], inoindex: dict[int: frozenset[Path]]) -> None:

diff --git a/test/__init__.py b/test/__init__.py
@@ -1,2 +1,26 @@
 from .. import *
-from pytest import mark, raises, skip
+from pytest import mark, raises, skip
+
+@contextmanager
+def skipon(exceptiontype: Exception, check: callable = lambda x: True, reason: str = ''):
+    """Skip test on Exception of type exceptiontype.
+    Optionally run additional validation of exception before skipping.
+
+    Arguments:
+    - exceptiontype: the type of exception to skip
+    - check (optional): a `callable` which returns a single `bool` value.
+    Test will only be skipped if this check results `True`
+    - reason (optional): reason to be passed to `skip` and included in the test logs
+
+    Example:
+    ```
+    with skipon(OSError, lambda e: e.winerror == 1314, 'SymLinks not available on Windows without DevMode enabled')
+        ...
+    ```
+    will skip test if Windows throws an OSError on missing permissions to create a symlink
+    """
+
+    try:
+        yield
+    except exceptiontype as e:
+        if check(e): skip(reason=reason)
diff --git a/test/majorver/test_BufferedIOFile.py b/test/majorver/test_BufferedIOFile.py
@@ -45,10 +45,8 @@ def test_equal_relativepathsgiven():
 def test_equal_pathsresolved(copiedtestfiles):
     fileA = copiedtestfiles.paths['fileA'][0]
     symlink = copiedtestfiles.root / Path('linktoA.txt')
-    try:
+    with skipon(OSError, lambda e: e.winerror == 1314, 'SymLinks not available on Windows without DevMode enabled'):
         symlink.symlink_to(fileA)
-    except OSError as e:
-        if e.winerror == 1314: skip(reason='SymLinks not available on Windows without DevMode enabled')
     assert fileA != symlink, 'Something when wrong in the test setup'
     assert fileA == symlink.resolve(), 'Something when wrong in the test setup'
     fileA = BufferedIOFile(fileA)
@@ -58,21 +56,17 @@ def test_equal_pathsresolved(copiedtestfiles):
 def test_symlink_raiseserror(copiedtestfiles):
     fileA = copiedtestfiles.paths['fileA'][0]
     symlink = copiedtestfiles.root / Path('linktoA.txt')
-    try:
+    with skipon(OSError, lambda e: e.winerror == 1314, 'SymLinks not available on Windows without DevMode enabled'):
         symlink.symlink_to(fileA)
-    except OSError as e:
-        if e.winerror == 1314: skip(reason='SymLinks not available on Windows without DevMode enabled')
     with raises(IsASymlinkError):
         symlink = BufferedIOFile(symlink)
 
 @mark.copyfiles(('fileA',1))
 def test_followsymlinks_notimplemented(copiedtestfiles):
     fileA = copiedtestfiles.paths['fileA'][0]
     symlink = copiedtestfiles.root / Path('linktoA.txt')
-    try:
+    with skipon(OSError, lambda e: e.winerror == 1314, 'SymLinks not available on Windows without DevMode enabled'):
         symlink.symlink_to(fileA)
-    except OSError as e:
-        if e.winerror == 1314: skip(reason='SymLinks not available on Windows without DevMode enabled')
     with raises(NotImplementedError):
         symlink = BufferedIOFile(symlink, follow_symlinks=True)
 

diff --git a/test/majorver/test_cli.py b/test/majorver/test_cli.py
@@ -15,12 +15,19 @@ def test_link(copiedtestfiles):
     completed = run(command, capture_output=True)
 
     output = [
-        '2 sets of duplicates found, totalling 5 files',
-        'current usage: 101, potential usage: 39, saving: 62',
+        f'Initiating search of {copiedtestfiles.root}',
+        f'Found 2 groups of same-sized files',
+        f'Identified 0 pre-existing hard links',
+        f'Will now begin comparing file contents, this may take some time',
+        f'Identified 2 sets of duplicate files, totalling 5 files',
+        f'Current usage: 101, future usage: 39, saving: 62',
         f'Linking files in {copiedtestfiles.root} ...'
     ]
 
-    assert [s.strip() for s in completed.stdout.decode().strip().split('\n')] == output
+    stdout = [s.strip() for s in completed.stdout.decode().strip().split('\n')]
+    assert (
+        stdout == output
+    ), f'\nOutput: {stdout}\nExpected: {output}'
 
     fileAino = copiedtestfiles.paths['fileA'][0].stat().st_ino
     fileBino = copiedtestfiles.paths['fileB'][0].stat().st_ino
@@ -44,11 +51,18 @@ def test_nolink(copiedtestfiles):
     completed = run(command, capture_output=True)
 
     output = [
-        '2 sets of duplicates found, totalling 5 files',
-        'current usage: 101, potential usage: 39, saving: 62'
+        f'Initiating search of {copiedtestfiles.root}',
+        f'Found 2 groups of same-sized files',
+        f'Identified 0 pre-existing hard links',
+        f'Will now begin comparing file contents, this may take some time',
+        f'Identified 2 sets of duplicate files, totalling 5 files',
+        f'Current usage: 101, future usage: 39, saving: 62'
     ]
 
-    assert [s.strip() for s in completed.stdout.decode().strip().split('\n')] == output
+    stdout = [s.strip() for s in completed.stdout.decode().strip().split('\n')]
+    assert (
+        stdout == output
+    ), f'\nOutput: {stdout}\nExpected: {output}'
 
     newinos = {file.stat().st_ino for copies in copiedtestfiles.paths.values() for file in copies}
 

diff --git a/test/majorver/test_dupes_identification.py b/test/majorver/test_dupes_identification.py
@@ -84,9 +84,7 @@ def test_multiplezerosizefiles(copiedtestfiles):
 def test_instantiate_dropsymlinks(copiedtestfiles):
     fileA = copiedtestfiles.paths['fileA'][0]
     symlink = copiedtestfiles.root / Path('linktoA.txt')
-    try:
+    with skipon(OSError, lambda e: e.winerror == 1314, 'SymLinks not available on Windows without DevMode enabled'):
         symlink.symlink_to(fileA)
-    except OSError as e:
-        if e.winerror == 1314: skip(reason='SymLinks not available on Windows without DevMode enabled')
     duplicatefiles = DuplicateFiles.frompath(copiedtestfiles.root)
     assert duplicatefiles.duplicates == {frozenset(path for path in copiedtestfiles.paths['fileA'])}, f'Following files identified as duplicates: {duplicatefiles.duplicates}'
diff --git a/test/majorver/test_logging.py b/test/majorver/test_logging.py
@@ -0,0 +1,24 @@
+from . import *
+import logging
+
+
+@mark.copyfiles(('fileA', 2), ('fileB', 1), ('fileA2', 1))
+@mark.linkfiles(('fileA', 1))
+def test_instantiatefrompath(copiedtestfiles, caplog):
+    caplog.set_level(logging.INFO, logger='dupes')
+    _ = DuplicateFiles.frompath(copiedtestfiles.root)
+    logs = [record for record in caplog.records if record.name.startswith(LOGROOT)]
+    logmessages = [record.message for record in logs]
+
+    expectedmessages = [
+        f'Initiating search of {copiedtestfiles.root}',
+        f'Found 1 groups of same-sized files',
+        f'Identified 1 pre-existing hard links',
+        f'Will now begin comparing file contents, this may take some time',
+        f'Identified 1 sets of duplicate files, totalling 2 files',
+        f'Current usage: 32, future usage: 16, saving: 16'
+    ]
+
+    assert (
+        logmessages == expectedmessages
+    ), f'\nReceived log: {logmessages}\nExpected: {expectedmessages}\nMissing: {set(expectedmessages).difference(logmessages)}\nExtra: {set(logmessages).difference(expectedmessages)}'
diff --git a/test/minorver/test_cli.py b/test/minorver/test_cli.py
@@ -14,12 +14,19 @@ def test_link(copiedtestfiles):
     result = clirunner.invoke(dupes, command)
 
     output = [
-        '2 sets of duplicates found, totalling 5 files',
-        'current usage: 101, potential usage: 39, saving: 62',
+        f'Initiating search of {copiedtestfiles.root}',
+        f'Found 2 groups of same-sized files',
+        f'Identified 0 pre-existing hard links',
+        f'Will now begin comparing file contents, this may take some time',
+        f'Identified 2 sets of duplicate files, totalling 5 files',
+        f'Current usage: 101, future usage: 39, saving: 62',
         f'Linking files in {copiedtestfiles.root} ...'
     ]
 
-    assert [s.strip() for s in result.output.strip().split('\n')] == output
+    stdout = [s.strip() for s in result.output.strip().split('\n')]
+    assert (
+        stdout == output
+    ), f'\nOutput: {stdout}\nExpected: {output}'
 
     fileAino = copiedtestfiles.paths['fileA'][0].stat().st_ino
     fileBino = copiedtestfiles.paths['fileB'][0].stat().st_ino
@@ -40,8 +47,12 @@ def test_linkapproved(copiedtestfiles):
     result = clirunner.invoke(dupes, command, input='y')
 
     output = [
-        '2 sets of duplicates found, totalling 5 files',
-        'current usage: 101, potential usage: 39, saving: 62',
+        f'Initiating search of {copiedtestfiles.root}',
+        f'Found 2 groups of same-sized files',
+        f'Identified 0 pre-existing hard links',
+        f'Will now begin comparing file contents, this may take some time',
+        f'Identified 2 sets of duplicate files, totalling 5 files',
+        f'Current usage: 101, future usage: 39, saving: 62',
         'Link files? [y/N]: y',
         f'Linking files in {copiedtestfiles.root} ...'
     ]
@@ -70,8 +81,12 @@ def test_link_abort(copiedtestfiles):
     result = clirunner.invoke(dupes, command, input='n')
 
     output = [
-        '2 sets of duplicates found, totalling 5 files',
-        'current usage: 101, potential usage: 39, saving: 62',
+        f'Initiating search of {copiedtestfiles.root}',
+        f'Found 2 groups of same-sized files',
+        f'Identified 0 pre-existing hard links',
+        f'Will now begin comparing file contents, this may take some time',
+        f'Identified 2 sets of duplicate files, totalling 5 files',
+        f'Current usage: 101, future usage: 39, saving: 62',
         'Link files? [y/N]: n',
         'Aborted!'
     ]
@@ -97,8 +112,12 @@ def test_nolink(copiedtestfiles):
     result = clirunner.invoke(dupes, command)
 
     output = [
-        '2 sets of duplicates found, totalling 5 files',
-        'current usage: 101, potential usage: 39, saving: 62'
+        f'Initiating search of {copiedtestfiles.root}',
+        f'Found 2 groups of same-sized files',
+        f'Identified 0 pre-existing hard links',
+        f'Will now begin comparing file contents, this may take some time',
+        f'Identified 2 sets of duplicate files, totalling 5 files',
+        f'Current usage: 101, future usage: 39, saving: 62'
     ]
 
     assert [s.strip() for s in result.output.strip().split('\n')] == output