Skip to content

Commit

Permalink
start to find duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
MusicalNinjaDad committed Nov 15, 2023
1 parent 1f71b50 commit 29c6673
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 18 deletions.
4 changes: 2 additions & 2 deletions duplicates/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .dupes import listfiles
from .dupes import listfiles, finddupes

__ALL__ = ['listfiles']
__ALL__ = ['listfiles', 'finddupes']
11 changes: 9 additions & 2 deletions duplicates/dupes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

def listfiles(in_path: Path) -> dict:
def listfiles(in_path: Path) -> dict[int, set]:
filedict = dict()
for root, dirs, files in in_path.walk():
for file in files:
Expand All @@ -10,4 +10,11 @@ def listfiles(in_path: Path) -> dict:
filedict[size].add(filepath)
else:
filedict[size] = {filepath}
return filedict
return filedict

def finddupes(filesbysize: dict[int, set]) -> set[frozenset]:
dupes = {
frozenset(filepath for filepath in files)
for size, files in filesbysize.items() if len(files) > 1
}
return dupes
13 changes: 13 additions & 0 deletions test/majorver/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
from pytest import fixture
from .. import *
from pathlib import Path

dir1 = Path('test/majorver/data/dir1')
dir2 = Path('test/majorver/data/dir2')

@fixture
def testfiles(tmp_path) -> Path:
dir1.copy(tmp_path)
dir2.copy(tmp_path)
return tmp_path

@fixture
def duplicatedir1(testfiles) -> Path:
tmp_path = testfiles
dir1.copy(testfiles / 'alt')
return tmp_path

def _copy(self: Path, target: Path) -> None:
from shutil import copytree
if self.is_dir():
Expand Down
8 changes: 8 additions & 0 deletions test/majorver/test_filecompare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pytest import fixture
from . import listfiles, finddupes, Path, duplicatedir1, testfiles

def test_fileisduplicate(duplicatedir1):
testfiles = duplicatedir1
filesdict = listfiles(testfiles)
duplicatefiles = finddupes(filesdict)
assert duplicatefiles == {frozenset((testfiles / 'dir1' / 'FileA.txt', testfiles / 'alt' / 'dir1' / 'FileA.txt'))}
15 changes: 1 addition & 14 deletions test/majorver/test_filedetails.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
from pytest import fixture
from recurtools import flatten

from . import listfiles, Path, dir1, dir2

@fixture
def testfiles(tmp_path):
dir1.copy(tmp_path)
dir2.copy(tmp_path)
return tmp_path

@fixture
def duplicatedir1(testfiles):
tmp_path = testfiles
dir1.copy(testfiles / 'alt')
return tmp_path
from . import listfiles, Path, testfiles, duplicatedir1

def test_fileslisted(testfiles):
filesdict = listfiles(testfiles)
Expand Down

0 comments on commit 29c6673

Please sign in to comment.