1
1
import os
2
2
import hashlib
3
+ import json # Import for generating reports
3
4
4
5
def get_file_hash (filepath ):
5
6
"""Return the MD5 hash of a file."""
@@ -9,13 +10,16 @@ def get_file_hash(filepath):
9
10
hasher .update (buf )
10
11
return hasher .hexdigest ()
11
12
12
- def find_duplicates (directory , min_size = 0 ):
13
- """Find duplicate files in a directory."""
13
+ def find_duplicates (directory , min_size = 0 , file_extensions = None ):
14
+ """Find duplicate files in a directory, with optional file type filtering ."""
14
15
hashes = {}
15
16
duplicates = {}
16
17
17
18
for dirpath , dirnames , filenames in os .walk (directory ):
18
19
for filename in filenames :
20
+ if file_extensions and not filename .lower ().endswith (tuple (file_extensions )):
21
+ continue # Skip files that don't match the extensions
22
+
19
23
filepath = os .path .join (dirpath , filename )
20
24
if os .path .getsize (filepath ) >= min_size :
21
25
file_hash = get_file_hash (filepath )
@@ -29,11 +33,20 @@ def find_duplicates(directory, min_size=0):
29
33
30
34
return {k : v for k , v in duplicates .items () if len (v ) > 1 }
31
35
36
+ def generate_report (duplicates , report_path ):
37
+ """Generate a report of duplicate files in JSON format."""
38
+ with open (report_path , 'w' ) as report_file :
39
+ json .dump (duplicates , report_file , indent = 4 )
40
+ print (f"Report generated: { report_path } " )
41
+
32
42
def main ():
33
43
directory = input ("Enter the directory to scan for duplicates: " )
34
44
min_size = int (input ("Enter the minimum file size to consider (in bytes, default is 0): " ) or "0" )
35
45
36
- duplicates = find_duplicates (directory , min_size )
46
+ file_type_input = input ("Enter the file extensions to check (comma-separated, e.g. .jpg,.png), or press Enter to check all: " )
47
+ file_extensions = [ext .strip ().lower () for ext in file_type_input .split ("," )] if file_type_input else None
48
+
49
+ duplicates = find_duplicates (directory , min_size , file_extensions )
37
50
38
51
if not duplicates :
39
52
print ("No duplicates found." )
@@ -45,7 +58,7 @@ def main():
45
58
print (path )
46
59
print ("------" )
47
60
48
- action = input ("\n Choose an action: (D)elete, (M)ove, (N)o action: " ).lower ()
61
+ action = input ("\n Choose an action: (D)elete, (M)ove, (R)eport, ( N)o action: " ).lower ()
49
62
50
63
if action == "d" :
51
64
for _ , paths in duplicates .items ():
@@ -64,6 +77,10 @@ def main():
64
77
os .rename (path , target_path )
65
78
print (f"Moved { path } to { target_path } " )
66
79
80
+ elif action == "r" :
81
+ report_path = input ("Enter the path to save the report (e.g., duplicates_report.json): " )
82
+ generate_report (duplicates , report_path )
83
+
67
84
else :
68
85
print ("No action taken." )
69
86
0 commit comments