Skip to content

Commit 4d28b62

Browse files
feat: Add file type filtering and report generation features to duplicate finder (DhanushNehru#327)
1 parent 66161ee commit 4d28b62

File tree

2 files changed

+35
-5
lines changed

2 files changed

+35
-5
lines changed

Duplicate Finder/Readme.md

+14-1
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,17 @@ Always backup your data before using scripts that modify files. The author is no
3333

3434

3535
<!-- Updated README links and corrected typos -->
36-
<!-- Updated README links and corrected typos -->
36+
<!-- Updated README links and corrected typos -->
37+
38+
39+
# KEY MODIFICATIONS
40+
41+
File Type Filtering:
42+
43+
Added an input prompt to specify file extensions for filtering.
44+
Modified the find_duplicates function to only consider files with the specified extensions.
45+
46+
Generate Report:
47+
48+
Added a new generate_report function that creates a JSON report of duplicate files.
49+
Added the option for the user to choose to generate a report instead of deleting or moving files.

Duplicate Finder/duplicate-finder.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import hashlib
3+
import json # Import for generating reports
34

45
def get_file_hash(filepath):
56
"""Return the MD5 hash of a file."""
@@ -9,13 +10,16 @@ def get_file_hash(filepath):
910
hasher.update(buf)
1011
return hasher.hexdigest()
1112

12-
def find_duplicates(directory, min_size=0):
13-
"""Find duplicate files in a directory."""
13+
def find_duplicates(directory, min_size=0, file_extensions=None):
14+
"""Find duplicate files in a directory, with optional file type filtering."""
1415
hashes = {}
1516
duplicates = {}
1617

1718
for dirpath, dirnames, filenames in os.walk(directory):
1819
for filename in filenames:
20+
if file_extensions and not filename.lower().endswith(tuple(file_extensions)):
21+
continue # Skip files that don't match the extensions
22+
1923
filepath = os.path.join(dirpath, filename)
2024
if os.path.getsize(filepath) >= min_size:
2125
file_hash = get_file_hash(filepath)
@@ -29,11 +33,20 @@ def find_duplicates(directory, min_size=0):
2933

3034
return {k: v for k, v in duplicates.items() if len(v) > 1}
3135

36+
def generate_report(duplicates, report_path):
37+
"""Generate a report of duplicate files in JSON format."""
38+
with open(report_path, 'w') as report_file:
39+
json.dump(duplicates, report_file, indent=4)
40+
print(f"Report generated: {report_path}")
41+
3242
def main():
3343
directory = input("Enter the directory to scan for duplicates: ")
3444
min_size = int(input("Enter the minimum file size to consider (in bytes, default is 0): ") or "0")
3545

36-
duplicates = find_duplicates(directory, min_size)
46+
file_type_input = input("Enter the file extensions to check (comma-separated, e.g. .jpg,.png), or press Enter to check all: ")
47+
file_extensions = [ext.strip().lower() for ext in file_type_input.split(",")] if file_type_input else None
48+
49+
duplicates = find_duplicates(directory, min_size, file_extensions)
3750

3851
if not duplicates:
3952
print("No duplicates found.")
@@ -45,7 +58,7 @@ def main():
4558
print(path)
4659
print("------")
4760

48-
action = input("\nChoose an action: (D)elete, (M)ove, (N)o action: ").lower()
61+
action = input("\nChoose an action: (D)elete, (M)ove, (R)eport, (N)o action: ").lower()
4962

5063
if action == "d":
5164
for _, paths in duplicates.items():
@@ -64,6 +77,10 @@ def main():
6477
os.rename(path, target_path)
6578
print(f"Moved {path} to {target_path}")
6679

80+
elif action == "r":
81+
report_path = input("Enter the path to save the report (e.g., duplicates_report.json): ")
82+
generate_report(duplicates, report_path)
83+
6784
else:
6885
print("No action taken.")
6986

0 commit comments

Comments
 (0)