-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathmd5-duplication-remover.py
executable file
·44 lines (36 loc) · 1.27 KB
/
md5-duplication-remover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python
# remove duplication in specified folder
# ( duplication - with equals md5sum )
import logging
import os
import sys
import hashlib
def main(source_folder):
file_list = dict()
for each_file in os.listdir(source_folder):
path_to_file = os.path.join(source_folder, each_file)
with open(path_to_file, "r") as text_in_file:
file_hash = hashlib.md5(text_in_file.read().encode("utf-8")).hexdigest()
if file_hash in file_list:
file_list[file_hash].append(path_to_file)
else:
file_list[file_hash] = [path_to_file]
for key, value in file_list.items():
value.pop(0)
for each_redundant_file in value:
os.remove(each_redundant_file)
if __name__ == "__main__":
app_description = """ find duplication of context of files into folder
( calculate md5sum for all files )
remove duplication
"""
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s',
level=logging.INFO)
if len(sys.argv) == 0:
print("folder with files should be specified")
sys.exit(1)
folder = sys.argv[1]
if not os.path.isdir(folder):
print("specified path is not a folder:"+folder)
sys.exit(2)
main(folder)