Skip to content

Commit ad935df

Browse files
authored
Merge pull request TheAlgorithms#179 from Shivams334/master
Added new code
2 parents 5c10a29 + fe7b86c commit ad935df

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed

sorts/countingsort.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Python program for counting sort
2+
3+
# This is the main function that sort the given string arr[] in
4+
# in the alphabetical order
5+
def countSort(arr):
6+
7+
# The output character array that will have sorted arr
8+
output = [0 for i in range(256)]
9+
10+
# Create a count array to store count of inidividul
11+
# characters and initialize count array as 0
12+
count = [0 for i in range(256)]
13+
14+
# For storing the resulting answer since the
15+
# string is immutable
16+
ans = ["" for _ in arr]
17+
18+
# Store count of each character
19+
for i in arr:
20+
count[ord(i)] += 1
21+
22+
# Change count[i] so that count[i] now contains actual
23+
# position of this character in output array
24+
for i in range(256):
25+
count[i] += count[i-1]
26+
27+
# Build the output character array
28+
for i in range(len(arr)):
29+
output[count[ord(arr[i])]-1] = arr[i]
30+
count[ord(arr[i])] -= 1
31+
32+
# Copy the output array to arr, so that arr now
33+
# contains sorted characters
34+
for i in range(len(arr)):
35+
ans[i] = output[i]
36+
return ans
37+
38+
# Driver program to test above function
39+
arr = "thisisthestring"
40+
ans = countSort(arr)
41+
print ("Sorted string array is %s" %("".join(ans)))

sorts/external-sort.py

+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python
2+
3+
#
4+
# Sort large text files in a minimum amount of memory
5+
#
6+
import os
7+
import sys
8+
import argparse
9+
10+
class FileSplitter(object):
11+
BLOCK_FILENAME_FORMAT = 'block_{0}.dat'
12+
13+
def __init__(self, filename):
14+
self.filename = filename
15+
self.block_filenames = []
16+
17+
def write_block(self, data, block_number):
18+
filename = self.BLOCK_FILENAME_FORMAT.format(block_number)
19+
file = open(filename, 'w')
20+
file.write(data)
21+
file.close()
22+
self.block_filenames.append(filename)
23+
24+
def get_block_filenames(self):
25+
return self.block_filenames
26+
27+
def split(self, block_size, sort_key=None):
28+
file = open(self.filename, 'r')
29+
i = 0
30+
31+
while True:
32+
lines = file.readlines(block_size)
33+
34+
if lines == []:
35+
break
36+
37+
if sort_key is None:
38+
lines.sort()
39+
else:
40+
lines.sort(key=sort_key)
41+
42+
self.write_block(''.join(lines), i)
43+
i += 1
44+
45+
def cleanup(self):
46+
map(lambda f: os.remove(f), self.block_filenames)
47+
48+
49+
class NWayMerge(object):
50+
def select(self, choices):
51+
min_index = -1
52+
min_str = None
53+
54+
for i in range(len(choices)):
55+
if min_str is None or choices[i] < min_str:
56+
min_index = i
57+
58+
return min_index
59+
60+
61+
class FilesArray(object):
62+
def __init__(self, files):
63+
self.files = files
64+
self.empty = set()
65+
self.num_buffers = len(files)
66+
self.buffers = {i: None for i in range(self.num_buffers)}
67+
68+
def get_dict(self):
69+
return {i: self.buffers[i] for i in range(self.num_buffers) if i not in self.empty}
70+
71+
def refresh(self):
72+
for i in range(self.num_buffers):
73+
if self.buffers[i] is None and i not in self.empty:
74+
self.buffers[i] = self.files[i].readline()
75+
76+
if self.buffers[i] == '':
77+
self.empty.add(i)
78+
79+
if len(self.empty) == self.num_buffers:
80+
return False
81+
82+
return True
83+
84+
def unshift(self, index):
85+
value = self.buffers[index]
86+
self.buffers[index] = None
87+
88+
return value
89+
90+
91+
class FileMerger(object):
92+
def __init__(self, merge_strategy):
93+
self.merge_strategy = merge_strategy
94+
95+
def merge(self, filenames, outfilename, buffer_size):
96+
outfile = open(outfilename, 'w', buffer_size)
97+
buffers = FilesArray(self.get_file_handles(filenames, buffer_size))
98+
99+
while buffers.refresh():
100+
min_index = self.merge_strategy.select(buffers.get_dict())
101+
outfile.write(buffers.unshift(min_index))
102+
103+
def get_file_handles(self, filenames, buffer_size):
104+
files = {}
105+
106+
for i in range(len(filenames)):
107+
files[i] = open(filenames[i], 'r', buffer_size)
108+
109+
return files
110+
111+
112+
113+
class ExternalSort(object):
114+
def __init__(self, block_size):
115+
self.block_size = block_size
116+
117+
def sort(self, filename, sort_key=None):
118+
num_blocks = self.get_number_blocks(filename, self.block_size)
119+
splitter = FileSplitter(filename)
120+
splitter.split(self.block_size, sort_key)
121+
122+
merger = FileMerger(NWayMerge())
123+
buffer_size = self.block_size / (num_blocks + 1)
124+
merger.merge(splitter.get_block_filenames(), filename + '.out', buffer_size)
125+
126+
splitter.cleanup()
127+
128+
def get_number_blocks(self, filename, block_size):
129+
return (os.stat(filename).st_size / block_size) + 1
130+
131+
132+
def parse_memory(string):
133+
if string[-1].lower() == 'k':
134+
return int(string[:-1]) * 1024
135+
elif string[-1].lower() == 'm':
136+
return int(string[:-1]) * 1024 * 1024
137+
elif string[-1].lower() == 'g':
138+
return int(string[:-1]) * 1024 * 1024 * 1024
139+
else:
140+
return int(string)
141+
142+
143+
144+
def main():
145+
parser = argparse.ArgumentParser()
146+
parser.add_argument('-m',
147+
'--mem',
148+
help='amount of memory to use for sorting',
149+
default='100M')
150+
parser.add_argument('filename',
151+
metavar='<filename>',
152+
nargs=1,
153+
help='name of file to sort')
154+
args = parser.parse_args()
155+
156+
sorter = ExternalSort(parse_memory(args.mem))
157+
sorter.sort(args.filename[0])
158+
159+
160+
if __name__ == '__main__':
161+
main()

0 commit comments

Comments
 (0)