Skip to content

Commit ae76601

Browse files
committed
strings: add levenshtein distance metric
1 parent 0e76ee9 commit ae76601

File tree

1 file changed

+78
-0
lines changed

1 file changed

+78
-0
lines changed

strings/levenshtein-distance.py

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""
2+
This is a Python implementation of the levenshtein distance.
3+
Levenshtein distance is a string metric for measuring the
4+
difference between two sequences.
5+
6+
For doctests run following command:
7+
python -m doctest -v levenshtein-distance.py
8+
or
9+
python3 -m doctest -v levenshtein-distance.py
10+
11+
For manual testing run:
12+
python levenshtein-distance.py
13+
"""
14+
15+
16+
def levenshtein_distance(first_word, second_word):
17+
"""Implementation of the levenshtein distance in Python.
18+
:param first_word: the first word to measure the difference.
19+
:param second_word: the second word to measure the difference.
20+
:return: the levenshtein distance between the two words.
21+
Examples:
22+
>>> levenshtein_distance("planet", "planetary")
23+
3
24+
>>> levenshtein_distance("", "test")
25+
4
26+
>>> levenshtein_distance("book", "back")
27+
2
28+
>>> levenshtein_distance("book", "book")
29+
0
30+
>>> levenshtein_distance("test", "")
31+
4
32+
>>> levenshtein_distance("", "")
33+
0
34+
>>> levenshtein_distance("orchestration", "container")
35+
10
36+
"""
37+
# The longer word should come first
38+
if len(first_word) < len(second_word):
39+
return levenshtein_distance(second_word, first_word)
40+
41+
if len(second_word) == 0:
42+
return len(first_word)
43+
44+
previous_row = range(len(second_word) + 1)
45+
46+
for i, c1 in enumerate(first_word):
47+
48+
current_row = [i + 1]
49+
50+
for j, c2 in enumerate(second_word):
51+
52+
# Calculate insertions, deletions and substitutions
53+
insertions = previous_row[j + 1] + 1
54+
deletions = current_row[j] + 1
55+
substitutions = previous_row[j] + (c1 != c2)
56+
57+
# Get the minimum to append to the current row
58+
current_row.append(min(insertions, deletions, substitutions))
59+
60+
# Store the previous row
61+
previous_row = current_row
62+
63+
# Returns the last element (distance)
64+
return previous_row[-1]
65+
66+
67+
if __name__ == '__main__':
68+
try:
69+
raw_input # Python 2
70+
except NameError:
71+
raw_input = input # Python 3
72+
73+
first_word = raw_input('Enter the first word:\n').strip()
74+
second_word = raw_input('Enter the second word:\n').strip()
75+
76+
result = levenshtein_distance(first_word, second_word)
77+
print('Levenshtein distance between {} and {} is {}'.format(
78+
first_word, second_word, result))

0 commit comments

Comments
 (0)