|
| 1 | +""" |
| 2 | +https://cp-algorithms.com/string/z-function.html |
| 3 | +
|
| 4 | +Z-function or Z algorithm |
| 5 | +
|
| 6 | +Efficient algorithm for pattern occurrence in a string |
| 7 | +
|
| 8 | +Time Complexity: O(n) - where n is the length of the string |
| 9 | +
|
| 10 | +""" |
| 11 | + |
| 12 | + |
| 13 | +def z_function(input_str: str) -> list: |
| 14 | + """ |
| 15 | + For the given string this function computes value for each index, |
| 16 | + which represents the maximal length substring starting from the index |
| 17 | + and is the same as the prefix of the same size |
| 18 | +
|
| 19 | + e.x. for string 'abab' for second index value would be 2 |
| 20 | +
|
| 21 | + For the value of the first element the algorithm always returns 0 |
| 22 | +
|
| 23 | + >>> z_function("abracadabra") |
| 24 | + [0, 0, 0, 1, 0, 1, 0, 4, 0, 0, 1] |
| 25 | + >>> z_function("aaaa") |
| 26 | + [0, 3, 2, 1] |
| 27 | + >>> z_function("zxxzxxz") |
| 28 | + [0, 0, 0, 4, 0, 0, 1] |
| 29 | + """ |
| 30 | + z_result = [0] * len(input_str) |
| 31 | + |
| 32 | + # initialize interval's left pointer and right pointer |
| 33 | + left_pointer, right_pointer = 0, 0 |
| 34 | + |
| 35 | + for i in range(1, len(input_str)): |
| 36 | + # case when current index is inside the interval |
| 37 | + if i <= right_pointer: |
| 38 | + min_edge = min(right_pointer - i + 1, z_result[i - left_pointer]) |
| 39 | + z_result[i] = min_edge |
| 40 | + |
| 41 | + while go_next(i, z_result, input_str): |
| 42 | + z_result[i] += 1 |
| 43 | + |
| 44 | + # if new index's result gives us more right interval, |
| 45 | + # we've to update left_pointer and right_pointer |
| 46 | + if i + z_result[i] - 1 > right_pointer: |
| 47 | + left_pointer, right_pointer = i, i + z_result[i] - 1 |
| 48 | + |
| 49 | + return z_result |
| 50 | + |
| 51 | + |
| 52 | +def go_next(i, z_result, s): |
| 53 | + """ |
| 54 | + Check if we have to move forward to the next characters or not |
| 55 | + """ |
| 56 | + return i + z_result[i] < len(s) and s[z_result[i]] == s[i + z_result[i]] |
| 57 | + |
| 58 | + |
| 59 | +def find_pattern(pattern: str, input_str: str) -> int: |
| 60 | + """ |
| 61 | + Example of using z-function for pattern occurrence |
| 62 | + Given function returns the number of times 'pattern' |
| 63 | + appears in 'input_str' as a substring |
| 64 | +
|
| 65 | + >>> find_pattern("abr", "abracadabra") |
| 66 | + 2 |
| 67 | + >>> find_pattern("a", "aaaa") |
| 68 | + 4 |
| 69 | + >>> find_pattern("xz", "zxxzxxz") |
| 70 | + 2 |
| 71 | + """ |
| 72 | + answer = 0 |
| 73 | + # concatenate 'pattern' and 'input_str' and call z_function |
| 74 | + # with concatenated string |
| 75 | + z_result = z_function(pattern + input_str) |
| 76 | + |
| 77 | + for val in z_result: |
| 78 | + # if value is greater then length of the pattern string |
| 79 | + # that means this index is starting position of substring |
| 80 | + # which is equal to pattern string |
| 81 | + if val >= len(pattern): |
| 82 | + answer += 1 |
| 83 | + |
| 84 | + return answer |
| 85 | + |
| 86 | + |
| 87 | +if __name__ == "__main__": |
| 88 | + import doctest |
| 89 | + doctest.testmod() |
0 commit comments