|
| 1 | +# Frequency Finder |
| 2 | + |
| 3 | +# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency |
| 4 | +englishLetterFreq = {'E': 12.70, 'T': 9.06, 'A': 8.17, 'O': 7.51, 'I': 6.97, |
| 5 | + 'N': 6.75, 'S': 6.33, 'H': 6.09, 'R': 5.99, 'D': 4.25, |
| 6 | + 'L': 4.03, 'C': 2.78, 'U': 2.76, 'M': 2.41, 'W': 2.36, |
| 7 | + 'F': 2.23, 'G': 2.02, 'Y': 1.97, 'P': 1.93, 'B': 1.29, |
| 8 | + 'V': 0.98, 'K': 0.77, 'J': 0.15, 'X': 0.15, 'Q': 0.10, |
| 9 | + 'Z': 0.07} |
| 10 | +ETAOIN = 'ETAOINSHRDLCUMWFGYPBVKJXQZ' |
| 11 | +LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' |
| 12 | + |
| 13 | +def getLetterCount(message): |
| 14 | + letterCount = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0, 'G': 0, 'H': 0, |
| 15 | + 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0, 'O': 0, 'P': 0, |
| 16 | + 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'W': 0, 'X': 0, |
| 17 | + 'Y': 0, 'Z': 0} |
| 18 | + for letter in message.upper(): |
| 19 | + if letter in LETTERS: |
| 20 | + letterCount[letter] += 1 |
| 21 | + |
| 22 | + return letterCount |
| 23 | + |
| 24 | +def getItemAtIndexZero(x): |
| 25 | + return x[0] |
| 26 | + |
| 27 | +def getFrequencyOrder(message): |
| 28 | + letterToFreq = getLetterCount(message) |
| 29 | + freqToLetter = {} |
| 30 | + for letter in LETTERS: |
| 31 | + if letterToFreq[letter] not in freqToLetter: |
| 32 | + freqToLetter[letterToFreq[letter]] = [letter] |
| 33 | + else: |
| 34 | + freqToLetter[letterToFreq[letter]].append(letter) |
| 35 | + |
| 36 | + for freq in freqToLetter: |
| 37 | + freqToLetter[freq].sort(key = ETAOIN.find, reverse = True) |
| 38 | + freqToLetter[freq] = ''.join(freqToLetter[freq]) |
| 39 | + |
| 40 | + freqPairs = list(freqToLetter.items()) |
| 41 | + freqPairs.sort(key = getItemAtIndexZero, reverse = True) |
| 42 | + |
| 43 | + freqOrder = [] |
| 44 | + for freqPair in freqPairs: |
| 45 | + freqOrder.append(freqPair[1]) |
| 46 | + |
| 47 | + return ''.join(freqOrder) |
| 48 | + |
| 49 | +def englishFreqMatchScore(message): |
| 50 | + ''' |
| 51 | + >>> englishFreqMatchScore('Hello World') |
| 52 | + 1 |
| 53 | + ''' |
| 54 | + freqOrder = getFrequencyOrder(message) |
| 55 | + matchScore = 0 |
| 56 | + for commonLetter in ETAOIN[:6]: |
| 57 | + if commonLetter in freqOrder[:6]: |
| 58 | + matchScore += 1 |
| 59 | + |
| 60 | + for uncommonLetter in ETAOIN[-6:]: |
| 61 | + if uncommonLetter in freqOrder[-6:]: |
| 62 | + matchScore += 1 |
| 63 | + |
| 64 | + return matchScore |
| 65 | + |
| 66 | +if __name__ == '__main__': |
| 67 | + import doctest |
| 68 | + doctest.testmod() |
0 commit comments