TheAlgorithms-Python/other/frequency_finder.py

121 lines
2.3 KiB
Python
Raw Normal View History

2016-08-18 20:59:10 +08:00
# Frequency Finder
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
2019-10-05 13:14:13 +08:00
englishLetterFreq = {
"E": 12.70,
"T": 9.06,
"A": 8.17,
"O": 7.51,
"I": 6.97,
"N": 6.75,
"S": 6.33,
"H": 6.09,
"R": 5.99,
"D": 4.25,
"L": 4.03,
"C": 2.78,
"U": 2.76,
"M": 2.41,
"W": 2.36,
"F": 2.23,
"G": 2.02,
"Y": 1.97,
"P": 1.93,
"B": 1.29,
"V": 0.98,
"K": 0.77,
"J": 0.15,
"X": 0.15,
"Q": 0.10,
"Z": 0.07,
}
ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2016-08-18 20:59:10 +08:00
def getLetterCount(message):
2019-10-05 13:14:13 +08:00
letterCount = {
"A": 0,
"B": 0,
"C": 0,
"D": 0,
"E": 0,
"F": 0,
"G": 0,
"H": 0,
"I": 0,
"J": 0,
"K": 0,
"L": 0,
"M": 0,
"N": 0,
"O": 0,
"P": 0,
"Q": 0,
"R": 0,
"S": 0,
"T": 0,
"U": 0,
"V": 0,
"W": 0,
"X": 0,
"Y": 0,
"Z": 0,
}
2016-08-18 20:59:10 +08:00
for letter in message.upper():
if letter in LETTERS:
letterCount[letter] += 1
return letterCount
2019-10-05 13:14:13 +08:00
2016-08-18 20:59:10 +08:00
def getItemAtIndexZero(x):
return x[0]
2019-10-05 13:14:13 +08:00
2016-08-18 20:59:10 +08:00
def getFrequencyOrder(message):
letterToFreq = getLetterCount(message)
freqToLetter = {}
for letter in LETTERS:
if letterToFreq[letter] not in freqToLetter:
freqToLetter[letterToFreq[letter]] = [letter]
else:
freqToLetter[letterToFreq[letter]].append(letter)
for freq in freqToLetter:
2019-10-05 13:14:13 +08:00
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
freqToLetter[freq] = "".join(freqToLetter[freq])
2016-08-18 20:59:10 +08:00
freqPairs = list(freqToLetter.items())
2019-10-05 13:14:13 +08:00
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
2016-08-18 20:59:10 +08:00
freqOrder = []
for freqPair in freqPairs:
freqOrder.append(freqPair[1])
2019-10-05 13:14:13 +08:00
return "".join(freqOrder)
2016-08-18 20:59:10 +08:00
def englishFreqMatchScore(message):
2019-10-05 13:14:13 +08:00
"""
2016-08-18 20:59:10 +08:00
>>> englishFreqMatchScore('Hello World')
1
2019-10-05 13:14:13 +08:00
"""
2016-08-18 20:59:10 +08:00
freqOrder = getFrequencyOrder(message)
matchScore = 0
for commonLetter in ETAOIN[:6]:
if commonLetter in freqOrder[:6]:
matchScore += 1
for uncommonLetter in ETAOIN[-6:]:
if uncommonLetter in freqOrder[-6:]:
matchScore += 1
return matchScore
2019-10-05 13:14:13 +08:00
if __name__ == "__main__":
2016-08-18 20:59:10 +08:00
import doctest
2019-10-05 13:14:13 +08:00
2016-08-18 20:59:10 +08:00
doctest.testmod()