2016-08-18 20:59:10 +08:00
|
|
|
# Frequency Finder
|
|
|
|
|
|
|
|
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
|
2019-10-05 13:14:13 +08:00
|
|
|
englishLetterFreq = {
|
|
|
|
"E": 12.70,
|
|
|
|
"T": 9.06,
|
|
|
|
"A": 8.17,
|
|
|
|
"O": 7.51,
|
|
|
|
"I": 6.97,
|
|
|
|
"N": 6.75,
|
|
|
|
"S": 6.33,
|
|
|
|
"H": 6.09,
|
|
|
|
"R": 5.99,
|
|
|
|
"D": 4.25,
|
|
|
|
"L": 4.03,
|
|
|
|
"C": 2.78,
|
|
|
|
"U": 2.76,
|
|
|
|
"M": 2.41,
|
|
|
|
"W": 2.36,
|
|
|
|
"F": 2.23,
|
|
|
|
"G": 2.02,
|
|
|
|
"Y": 1.97,
|
|
|
|
"P": 1.93,
|
|
|
|
"B": 1.29,
|
|
|
|
"V": 0.98,
|
|
|
|
"K": 0.77,
|
|
|
|
"J": 0.15,
|
|
|
|
"X": 0.15,
|
|
|
|
"Q": 0.10,
|
|
|
|
"Z": 0.07,
|
|
|
|
}
|
|
|
|
ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
|
|
|
|
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
|
2016-08-18 20:59:10 +08:00
|
|
|
|
|
|
|
def getLetterCount(message):
|
2019-10-05 13:14:13 +08:00
|
|
|
letterCount = {
|
|
|
|
"A": 0,
|
|
|
|
"B": 0,
|
|
|
|
"C": 0,
|
|
|
|
"D": 0,
|
|
|
|
"E": 0,
|
|
|
|
"F": 0,
|
|
|
|
"G": 0,
|
|
|
|
"H": 0,
|
|
|
|
"I": 0,
|
|
|
|
"J": 0,
|
|
|
|
"K": 0,
|
|
|
|
"L": 0,
|
|
|
|
"M": 0,
|
|
|
|
"N": 0,
|
|
|
|
"O": 0,
|
|
|
|
"P": 0,
|
|
|
|
"Q": 0,
|
|
|
|
"R": 0,
|
|
|
|
"S": 0,
|
|
|
|
"T": 0,
|
|
|
|
"U": 0,
|
|
|
|
"V": 0,
|
|
|
|
"W": 0,
|
|
|
|
"X": 0,
|
|
|
|
"Y": 0,
|
|
|
|
"Z": 0,
|
|
|
|
}
|
2016-08-18 20:59:10 +08:00
|
|
|
for letter in message.upper():
|
|
|
|
if letter in LETTERS:
|
|
|
|
letterCount[letter] += 1
|
|
|
|
|
|
|
|
return letterCount
|
|
|
|
|
2019-10-05 13:14:13 +08:00
|
|
|
|
2016-08-18 20:59:10 +08:00
|
|
|
def getItemAtIndexZero(x):
|
|
|
|
return x[0]
|
|
|
|
|
2019-10-05 13:14:13 +08:00
|
|
|
|
2016-08-18 20:59:10 +08:00
|
|
|
def getFrequencyOrder(message):
|
|
|
|
letterToFreq = getLetterCount(message)
|
|
|
|
freqToLetter = {}
|
|
|
|
for letter in LETTERS:
|
|
|
|
if letterToFreq[letter] not in freqToLetter:
|
|
|
|
freqToLetter[letterToFreq[letter]] = [letter]
|
|
|
|
else:
|
|
|
|
freqToLetter[letterToFreq[letter]].append(letter)
|
|
|
|
|
|
|
|
for freq in freqToLetter:
|
2019-10-05 13:14:13 +08:00
|
|
|
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
|
|
|
|
freqToLetter[freq] = "".join(freqToLetter[freq])
|
2016-08-18 20:59:10 +08:00
|
|
|
|
|
|
|
freqPairs = list(freqToLetter.items())
|
2019-10-05 13:14:13 +08:00
|
|
|
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
|
2016-08-18 20:59:10 +08:00
|
|
|
|
|
|
|
freqOrder = []
|
|
|
|
for freqPair in freqPairs:
|
|
|
|
freqOrder.append(freqPair[1])
|
|
|
|
|
2019-10-05 13:14:13 +08:00
|
|
|
return "".join(freqOrder)
|
|
|
|
|
2016-08-18 20:59:10 +08:00
|
|
|
|
|
|
|
def englishFreqMatchScore(message):
|
2019-10-05 13:14:13 +08:00
|
|
|
"""
|
2016-08-18 20:59:10 +08:00
|
|
|
>>> englishFreqMatchScore('Hello World')
|
|
|
|
1
|
2019-10-05 13:14:13 +08:00
|
|
|
"""
|
2016-08-18 20:59:10 +08:00
|
|
|
freqOrder = getFrequencyOrder(message)
|
|
|
|
matchScore = 0
|
|
|
|
for commonLetter in ETAOIN[:6]:
|
|
|
|
if commonLetter in freqOrder[:6]:
|
|
|
|
matchScore += 1
|
|
|
|
|
|
|
|
for uncommonLetter in ETAOIN[-6:]:
|
|
|
|
if uncommonLetter in freqOrder[-6:]:
|
|
|
|
matchScore += 1
|
|
|
|
|
|
|
|
return matchScore
|
|
|
|
|
2019-10-05 13:14:13 +08:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2016-08-18 20:59:10 +08:00
|
|
|
import doctest
|
2019-10-05 13:14:13 +08:00
|
|
|
|
2016-08-18 20:59:10 +08:00
|
|
|
doctest.testmod()
|