mirror of
https://hub.njuu.cf/TheAlgorithms/Python.git
synced 2023-10-11 13:06:12 +08:00
Add missing type annotations for strings
directory (#5817)
* Type annotations for `strings/autocomplete_using_trie.py` * Update autocomplete_using_trie.py * Update detecting_english_programmatically.py * Update detecting_english_programmatically.py * Update frequency_finder.py * Update frequency_finder.py * Update frequency_finder.py * Update word_occurrence.py * Update frequency_finder.py * Update z_function.py * Update z_function.py * Update frequency_finder.py
This commit is contained in:
parent
bbb88bb5c2
commit
e95ecfaf27
@ -1,11 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
END = "#"
|
||||
|
||||
|
||||
class Trie:
|
||||
def __init__(self):
|
||||
self._trie = {}
|
||||
def __init__(self) -> None:
|
||||
self._trie: dict = {}
|
||||
|
||||
def insert_word(self, text):
|
||||
def insert_word(self, text: str) -> None:
|
||||
trie = self._trie
|
||||
for char in text:
|
||||
if char not in trie:
|
||||
@ -13,7 +15,7 @@ class Trie:
|
||||
trie = trie[char]
|
||||
trie[END] = True
|
||||
|
||||
def find_word(self, prefix):
|
||||
def find_word(self, prefix: str) -> tuple | list:
|
||||
trie = self._trie
|
||||
for char in prefix:
|
||||
if char in trie:
|
||||
@ -22,7 +24,7 @@ class Trie:
|
||||
return []
|
||||
return self._elements(trie)
|
||||
|
||||
def _elements(self, d):
|
||||
def _elements(self, d: dict) -> tuple:
|
||||
result = []
|
||||
for c, v in d.items():
|
||||
if c == END:
|
||||
@ -39,26 +41,28 @@ for word in words:
|
||||
trie.insert_word(word)
|
||||
|
||||
|
||||
def autocomplete_using_trie(s):
|
||||
def autocomplete_using_trie(string: str) -> tuple:
|
||||
"""
|
||||
>>> trie = Trie()
|
||||
>>> for word in words:
|
||||
... trie.insert_word(word)
|
||||
...
|
||||
>>> matches = autocomplete_using_trie("de")
|
||||
|
||||
"detergent " in matches
|
||||
>>> "detergent " in matches
|
||||
True
|
||||
"dog " in matches
|
||||
>>> "dog " in matches
|
||||
False
|
||||
"""
|
||||
suffixes = trie.find_word(s)
|
||||
return tuple(s + w for w in suffixes)
|
||||
suffixes = trie.find_word(string)
|
||||
return tuple(string + word for word in suffixes)
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
print(autocomplete_using_trie("de"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
||||
main()
|
||||
|
@ -4,55 +4,56 @@ UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
|
||||
|
||||
|
||||
def loadDictionary():
|
||||
def load_dictionary() -> dict[str, None]:
|
||||
path = os.path.split(os.path.realpath(__file__))
|
||||
englishWords = {}
|
||||
with open(path[0] + "/dictionary.txt") as dictionaryFile:
|
||||
for word in dictionaryFile.read().split("\n"):
|
||||
englishWords[word] = None
|
||||
return englishWords
|
||||
english_words: dict[str, None] = {}
|
||||
with open(path[0] + "/dictionary.txt") as dictionary_file:
|
||||
for word in dictionary_file.read().split("\n"):
|
||||
english_words[word] = None
|
||||
return english_words
|
||||
|
||||
|
||||
ENGLISH_WORDS = loadDictionary()
|
||||
ENGLISH_WORDS = load_dictionary()
|
||||
|
||||
|
||||
def getEnglishCount(message):
|
||||
def get_english_count(message: str) -> float:
|
||||
message = message.upper()
|
||||
message = removeNonLetters(message)
|
||||
possibleWords = message.split()
|
||||
message = remove_non_letters(message)
|
||||
possible_words = message.split()
|
||||
|
||||
if possibleWords == []:
|
||||
if possible_words == []:
|
||||
return 0.0
|
||||
|
||||
matches = 0
|
||||
for word in possibleWords:
|
||||
for word in possible_words:
|
||||
if word in ENGLISH_WORDS:
|
||||
matches += 1
|
||||
|
||||
return float(matches) / len(possibleWords)
|
||||
return float(matches) / len(possible_words)
|
||||
|
||||
|
||||
def removeNonLetters(message):
|
||||
lettersOnly = []
|
||||
def remove_non_letters(message: str) -> str:
|
||||
letters_only = []
|
||||
for symbol in message:
|
||||
if symbol in LETTERS_AND_SPACE:
|
||||
lettersOnly.append(symbol)
|
||||
return "".join(lettersOnly)
|
||||
letters_only.append(symbol)
|
||||
return "".join(letters_only)
|
||||
|
||||
|
||||
def isEnglish(message, wordPercentage=20, letterPercentage=85):
|
||||
def is_english(
|
||||
message: str, word_percentage: int = 20, letter_percentage: int = 85
|
||||
) -> bool:
|
||||
"""
|
||||
>>> isEnglish('Hello World')
|
||||
>>> is_english('Hello World')
|
||||
True
|
||||
|
||||
>>> isEnglish('llold HorWd')
|
||||
>>> is_english('llold HorWd')
|
||||
False
|
||||
"""
|
||||
wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
|
||||
numLetters = len(removeNonLetters(message))
|
||||
messageLettersPercentage = (float(numLetters) / len(message)) * 100
|
||||
lettersMatch = messageLettersPercentage >= letterPercentage
|
||||
return wordsMatch and lettersMatch
|
||||
words_match = get_english_count(message) * 100 >= word_percentage
|
||||
num_letters = len(remove_non_letters(message))
|
||||
message_letters_percentage = (float(num_letters) / len(message)) * 100
|
||||
letters_match = message_letters_percentage >= letter_percentage
|
||||
return words_match and letters_match
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,7 +1,9 @@
|
||||
# Frequency Finder
|
||||
|
||||
import string
|
||||
|
||||
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
|
||||
englishLetterFreq = {
|
||||
english_letter_freq = {
|
||||
"E": 12.70,
|
||||
"T": 9.06,
|
||||
"A": 8.17,
|
||||
@ -33,85 +35,57 @@ ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
|
||||
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
|
||||
|
||||
def getLetterCount(message):
|
||||
letterCount = {
|
||||
"A": 0,
|
||||
"B": 0,
|
||||
"C": 0,
|
||||
"D": 0,
|
||||
"E": 0,
|
||||
"F": 0,
|
||||
"G": 0,
|
||||
"H": 0,
|
||||
"I": 0,
|
||||
"J": 0,
|
||||
"K": 0,
|
||||
"L": 0,
|
||||
"M": 0,
|
||||
"N": 0,
|
||||
"O": 0,
|
||||
"P": 0,
|
||||
"Q": 0,
|
||||
"R": 0,
|
||||
"S": 0,
|
||||
"T": 0,
|
||||
"U": 0,
|
||||
"V": 0,
|
||||
"W": 0,
|
||||
"X": 0,
|
||||
"Y": 0,
|
||||
"Z": 0,
|
||||
}
|
||||
def get_letter_count(message: str) -> dict[str, int]:
|
||||
letter_count = {letter: 0 for letter in string.ascii_uppercase}
|
||||
for letter in message.upper():
|
||||
if letter in LETTERS:
|
||||
letterCount[letter] += 1
|
||||
letter_count[letter] += 1
|
||||
|
||||
return letterCount
|
||||
return letter_count
|
||||
|
||||
|
||||
def getItemAtIndexZero(x):
|
||||
def get_item_at_index_zero(x: tuple) -> str:
|
||||
return x[0]
|
||||
|
||||
|
||||
def getFrequencyOrder(message):
|
||||
letterToFreq = getLetterCount(message)
|
||||
freqToLetter = {}
|
||||
def get_frequency_order(message: str) -> str:
|
||||
letter_to_freq = get_letter_count(message)
|
||||
freq_to_letter: dict[int, list[str]] = {
|
||||
freq: [] for letter, freq in letter_to_freq.items()
|
||||
}
|
||||
for letter in LETTERS:
|
||||
if letterToFreq[letter] not in freqToLetter:
|
||||
freqToLetter[letterToFreq[letter]] = [letter]
|
||||
else:
|
||||
freqToLetter[letterToFreq[letter]].append(letter)
|
||||
freq_to_letter[letter_to_freq[letter]].append(letter)
|
||||
|
||||
for freq in freqToLetter:
|
||||
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
|
||||
freqToLetter[freq] = "".join(freqToLetter[freq])
|
||||
freq_to_letter_str: dict[int, str] = {}
|
||||
|
||||
freqPairs = list(freqToLetter.items())
|
||||
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
|
||||
for freq in freq_to_letter:
|
||||
freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True)
|
||||
freq_to_letter_str[freq] = "".join(freq_to_letter[freq])
|
||||
|
||||
freqOrder = []
|
||||
for freqPair in freqPairs:
|
||||
freqOrder.append(freqPair[1])
|
||||
freq_pairs = list(freq_to_letter_str.items())
|
||||
freq_pairs.sort(key=get_item_at_index_zero, reverse=True)
|
||||
|
||||
return "".join(freqOrder)
|
||||
freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs]
|
||||
|
||||
return "".join(freq_order)
|
||||
|
||||
|
||||
def englishFreqMatchScore(message):
|
||||
def english_freq_match_score(message: str) -> int:
|
||||
"""
|
||||
>>> englishFreqMatchScore('Hello World')
|
||||
>>> english_freq_match_score('Hello World')
|
||||
1
|
||||
"""
|
||||
freqOrder = getFrequencyOrder(message)
|
||||
matchScore = 0
|
||||
for commonLetter in ETAOIN[:6]:
|
||||
if commonLetter in freqOrder[:6]:
|
||||
matchScore += 1
|
||||
freq_order = get_frequency_order(message)
|
||||
match_score = 0
|
||||
for common_letter in ETAOIN[:6]:
|
||||
if common_letter in freq_order[:6]:
|
||||
match_score += 1
|
||||
|
||||
for uncommonLetter in ETAOIN[-6:]:
|
||||
if uncommonLetter in freqOrder[-6:]:
|
||||
matchScore += 1
|
||||
for uncommon_letter in ETAOIN[-6:]:
|
||||
if uncommon_letter in freq_order[-6:]:
|
||||
match_score += 1
|
||||
|
||||
return matchScore
|
||||
return match_score
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,6 +1,7 @@
|
||||
# Created by sarathkaul on 17/11/19
|
||||
# Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020
|
||||
from collections import defaultdict
|
||||
from typing import DefaultDict
|
||||
|
||||
|
||||
def word_occurence(sentence: str) -> dict:
|
||||
@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict:
|
||||
>>> dict(word_occurence("Two spaces"))
|
||||
{'Two': 1, 'spaces': 1}
|
||||
"""
|
||||
occurrence: dict = defaultdict(int)
|
||||
occurrence: DefaultDict[str, int] = defaultdict(int)
|
||||
# Creating a dictionary containing count of each word
|
||||
for word in sentence.split():
|
||||
occurrence[word] += 1
|
||||
|
@ -10,7 +10,7 @@ Time Complexity: O(n) - where n is the length of the string
|
||||
"""
|
||||
|
||||
|
||||
def z_function(input_str: str) -> list:
|
||||
def z_function(input_str: str) -> list[int]:
|
||||
"""
|
||||
For the given string this function computes value for each index,
|
||||
which represents the maximal length substring starting from the index
|
||||
@ -27,7 +27,7 @@ def z_function(input_str: str) -> list:
|
||||
>>> z_function("zxxzxxz")
|
||||
[0, 0, 0, 4, 0, 0, 1]
|
||||
"""
|
||||
z_result = [0] * len(input_str)
|
||||
z_result = [0 for i in range(len(input_str))]
|
||||
|
||||
# initialize interval's left pointer and right pointer
|
||||
left_pointer, right_pointer = 0, 0
|
||||
@ -49,7 +49,7 @@ def z_function(input_str: str) -> list:
|
||||
return z_result
|
||||
|
||||
|
||||
def go_next(i, z_result, s):
|
||||
def go_next(i: int, z_result: list[int], s: str) -> bool:
|
||||
"""
|
||||
Check if we have to move forward to the next characters or not
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user