From e95ecfaf27c545391bdb7a2d1d8948943a40f828 Mon Sep 17 00:00:00 2001 From: Rohan R Bharadwaj Date: Fri, 13 May 2022 11:25:53 +0530 Subject: [PATCH] Add missing type annotations for `strings` directory (#5817) * Type annotations for `strings/autocomplete_using_trie.py` * Update autocomplete_using_trie.py * Update detecting_english_programmatically.py * Update detecting_english_programmatically.py * Update frequency_finder.py * Update frequency_finder.py * Update frequency_finder.py * Update word_occurrence.py * Update frequency_finder.py * Update z_function.py * Update z_function.py * Update frequency_finder.py --- strings/autocomplete_using_trie.py | 28 +++--- strings/detecting_english_programmatically.py | 53 ++++++----- strings/frequency_finder.py | 94 +++++++------------ strings/word_occurrence.py | 3 +- strings/z_function.py | 6 +- 5 files changed, 82 insertions(+), 102 deletions(-) diff --git a/strings/autocomplete_using_trie.py b/strings/autocomplete_using_trie.py index 8aa0dc223..758260292 100644 --- a/strings/autocomplete_using_trie.py +++ b/strings/autocomplete_using_trie.py @@ -1,11 +1,13 @@ +from __future__ import annotations + END = "#" class Trie: - def __init__(self): - self._trie = {} + def __init__(self) -> None: + self._trie: dict = {} - def insert_word(self, text): + def insert_word(self, text: str) -> None: trie = self._trie for char in text: if char not in trie: @@ -13,7 +15,7 @@ class Trie: trie = trie[char] trie[END] = True - def find_word(self, prefix): + def find_word(self, prefix: str) -> tuple | list: trie = self._trie for char in prefix: if char in trie: @@ -22,7 +24,7 @@ class Trie: return [] return self._elements(trie) - def _elements(self, d): + def _elements(self, d: dict) -> tuple: result = [] for c, v in d.items(): if c == END: @@ -39,26 +41,28 @@ for word in words: trie.insert_word(word) -def autocomplete_using_trie(s): +def autocomplete_using_trie(string: str) -> tuple: """ >>> trie = Trie() >>> for word in words: ... trie.insert_word(word) ... >>> matches = autocomplete_using_trie("de") - - "detergent " in matches + >>> "detergent " in matches True - "dog " in matches + >>> "dog " in matches False """ - suffixes = trie.find_word(s) - return tuple(s + w for w in suffixes) + suffixes = trie.find_word(string) + return tuple(string + word for word in suffixes) -def main(): +def main() -> None: print(autocomplete_using_trie("de")) if __name__ == "__main__": + import doctest + + doctest.testmod() main() diff --git a/strings/detecting_english_programmatically.py b/strings/detecting_english_programmatically.py index 44fb71918..aa18db210 100644 --- a/strings/detecting_english_programmatically.py +++ b/strings/detecting_english_programmatically.py @@ -4,55 +4,56 @@ UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n" -def loadDictionary(): +def load_dictionary() -> dict[str, None]: path = os.path.split(os.path.realpath(__file__)) - englishWords = {} - with open(path[0] + "/dictionary.txt") as dictionaryFile: - for word in dictionaryFile.read().split("\n"): - englishWords[word] = None - return englishWords + english_words: dict[str, None] = {} + with open(path[0] + "/dictionary.txt") as dictionary_file: + for word in dictionary_file.read().split("\n"): + english_words[word] = None + return english_words -ENGLISH_WORDS = loadDictionary() +ENGLISH_WORDS = load_dictionary() -def getEnglishCount(message): +def get_english_count(message: str) -> float: message = message.upper() - message = removeNonLetters(message) - possibleWords = message.split() + message = remove_non_letters(message) + possible_words = message.split() - if possibleWords == []: + if possible_words == []: return 0.0 matches = 0 - for word in possibleWords: + for word in possible_words: if word in ENGLISH_WORDS: matches += 1 - return float(matches) / len(possibleWords) + return float(matches) / len(possible_words) -def removeNonLetters(message): - lettersOnly = [] +def remove_non_letters(message: str) -> str: + letters_only = [] for symbol in message: if symbol in LETTERS_AND_SPACE: - lettersOnly.append(symbol) - return "".join(lettersOnly) + letters_only.append(symbol) + return "".join(letters_only) -def isEnglish(message, wordPercentage=20, letterPercentage=85): +def is_english( + message: str, word_percentage: int = 20, letter_percentage: int = 85 +) -> bool: """ - >>> isEnglish('Hello World') + >>> is_english('Hello World') True - - >>> isEnglish('llold HorWd') + >>> is_english('llold HorWd') False """ - wordsMatch = getEnglishCount(message) * 100 >= wordPercentage - numLetters = len(removeNonLetters(message)) - messageLettersPercentage = (float(numLetters) / len(message)) * 100 - lettersMatch = messageLettersPercentage >= letterPercentage - return wordsMatch and lettersMatch + words_match = get_english_count(message) * 100 >= word_percentage + num_letters = len(remove_non_letters(message)) + message_letters_percentage = (float(num_letters) / len(message)) * 100 + letters_match = message_letters_percentage >= letter_percentage + return words_match and letters_match if __name__ == "__main__": diff --git a/strings/frequency_finder.py b/strings/frequency_finder.py index 48760a9de..7024be17b 100644 --- a/strings/frequency_finder.py +++ b/strings/frequency_finder.py @@ -1,7 +1,9 @@ # Frequency Finder +import string + # frequency taken from http://en.wikipedia.org/wiki/Letter_frequency -englishLetterFreq = { +english_letter_freq = { "E": 12.70, "T": 9.06, "A": 8.17, @@ -33,85 +35,57 @@ ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ" LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" -def getLetterCount(message): - letterCount = { - "A": 0, - "B": 0, - "C": 0, - "D": 0, - "E": 0, - "F": 0, - "G": 0, - "H": 0, - "I": 0, - "J": 0, - "K": 0, - "L": 0, - "M": 0, - "N": 0, - "O": 0, - "P": 0, - "Q": 0, - "R": 0, - "S": 0, - "T": 0, - "U": 0, - "V": 0, - "W": 0, - "X": 0, - "Y": 0, - "Z": 0, - } +def get_letter_count(message: str) -> dict[str, int]: + letter_count = {letter: 0 for letter in string.ascii_uppercase} for letter in message.upper(): if letter in LETTERS: - letterCount[letter] += 1 + letter_count[letter] += 1 - return letterCount + return letter_count -def getItemAtIndexZero(x): +def get_item_at_index_zero(x: tuple) -> str: return x[0] -def getFrequencyOrder(message): - letterToFreq = getLetterCount(message) - freqToLetter = {} +def get_frequency_order(message: str) -> str: + letter_to_freq = get_letter_count(message) + freq_to_letter: dict[int, list[str]] = { + freq: [] for letter, freq in letter_to_freq.items() + } for letter in LETTERS: - if letterToFreq[letter] not in freqToLetter: - freqToLetter[letterToFreq[letter]] = [letter] - else: - freqToLetter[letterToFreq[letter]].append(letter) + freq_to_letter[letter_to_freq[letter]].append(letter) - for freq in freqToLetter: - freqToLetter[freq].sort(key=ETAOIN.find, reverse=True) - freqToLetter[freq] = "".join(freqToLetter[freq]) + freq_to_letter_str: dict[int, str] = {} - freqPairs = list(freqToLetter.items()) - freqPairs.sort(key=getItemAtIndexZero, reverse=True) + for freq in freq_to_letter: + freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True) + freq_to_letter_str[freq] = "".join(freq_to_letter[freq]) - freqOrder = [] - for freqPair in freqPairs: - freqOrder.append(freqPair[1]) + freq_pairs = list(freq_to_letter_str.items()) + freq_pairs.sort(key=get_item_at_index_zero, reverse=True) - return "".join(freqOrder) + freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs] + + return "".join(freq_order) -def englishFreqMatchScore(message): +def english_freq_match_score(message: str) -> int: """ - >>> englishFreqMatchScore('Hello World') + >>> english_freq_match_score('Hello World') 1 """ - freqOrder = getFrequencyOrder(message) - matchScore = 0 - for commonLetter in ETAOIN[:6]: - if commonLetter in freqOrder[:6]: - matchScore += 1 + freq_order = get_frequency_order(message) + match_score = 0 + for common_letter in ETAOIN[:6]: + if common_letter in freq_order[:6]: + match_score += 1 - for uncommonLetter in ETAOIN[-6:]: - if uncommonLetter in freqOrder[-6:]: - matchScore += 1 + for uncommon_letter in ETAOIN[-6:]: + if uncommon_letter in freq_order[-6:]: + match_score += 1 - return matchScore + return match_score if __name__ == "__main__": diff --git a/strings/word_occurrence.py b/strings/word_occurrence.py index 4acfa41ad..4e0b3ff34 100644 --- a/strings/word_occurrence.py +++ b/strings/word_occurrence.py @@ -1,6 +1,7 @@ # Created by sarathkaul on 17/11/19 # Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020 from collections import defaultdict +from typing import DefaultDict def word_occurence(sentence: str) -> dict: @@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict: >>> dict(word_occurence("Two spaces")) {'Two': 1, 'spaces': 1} """ - occurrence: dict = defaultdict(int) + occurrence: DefaultDict[str, int] = defaultdict(int) # Creating a dictionary containing count of each word for word in sentence.split(): occurrence[word] += 1 diff --git a/strings/z_function.py b/strings/z_function.py index d8d823a37..e77ba8dab 100644 --- a/strings/z_function.py +++ b/strings/z_function.py @@ -10,7 +10,7 @@ Time Complexity: O(n) - where n is the length of the string """ -def z_function(input_str: str) -> list: +def z_function(input_str: str) -> list[int]: """ For the given string this function computes value for each index, which represents the maximal length substring starting from the index @@ -27,7 +27,7 @@ def z_function(input_str: str) -> list: >>> z_function("zxxzxxz") [0, 0, 0, 4, 0, 0, 1] """ - z_result = [0] * len(input_str) + z_result = [0 for i in range(len(input_str))] # initialize interval's left pointer and right pointer left_pointer, right_pointer = 0, 0 @@ -49,7 +49,7 @@ def z_function(input_str: str) -> list: return z_result -def go_next(i, z_result, s): +def go_next(i: int, z_result: list[int], s: str) -> bool: """ Check if we have to move forward to the next characters or not """