From e95ecfaf27c545391bdb7a2d1d8948943a40f828 Mon Sep 17 00:00:00 2001
From: Rohan R Bharadwaj <rohanrbharadwaj@gmail.com>
Date: Fri, 13 May 2022 11:25:53 +0530
Subject: [PATCH] Add missing type annotations for `strings` directory (#5817)

* Type annotations for `strings/autocomplete_using_trie.py`

* Update autocomplete_using_trie.py

* Update detecting_english_programmatically.py

* Update detecting_english_programmatically.py

* Update frequency_finder.py

* Update frequency_finder.py

* Update frequency_finder.py

* Update word_occurrence.py

* Update frequency_finder.py

* Update z_function.py

* Update z_function.py

* Update frequency_finder.py
---
 strings/autocomplete_using_trie.py            | 28 +++---
 strings/detecting_english_programmatically.py | 53 ++++++-----
 strings/frequency_finder.py                   | 94 +++++++------------
 strings/word_occurrence.py                    |  3 +-
 strings/z_function.py                         |  6 +-
 5 files changed, 82 insertions(+), 102 deletions(-)

diff --git a/strings/autocomplete_using_trie.py b/strings/autocomplete_using_trie.py
index 8aa0dc223..758260292 100644
--- a/strings/autocomplete_using_trie.py
+++ b/strings/autocomplete_using_trie.py
@@ -1,11 +1,13 @@
+from __future__ import annotations
+
 END = "#"
 
 
 class Trie:
-    def __init__(self):
-        self._trie = {}
+    def __init__(self) -> None:
+        self._trie: dict = {}
 
-    def insert_word(self, text):
+    def insert_word(self, text: str) -> None:
         trie = self._trie
         for char in text:
             if char not in trie:
@@ -13,7 +15,7 @@ class Trie:
             trie = trie[char]
         trie[END] = True
 
-    def find_word(self, prefix):
+    def find_word(self, prefix: str) -> tuple | list:
         trie = self._trie
         for char in prefix:
             if char in trie:
@@ -22,7 +24,7 @@ class Trie:
                 return []
         return self._elements(trie)
 
-    def _elements(self, d):
+    def _elements(self, d: dict) -> tuple:
         result = []
         for c, v in d.items():
             if c == END:
@@ -39,26 +41,28 @@ for word in words:
     trie.insert_word(word)
 
 
-def autocomplete_using_trie(s):
+def autocomplete_using_trie(string: str) -> tuple:
     """
     >>> trie = Trie()
     >>> for word in words:
     ...     trie.insert_word(word)
     ...
     >>> matches = autocomplete_using_trie("de")
-
-    "detergent " in matches
+    >>> "detergent " in matches
     True
-    "dog " in matches
+    >>> "dog " in matches
     False
     """
-    suffixes = trie.find_word(s)
-    return tuple(s + w for w in suffixes)
+    suffixes = trie.find_word(string)
+    return tuple(string + word for word in suffixes)
 
 
-def main():
+def main() -> None:
     print(autocomplete_using_trie("de"))
 
 
 if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
     main()
diff --git a/strings/detecting_english_programmatically.py b/strings/detecting_english_programmatically.py
index 44fb71918..aa18db210 100644
--- a/strings/detecting_english_programmatically.py
+++ b/strings/detecting_english_programmatically.py
@@ -4,55 +4,56 @@ UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
 
 
-def loadDictionary():
+def load_dictionary() -> dict[str, None]:
     path = os.path.split(os.path.realpath(__file__))
-    englishWords = {}
-    with open(path[0] + "/dictionary.txt") as dictionaryFile:
-        for word in dictionaryFile.read().split("\n"):
-            englishWords[word] = None
-    return englishWords
+    english_words: dict[str, None] = {}
+    with open(path[0] + "/dictionary.txt") as dictionary_file:
+        for word in dictionary_file.read().split("\n"):
+            english_words[word] = None
+    return english_words
 
 
-ENGLISH_WORDS = loadDictionary()
+ENGLISH_WORDS = load_dictionary()
 
 
-def getEnglishCount(message):
+def get_english_count(message: str) -> float:
     message = message.upper()
-    message = removeNonLetters(message)
-    possibleWords = message.split()
+    message = remove_non_letters(message)
+    possible_words = message.split()
 
-    if possibleWords == []:
+    if possible_words == []:
         return 0.0
 
     matches = 0
-    for word in possibleWords:
+    for word in possible_words:
         if word in ENGLISH_WORDS:
             matches += 1
 
-    return float(matches) / len(possibleWords)
+    return float(matches) / len(possible_words)
 
 
-def removeNonLetters(message):
-    lettersOnly = []
+def remove_non_letters(message: str) -> str:
+    letters_only = []
     for symbol in message:
         if symbol in LETTERS_AND_SPACE:
-            lettersOnly.append(symbol)
-    return "".join(lettersOnly)
+            letters_only.append(symbol)
+    return "".join(letters_only)
 
 
-def isEnglish(message, wordPercentage=20, letterPercentage=85):
+def is_english(
+    message: str, word_percentage: int = 20, letter_percentage: int = 85
+) -> bool:
     """
-    >>> isEnglish('Hello World')
+    >>> is_english('Hello World')
     True
-
-    >>> isEnglish('llold HorWd')
+    >>> is_english('llold HorWd')
     False
     """
-    wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
-    numLetters = len(removeNonLetters(message))
-    messageLettersPercentage = (float(numLetters) / len(message)) * 100
-    lettersMatch = messageLettersPercentage >= letterPercentage
-    return wordsMatch and lettersMatch
+    words_match = get_english_count(message) * 100 >= word_percentage
+    num_letters = len(remove_non_letters(message))
+    message_letters_percentage = (float(num_letters) / len(message)) * 100
+    letters_match = message_letters_percentage >= letter_percentage
+    return words_match and letters_match
 
 
 if __name__ == "__main__":
diff --git a/strings/frequency_finder.py b/strings/frequency_finder.py
index 48760a9de..7024be17b 100644
--- a/strings/frequency_finder.py
+++ b/strings/frequency_finder.py
@@ -1,7 +1,9 @@
 # Frequency Finder
 
+import string
+
 # frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
-englishLetterFreq = {
+english_letter_freq = {
     "E": 12.70,
     "T": 9.06,
     "A": 8.17,
@@ -33,85 +35,57 @@ ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
 LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 
 
-def getLetterCount(message):
-    letterCount = {
-        "A": 0,
-        "B": 0,
-        "C": 0,
-        "D": 0,
-        "E": 0,
-        "F": 0,
-        "G": 0,
-        "H": 0,
-        "I": 0,
-        "J": 0,
-        "K": 0,
-        "L": 0,
-        "M": 0,
-        "N": 0,
-        "O": 0,
-        "P": 0,
-        "Q": 0,
-        "R": 0,
-        "S": 0,
-        "T": 0,
-        "U": 0,
-        "V": 0,
-        "W": 0,
-        "X": 0,
-        "Y": 0,
-        "Z": 0,
-    }
+def get_letter_count(message: str) -> dict[str, int]:
+    letter_count = {letter: 0 for letter in string.ascii_uppercase}
     for letter in message.upper():
         if letter in LETTERS:
-            letterCount[letter] += 1
+            letter_count[letter] += 1
 
-    return letterCount
+    return letter_count
 
 
-def getItemAtIndexZero(x):
+def get_item_at_index_zero(x: tuple) -> str:
     return x[0]
 
 
-def getFrequencyOrder(message):
-    letterToFreq = getLetterCount(message)
-    freqToLetter = {}
+def get_frequency_order(message: str) -> str:
+    letter_to_freq = get_letter_count(message)
+    freq_to_letter: dict[int, list[str]] = {
+        freq: [] for letter, freq in letter_to_freq.items()
+    }
     for letter in LETTERS:
-        if letterToFreq[letter] not in freqToLetter:
-            freqToLetter[letterToFreq[letter]] = [letter]
-        else:
-            freqToLetter[letterToFreq[letter]].append(letter)
+        freq_to_letter[letter_to_freq[letter]].append(letter)
 
-    for freq in freqToLetter:
-        freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
-        freqToLetter[freq] = "".join(freqToLetter[freq])
+    freq_to_letter_str: dict[int, str] = {}
 
-    freqPairs = list(freqToLetter.items())
-    freqPairs.sort(key=getItemAtIndexZero, reverse=True)
+    for freq in freq_to_letter:
+        freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True)
+        freq_to_letter_str[freq] = "".join(freq_to_letter[freq])
 
-    freqOrder = []
-    for freqPair in freqPairs:
-        freqOrder.append(freqPair[1])
+    freq_pairs = list(freq_to_letter_str.items())
+    freq_pairs.sort(key=get_item_at_index_zero, reverse=True)
 
-    return "".join(freqOrder)
+    freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs]
+
+    return "".join(freq_order)
 
 
-def englishFreqMatchScore(message):
+def english_freq_match_score(message: str) -> int:
     """
-    >>> englishFreqMatchScore('Hello World')
+    >>> english_freq_match_score('Hello World')
     1
     """
-    freqOrder = getFrequencyOrder(message)
-    matchScore = 0
-    for commonLetter in ETAOIN[:6]:
-        if commonLetter in freqOrder[:6]:
-            matchScore += 1
+    freq_order = get_frequency_order(message)
+    match_score = 0
+    for common_letter in ETAOIN[:6]:
+        if common_letter in freq_order[:6]:
+            match_score += 1
 
-    for uncommonLetter in ETAOIN[-6:]:
-        if uncommonLetter in freqOrder[-6:]:
-            matchScore += 1
+    for uncommon_letter in ETAOIN[-6:]:
+        if uncommon_letter in freq_order[-6:]:
+            match_score += 1
 
-    return matchScore
+    return match_score
 
 
 if __name__ == "__main__":
diff --git a/strings/word_occurrence.py b/strings/word_occurrence.py
index 4acfa41ad..4e0b3ff34 100644
--- a/strings/word_occurrence.py
+++ b/strings/word_occurrence.py
@@ -1,6 +1,7 @@
 # Created by sarathkaul on 17/11/19
 # Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020
 from collections import defaultdict
+from typing import DefaultDict
 
 
 def word_occurence(sentence: str) -> dict:
@@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict:
     >>> dict(word_occurence("Two  spaces"))
     {'Two': 1, 'spaces': 1}
     """
-    occurrence: dict = defaultdict(int)
+    occurrence: DefaultDict[str, int] = defaultdict(int)
     # Creating a dictionary containing count of each word
     for word in sentence.split():
         occurrence[word] += 1
diff --git a/strings/z_function.py b/strings/z_function.py
index d8d823a37..e77ba8dab 100644
--- a/strings/z_function.py
+++ b/strings/z_function.py
@@ -10,7 +10,7 @@ Time Complexity: O(n) - where n is the length of the string
 """
 
 
-def z_function(input_str: str) -> list:
+def z_function(input_str: str) -> list[int]:
     """
     For the given string this function computes value for each index,
     which represents the maximal length substring starting from the index
@@ -27,7 +27,7 @@ def z_function(input_str: str) -> list:
     >>> z_function("zxxzxxz")
     [0, 0, 0, 4, 0, 0, 1]
     """
-    z_result = [0] * len(input_str)
+    z_result = [0 for i in range(len(input_str))]
 
     # initialize interval's left pointer and right pointer
     left_pointer, right_pointer = 0, 0
@@ -49,7 +49,7 @@ def z_function(input_str: str) -> list:
     return z_result
 
 
-def go_next(i, z_result, s):
+def go_next(i: int, z_result: list[int], s: str) -> bool:
     """
     Check if we have to move forward to the next characters or not
     """