Initial

2023-10-11 13:06:12 +08:00 · 2016-08-02 21:03:29 +05:30 · 2016-08-02 21:03:29 +05:30 · 052bcb1f52
commit 052bcb1f52
parent ccf062a388
2 changed files with 45374 additions and 0 deletions
--- a/english/Dictionary.txt
+++ b/english/Dictionary.txt
--- a/english/detecting_english_programmatically.py
+++ b/english/detecting_english_programmatically.py
@ -0,0 +1,41 @@
+UPPERLETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + ' \t\n'
+
+def loadDictionary():
+    dictionaryFile = open('Dictionary.txt')
+    englishWords = {}
+    for word in dictionaryFile.read().split('\n'):
+        englishWords[word] = None
+    dictionaryFile.close()
+    return englishWords
+
+ENGLISH_WORDS = loadDictionary()
+
+def getEnglishCount(message):
+    message = message.upper()
+    message = removeNonLetters(message)
+    possibleWords = message.split()
+
+    if possibleWords == []:
+        return 0.0
+
+    matches = 0
+    for word in possibleWords:
+        if word in ENGLISH_WORDS:
+            matches += 1
+
+    return float(matches) / len(possibleWords)
+
+def removeNonLetters(message):
+    lettersOnly = []
+    for symbol in message:
+        if symbol in LETTERS_AND_SPACE:
+            lettersOnly.append(symbol)
+    return ''.join(lettersOnly)
+
+def isEnglish(message, wordPercentage = 20, letterPercentage = 85):
+    wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
+    numLetters = len(removeNonLetters(message))
+    messageLettersPercentage = (float(numLetters) / len(message)) * 100
+    lettersMatch = messageLettersPercentage >= letterPercentage
+    return wordsMatch and lettersMatch