From 4b7ecb6a8134379481dd3d5035cb99a627930462 Mon Sep 17 00:00:00 2001 From: Caeden Perelli-Harris Date: Mon, 14 Aug 2023 09:28:52 +0100 Subject: [PATCH] Create is valid email address algorithm (#8907) * feat(strings): Create is valid email address * updating DIRECTORY.md * feat(strings): Create is_valid_email_address algorithm * chore(is_valid_email_address): Implement changes from code review * Update strings/is_valid_email_address.py Co-authored-by: Tianyi Zheng * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * chore(is_valid_email_address): Fix ruff error * Update strings/is_valid_email_address.py Co-authored-by: Tianyi Zheng --------- Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Tianyi Zheng Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- DIRECTORY.md | 1 + strings/is_valid_email_address.py | 117 ++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 strings/is_valid_email_address.py diff --git a/DIRECTORY.md b/DIRECTORY.md index 3a244ca6c..14152e4ab 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -1171,6 +1171,7 @@ * [Is Pangram](strings/is_pangram.py) * [Is Spain National Id](strings/is_spain_national_id.py) * [Is Srilankan Phone Number](strings/is_srilankan_phone_number.py) + * [Is Valid Email Address](strings/is_valid_email_address.py) * [Jaro Winkler](strings/jaro_winkler.py) * [Join](strings/join.py) * [Knuth Morris Pratt](strings/knuth_morris_pratt.py) diff --git a/strings/is_valid_email_address.py b/strings/is_valid_email_address.py new file mode 100644 index 000000000..205394f81 --- /dev/null +++ b/strings/is_valid_email_address.py @@ -0,0 +1,117 @@ +""" +Implements an is valid email address algorithm + +@ https://en.wikipedia.org/wiki/Email_address +""" + +import string + +email_tests: tuple[tuple[str, bool], ...] = ( + ("simple@example.com", True), + ("very.common@example.com", True), + ("disposable.style.email.with+symbol@example.com", True), + ("other-email-with-hyphen@and.subdomains.example.com", True), + ("fully-qualified-domain@example.com", True), + ("user.name+tag+sorting@example.com", True), + ("x@example.com", True), + ("example-indeed@strange-example.com", True), + ("test/test@test.com", True), + ( + "123456789012345678901234567890123456789012345678901234567890123@example.com", + True, + ), + ("admin@mailserver1", True), + ("example@s.example", True), + ("Abc.example.com", False), + ("A@b@c@example.com", False), + ("abc@example..com", False), + ("a(c)d,e:f;gi[j\\k]l@example.com", False), + ( + "12345678901234567890123456789012345678901234567890123456789012345@example.com", + False, + ), + ("i.like.underscores@but_its_not_allowed_in_this_part", False), + ("", False), +) + +# The maximum octets (one character as a standard unicode character is one byte) +# that the local part and the domain part can have +MAX_LOCAL_PART_OCTETS = 64 +MAX_DOMAIN_OCTETS = 255 + + +def is_valid_email_address(email: str) -> bool: + """ + Returns True if the passed email address is valid. + + The local part of the email precedes the singular @ symbol and + is associated with a display-name. For example, "john.smith" + The domain is stricter than the local part and follows the @ symbol. + + Global email checks: + 1. There can only be one @ symbol in the email address. Technically if the + @ symbol is quoted in the local-part, then it is valid, however this + implementation ignores "" for now. + (See https://en.wikipedia.org/wiki/Email_address#:~:text=If%20quoted,) + 2. The local-part and the domain are limited to a certain number of octets. With + unicode storing a single character in one byte, each octet is equivalent to + a character. Hence, we can just check the length of the string. + Checks for the local-part: + 3. The local-part may contain: upper and lowercase latin letters, digits 0 to 9, + and printable characters (!#$%&'*+-/=?^_`{|}~) + 4. The local-part may also contain a "." in any place that is not the first or + last character, and may not have more than one "." consecutively. + + Checks for the domain: + 5. The domain may contain: upper and lowercase latin letters and digits 0 to 9 + 6. Hyphen "-", provided that it is not the first or last character + 7. The domain may also contain a "." in any place that is not the first or + last character, and may not have more than one "." consecutively. + + >>> for email, valid in email_tests: + ... assert is_valid_email_address(email) == valid + """ + + # (1.) Make sure that there is only one @ symbol in the email address + if email.count("@") != 1: + return False + + local_part, domain = email.split("@") + # (2.) Check octet length of the local part and domain + if len(local_part) > MAX_LOCAL_PART_OCTETS or len(domain) > MAX_DOMAIN_OCTETS: + return False + + # (3.) Validate the characters in the local-part + if any( + char not in string.ascii_letters + string.digits + ".(!#$%&'*+-/=?^_`{|}~)" + for char in local_part + ): + return False + + # (4.) Validate the placement of "." characters in the local-part + if local_part.startswith(".") or local_part.endswith(".") or ".." in local_part: + return False + + # (5.) Validate the characters in the domain + if any(char not in string.ascii_letters + string.digits + ".-" for char in domain): + return False + + # (6.) Validate the placement of "-" characters + if domain.startswith("-") or domain.endswith("."): + return False + + # (7.) Validate the placement of "." characters + if domain.startswith(".") or domain.endswith(".") or ".." in domain: + return False + return True + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + for email, valid in email_tests: + is_valid = is_valid_email_address(email) + assert is_valid == valid, f"{email} is {is_valid}" + print(f"Email address {email} is {'not ' if not is_valid else ''}valid")