mirror of
https://hub.njuu.cf/TheAlgorithms/Python.git
synced 2023-10-11 13:06:12 +08:00
4d0c830d2c
* ci(pre-commit): Add ``flake8-builtins`` additional dependency to ``pre-commit`` (#7104) * refactor: Fix ``flake8-builtins`` (#7104) * fix(lru_cache): Fix naming conventions in docstrings (#7104) * ci(pre-commit): Order additional dependencies alphabetically (#7104) * fix(lfu_cache): Correct function name in docstring (#7104) * Update strings/snake_case_to_camel_pascal_case.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update data_structures/stacks/next_greater_element.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update digital_image_processing/index_calculation.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update graphs/prim.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update hashes/djb2.py Co-authored-by: Christian Clauss <cclauss@me.com> * refactor: Rename `_builtin` to `builtin_` ( #7104) * fix: Rename all instances (#7104) * refactor: Update variable names (#7104) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * ci: Create ``tox.ini`` and ignore ``A003`` (#7123) * revert: Remove function name changes (#7104) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Rename tox.ini to .flake8 * Update data_structures/heap/heap.py Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com> * refactor: Rename `next_` to `next_item` (#7104) * ci(pre-commit): Add `flake8` plugin `flake8-bugbear` (#7127) * refactor: Follow `flake8-bugbear` plugin (#7127) * fix: Correct `knapsack` code (#7127) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Christian Clauss <cclauss@me.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
177 lines
6.8 KiB
Python
177 lines
6.8 KiB
Python
"""
|
||
https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform
|
||
|
||
The Burrows–Wheeler transform (BWT, also called block-sorting compression)
|
||
rearranges a character string into runs of similar characters. This is useful
|
||
for compression, since it tends to be easy to compress a string that has runs
|
||
of repeated characters by techniques such as move-to-front transform and
|
||
run-length encoding. More importantly, the transformation is reversible,
|
||
without needing to store any additional data except the position of the first
|
||
original character. The BWT is thus a "free" method of improving the efficiency
|
||
of text compression algorithms, costing only some extra computation.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from typing import TypedDict
|
||
|
||
|
||
class BWTTransformDict(TypedDict):
|
||
bwt_string: str
|
||
idx_original_string: int
|
||
|
||
|
||
def all_rotations(s: str) -> list[str]:
|
||
"""
|
||
:param s: The string that will be rotated len(s) times.
|
||
:return: A list with the rotations.
|
||
:raises TypeError: If s is not an instance of str.
|
||
Examples:
|
||
|
||
>>> all_rotations("^BANANA|") # doctest: +NORMALIZE_WHITESPACE
|
||
['^BANANA|', 'BANANA|^', 'ANANA|^B', 'NANA|^BA', 'ANA|^BAN', 'NA|^BANA',
|
||
'A|^BANAN', '|^BANANA']
|
||
>>> all_rotations("a_asa_da_casa") # doctest: +NORMALIZE_WHITESPACE
|
||
['a_asa_da_casa', '_asa_da_casaa', 'asa_da_casaa_', 'sa_da_casaa_a',
|
||
'a_da_casaa_as', '_da_casaa_asa', 'da_casaa_asa_', 'a_casaa_asa_d',
|
||
'_casaa_asa_da', 'casaa_asa_da_', 'asaa_asa_da_c', 'saa_asa_da_ca',
|
||
'aa_asa_da_cas']
|
||
>>> all_rotations("panamabanana") # doctest: +NORMALIZE_WHITESPACE
|
||
['panamabanana', 'anamabananap', 'namabananapa', 'amabananapan',
|
||
'mabananapana', 'abananapanam', 'bananapanama', 'ananapanamab',
|
||
'nanapanamaba', 'anapanamaban', 'napanamabana', 'apanamabanan']
|
||
>>> all_rotations(5)
|
||
Traceback (most recent call last):
|
||
...
|
||
TypeError: The parameter s type must be str.
|
||
"""
|
||
if not isinstance(s, str):
|
||
raise TypeError("The parameter s type must be str.")
|
||
|
||
return [s[i:] + s[:i] for i in range(len(s))]
|
||
|
||
|
||
def bwt_transform(s: str) -> BWTTransformDict:
|
||
"""
|
||
:param s: The string that will be used at bwt algorithm
|
||
:return: the string composed of the last char of each row of the ordered
|
||
rotations and the index of the original string at ordered rotations list
|
||
:raises TypeError: If the s parameter type is not str
|
||
:raises ValueError: If the s parameter is empty
|
||
Examples:
|
||
|
||
>>> bwt_transform("^BANANA")
|
||
{'bwt_string': 'BNN^AAA', 'idx_original_string': 6}
|
||
>>> bwt_transform("a_asa_da_casa")
|
||
{'bwt_string': 'aaaadss_c__aa', 'idx_original_string': 3}
|
||
>>> bwt_transform("panamabanana")
|
||
{'bwt_string': 'mnpbnnaaaaaa', 'idx_original_string': 11}
|
||
>>> bwt_transform(4)
|
||
Traceback (most recent call last):
|
||
...
|
||
TypeError: The parameter s type must be str.
|
||
>>> bwt_transform('')
|
||
Traceback (most recent call last):
|
||
...
|
||
ValueError: The parameter s must not be empty.
|
||
"""
|
||
if not isinstance(s, str):
|
||
raise TypeError("The parameter s type must be str.")
|
||
if not s:
|
||
raise ValueError("The parameter s must not be empty.")
|
||
|
||
rotations = all_rotations(s)
|
||
rotations.sort() # sort the list of rotations in alphabetically order
|
||
# make a string composed of the last char of each rotation
|
||
response: BWTTransformDict = {
|
||
"bwt_string": "".join([word[-1] for word in rotations]),
|
||
"idx_original_string": rotations.index(s),
|
||
}
|
||
return response
|
||
|
||
|
||
def reverse_bwt(bwt_string: str, idx_original_string: int) -> str:
|
||
"""
|
||
:param bwt_string: The string returned from bwt algorithm execution
|
||
:param idx_original_string: A 0-based index of the string that was used to
|
||
generate bwt_string at ordered rotations list
|
||
:return: The string used to generate bwt_string when bwt was executed
|
||
:raises TypeError: If the bwt_string parameter type is not str
|
||
:raises ValueError: If the bwt_string parameter is empty
|
||
:raises TypeError: If the idx_original_string type is not int or if not
|
||
possible to cast it to int
|
||
:raises ValueError: If the idx_original_string value is lower than 0 or
|
||
greater than len(bwt_string) - 1
|
||
|
||
>>> reverse_bwt("BNN^AAA", 6)
|
||
'^BANANA'
|
||
>>> reverse_bwt("aaaadss_c__aa", 3)
|
||
'a_asa_da_casa'
|
||
>>> reverse_bwt("mnpbnnaaaaaa", 11)
|
||
'panamabanana'
|
||
>>> reverse_bwt(4, 11)
|
||
Traceback (most recent call last):
|
||
...
|
||
TypeError: The parameter bwt_string type must be str.
|
||
>>> reverse_bwt("", 11)
|
||
Traceback (most recent call last):
|
||
...
|
||
ValueError: The parameter bwt_string must not be empty.
|
||
>>> reverse_bwt("mnpbnnaaaaaa", "asd") # doctest: +NORMALIZE_WHITESPACE
|
||
Traceback (most recent call last):
|
||
...
|
||
TypeError: The parameter idx_original_string type must be int or passive
|
||
of cast to int.
|
||
>>> reverse_bwt("mnpbnnaaaaaa", -1)
|
||
Traceback (most recent call last):
|
||
...
|
||
ValueError: The parameter idx_original_string must not be lower than 0.
|
||
>>> reverse_bwt("mnpbnnaaaaaa", 12) # doctest: +NORMALIZE_WHITESPACE
|
||
Traceback (most recent call last):
|
||
...
|
||
ValueError: The parameter idx_original_string must be lower than
|
||
len(bwt_string).
|
||
>>> reverse_bwt("mnpbnnaaaaaa", 11.0)
|
||
'panamabanana'
|
||
>>> reverse_bwt("mnpbnnaaaaaa", 11.4)
|
||
'panamabanana'
|
||
"""
|
||
if not isinstance(bwt_string, str):
|
||
raise TypeError("The parameter bwt_string type must be str.")
|
||
if not bwt_string:
|
||
raise ValueError("The parameter bwt_string must not be empty.")
|
||
try:
|
||
idx_original_string = int(idx_original_string)
|
||
except ValueError:
|
||
raise TypeError(
|
||
"The parameter idx_original_string type must be int or passive"
|
||
" of cast to int."
|
||
)
|
||
if idx_original_string < 0:
|
||
raise ValueError("The parameter idx_original_string must not be lower than 0.")
|
||
if idx_original_string >= len(bwt_string):
|
||
raise ValueError(
|
||
"The parameter idx_original_string must be lower than" " len(bwt_string)."
|
||
)
|
||
|
||
ordered_rotations = [""] * len(bwt_string)
|
||
for _ in range(len(bwt_string)):
|
||
for i in range(len(bwt_string)):
|
||
ordered_rotations[i] = bwt_string[i] + ordered_rotations[i]
|
||
ordered_rotations.sort()
|
||
return ordered_rotations[idx_original_string]
|
||
|
||
|
||
if __name__ == "__main__":
|
||
entry_msg = "Provide a string that I will generate its BWT transform: "
|
||
s = input(entry_msg).strip()
|
||
result = bwt_transform(s)
|
||
print(
|
||
f"Burrows Wheeler transform for string '{s}' results "
|
||
f"in '{result['bwt_string']}'"
|
||
)
|
||
original_string = reverse_bwt(result["bwt_string"], result["idx_original_string"])
|
||
print(
|
||
f"Reversing Burrows Wheeler transform for entry '{result['bwt_string']}' "
|
||
f"we get original string '{original_string}'"
|
||
)
|