2017-11-01 15:04:55 +08:00
|
|
|
"""
|
2020-06-16 16:09:19 +08:00
|
|
|
LCS Problem Statement: Given two sequences, find the length of longest subsequence
|
|
|
|
present in both of them. A subsequence is a sequence that appears in the same relative
|
|
|
|
order, but not necessarily continuous.
|
2017-11-01 15:04:55 +08:00
|
|
|
Example:"abc", "abg" are subsequences of "abcdefgh".
|
|
|
|
"""
|
2017-11-25 17:23:50 +08:00
|
|
|
|
|
|
|
|
2019-08-19 15:40:36 +08:00
|
|
|
def longest_common_subsequence(x: str, y: str):
|
|
|
|
"""
|
|
|
|
Finds the longest common subsequence between two strings. Also returns the
|
|
|
|
The subsequence found
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
|
|
|
|
x: str, one of the strings
|
|
|
|
y: str, the other string
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
L[m][n]: int, the length of the longest subsequence. Also equal to len(seq)
|
|
|
|
Seq: str, the subsequence found
|
|
|
|
|
|
|
|
>>> longest_common_subsequence("programming", "gaming")
|
|
|
|
(6, 'gaming')
|
|
|
|
>>> longest_common_subsequence("physics", "smartphone")
|
|
|
|
(2, 'ph')
|
|
|
|
>>> longest_common_subsequence("computer", "food")
|
|
|
|
(1, 'o')
|
|
|
|
"""
|
2017-11-01 15:04:55 +08:00
|
|
|
# find the length of strings
|
2019-08-19 15:40:36 +08:00
|
|
|
|
|
|
|
assert x is not None
|
|
|
|
assert y is not None
|
|
|
|
|
2017-11-01 15:04:55 +08:00
|
|
|
m = len(x)
|
|
|
|
n = len(y)
|
|
|
|
|
|
|
|
# declaring the array for storing the dp values
|
2022-10-13 06:54:20 +08:00
|
|
|
l = [[0] * (n + 1) for _ in range(m + 1)] # noqa: E741
|
2019-08-19 15:40:36 +08:00
|
|
|
|
|
|
|
for i in range(1, m + 1):
|
|
|
|
for j in range(1, n + 1):
|
2019-10-05 13:14:13 +08:00
|
|
|
if x[i - 1] == y[j - 1]:
|
2019-08-19 15:40:36 +08:00
|
|
|
match = 1
|
2017-11-01 15:04:55 +08:00
|
|
|
else:
|
2019-08-19 15:40:36 +08:00
|
|
|
match = 0
|
|
|
|
|
2022-10-13 06:54:20 +08:00
|
|
|
l[i][j] = max(l[i - 1][j], l[i][j - 1], l[i - 1][j - 1] + match)
|
2019-08-19 15:40:36 +08:00
|
|
|
|
|
|
|
seq = ""
|
|
|
|
i, j = m, n
|
2019-08-31 19:40:50 +08:00
|
|
|
while i > 0 and j > 0:
|
2019-08-19 15:40:36 +08:00
|
|
|
if x[i - 1] == y[j - 1]:
|
|
|
|
match = 1
|
|
|
|
else:
|
|
|
|
match = 0
|
|
|
|
|
2022-10-13 06:54:20 +08:00
|
|
|
if l[i][j] == l[i - 1][j - 1] + match:
|
2019-08-19 15:40:36 +08:00
|
|
|
if match == 1:
|
|
|
|
seq = x[i - 1] + seq
|
|
|
|
i -= 1
|
|
|
|
j -= 1
|
2022-10-13 06:54:20 +08:00
|
|
|
elif l[i][j] == l[i - 1][j]:
|
2019-08-19 15:40:36 +08:00
|
|
|
i -= 1
|
|
|
|
else:
|
|
|
|
j -= 1
|
|
|
|
|
2022-10-13 06:54:20 +08:00
|
|
|
return l[m][n], seq
|
2017-11-01 15:04:55 +08:00
|
|
|
|
2019-08-19 15:40:36 +08:00
|
|
|
|
2019-10-05 13:14:13 +08:00
|
|
|
if __name__ == "__main__":
|
|
|
|
a = "AGGTAB"
|
|
|
|
b = "GXTXAYB"
|
2019-08-19 15:40:36 +08:00
|
|
|
expected_ln = 4
|
|
|
|
expected_subseq = "GTAB"
|
|
|
|
|
|
|
|
ln, subseq = longest_common_subsequence(a, b)
|
2020-05-22 14:10:11 +08:00
|
|
|
print("len =", ln, ", sub-sequence =", subseq)
|
2019-10-12 02:32:41 +08:00
|
|
|
import doctest
|
|
|
|
|
|
|
|
doctest.testmod()
|