2019-09-01 14:07:31 +08:00
|
|
|
"""
|
|
|
|
Sum of digits sequence
|
|
|
|
Problem 551
|
|
|
|
|
2020-01-18 20:24:33 +08:00
|
|
|
Let a(0), a(1),... be an integer sequence defined by:
|
2019-09-01 14:07:31 +08:00
|
|
|
a(0) = 1
|
|
|
|
for n >= 1, a(n) is the sum of the digits of all preceding terms
|
|
|
|
|
|
|
|
The sequence starts with 1, 1, 2, 4, 8, ...
|
|
|
|
You are given a(10^6) = 31054319.
|
|
|
|
|
|
|
|
Find a(10^15)
|
|
|
|
"""
|
|
|
|
|
2021-10-12 00:33:44 +08:00
|
|
|
|
2022-10-16 01:29:42 +08:00
|
|
|
ks = range(2, 20 + 1)
|
2022-01-31 03:29:54 +08:00
|
|
|
base = [10**k for k in range(ks[-1] + 1)]
|
2021-10-12 00:33:44 +08:00
|
|
|
memo: dict[int, dict[int, list[list[int]]]] = {}
|
2019-09-01 14:07:31 +08:00
|
|
|
|
|
|
|
|
|
|
|
def next_term(a_i, k, i, n):
|
|
|
|
"""
|
|
|
|
Calculates and updates a_i in-place to either the n-th term or the
|
|
|
|
smallest term for which c > 10^k when the terms are written in the form:
|
|
|
|
a(i) = b * 10^k + c
|
|
|
|
|
|
|
|
For any a(i), if digitsum(b) and c have the same value, the difference
|
|
|
|
between subsequent terms will be the same until c >= 10^k. This difference
|
|
|
|
is cached to greatly speed up the computation.
|
|
|
|
|
|
|
|
Arguments:
|
2020-05-22 14:10:11 +08:00
|
|
|
a_i -- array of digits starting from the one's place that represent
|
2019-09-01 14:07:31 +08:00
|
|
|
the i-th term in the sequence
|
|
|
|
k -- k when terms are written in the from a(i) = b*10^k + c.
|
|
|
|
Term are calulcated until c > 10^k or the n-th term is reached.
|
|
|
|
i -- position along the sequence
|
2020-01-18 20:24:33 +08:00
|
|
|
n -- term to calculate up to if k is large enough
|
2019-09-01 14:07:31 +08:00
|
|
|
|
|
|
|
Return: a tuple of difference between ending term and starting term, and
|
|
|
|
the number of terms calculated. ex. if starting term is a_0=1, and
|
|
|
|
ending term is a_10=62, then (61, 9) is returned.
|
|
|
|
"""
|
|
|
|
# ds_b - digitsum(b)
|
2020-06-22 20:16:12 +08:00
|
|
|
ds_b = sum(a_i[j] for j in range(k, len(a_i)))
|
|
|
|
c = sum(a_i[j] * base[j] for j in range(min(len(a_i), k)))
|
2019-09-01 14:07:31 +08:00
|
|
|
|
|
|
|
diff, dn = 0, 0
|
|
|
|
max_dn = n - i
|
|
|
|
|
|
|
|
sub_memo = memo.get(ds_b)
|
|
|
|
|
2020-03-04 20:40:28 +08:00
|
|
|
if sub_memo is not None:
|
2019-09-01 14:07:31 +08:00
|
|
|
jumps = sub_memo.get(c)
|
|
|
|
|
2020-03-04 20:40:28 +08:00
|
|
|
if jumps is not None and len(jumps) > 0:
|
2019-09-01 14:07:31 +08:00
|
|
|
# find and make the largest jump without going over
|
|
|
|
max_jump = -1
|
|
|
|
for _k in range(len(jumps) - 1, -1, -1):
|
|
|
|
if jumps[_k][2] <= k and jumps[_k][1] <= max_dn:
|
|
|
|
max_jump = _k
|
|
|
|
break
|
|
|
|
|
|
|
|
if max_jump >= 0:
|
|
|
|
diff, dn, _kk = jumps[max_jump]
|
|
|
|
# since the difference between jumps is cached, add c
|
|
|
|
new_c = diff + c
|
|
|
|
for j in range(min(k, len(a_i))):
|
|
|
|
new_c, a_i[j] = divmod(new_c, 10)
|
|
|
|
if new_c > 0:
|
|
|
|
add(a_i, k, new_c)
|
|
|
|
|
|
|
|
else:
|
|
|
|
sub_memo[c] = []
|
|
|
|
else:
|
|
|
|
sub_memo = {c: []}
|
|
|
|
memo[ds_b] = sub_memo
|
|
|
|
|
|
|
|
if dn >= max_dn or c + diff >= base[k]:
|
|
|
|
return diff, dn
|
|
|
|
|
|
|
|
if k > ks[0]:
|
|
|
|
while True:
|
|
|
|
# keep doing smaller jumps
|
|
|
|
_diff, terms_jumped = next_term(a_i, k - 1, i + dn, n)
|
|
|
|
diff += _diff
|
|
|
|
dn += terms_jumped
|
|
|
|
|
|
|
|
if dn >= max_dn or c + diff >= base[k]:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
# would be too small a jump, just compute sequential terms instead
|
|
|
|
_diff, terms_jumped = compute(a_i, k, i + dn, n)
|
|
|
|
diff += _diff
|
|
|
|
dn += terms_jumped
|
|
|
|
|
|
|
|
jumps = sub_memo[c]
|
|
|
|
|
|
|
|
# keep jumps sorted by # of terms skipped
|
|
|
|
j = 0
|
|
|
|
while j < len(jumps):
|
|
|
|
if jumps[j][1] > dn:
|
|
|
|
break
|
|
|
|
j += 1
|
|
|
|
|
|
|
|
# cache the jump for this value digitsum(b) and c
|
|
|
|
sub_memo[c].insert(j, (diff, dn, k))
|
|
|
|
return (diff, dn)
|
|
|
|
|
|
|
|
|
|
|
|
def compute(a_i, k, i, n):
|
|
|
|
"""
|
|
|
|
same as next_term(a_i, k, i, n) but computes terms without memoizing results.
|
|
|
|
"""
|
|
|
|
if i >= n:
|
|
|
|
return 0, i
|
|
|
|
if k > len(a_i):
|
|
|
|
a_i.extend([0 for _ in range(k - len(a_i))])
|
|
|
|
|
|
|
|
# note: a_i -> b * 10^k + c
|
|
|
|
# ds_b -> digitsum(b)
|
|
|
|
# ds_c -> digitsum(c)
|
|
|
|
start_i = i
|
|
|
|
ds_b, ds_c, diff = 0, 0, 0
|
|
|
|
for j in range(len(a_i)):
|
|
|
|
if j >= k:
|
|
|
|
ds_b += a_i[j]
|
|
|
|
else:
|
|
|
|
ds_c += a_i[j]
|
|
|
|
|
|
|
|
while i < n:
|
|
|
|
i += 1
|
|
|
|
addend = ds_c + ds_b
|
|
|
|
diff += addend
|
|
|
|
ds_c = 0
|
|
|
|
for j in range(k):
|
|
|
|
s = a_i[j] + addend
|
|
|
|
addend, a_i[j] = divmod(s, 10)
|
|
|
|
|
|
|
|
ds_c += a_i[j]
|
|
|
|
|
|
|
|
if addend > 0:
|
|
|
|
break
|
|
|
|
|
|
|
|
if addend > 0:
|
|
|
|
add(a_i, k, addend)
|
|
|
|
return diff, i - start_i
|
|
|
|
|
|
|
|
|
|
|
|
def add(digits, k, addend):
|
|
|
|
"""
|
|
|
|
adds addend to digit array given in digits
|
|
|
|
starting at index k
|
|
|
|
"""
|
|
|
|
for j in range(k, len(digits)):
|
|
|
|
s = digits[j] + addend
|
|
|
|
if s >= 10:
|
|
|
|
quotient, digits[j] = divmod(s, 10)
|
|
|
|
addend = addend // 10 + quotient
|
|
|
|
else:
|
|
|
|
digits[j] = s
|
|
|
|
addend = addend // 10
|
|
|
|
|
|
|
|
if addend == 0:
|
|
|
|
break
|
|
|
|
|
|
|
|
while addend > 0:
|
|
|
|
addend, digit = divmod(addend, 10)
|
|
|
|
digits.append(digit)
|
|
|
|
|
|
|
|
|
2022-01-31 03:29:54 +08:00
|
|
|
def solution(n: int = 10**15) -> int:
|
2019-09-01 14:07:31 +08:00
|
|
|
"""
|
|
|
|
returns n-th term of sequence
|
|
|
|
|
|
|
|
>>> solution(10)
|
|
|
|
62
|
|
|
|
|
|
|
|
>>> solution(10**6)
|
|
|
|
31054319
|
|
|
|
|
|
|
|
>>> solution(10**15)
|
|
|
|
73597483551591773
|
|
|
|
"""
|
|
|
|
|
|
|
|
digits = [1]
|
|
|
|
i = 1
|
|
|
|
dn = 0
|
|
|
|
while True:
|
|
|
|
diff, terms_jumped = next_term(digits, 20, i + dn, n)
|
|
|
|
dn += terms_jumped
|
|
|
|
if dn == n - i:
|
|
|
|
break
|
|
|
|
|
|
|
|
a_n = 0
|
|
|
|
for j in range(len(digits)):
|
2022-01-31 03:29:54 +08:00
|
|
|
a_n += digits[j] * 10**j
|
2019-09-01 14:07:31 +08:00
|
|
|
return a_n
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2020-10-10 23:53:17 +08:00
|
|
|
print(f"{solution() = }")
|