2017-10-10 05:26:27 +08:00
|
|
|
"""
|
2019-10-30 23:10:30 +08:00
|
|
|
A Python implementation of the quick select algorithm, which is efficient for
|
|
|
|
calculating the value that would appear in the index of a list if it would be
|
|
|
|
sorted, even if it is not already sorted
|
2017-10-10 05:26:27 +08:00
|
|
|
https://en.wikipedia.org/wiki/Quickselect
|
|
|
|
"""
|
2019-10-30 23:10:30 +08:00
|
|
|
import random
|
2019-10-05 13:14:13 +08:00
|
|
|
|
|
|
|
|
2019-10-30 23:10:30 +08:00
|
|
|
def _partition(data: list, pivot) -> tuple:
|
2017-10-10 05:26:27 +08:00
|
|
|
"""
|
|
|
|
Three way partition the data into smaller, equal and greater lists,
|
|
|
|
in relationship to the pivot
|
|
|
|
:param data: The data to be sorted (a list)
|
|
|
|
:param pivot: The value to partition the data on
|
|
|
|
:return: Three list: smaller, equal and greater
|
|
|
|
"""
|
|
|
|
less, equal, greater = [], [], []
|
|
|
|
for element in data:
|
Interpolation search - fix endless loop bug, divide 0 bug and update description (#793)
* fix endless loop bug, divide 0 bug and update description
fix an endless bug, for example, if collection = [10,30,40,45,50,66,77,93], item = 67.
fix divide 0 bug, when right=left it is not OK to point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
update 'sorted' to 'ascending sorted' in description to avoid confusion
* delete swap files
* delete 'address' and add input validation
2019-05-18 10:59:12 +08:00
|
|
|
if element < pivot:
|
2017-10-10 05:26:27 +08:00
|
|
|
less.append(element)
|
Interpolation search - fix endless loop bug, divide 0 bug and update description (#793)
* fix endless loop bug, divide 0 bug and update description
fix an endless bug, for example, if collection = [10,30,40,45,50,66,77,93], item = 67.
fix divide 0 bug, when right=left it is not OK to point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
update 'sorted' to 'ascending sorted' in description to avoid confusion
* delete swap files
* delete 'address' and add input validation
2019-05-18 10:59:12 +08:00
|
|
|
elif element > pivot:
|
2017-10-10 05:26:27 +08:00
|
|
|
greater.append(element)
|
|
|
|
else:
|
|
|
|
equal.append(element)
|
|
|
|
return less, equal, greater
|
2019-10-05 13:14:13 +08:00
|
|
|
|
|
|
|
|
2019-10-30 23:10:30 +08:00
|
|
|
def quick_select(items: list, index: int):
|
|
|
|
"""
|
|
|
|
>>> quick_select([2, 4, 5, 7, 899, 54, 32], 5)
|
|
|
|
54
|
|
|
|
>>> quick_select([2, 4, 5, 7, 899, 54, 32], 1)
|
|
|
|
4
|
|
|
|
>>> quick_select([5, 4, 3, 2], 2)
|
|
|
|
4
|
|
|
|
>>> quick_select([3, 5, 7, 10, 2, 12], 3)
|
|
|
|
7
|
|
|
|
"""
|
|
|
|
# index = len(items) // 2 when trying to find the median
|
|
|
|
# (value of index when items is sorted)
|
2019-10-05 13:14:13 +08:00
|
|
|
|
|
|
|
# invalid input
|
2019-10-30 23:10:30 +08:00
|
|
|
if index >= len(items) or index < 0:
|
Interpolation search - fix endless loop bug, divide 0 bug and update description (#793)
* fix endless loop bug, divide 0 bug and update description
fix an endless bug, for example, if collection = [10,30,40,45,50,66,77,93], item = 67.
fix divide 0 bug, when right=left it is not OK to point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
update 'sorted' to 'ascending sorted' in description to avoid confusion
* delete swap files
* delete 'address' and add input validation
2019-05-18 10:59:12 +08:00
|
|
|
return None
|
2019-10-05 13:14:13 +08:00
|
|
|
|
2020-11-01 15:38:11 +08:00
|
|
|
pivot = items[random.randint(0, len(items) - 1)]
|
2018-10-18 05:28:57 +08:00
|
|
|
count = 0
|
2019-10-30 23:10:30 +08:00
|
|
|
smaller, equal, larger = _partition(items, pivot)
|
2018-10-18 05:28:57 +08:00
|
|
|
count = len(equal)
|
|
|
|
m = len(smaller)
|
2017-10-10 05:26:27 +08:00
|
|
|
|
2019-10-30 23:10:30 +08:00
|
|
|
# index is the pivot
|
|
|
|
if m <= index < m + count:
|
2017-10-10 05:26:27 +08:00
|
|
|
return pivot
|
|
|
|
# must be in smaller
|
2019-10-30 23:10:30 +08:00
|
|
|
elif m > index:
|
|
|
|
return quick_select(smaller, index)
|
2019-10-05 13:14:13 +08:00
|
|
|
# must be in larger
|
2018-10-18 05:28:57 +08:00
|
|
|
else:
|
2019-10-30 23:10:30 +08:00
|
|
|
return quick_select(larger, index - (m + count))
|