2017-10-10 05:26:27 +08:00
|
|
|
import random
|
2018-10-18 05:28:57 +08:00
|
|
|
|
2017-10-10 05:26:27 +08:00
|
|
|
"""
|
|
|
|
A python implementation of the quick select algorithm, which is efficient for calculating the value that would appear in the index of a list if it would be sorted, even if it is not already sorted
|
|
|
|
https://en.wikipedia.org/wiki/Quickselect
|
|
|
|
"""
|
2019-10-05 13:14:13 +08:00
|
|
|
|
|
|
|
|
2017-10-10 05:26:27 +08:00
|
|
|
def _partition(data, pivot):
|
|
|
|
"""
|
|
|
|
Three way partition the data into smaller, equal and greater lists,
|
|
|
|
in relationship to the pivot
|
|
|
|
:param data: The data to be sorted (a list)
|
|
|
|
:param pivot: The value to partition the data on
|
|
|
|
:return: Three list: smaller, equal and greater
|
|
|
|
"""
|
|
|
|
less, equal, greater = [], [], []
|
|
|
|
for element in data:
|
Interpolation search - fix endless loop bug, divide 0 bug and update description (#793)
* fix endless loop bug, divide 0 bug and update description
fix an endless bug, for example, if collection = [10,30,40,45,50,66,77,93], item = 67.
fix divide 0 bug, when right=left it is not OK to point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
update 'sorted' to 'ascending sorted' in description to avoid confusion
* delete swap files
* delete 'address' and add input validation
2019-05-18 10:59:12 +08:00
|
|
|
if element < pivot:
|
2017-10-10 05:26:27 +08:00
|
|
|
less.append(element)
|
Interpolation search - fix endless loop bug, divide 0 bug and update description (#793)
* fix endless loop bug, divide 0 bug and update description
fix an endless bug, for example, if collection = [10,30,40,45,50,66,77,93], item = 67.
fix divide 0 bug, when right=left it is not OK to point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
update 'sorted' to 'ascending sorted' in description to avoid confusion
* delete swap files
* delete 'address' and add input validation
2019-05-18 10:59:12 +08:00
|
|
|
elif element > pivot:
|
2017-10-10 05:26:27 +08:00
|
|
|
greater.append(element)
|
|
|
|
else:
|
|
|
|
equal.append(element)
|
|
|
|
return less, equal, greater
|
2019-10-05 13:14:13 +08:00
|
|
|
|
|
|
|
|
2018-10-18 05:28:57 +08:00
|
|
|
def quickSelect(list, k):
|
2019-10-05 13:14:13 +08:00
|
|
|
# k = len(list) // 2 when trying to find the median (index that value would be when list is sorted)
|
|
|
|
|
|
|
|
# invalid input
|
|
|
|
if k >= len(list) or k < 0:
|
Interpolation search - fix endless loop bug, divide 0 bug and update description (#793)
* fix endless loop bug, divide 0 bug and update description
fix an endless bug, for example, if collection = [10,30,40,45,50,66,77,93], item = 67.
fix divide 0 bug, when right=left it is not OK to point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
update 'sorted' to 'ascending sorted' in description to avoid confusion
* delete swap files
* delete 'address' and add input validation
2019-05-18 10:59:12 +08:00
|
|
|
return None
|
2019-10-05 13:14:13 +08:00
|
|
|
|
2018-10-18 05:28:57 +08:00
|
|
|
smaller = []
|
|
|
|
larger = []
|
|
|
|
pivot = random.randint(0, len(list) - 1)
|
|
|
|
pivot = list[pivot]
|
|
|
|
count = 0
|
2019-10-05 13:14:13 +08:00
|
|
|
smaller, equal, larger = _partition(list, pivot)
|
2018-10-18 05:28:57 +08:00
|
|
|
count = len(equal)
|
|
|
|
m = len(smaller)
|
2017-10-10 05:26:27 +08:00
|
|
|
|
2019-10-05 13:14:13 +08:00
|
|
|
# k is the pivot
|
2018-10-18 05:28:57 +08:00
|
|
|
if m <= k < m + count:
|
2017-10-10 05:26:27 +08:00
|
|
|
return pivot
|
|
|
|
# must be in smaller
|
2018-10-18 05:28:57 +08:00
|
|
|
elif m > k:
|
2017-10-10 05:26:27 +08:00
|
|
|
return quickSelect(smaller, k)
|
2019-10-05 13:14:13 +08:00
|
|
|
# must be in larger
|
2018-10-18 05:28:57 +08:00
|
|
|
else:
|
2019-10-05 13:14:13 +08:00
|
|
|
return quickSelect(larger, k - (m + count))
|