mirror of
https://hub.njuu.cf/TheAlgorithms/Python.git
synced 2023-10-11 13:06:12 +08:00
Adding doctests into LDA algorithm (#1621)
* Adding doctests into <gaussian_distribution> function * Adding doctests into <y_generator> function * Adding doctests into <calculate_mean> function * Adding doctests into <calculate_probabilities> function * Adding doctests into <calculate_variance> function * Adding doctests into <predict_y_values> function * Adding doctests into <accuracy> function * fixup! Format Python code with psf/black push * Update convex_hull.py * Update convex_hull.py
This commit is contained in:
parent
26b0803319
commit
43905efe29
@ -1,5 +1,3 @@
|
||||
from numbers import Number
|
||||
|
||||
"""
|
||||
The convex hull problem is problem of finding all the vertices of convex polygon, P of
|
||||
a set of points in a plane such that all the points are either on the vertices of P or
|
||||
@ -40,22 +38,11 @@ class Point:
|
||||
>>> Point("pi", "e")
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: x and y must be both numeric types but got <class 'str'>, <class 'str'> instead
|
||||
ValueError: could not convert string to float: 'pi'
|
||||
"""
|
||||
|
||||
def __init__(self, x, y):
|
||||
if not (isinstance(x, Number) and isinstance(y, Number)):
|
||||
try:
|
||||
x, y = float(x), float(y)
|
||||
except ValueError as e:
|
||||
e.args = (
|
||||
"x and y must be both numeric types "
|
||||
f"but got {type(x)}, {type(y)} instead"
|
||||
)
|
||||
raise
|
||||
|
||||
self.x = x
|
||||
self.y = y
|
||||
self.x, self.y = float(x), float(y)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.x == other.x and self.y == other.y
|
||||
@ -112,13 +99,7 @@ def _construct_points(list_of_tuples):
|
||||
Examples
|
||||
-------
|
||||
>>> _construct_points([[1, 1], [2, -1], [0.3, 4]])
|
||||
[(1, 1), (2, -1), (0.3, 4)]
|
||||
>>> _construct_points(([1, 1], [2, -1], [0.3, 4]))
|
||||
[(1, 1), (2, -1), (0.3, 4)]
|
||||
>>> _construct_points([(1, 1), (2, -1), (0.3, 4)])
|
||||
[(1, 1), (2, -1), (0.3, 4)]
|
||||
>>> _construct_points([[1, 1], (2, -1), [0.3, 4]])
|
||||
[(1, 1), (2, -1), (0.3, 4)]
|
||||
[(1.0, 1.0), (2.0, -1.0), (0.3, 4.0)]
|
||||
>>> _construct_points([1, 2])
|
||||
Ignoring deformed point 1. All points must have at least 2 coordinates.
|
||||
Ignoring deformed point 2. All points must have at least 2 coordinates.
|
||||
@ -168,11 +149,11 @@ def _validate_input(points):
|
||||
Examples
|
||||
-------
|
||||
>>> _validate_input([[1, 2]])
|
||||
[(1, 2)]
|
||||
[(1.0, 2.0)]
|
||||
>>> _validate_input([(1, 2)])
|
||||
[(1, 2)]
|
||||
[(1.0, 2.0)]
|
||||
>>> _validate_input([Point(2, 1), Point(-1, 2)])
|
||||
[(2, 1), (-1, 2)]
|
||||
[(2.0, 1.0), (-1.0, 2.0)]
|
||||
>>> _validate_input([])
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
@ -200,9 +181,9 @@ def _validate_input(points):
|
||||
)
|
||||
elif not hasattr(points, "__iter__"):
|
||||
raise ValueError(
|
||||
"Expecting an iterable object " f"but got an non-iterable type {points}"
|
||||
f"Expecting an iterable object but got an non-iterable type {points}"
|
||||
)
|
||||
except TypeError as e:
|
||||
except TypeError:
|
||||
print("Expecting an iterable of type Point, list or tuple.")
|
||||
raise
|
||||
|
||||
@ -233,11 +214,11 @@ def _det(a, b, c):
|
||||
Examples
|
||||
----------
|
||||
>>> _det(Point(1, 1), Point(1, 2), Point(1, 5))
|
||||
0
|
||||
0.0
|
||||
>>> _det(Point(0, 0), Point(10, 0), Point(0, 10))
|
||||
100
|
||||
100.0
|
||||
>>> _det(Point(0, 0), Point(10, 0), Point(0, -10))
|
||||
-100
|
||||
-100.0
|
||||
"""
|
||||
|
||||
det = (a.x * b.y + b.x * c.y + c.x * a.y) - (a.y * b.x + b.y * c.x + c.y * a.x)
|
||||
@ -271,13 +252,13 @@ def convex_hull_bf(points):
|
||||
Examples
|
||||
---------
|
||||
>>> convex_hull_bf([[0, 0], [1, 0], [10, 1]])
|
||||
[(0, 0), (1, 0), (10, 1)]
|
||||
[(0.0, 0.0), (1.0, 0.0), (10.0, 1.0)]
|
||||
>>> convex_hull_bf([[0, 0], [1, 0], [10, 0]])
|
||||
[(0, 0), (10, 0)]
|
||||
[(0.0, 0.0), (10.0, 0.0)]
|
||||
>>> convex_hull_bf([[-1, 1],[-1, -1], [0, 0], [0.5, 0.5], [1, -1], [1, 1], [-0.75, 1]])
|
||||
[(-1, -1), (-1, 1), (1, -1), (1, 1)]
|
||||
[(-1.0, -1.0), (-1.0, 1.0), (1.0, -1.0), (1.0, 1.0)]
|
||||
>>> convex_hull_bf([(0, 3), (2, 2), (1, 1), (2, 1), (3, 0), (0, 0), (3, 3), (2, -1), (2, -4), (1, -3)])
|
||||
[(0, 0), (0, 3), (1, -3), (2, -4), (3, 0), (3, 3)]
|
||||
[(0.0, 0.0), (0.0, 3.0), (1.0, -3.0), (2.0, -4.0), (3.0, 0.0), (3.0, 3.0)]
|
||||
"""
|
||||
|
||||
points = sorted(_validate_input(points))
|
||||
@ -336,13 +317,13 @@ def convex_hull_recursive(points):
|
||||
Examples
|
||||
---------
|
||||
>>> convex_hull_recursive([[0, 0], [1, 0], [10, 1]])
|
||||
[(0, 0), (1, 0), (10, 1)]
|
||||
[(0.0, 0.0), (1.0, 0.0), (10.0, 1.0)]
|
||||
>>> convex_hull_recursive([[0, 0], [1, 0], [10, 0]])
|
||||
[(0, 0), (10, 0)]
|
||||
[(0.0, 0.0), (10.0, 0.0)]
|
||||
>>> convex_hull_recursive([[-1, 1],[-1, -1], [0, 0], [0.5, 0.5], [1, -1], [1, 1], [-0.75, 1]])
|
||||
[(-1, -1), (-1, 1), (1, -1), (1, 1)]
|
||||
[(-1.0, -1.0), (-1.0, 1.0), (1.0, -1.0), (1.0, 1.0)]
|
||||
>>> convex_hull_recursive([(0, 3), (2, 2), (1, 1), (2, 1), (3, 0), (0, 0), (3, 3), (2, -1), (2, -4), (1, -3)])
|
||||
[(0, 0), (0, 3), (1, -3), (2, -4), (3, 0), (3, 3)]
|
||||
[(0.0, 0.0), (0.0, 3.0), (1.0, -3.0), (2.0, -4.0), (3.0, 0.0), (3.0, 3.0)]
|
||||
|
||||
"""
|
||||
points = sorted(_validate_input(points))
|
||||
|
@ -45,6 +45,7 @@
|
||||
from math import log
|
||||
from os import name, system
|
||||
from random import gauss
|
||||
from random import seed
|
||||
|
||||
|
||||
# Make a training dataset drawn from a gaussian distribution
|
||||
@ -56,7 +57,15 @@ def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> l
|
||||
:param instance_count: instance number of class
|
||||
:return: a list containing generated values based-on given mean, std_dev and
|
||||
instance_count
|
||||
|
||||
>>> gaussian_distribution(5.0, 1.0, 20) # doctest: +NORMALIZE_WHITESPACE
|
||||
[6.288184753155463, 6.4494456086997705, 5.066335808938262, 4.235456349028368,
|
||||
3.9078267848958586, 5.031334516831717, 3.977896829989127, 3.56317055489747,
|
||||
5.199311976483754, 5.133374604658605, 5.546468300338232, 4.086029056264687,
|
||||
5.005005283626573, 4.935258239627312, 3.494170998739258, 5.537997178661033,
|
||||
5.320711100998849, 7.3891120432406865, 5.202969177309964, 4.855297691835079]
|
||||
"""
|
||||
seed(1)
|
||||
return [gauss(mean, std_dev) for _ in range(instance_count)]
|
||||
|
||||
|
||||
@ -67,6 +76,14 @@ def y_generator(class_count: int, instance_count: list) -> list:
|
||||
:param class_count: Number of classes(data groupings) in dataset
|
||||
:param instance_count: number of instances in class
|
||||
:return: corresponding values for data groupings in dataset
|
||||
|
||||
>>> y_generator(1, [10])
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
>>> y_generator(2, [5, 10])
|
||||
[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
||||
>>> y_generator(4, [10, 5, 15, 20]) # doctest: +NORMALIZE_WHITESPACE
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
|
||||
"""
|
||||
|
||||
return [k for k in range(class_count) for _ in range(instance_count[k])]
|
||||
@ -79,6 +96,10 @@ def calculate_mean(instance_count: int, items: list) -> float:
|
||||
:param instance_count: Number of instances in class
|
||||
:param items: items that related to specific class(data grouping)
|
||||
:return: calculated actual mean of considered class
|
||||
|
||||
>>> items = gaussian_distribution(5.0, 1.0, 20)
|
||||
>>> calculate_mean(len(items), items)
|
||||
5.011267842911003
|
||||
"""
|
||||
# the sum of all items divided by number of instances
|
||||
return sum(items) / instance_count
|
||||
@ -91,6 +112,11 @@ def calculate_probabilities(instance_count: int, total_count: int) -> float:
|
||||
:param instance_count: number of instances in class
|
||||
:param total_count: the number of all instances
|
||||
:return: value of probability for considered class
|
||||
|
||||
>>> calculate_probabilities(20, 60)
|
||||
0.3333333333333333
|
||||
>>> calculate_probabilities(30, 100)
|
||||
0.3
|
||||
"""
|
||||
# number of instances in specific class divided by number of all instances
|
||||
return instance_count / total_count
|
||||
@ -104,6 +130,12 @@ def calculate_variance(items: list, means: list, total_count: int) -> float:
|
||||
:param means: a list containing real mean values of each class
|
||||
:param total_count: the number of all instances
|
||||
:return: calculated variance for considered dataset
|
||||
|
||||
>>> items = gaussian_distribution(5.0, 1.0, 20)
|
||||
>>> means = [5.011267842911003]
|
||||
>>> total_count = 20
|
||||
>>> calculate_variance([items], means, total_count)
|
||||
0.9618530973487491
|
||||
"""
|
||||
squared_diff = [] # An empty list to store all squared differences
|
||||
# iterate over number of elements in items
|
||||
@ -129,6 +161,36 @@ def predict_y_values(
|
||||
:param variance: calculated value of variance by calculate_variance function
|
||||
:param probabilities: a list containing all probabilities of classes
|
||||
:return: a list containing predicted Y values
|
||||
|
||||
>>> x_items = [[6.288184753155463, 6.4494456086997705, 5.066335808938262,
|
||||
... 4.235456349028368, 3.9078267848958586, 5.031334516831717,
|
||||
... 3.977896829989127, 3.56317055489747, 5.199311976483754,
|
||||
... 5.133374604658605, 5.546468300338232, 4.086029056264687,
|
||||
... 5.005005283626573, 4.935258239627312, 3.494170998739258,
|
||||
... 5.537997178661033, 5.320711100998849, 7.3891120432406865,
|
||||
... 5.202969177309964, 4.855297691835079], [11.288184753155463,
|
||||
... 11.44944560869977, 10.066335808938263, 9.235456349028368,
|
||||
... 8.907826784895859, 10.031334516831716, 8.977896829989128,
|
||||
... 8.56317055489747, 10.199311976483754, 10.133374604658606,
|
||||
... 10.546468300338232, 9.086029056264687, 10.005005283626572,
|
||||
... 9.935258239627313, 8.494170998739259, 10.537997178661033,
|
||||
... 10.320711100998848, 12.389112043240686, 10.202969177309964,
|
||||
... 9.85529769183508], [16.288184753155463, 16.449445608699772,
|
||||
... 15.066335808938263, 14.235456349028368, 13.907826784895859,
|
||||
... 15.031334516831716, 13.977896829989128, 13.56317055489747,
|
||||
... 15.199311976483754, 15.133374604658606, 15.546468300338232,
|
||||
... 14.086029056264687, 15.005005283626572, 14.935258239627313,
|
||||
... 13.494170998739259, 15.537997178661033, 15.320711100998848,
|
||||
... 17.389112043240686, 15.202969177309964, 14.85529769183508]]
|
||||
|
||||
>>> means = [5.011267842911003, 10.011267842911003, 15.011267842911002]
|
||||
>>> variance = 0.9618530973487494
|
||||
>>> probabilities = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
|
||||
>>> predict_y_values(x_items, means, variance, probabilities) # doctest: +NORMALIZE_WHITESPACE
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2]
|
||||
|
||||
"""
|
||||
# An empty list to store generated discriminant values of all items in dataset for
|
||||
# each class
|
||||
@ -148,7 +210,7 @@ def predict_y_values(
|
||||
)
|
||||
# appending discriminant values of each item to 'results' list
|
||||
results.append(temp)
|
||||
print("Generated Discriminants: \n", results)
|
||||
|
||||
return [l.index(max(l)) for l in results]
|
||||
|
||||
|
||||
@ -161,6 +223,20 @@ def accuracy(actual_y: list, predicted_y: list) -> float:
|
||||
:param predicted_y: a list containing predicted Y values generated by
|
||||
'predict_y_values' function
|
||||
:return: percentage of accuracy
|
||||
|
||||
>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
|
||||
... 1, 1 ,1 ,1 ,1 ,1 ,1]
|
||||
>>> predicted_y = [0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
|
||||
... 0, 0, 1, 1, 1, 0, 1, 1, 1]
|
||||
>>> accuracy(actual_y, predicted_y)
|
||||
50.0
|
||||
|
||||
>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
... 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
|
||||
>>> predicted_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
... 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
|
||||
>>> accuracy(actual_y, predicted_y)
|
||||
100.0
|
||||
"""
|
||||
# iterate over one element of each list at a time (zip mode)
|
||||
# prediction is correct if actual Y value equals to predicted Y value
|
||||
|
Loading…
Reference in New Issue
Block a user