mirror of
https://hub.njuu.cf/TheAlgorithms/Python.git
synced 2023-10-11 13:06:12 +08:00
Local Weighted Learning (#5615)
* Local Weighted Learning Added * Delete LWL directory * Local Weighted Learning Added * local weighted learning added * Delete LWL directory * Delete local_weighted_learning.py * rephrased code added * local weight learning updated * local weight learning updated * Updated dir * updated codespell * import modification * Doctests added * doctests updated * lcl updated * doctests updated * doctest values updated
This commit is contained in:
parent
7488c5070e
commit
508589e3fc
@ -0,0 +1,66 @@
|
||||
# Locally Weighted Linear Regression
|
||||
It is a non-parametric ML algorithm that does not learn on a fixed set of parameters such as **linear regression**. \
|
||||
So, here comes a question of what is *linear regression*? \
|
||||
**Linear regression** is a supervised learning algorithm used for computing linear relationships between input (X) and output (Y). \
|
||||
|
||||
### Terminology Involved
|
||||
|
||||
number_of_features(i) = Number of features involved. \
|
||||
number_of_training_examples(m) = Number of training examples. \
|
||||
output_sequence(y) = Output Sequence. \
|
||||
$\theta$ $^T$ x = predicted point. \
|
||||
J($\theta$) = COst function of point.
|
||||
|
||||
The steps involved in ordinary linear regression are:
|
||||
|
||||
Training phase: Compute \theta to minimize the cost. \
|
||||
J($\theta$) = $\sum_{i=1}^m$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$
|
||||
|
||||
Predict output: for given query point x, \
|
||||
return: ($\theta$)$^T$ x
|
||||
|
||||
<img src="https://miro.medium.com/max/700/1*FZsLp8yTULf77qrp0Qd91g.png" alt="Linear Regression">
|
||||
|
||||
This training phase is possible when data points are linear, but there again comes a question can we predict non-linear relationship between x and y ? as shown below
|
||||
|
||||
<img src="https://miro.medium.com/max/700/1*DHYvJg55uN-Kj8jHaxDKvQ.png" alt="Non-linear Data">
|
||||
<br />
|
||||
<br />
|
||||
So, here comes the role of non-parametric algorithm which doesn't compute predictions based on fixed set of params. Rather parameters $\theta$ are computed individually for each query point/data point x.
|
||||
<br />
|
||||
<br />
|
||||
While Computing $\theta$ , a higher "preferance" is given to points in the vicinity of x than points farther from x.
|
||||
|
||||
Cost Function J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$
|
||||
|
||||
$w^i$ is non-negative weight associated to training point $x^i$. \
|
||||
$w^i$ is large fr $x^i$'s lying closer to query point $x_i$. \
|
||||
$w^i$ is small for $x^i$'s lying farther to query point $x_i$.
|
||||
|
||||
A Typical weight can be computed using \
|
||||
|
||||
$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$)
|
||||
|
||||
Where $\tau$ is the bandwidth parameter that controls $w^i$ distance from x.
|
||||
|
||||
Let's look at a example :
|
||||
|
||||
Suppose, we had a query point x=5.0 and training points $x^1$=4.9 and $x^2$=5.0 than we can calculate weights as :
|
||||
|
||||
$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$) with $\tau$=0.5
|
||||
|
||||
$w^1$ = $\exp$(-$\frac{(4.9-5)^2}{2(0.5)^2}$) = 0.9802
|
||||
|
||||
$w^2$ = $\exp$(-$\frac{(3-5)^2}{2(0.5)^2}$) = 0.000335
|
||||
|
||||
So, J($\theta$) = 0.9802*($\theta$ $^T$ $x^1$ - $y^1$) + 0.000335*($\theta$ $^T$ $x^2$ - $y^2$)
|
||||
|
||||
So, here by we can conclude that the weight fall exponentially as the distance between x & $x^i$ increases and So, does the contribution of error in prediction for $x^i$ to the cost.
|
||||
|
||||
Steps involved in LWL are : \
|
||||
Compute \theta to minimize the cost.
|
||||
J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$ \
|
||||
Predict Output: for given query point x, \
|
||||
return : $\theta$ $^T$ x
|
||||
|
||||
<img src="https://miro.medium.com/max/700/1*H3QS05Q1GJtY-tiBL00iug.png" alt="LWL">
|
@ -0,0 +1,135 @@
|
||||
# Required imports to run this file
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
|
||||
# weighted matrix
|
||||
def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat:
|
||||
"""
|
||||
Calculate the weight for every point in the
|
||||
data set. It takes training_point , query_point, and tau
|
||||
Here Tau is not a fixed value it can be varied depends on output.
|
||||
tau --> bandwidth
|
||||
xmat -->Training data
|
||||
point --> the x where we want to make predictions
|
||||
>>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
|
||||
... [24.59,25.69]]), 0.6)
|
||||
matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
|
||||
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
|
||||
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
|
||||
"""
|
||||
# m is the number of training samples
|
||||
m, n = np.shape(training_data_x)
|
||||
# Initializing weights as identity matrix
|
||||
weights = np.mat(np.eye(m))
|
||||
# calculating weights for all training examples [x(i)'s]
|
||||
for j in range(m):
|
||||
diff = point - training_data_x[j]
|
||||
weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth ** 2))
|
||||
return weights
|
||||
|
||||
|
||||
def local_weight(
|
||||
point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
|
||||
) -> np.mat:
|
||||
"""
|
||||
Calculate the local weights using the weight_matrix function on training data.
|
||||
Return the weighted matrix.
|
||||
>>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
|
||||
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
|
||||
matrix([[0.00873174],
|
||||
[0.08272556]])
|
||||
"""
|
||||
weight = weighted_matrix(point, training_data_x, bandwidth)
|
||||
W = (training_data_x.T * (weight * training_data_x)).I * (
|
||||
training_data_x.T * weight * training_data_y.T
|
||||
)
|
||||
|
||||
return W
|
||||
|
||||
|
||||
def local_weight_regression(
|
||||
training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
|
||||
) -> np.mat:
|
||||
"""
|
||||
Calculate predictions for each data point on axis.
|
||||
>>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],
|
||||
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
|
||||
array([1.07173261, 1.65970737, 3.50160179])
|
||||
"""
|
||||
m, n = np.shape(training_data_x)
|
||||
ypred = np.zeros(m)
|
||||
|
||||
for i, item in enumerate(training_data_x):
|
||||
ypred[i] = item * local_weight(
|
||||
item, training_data_x, training_data_y, bandwidth
|
||||
)
|
||||
|
||||
return ypred
|
||||
|
||||
|
||||
def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat:
|
||||
"""
|
||||
Function used for loading data from the seaborn splitting into x and y points
|
||||
>>> pass # this function has no doctest
|
||||
"""
|
||||
import seaborn as sns
|
||||
|
||||
data = sns.load_dataset(dataset_name)
|
||||
col_a = np.array(data[cola_name]) # total_bill
|
||||
col_b = np.array(data[colb_name]) # tip
|
||||
|
||||
mcol_a = np.mat(col_a)
|
||||
mcol_b = np.mat(col_b)
|
||||
|
||||
m = np.shape(mcol_b)[1]
|
||||
one = np.ones((1, m), dtype=int)
|
||||
|
||||
# horizontal stacking
|
||||
training_data_x = np.hstack((one.T, mcol_a.T))
|
||||
|
||||
return training_data_x, mcol_b, col_a, col_b
|
||||
|
||||
|
||||
def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray:
|
||||
"""
|
||||
Get predictions with minimum error for each training data
|
||||
>>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],
|
||||
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
|
||||
array([1.07173261, 1.65970737, 3.50160179])
|
||||
"""
|
||||
ypred = local_weight_regression(training_data_x, mcol_b, tau)
|
||||
return ypred
|
||||
|
||||
|
||||
def plot_preds(
|
||||
training_data_x: np.mat,
|
||||
predictions: np.ndarray,
|
||||
col_x: np.ndarray,
|
||||
col_y: np.ndarray,
|
||||
cola_name: str,
|
||||
colb_name: str,
|
||||
) -> plt.plot:
|
||||
"""
|
||||
This function used to plot predictions and display the graph
|
||||
>>> pass #this function has no doctest
|
||||
"""
|
||||
xsort = training_data_x.copy()
|
||||
xsort.sort(axis=0)
|
||||
plt.scatter(col_x, col_y, color="blue")
|
||||
plt.plot(
|
||||
xsort[:, 1],
|
||||
predictions[training_data_x[:, 1].argsort(0)],
|
||||
color="yellow",
|
||||
linewidth=5,
|
||||
)
|
||||
plt.title("Local Weighted Regression")
|
||||
plt.xlabel(cola_name)
|
||||
plt.ylabel(colb_name)
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip")
|
||||
predictions = get_preds(training_data_x, mcol_b, 0.5)
|
||||
plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")
|
Loading…
Reference in New Issue
Block a user