From 508589e3fc3fa93312b131c30c77ecd61460a430 Mon Sep 17 00:00:00 2001
From: Venkatesh Tantravahi
<64308188+venkateshtantravahi@users.noreply.github.com>
Date: Sun, 31 Oct 2021 16:57:50 +0530
Subject: [PATCH] Local Weighted Learning (#5615)
* Local Weighted Learning Added
* Delete LWL directory
* Local Weighted Learning Added
* local weighted learning added
* Delete LWL directory
* Delete local_weighted_learning.py
* rephrased code added
* local weight learning updated
* local weight learning updated
* Updated dir
* updated codespell
* import modification
* Doctests added
* doctests updated
* lcl updated
* doctests updated
* doctest values updated
---
.../local_weighted_learning/__init__.py | 0
.../local_weighted_learning.md | 66 +++++++++
.../local_weighted_learning.py | 135 ++++++++++++++++++
3 files changed, 201 insertions(+)
create mode 100644 machine_learning/local_weighted_learning/__init__.py
create mode 100644 machine_learning/local_weighted_learning/local_weighted_learning.md
create mode 100644 machine_learning/local_weighted_learning/local_weighted_learning.py
diff --git a/machine_learning/local_weighted_learning/__init__.py b/machine_learning/local_weighted_learning/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/machine_learning/local_weighted_learning/local_weighted_learning.md b/machine_learning/local_weighted_learning/local_weighted_learning.md
new file mode 100644
index 000000000..5c7895e75
--- /dev/null
+++ b/machine_learning/local_weighted_learning/local_weighted_learning.md
@@ -0,0 +1,66 @@
+# Locally Weighted Linear Regression
+It is a non-parametric ML algorithm that does not learn on a fixed set of parameters such as **linear regression**. \
+So, here comes a question of what is *linear regression*? \
+**Linear regression** is a supervised learning algorithm used for computing linear relationships between input (X) and output (Y). \
+
+### Terminology Involved
+
+number_of_features(i) = Number of features involved. \
+number_of_training_examples(m) = Number of training examples. \
+output_sequence(y) = Output Sequence. \
+$\theta$ $^T$ x = predicted point. \
+J($\theta$) = COst function of point.
+
+The steps involved in ordinary linear regression are:
+
+Training phase: Compute \theta to minimize the cost. \
+J($\theta$) = $\sum_{i=1}^m$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$
+
+Predict output: for given query point x, \
+ return: ($\theta$)$^T$ x
+
+
+
+This training phase is possible when data points are linear, but there again comes a question can we predict non-linear relationship between x and y ? as shown below
+
+
+
+
+So, here comes the role of non-parametric algorithm which doesn't compute predictions based on fixed set of params. Rather parameters $\theta$ are computed individually for each query point/data point x.
+
+
+While Computing $\theta$ , a higher "preferance" is given to points in the vicinity of x than points farther from x.
+
+Cost Function J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$
+
+$w^i$ is non-negative weight associated to training point $x^i$. \
+$w^i$ is large fr $x^i$'s lying closer to query point $x_i$. \
+$w^i$ is small for $x^i$'s lying farther to query point $x_i$.
+
+A Typical weight can be computed using \
+
+$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$)
+
+Where $\tau$ is the bandwidth parameter that controls $w^i$ distance from x.
+
+Let's look at a example :
+
+Suppose, we had a query point x=5.0 and training points $x^1$=4.9 and $x^2$=5.0 than we can calculate weights as :
+
+$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$) with $\tau$=0.5
+
+$w^1$ = $\exp$(-$\frac{(4.9-5)^2}{2(0.5)^2}$) = 0.9802
+
+$w^2$ = $\exp$(-$\frac{(3-5)^2}{2(0.5)^2}$) = 0.000335
+
+So, J($\theta$) = 0.9802*($\theta$ $^T$ $x^1$ - $y^1$) + 0.000335*($\theta$ $^T$ $x^2$ - $y^2$)
+
+So, here by we can conclude that the weight fall exponentially as the distance between x & $x^i$ increases and So, does the contribution of error in prediction for $x^i$ to the cost.
+
+Steps involved in LWL are : \
+Compute \theta to minimize the cost.
+J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$ \
+Predict Output: for given query point x, \
+return : $\theta$ $^T$ x
+
+
diff --git a/machine_learning/local_weighted_learning/local_weighted_learning.py b/machine_learning/local_weighted_learning/local_weighted_learning.py
new file mode 100644
index 000000000..af8694bf8
--- /dev/null
+++ b/machine_learning/local_weighted_learning/local_weighted_learning.py
@@ -0,0 +1,135 @@
+# Required imports to run this file
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+# weighted matrix
+def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat:
+ """
+ Calculate the weight for every point in the
+ data set. It takes training_point , query_point, and tau
+ Here Tau is not a fixed value it can be varied depends on output.
+ tau --> bandwidth
+ xmat -->Training data
+ point --> the x where we want to make predictions
+ >>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
+ ... [24.59,25.69]]), 0.6)
+ matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
+ [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
+ [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
+ """
+ # m is the number of training samples
+ m, n = np.shape(training_data_x)
+ # Initializing weights as identity matrix
+ weights = np.mat(np.eye(m))
+ # calculating weights for all training examples [x(i)'s]
+ for j in range(m):
+ diff = point - training_data_x[j]
+ weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth ** 2))
+ return weights
+
+
+def local_weight(
+ point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
+) -> np.mat:
+ """
+ Calculate the local weights using the weight_matrix function on training data.
+ Return the weighted matrix.
+ >>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
+ ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
+ matrix([[0.00873174],
+ [0.08272556]])
+ """
+ weight = weighted_matrix(point, training_data_x, bandwidth)
+ W = (training_data_x.T * (weight * training_data_x)).I * (
+ training_data_x.T * weight * training_data_y.T
+ )
+
+ return W
+
+
+def local_weight_regression(
+ training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
+) -> np.mat:
+ """
+ Calculate predictions for each data point on axis.
+ >>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],
+ ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
+ array([1.07173261, 1.65970737, 3.50160179])
+ """
+ m, n = np.shape(training_data_x)
+ ypred = np.zeros(m)
+
+ for i, item in enumerate(training_data_x):
+ ypred[i] = item * local_weight(
+ item, training_data_x, training_data_y, bandwidth
+ )
+
+ return ypred
+
+
+def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat:
+ """
+ Function used for loading data from the seaborn splitting into x and y points
+ >>> pass # this function has no doctest
+ """
+ import seaborn as sns
+
+ data = sns.load_dataset(dataset_name)
+ col_a = np.array(data[cola_name]) # total_bill
+ col_b = np.array(data[colb_name]) # tip
+
+ mcol_a = np.mat(col_a)
+ mcol_b = np.mat(col_b)
+
+ m = np.shape(mcol_b)[1]
+ one = np.ones((1, m), dtype=int)
+
+ # horizontal stacking
+ training_data_x = np.hstack((one.T, mcol_a.T))
+
+ return training_data_x, mcol_b, col_a, col_b
+
+
+def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray:
+ """
+ Get predictions with minimum error for each training data
+ >>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],
+ ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
+ array([1.07173261, 1.65970737, 3.50160179])
+ """
+ ypred = local_weight_regression(training_data_x, mcol_b, tau)
+ return ypred
+
+
+def plot_preds(
+ training_data_x: np.mat,
+ predictions: np.ndarray,
+ col_x: np.ndarray,
+ col_y: np.ndarray,
+ cola_name: str,
+ colb_name: str,
+) -> plt.plot:
+ """
+ This function used to plot predictions and display the graph
+ >>> pass #this function has no doctest
+ """
+ xsort = training_data_x.copy()
+ xsort.sort(axis=0)
+ plt.scatter(col_x, col_y, color="blue")
+ plt.plot(
+ xsort[:, 1],
+ predictions[training_data_x[:, 1].argsort(0)],
+ color="yellow",
+ linewidth=5,
+ )
+ plt.title("Local Weighted Regression")
+ plt.xlabel(cola_name)
+ plt.ylabel(colb_name)
+ plt.show()
+
+
+if __name__ == "__main__":
+ training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip")
+ predictions = get_preds(training_data_x, mcol_b, 0.5)
+ plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")