diff --git a/machine_learning/local_weighted_learning/__init__.py b/machine_learning/local_weighted_learning/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/machine_learning/local_weighted_learning/local_weighted_learning.md b/machine_learning/local_weighted_learning/local_weighted_learning.md new file mode 100644 index 000000000..5c7895e75 --- /dev/null +++ b/machine_learning/local_weighted_learning/local_weighted_learning.md @@ -0,0 +1,66 @@ +# Locally Weighted Linear Regression +It is a non-parametric ML algorithm that does not learn on a fixed set of parameters such as **linear regression**. \ +So, here comes a question of what is *linear regression*? \ +**Linear regression** is a supervised learning algorithm used for computing linear relationships between input (X) and output (Y). \ + +### Terminology Involved + +number_of_features(i) = Number of features involved. \ +number_of_training_examples(m) = Number of training examples. \ +output_sequence(y) = Output Sequence. \ +$\theta$ $^T$ x = predicted point. \ +J($\theta$) = COst function of point. + +The steps involved in ordinary linear regression are: + +Training phase: Compute \theta to minimize the cost. \ +J($\theta$) = $\sum_{i=1}^m$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$ + +Predict output: for given query point x, \ + return: ($\theta$)$^T$ x + +Linear Regression + +This training phase is possible when data points are linear, but there again comes a question can we predict non-linear relationship between x and y ? as shown below + +Non-linear Data +
+
+So, here comes the role of non-parametric algorithm which doesn't compute predictions based on fixed set of params. Rather parameters $\theta$ are computed individually for each query point/data point x. +
+
+While Computing $\theta$ , a higher "preferance" is given to points in the vicinity of x than points farther from x. + +Cost Function J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$ + +$w^i$ is non-negative weight associated to training point $x^i$. \ +$w^i$ is large fr $x^i$'s lying closer to query point $x_i$. \ +$w^i$ is small for $x^i$'s lying farther to query point $x_i$. + +A Typical weight can be computed using \ + +$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$) + +Where $\tau$ is the bandwidth parameter that controls $w^i$ distance from x. + +Let's look at a example : + +Suppose, we had a query point x=5.0 and training points $x^1$=4.9 and $x^2$=5.0 than we can calculate weights as : + +$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$) with $\tau$=0.5 + +$w^1$ = $\exp$(-$\frac{(4.9-5)^2}{2(0.5)^2}$) = 0.9802 + +$w^2$ = $\exp$(-$\frac{(3-5)^2}{2(0.5)^2}$) = 0.000335 + +So, J($\theta$) = 0.9802*($\theta$ $^T$ $x^1$ - $y^1$) + 0.000335*($\theta$ $^T$ $x^2$ - $y^2$) + +So, here by we can conclude that the weight fall exponentially as the distance between x & $x^i$ increases and So, does the contribution of error in prediction for $x^i$ to the cost. + +Steps involved in LWL are : \ +Compute \theta to minimize the cost. +J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$ \ +Predict Output: for given query point x, \ +return : $\theta$ $^T$ x + +LWL diff --git a/machine_learning/local_weighted_learning/local_weighted_learning.py b/machine_learning/local_weighted_learning/local_weighted_learning.py new file mode 100644 index 000000000..af8694bf8 --- /dev/null +++ b/machine_learning/local_weighted_learning/local_weighted_learning.py @@ -0,0 +1,135 @@ +# Required imports to run this file +import matplotlib.pyplot as plt +import numpy as np + + +# weighted matrix +def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat: + """ + Calculate the weight for every point in the + data set. It takes training_point , query_point, and tau + Here Tau is not a fixed value it can be varied depends on output. + tau --> bandwidth + xmat -->Training data + point --> the x where we want to make predictions + >>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68], + ... [24.59,25.69]]), 0.6) + matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000], + [0.00000000e+000, 0.00000000e+000, 0.00000000e+000], + [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]]) + """ + # m is the number of training samples + m, n = np.shape(training_data_x) + # Initializing weights as identity matrix + weights = np.mat(np.eye(m)) + # calculating weights for all training examples [x(i)'s] + for j in range(m): + diff = point - training_data_x[j] + weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth ** 2)) + return weights + + +def local_weight( + point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float +) -> np.mat: + """ + Calculate the local weights using the weight_matrix function on training data. + Return the weighted matrix. + >>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68], + ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6) + matrix([[0.00873174], + [0.08272556]]) + """ + weight = weighted_matrix(point, training_data_x, bandwidth) + W = (training_data_x.T * (weight * training_data_x)).I * ( + training_data_x.T * weight * training_data_y.T + ) + + return W + + +def local_weight_regression( + training_data_x: np.mat, training_data_y: np.mat, bandwidth: float +) -> np.mat: + """ + Calculate predictions for each data point on axis. + >>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68], + ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6) + array([1.07173261, 1.65970737, 3.50160179]) + """ + m, n = np.shape(training_data_x) + ypred = np.zeros(m) + + for i, item in enumerate(training_data_x): + ypred[i] = item * local_weight( + item, training_data_x, training_data_y, bandwidth + ) + + return ypred + + +def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat: + """ + Function used for loading data from the seaborn splitting into x and y points + >>> pass # this function has no doctest + """ + import seaborn as sns + + data = sns.load_dataset(dataset_name) + col_a = np.array(data[cola_name]) # total_bill + col_b = np.array(data[colb_name]) # tip + + mcol_a = np.mat(col_a) + mcol_b = np.mat(col_b) + + m = np.shape(mcol_b)[1] + one = np.ones((1, m), dtype=int) + + # horizontal stacking + training_data_x = np.hstack((one.T, mcol_a.T)) + + return training_data_x, mcol_b, col_a, col_b + + +def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray: + """ + Get predictions with minimum error for each training data + >>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68], + ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6) + array([1.07173261, 1.65970737, 3.50160179]) + """ + ypred = local_weight_regression(training_data_x, mcol_b, tau) + return ypred + + +def plot_preds( + training_data_x: np.mat, + predictions: np.ndarray, + col_x: np.ndarray, + col_y: np.ndarray, + cola_name: str, + colb_name: str, +) -> plt.plot: + """ + This function used to plot predictions and display the graph + >>> pass #this function has no doctest + """ + xsort = training_data_x.copy() + xsort.sort(axis=0) + plt.scatter(col_x, col_y, color="blue") + plt.plot( + xsort[:, 1], + predictions[training_data_x[:, 1].argsort(0)], + color="yellow", + linewidth=5, + ) + plt.title("Local Weighted Regression") + plt.xlabel(cola_name) + plt.ylabel(colb_name) + plt.show() + + +if __name__ == "__main__": + training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip") + predictions = get_preds(training_data_x, mcol_b, 0.5) + plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")