local_weighted_learning.py: fix mypy errors and more (#8073)

This commit is contained in:
Tianyi Zheng 2023-05-16 17:05:55 -07:00 committed by GitHub
parent c0892a0651
commit 8102424950
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,14 +1,55 @@
"""
Locally weighted linear regression, also called local regression, is a type of
non-parametric linear regression that prioritizes data closest to a given
prediction point. The algorithm estimates the vector of model coefficients β
using weighted least squares regression:
β = (XᵀWX)¹(XᵀWy),
where X is the design matrix, y is the response vector, and W is the diagonal
weight matrix.
This implementation calculates wᵢ, the weight of the ith training sample, using
the Gaussian weight:
wᵢ = exp(-xᵢ - x²/(2τ²)),
where xᵢ is the ith training sample, x is the prediction point, τ is the
"bandwidth", and x is the Euclidean norm (also called the 2-norm or the
norm). The bandwidth τ controls how quickly the weight of a training sample
decreases as its distance from the prediction point increases. One can think of
the Gaussian weight as a bell curve centered around the prediction point: a
training sample is weighted lower if it's farther from the center, and τ
controls the spread of the bell curve.
Other types of locally weighted regression such as locally estimated scatterplot
smoothing (LOESS) typically use different weight functions.
References:
- https://en.wikipedia.org/wiki/Local_regression
- https://en.wikipedia.org/wiki/Weighted_least_squares
- https://cs229.stanford.edu/notes2022fall/main_notes.pdf
"""
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
def weighted_matrix( def weight_matrix(point: np.ndarray, x_train: np.ndarray, tau: float) -> np.ndarray:
point: np.array, training_data_x: np.array, bandwidth: float
) -> np.array:
""" """
Calculate the weight for every point in the data set. Calculate the weight of every point in the training data around a given
point --> the x value at which we want to make predictions prediction point
>>> weighted_matrix(
Args:
point: x-value at which the prediction is being made
x_train: ndarray of x-values for training
tau: bandwidth value, controls how quickly the weight of training values
decreases as the distance from the prediction point increases
Returns:
m x m weight matrix around the prediction point, where m is the size of
the training set
>>> weight_matrix(
... np.array([1., 1.]), ... np.array([1., 1.]),
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]), ... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
... 0.6 ... 0.6
@ -17,25 +58,30 @@ def weighted_matrix(
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000], [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]]) [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
""" """
m, _ = np.shape(training_data_x) # m is the number of training samples m = len(x_train) # Number of training samples
weights = np.eye(m) # Initializing weights as identity matrix weights = np.eye(m) # Initialize weights as identity matrix
# calculating weights for all training examples [x(i)'s]
for j in range(m): for j in range(m):
diff = point - training_data_x[j] diff = point - x_train[j]
weights[j, j] = np.exp(diff @ diff.T / (-2.0 * bandwidth**2)) weights[j, j] = np.exp(diff @ diff.T / (-2.0 * tau**2))
return weights return weights
def local_weight( def local_weight(
point: np.array, point: np.ndarray, x_train: np.ndarray, y_train: np.ndarray, tau: float
training_data_x: np.array, ) -> np.ndarray:
training_data_y: np.array,
bandwidth: float,
) -> np.array:
""" """
Calculate the local weights using the weight_matrix function on training data. Calculate the local weights at a given prediction point using the weight
Return the weighted matrix. matrix for that point
Args:
point: x-value at which the prediction is being made
x_train: ndarray of x-values for training
y_train: ndarray of y-values for training
tau: bandwidth value, controls how quickly the weight of training values
decreases as the distance from the prediction point increases
Returns:
ndarray of local weights
>>> local_weight( >>> local_weight(
... np.array([1., 1.]), ... np.array([1., 1.]),
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]), ... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
@ -45,19 +91,28 @@ def local_weight(
array([[0.00873174], array([[0.00873174],
[0.08272556]]) [0.08272556]])
""" """
weight = weighted_matrix(point, training_data_x, bandwidth) weight_mat = weight_matrix(point, x_train, tau)
w = np.linalg.inv(training_data_x.T @ (weight @ training_data_x)) @ ( weight = np.linalg.inv(x_train.T @ weight_mat @ x_train) @ (
training_data_x.T @ weight @ training_data_y.T x_train.T @ weight_mat @ y_train.T
) )
return w return weight
def local_weight_regression( def local_weight_regression(
training_data_x: np.array, training_data_y: np.array, bandwidth: float x_train: np.ndarray, y_train: np.ndarray, tau: float
) -> np.array: ) -> np.ndarray:
""" """
Calculate predictions for each data point on axis Calculate predictions for each point in the training data
Args:
x_train: ndarray of x-values for training
y_train: ndarray of y-values for training
tau: bandwidth value, controls how quickly the weight of training values
decreases as the distance from the prediction point increases
Returns:
ndarray of predictions
>>> local_weight_regression( >>> local_weight_regression(
... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]), ... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
... np.array([[1.01, 1.66, 3.5]]), ... np.array([[1.01, 1.66, 3.5]]),
@ -65,77 +120,57 @@ def local_weight_regression(
... ) ... )
array([1.07173261, 1.65970737, 3.50160179]) array([1.07173261, 1.65970737, 3.50160179])
""" """
m, _ = np.shape(training_data_x) y_pred = np.zeros(len(x_train)) # Initialize array of predictions
ypred = np.zeros(m) for i, item in enumerate(x_train):
y_pred[i] = item @ local_weight(item, x_train, y_train, tau)
for i, item in enumerate(training_data_x): return y_pred
ypred[i] = item @ local_weight(
item, training_data_x, training_data_y, bandwidth
)
return ypred
def load_data( def load_data(
dataset_name: str, cola_name: str, colb_name: str dataset_name: str, x_name: str, y_name: str
) -> tuple[np.array, np.array, np.array, np.array]: ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
""" """
Load data from seaborn and split it into x and y points Load data from seaborn and split it into x and y points
>>> pass # No doctests, function is for demo purposes only
""" """
import seaborn as sns import seaborn as sns
data = sns.load_dataset(dataset_name) data = sns.load_dataset(dataset_name)
col_a = np.array(data[cola_name]) # total_bill x_data = np.array(data[x_name])
col_b = np.array(data[colb_name]) # tip y_data = np.array(data[y_name])
mcol_a = col_a.copy() one = np.ones(len(y_data))
mcol_b = col_b.copy()
one = np.ones(np.shape(mcol_b)[0], dtype=int) # pairing elements of one and x_data
x_train = np.column_stack((one, x_data))
# pairing elements of one and mcol_a return x_train, x_data, y_data
training_data_x = np.column_stack((one, mcol_a))
return training_data_x, mcol_b, col_a, col_b
def get_preds(training_data_x: np.array, mcol_b: np.array, tau: float) -> np.array:
"""
Get predictions with minimum error for each training data
>>> get_preds(
... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
... np.array([[1.01, 1.66, 3.5]]),
... 0.6
... )
array([1.07173261, 1.65970737, 3.50160179])
"""
ypred = local_weight_regression(training_data_x, mcol_b, tau)
return ypred
def plot_preds( def plot_preds(
training_data_x: np.array, x_train: np.ndarray,
predictions: np.array, preds: np.ndarray,
col_x: np.array, x_data: np.ndarray,
col_y: np.array, y_data: np.ndarray,
cola_name: str, x_name: str,
colb_name: str, y_name: str,
) -> plt.plot: ) -> None:
""" """
Plot predictions and display the graph Plot predictions and display the graph
>>> pass # No doctests, function is for demo purposes only
""" """
xsort = training_data_x.copy() x_train_sorted = np.sort(x_train, axis=0)
xsort.sort(axis=0) plt.scatter(x_data, y_data, color="blue")
plt.scatter(col_x, col_y, color="blue")
plt.plot( plt.plot(
xsort[:, 1], x_train_sorted[:, 1],
predictions[training_data_x[:, 1].argsort(0)], preds[x_train[:, 1].argsort(0)],
color="yellow", color="yellow",
linewidth=5, linewidth=5,
) )
plt.title("Local Weighted Regression") plt.title("Local Weighted Regression")
plt.xlabel(cola_name) plt.xlabel(x_name)
plt.ylabel(colb_name) plt.ylabel(y_name)
plt.show() plt.show()
@ -144,6 +179,7 @@ if __name__ == "__main__":
doctest.testmod() doctest.testmod()
training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip") # Demo with a dataset from the seaborn module
predictions = get_preds(training_data_x, mcol_b, 0.5) training_data_x, total_bill, tip = load_data("tips", "total_bill", "tip")
plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip") predictions = local_weight_regression(training_data_x, tip, 5)
plot_preds(training_data_x, predictions, total_bill, tip, "total_bill", "tip")