Source code for mlearn.algorithms.linreg

"""This module contains simple functions for linear regressions."""

import numpy as np
import transform

[docs]def sum_of_squared_residuals(x, y, beta): """ Calculate the sum of squared residuals for observed :math:`y` values, given :math:`x` predictor matrix and provided :math:`\\beta` parameters. Parameters ---------- x : np.array Corresponds to a matrix :math:`x` which contains all :math:`x_i` values (including :math:`x_0=1`). y : np.array Corresponds to vector :math:`y` which refers to the observed values. beta : np.array Corresponds to vector :math:`\\beta` which contains the parameters to calculate the predicted values. Returns ------- ssr : numpy.array References ---------- .. [1] https://en.wikipedia.org/wiki/Residual_sum_of_squares Notes ----- .. math:: SSR(\\beta) = \\displaystyle\\sum_{i=1}^{n}(x_i\\beta-y_i)^2 .. math:: SSR(\\beta) = (x\\beta-y)^T (x\\beta-y) Examples -------- >>> # The predictor matrix >>> x = np.array([[1, 11, 104], [1, 15, 99], [1, 22, 89], [1, 27, 88]]) >>> # The observed values vector >>> y = np.array([[12], [15], [19], [22]]) >>> # The parameters vector >>> beta_zero = np.zeros((3,1)) >>> sum_of_squared_residuals(x, y, beta_zero) [ 1214.] """ diff_score = np.dot(x, beta) - y ssr = np.dot(diff_score.transpose(), diff_score) return ssr.ravel()
[docs]def gradient_descent(x, y, beta_init=None, gamma=0.01, max_iter=200, threshold=0.01, scaling=True, regularize=False): """ Numerically estimates the unknown parameters :math:`\\beta_i` for a linear regression model where :math:`x` refers to the predictor matrix and and :math:`y` to the observed values vector. The first derivative of the sum of the squared residuals is used to calculate the gradient for each parameter. In every iteration, the parameter is changed in decreasing direction of the gradient with the given step size :math:`\\gamma`. This is done until the maximum iteration amount is reached or the difference of sum of squared residuals between two iterations falls below a given threshold. Parameters ---------- x : np.array Corresponds to a matrix :math:`x` which contains all :math:`x_i` values (including :math:`x_0=1`). y : np.array Corresponds to vector :math:`y` which refers to the observed values. beta_init : np.array, optional Initial :math:`\\beta_i` values may be provided. Otherwise they are set to zero. gamma : float, optional The step size :math:`\\gamma` of the gradient descent. Determines how much parameters change per iteration. max_iter : float, optional Sets the maximum number of iterations. threshold : float, optional Define the threshold for convergence. If the difference of sum of the squared residuals between two consecutive iterations falls below this value, the gradient descent has converged and the function stops. scaling : boolean, optional By default, the predictors are z-transformed. This improves the gradient descent performance because all predictors behave on the same scale. regularize : float, optional Apply the regularization term :math:`\\lambda` to the estimation of :math:`\\beta`. It can prevent overfitting when :math:`x` contains a large number of higher order predictors. Increasing :math:`\\lambda` will decrease :math:`\\beta_i` values which causes the decision boundary to be smoother. Returns ------- beta : numpy.array Notes ----- .. math:: f(\\beta)=\\frac{1}{2n}\\ \\displaystyle\\sum_{i=1}^{n}(x_i\\beta-y_i)^2 = \\ \\frac{1}{2n}(x\\beta-y)^T (x\\beta-y)=\\frac{1}{2n}SSR(\\beta) .. math:: f'(\\beta)=\\beta-\\ \\frac{\\gamma}{n}\\displaystyle\\sum_{i=1}^{n}\\ (x_i\\beta_m-y_i) x_i = \\beta-\\gamma\\frac{1}{n}x^T(x\\beta-y) .. math:: f'_{reg}(\\beta)=\\beta(1-\\gamma \\frac{\\lambda}{n} \\ \\beta_{reg})-\\frac{\\gamma}{n}x^T(x\\beta-y) \\text{ where } \\ \\beta_{reg} = \\begin{bmatrix} 0 \\\\ 1 \\\\ \\vdots \\\\ 1_m \\ \\end{bmatrix} References ---------- [4] https://en.wikipedia.org/wiki/Gradient_descent """ n, m = x.shape if scaling: x = transform.standardize(x[:,1:]) x = np.append(np.ones((x.shape[0], 1)), x, axis=1) if not beta_init: beta_init = np.ones((m, 1)) reg_term = np.ones((m,1)) if regularize: beta_reg = np.ones((m,1)) beta_reg[0,0] = 0 reg_term = 1-(gamma*(regularize/float(n)) * beta_reg) beta = beta_init current_cost = None n_iter = 0 while True: right_term = (gamma / float(n)) right_term = right_term * np.dot(np.transpose(x), np.dot(x, beta) - y) left_term = beta * reg_term beta = left_term - right_term cost = sum_of_squared_residuals(x, y, beta) if current_cost is None: current_cost = cost elif np.abs(cost - current_cost) < threshold: break else: current_cost = cost if n_iter == max_iter: break else: n_iter += 1 return beta
[docs]def ordinary_least_squares(x, y, regularize=False): """ Analytically calculate the unknown parameters :math:`\\beta` for a linear regression model where :math:`x` refers to the predictor matrix and and :math:`y` to the observed values vector. Parameters ---------- x : np.array Corresponds to a matrix :math:`x` which contains all :math:`x_i` values (including :math:`x_0=1`). y : np.array Corresponds to vector :math:`y` which refers to the observed values. regularize : float, optional Apply the regularization term :math:`\\lambda` to the estimation of :math:`\\beta`. It can prevent overfitting when :math:`x` contains a large number of higher order predictors. Increasing :math:`\\lambda` will decrease :math:`\\beta_i` values which causes the decision boundary to be smoother. Returns ------- beta : numpy.array Notes ----- .. math:: \\hat{\\beta} = (x^Tx)^{-1}x^Ty .. math:: \\text{Regularization with m predictors: } \\hat{\\beta} = \\ (x^Tx + \\lambda \\ \\begin{bmatrix} 0 & 0 & 0 & 0 \\\\ 0 & 1 & 0 & 0 \\\\\\ 0 & 0 & \\ddots & \\vdots \\\\ 0 & 0 & \\cdots & 1_{m+1} \\ \\end{bmatrix})\\ ^{-1}x^Ty References ---------- .. [2] https://en.wikipedia.org/wiki/Ordinary_least_squares .. [3] https://en.wikipedia.org/wiki/Regularization_%28mathematics%29 Examples -------- >>> # The predictor matrix >>> x = np.array([[1, 11, 104], [1, 15, 99], [1, 22, 89], [1, 27, 88]]) >>> # The observed values vector >>> y = np.array([[12], [15], [19], [22]]) >>> # The parameters vector >>> beta_zero = np.zeros((3,1)) >>> sum_of_squared_residuals(x, y, beta_zero) [ 1214.] >>> beta_min = ordinary_least_squares(x, y) >>> sum_of_squared_residuals(x, y, beta_min) [ 0.14455509] """ x_t = x.transpose() x_sum = np.dot(x_t, x) if regularize: lambda_identity = np.identity(x.shape[1]) lambda_identity[0,0] = 0 lambda_regularization = regularize * lambda_identity inverse = np.linalg.pinv(x_sum + lambda_regularization) else: inverse = np.linalg.pinv(x_sum) beta = np.dot(inverse, np.dot(x_t, y)) return beta
if __name__ == "__main__": """ x = np.array([[1, 11, 104], [1, 15, 99], [1, 22, 89], [1, 27, 88]]) x2 = np.array([[1, 11], [1, 15], [1, 22], [1, 27]]) y = np.array([[12], [15], [19], [22]]) beta_zero = np.zeros((2,1)) print(sum_of_squared_residuals(x2, y, beta_zero)) beta_min = ordinary_least_squares(x2, y) print(sum_of_squared_residuals(x2, y, beta_min)) beta_grad = gradient_descent(x2, y, scaling=False, gamma=0.001) print(sum_of_squared_residuals(x2, y, beta_grad)) """ from sklearn import datasets, linear_model diabetes = datasets.load_diabetes() # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(diabetes.data, diabetes.target) print(sum_of_squared_residuals(diabetes.data, diabetes.target, regr.coef_)) x = np.append(np.ones((diabetes.data.shape[0], 1)), diabetes.data, axis=1) y = diabetes.target ols = ordinary_least_squares(x, y) print(sum_of_squared_residuals(x, y, ols)) gd = gradient_descent(x,np.reshape(y,(442,1))) print(sum_of_squared_residuals(x, y, gd.ravel()))