Adaptive Linear Neuron (ADALINE) implementation More...

#include <assert.h>
#include <limits.h>
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

Include dependency graph for adaline_learning.c:

Data Structures
struct	adaline

Macros
#define	MAX_ITER 500

#define	ACCURACY 1e-5
	convergence accuracy \(=1\times10^{-5}\)

Functions
struct adaline	new_adaline (const int num_features, const double eta)

void	delete_adaline (struct adaline *ada)

int	activation (double x)

char *	get_weights_str (struct adaline *ada)

int	predict (struct adaline ada, const double x, double *out)

double	fit_sample (struct adaline ada, const double x, const int y)

void	fit (struct adaline ada, double X, const int y, const int N)

void	test1 (double eta)

void	test2 (double eta)

void	test3 (double eta)

int	main (int argc, char **argv)

Detailed Description

Adaptive Linear Neuron (ADALINE) implementation

Author: Krishna Vedala

source ADALINE is one of the first and simplest single layer artificial neural network. The algorithm essentially implements a linear function

\[ f\left(x_0,x_1,x_2,\ldots\right) = \sum_j x_jw_j+\theta \]

where \(x_j\) are the input features of a sample, \(w_j\) are the coefficients of the linear function and \(\theta\) is a constant. If we know the \(w_j\), then for any given set of features, \(y\) can be computed. Computing the \(w_j\) is a supervised learning algorithm wherein a set of features and their corresponding outputs are given and weights are computed using stochastic gradient descent method.

Function Documentation

◆ activation()

int activation ( double x )

Heaviside activation function

94 { return x > 0 ? 1 : -1; }

◆ delete_adaline()

void delete_adaline ( struct adaline * ada )

delete dynamically allocated memory

Parameters

[in] ada model from which the memory is to be freeed.

 {
     if (ada == NULL)
         return;
  
     free(ada->weights);
 };

◆ fit()

void fit	(	struct adaline *	ada,
		double **	X,
		const int *	y,
		const int	N
	)

Update the weights of the model using supervised learning for an array of vectors.

Parameters

[in]	ada	adaline model to train
[in]	X	array of feature vector
[in]	y	known output value for each feature vector
[in]	N	number of training samples

 {
     double avg_pred_error = 1.f;
  
     int iter;
     for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > ACCURACY); iter++)
     {
         avg_pred_error = 0.f;
  
         // perform fit for each sample
         for (int i = 0; i < N; i++)
         {
             double err = fit_sample(ada, X[i], y[i]);
             avg_pred_error += fabs(err);
         }
         avg_pred_error /= N;
  
         // Print updates every 200th iteration
         // if (iter % 100 == 0)
         printf("\tIter %3d: Training weights: %s\tAvg error: %.4f\n", iter,
                get_weights_str(ada), avg_pred_error);
     }
  
     if (iter < MAX_ITER)
         printf("Converged after %d iterations.\n", iter);
     else
         printf("Did not converged after %d iterations.\n", iter);
 }

◆ fit_sample()

double fit_sample	(	struct adaline *	ada,
		const double *	x,
		const int	y
	)

Update the weights of the model using supervised learning for one feature vector

Parameters

[in]	ada	adaline model to fit
[in]	x	feature vector
[in]	y	known output value

Returns: correction factor

 {
     /* output of the model with current weights */
     int p = predict(ada, x, NULL);
     int prediction_error = y - p;  // error in estimation
     double correction_factor = ada->eta * prediction_error;
  
     /* update each weight, the last weight is the bias term */
     for (int i = 0; i < ada->num_weights - 1; i++)
     {
         ada->weights[i] += correction_factor * x[i];
     }
     ada->weights[ada->num_weights - 1] += correction_factor;  // update bias
  
     return correction_factor;
 }

Here is the call graph for this function:

◆ get_weights_str()

char* get_weights_str ( struct adaline * ada )

Operator to print the weights of the model

 {
     static char out[100];  // static so the value is persistent
  
     sprintf(out, "<");
     for (int i = 0; i < ada->num_weights; i++)
     {
         sprintf(out, "%s%.4g", out, ada->weights[i]);
         if (i < ada->num_weights - 1)
             sprintf(out, "%s, ", out);
     }
     sprintf(out, "%s>", out);
     return out;
 }

◆ main()

int main	(	int	argc,
		char **	argv
	)

Main function

 {
     srand(time(NULL));  // initialize random number generator
  
     double eta = 0.1;  // default value of eta
     if (argc == 2)     // read eta value from commandline argument if present
         eta = strtof(argv[1], NULL);
  
     test1(eta);
  
     printf("Press ENTER to continue...\n");
     getchar();
  
     test2(eta);
  
     printf("Press ENTER to continue...\n");
     getchar();
  
     test3(eta);
  
     return 0;
 }

Here is the call graph for this function:

◆ new_adaline()

struct adaline new_adaline	(	const int	num_features,
		const double	eta
	)

Default constructor

Parameters

[in]	num_features	number of features present
[in]	eta	learning rate (optional, default=0.1)

Returns: new adaline model

 {
     if (eta <= 0.f || eta >= 1.f)
     {
         fprintf(stderr, "learning rate should be > 0 and < 1\n");
         exit(EXIT_FAILURE);
     }
  
     // additional weight is for the constant bias term
     int num_weights = num_features + 1;
     struct adaline ada;
     ada.eta = eta;
     ada.num_weights = num_weights;
     ada.weights = (double *)malloc(num_weights * sizeof(double));
     if (!ada.weights)
     {
         perror("Unable to allocate error for weights!");
         return ada;
     }
  
     // initialize with random weights in the range [-50, 49]
     for (int i = 0; i < num_weights; i++) ada.weights[i] = 1.f;
     // ada.weights[i] = (double)(rand() % 100) - 50);
  
     return ada;
 }

◆ predict()

int predict	(	struct adaline *	ada,
		const double *	x,
		double *	out
	)

predict the output of the model for given set of features

Parameters

[in]	ada	adaline model to predict
[in]	x	input vector
[out]	out	optional argument to return neuron output before applying activation function (`NULL` to ignore)

Returns: model prediction output

 {
     double y = ada->weights[ada->num_weights - 1];  // assign bias value
  
     for (int i = 0; i < ada->num_weights - 1; i++) y += x[i] * ada->weights[i];
  
     if (out)  // if out variable is not NULL
         *out = y;
  
     return activation(y);  // quantizer: apply ADALINE threshold function
 }

Here is the call graph for this function:

◆ test1()

void test1 ( double eta )

test function to predict points in a 2D coordinate system above the line \(x=y\) as +1 and others as -1. Note that each point is defined by 2 values or 2 features.

Parameters

[in] eta learning rate (optional, default=0.01)

 {
     struct adaline ada = new_adaline(2, eta);  // 2 features
  
     const int N = 10;  // number of sample points
     const double saved_X[10][2] = {{0, 1},  {1, -2},   {2, 3},   {3, -1},
                                    {4, 1},  {6, -5},   {-7, -3}, {-8, 5},
                                    {-9, 2}, {-10, -15}};
  
     double **X = (double **)malloc(N * sizeof(double *));
     const int Y[10] = {1,  -1, 1, -1, -1,
                        -1, 1,  1, 1,  -1};  // corresponding y-values
     for (int i = 0; i < N; i++)
     {
         X[i] = (double *)saved_X[i];
     }
  
     printf("------- Test 1 -------\n");
     printf("Model before fit: %s", get_weights_str(&ada));
  
     fit(&ada, X, Y, N);
     printf("Model after fit: %s\n", get_weights_str(&ada));
  
     double test_x[] = {5, -3};
     int pred = predict(&ada, test_x, NULL);
     printf("Predict for x=(5,-3): % d", pred);
     assert(pred == -1);
     printf(" ...passed\n");
  
     double test_x2[] = {5, 8};
     pred = predict(&ada, test_x2, NULL);
     printf("Predict for x=(5, 8): % d", pred);
     assert(pred == 1);
     printf(" ...passed\n");
  
     // for (int i = 0; i < N; i++)
     //     free(X[i]);
     free(X);
     delete_adaline(&ada);
 }

Here is the call graph for this function:

◆ test2()

void test2 ( double eta )

test function to predict points in a 2D coordinate system above the line \(x+3y=-1\) as +1 and others as -1. Note that each point is defined by 2 values or 2 features. The function will create random sample points for training and test purposes.

Parameters

[in] eta learning rate (optional, default=0.01)

 {
     struct adaline ada = new_adaline(2, eta);  // 2 features
  
     const int N = 50;  // number of sample points
  
     double **X = (double **)malloc(N * sizeof(double *));
     int *Y = (int *)malloc(N * sizeof(int));  // corresponding y-values
     for (int i = 0; i < N; i++) X[i] = (double *)malloc(2 * sizeof(double));
  
     // generate sample points in the interval
     // [-range2/100 , (range2-1)/100]
     int range = 500;          // sample points full-range
     int range2 = range >> 1;  // sample points half-range
     for (int i = 0; i < N; i++)
     {
         double x0 = ((rand() % range) - range2) / 100.f;
         double x1 = ((rand() % range) - range2) / 100.f;
         X[i][0] = x0;
         X[i][1] = x1;
         Y[i] = (x0 + 3. * x1) > -1 ? 1 : -1;
     }
  
     printf("------- Test 2 -------\n");
     printf("Model before fit: %s", get_weights_str(&ada));
  
     fit(&ada, X, Y, N);
     printf("Model after fit: %s\n", get_weights_str(&ada));
  
     int N_test_cases = 5;
     double test_x[2];
     for (int i = 0; i < N_test_cases; i++)
     {
         double x0 = ((rand() % range) - range2) / 100.f;
         double x1 = ((rand() % range) - range2) / 100.f;
  
         test_x[0] = x0;
         test_x[1] = x1;
         int pred = predict(&ada, test_x, NULL);
         printf("Predict for x=(% 3.2f,% 3.2f): % d", x0, x1, pred);
  
         int expected_val = (x0 + 3. * x1) > -1 ? 1 : -1;
         assert(pred == expected_val);
         printf(" ...passed\n");
     }
  
     for (int i = 0; i < N; i++) free(X[i]);
     free(X);
     free(Y);
     delete_adaline(&ada);
 }

Here is the call graph for this function:

◆ test3()

void test3 ( double eta )

test function to predict points in a 3D coordinate system lying within the sphere of radius 1 and centre at origin as +1 and others as -1. Note that each point is defined by 3 values but we use 6 features. The function will create random sample points for training and test purposes. The sphere centred at origin and radius 1 is defined as: \(x^2+y^2+z^2=r^2=1\) and if the \(r^2<1\), point lies within the sphere else, outside.

Parameters

[in] eta learning rate (optional, default=0.01)

 {
     struct adaline ada = new_adaline(6, eta);  // 2 features
  
     const int N = 50;  // number of sample points
  
     double **X = (double **)malloc(N * sizeof(double *));
     int *Y = (int *)malloc(N * sizeof(int));  // corresponding y-values
     for (int i = 0; i < N; i++) X[i] = (double *)malloc(6 * sizeof(double));
  
     // generate sample points in the interval
     // [-range2/100 , (range2-1)/100]
     int range = 200;          // sample points full-range
     int range2 = range >> 1;  // sample points half-range
     for (int i = 0; i < N; i++)
     {
         double x0 = ((rand() % range) - range2) / 100.f;
         double x1 = ((rand() % range) - range2) / 100.f;
         double x2 = ((rand() % range) - range2) / 100.f;
         X[i][0] = x0;
         X[i][1] = x1;
         X[i][2] = x2;
         X[i][3] = x0 * x0;
         X[i][4] = x1 * x1;
         X[i][5] = x2 * x2;
         Y[i] = (x0 * x0 + x1 * x1 + x2 * x2) <= 1 ? 1 : -1;
     }
  
     printf("------- Test 3 -------\n");
     printf("Model before fit: %s", get_weights_str(&ada));
  
     fit(&ada, X, Y, N);
     printf("Model after fit: %s\n", get_weights_str(&ada));
  
     int N_test_cases = 5;
     double test_x[6];
     for (int i = 0; i < N_test_cases; i++)
     {
         double x0 = ((rand() % range) - range2) / 100.f;
         double x1 = ((rand() % range) - range2) / 100.f;
         double x2 = ((rand() % range) - range2) / 100.f;
         test_x[0] = x0;
         test_x[1] = x1;
         test_x[2] = x2;
         test_x[3] = x0 * x0;
         test_x[4] = x1 * x1;
         test_x[5] = x2 * x2;
         int pred = predict(&ada, test_x, NULL);
         printf("Predict for x=(% 3.2f,% 3.2f): % d", x0, x1, pred);
  
         int expected_val = (x0 * x0 + x1 * x1 + x2 * x2) <= 1 ? 1 : -1;
         assert(pred == expected_val);
         printf(" ...passed\n");
     }
  
     for (int i = 0; i < N; i++) free(X[i]);
     free(X);
     free(Y);
     delete_adaline(&ada);
 }

Here is the call graph for this function:

Data Structures

Macros

Functions

Detailed Description

Function Documentation

◆ activation()

◆ delete_adaline()

◆ fit()

◆ fit_sample()

◆ get_weights_str()

◆ main()

◆ new_adaline()

◆ predict()

◆ test1()

◆ test2()

◆ test3()