Algorithms_in_C  1.0.0
Set of algorithms implemented in C.
kohonen_som_trace.c File Reference

Kohonen self organizing map (data tracing) More...

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
Include dependency graph for kohonen_som_trace.c:

Macros

#define _USE_MATH_DEFINES
 
#define max(a, b)   (((a) > (b)) ? (a) : (b))
 
#define min(a, b)   (((a) < (b)) ? (a) : (b))
 

Functions

double _random (double a, double b)
 
int save_nd_data (const char *fname, double **X, int num_points, int num_features)
 
void get_min_1d (double const *X, int N, double *val, int *idx)
 
void update_weights (double const *x, double *const *W, double *D, int num_out, int num_features, double alpha, int R)
 
void kohonen_som_tracer (double **X, double *const *W, int num_samples, int num_features, int num_out, double alpha_min)
 
void test_circle (double *const *data, int N)
 
void test1 ()
 
void test_lamniscate (double *const *data, int N)
 
void test2 ()
 
void test_3d_classes (double *const *data, int N)
 
void test3 ()
 
double get_clock_diff (clock_t start_t, clock_t end_t)
 
int main (int argc, char **argv)
 

Detailed Description

Kohonen self organizing map (data tracing)

Author
Krishna Vedala

This example implements a powerful self organizing map algorithm. The algorithm creates a connected network of weights that closely follows the given data points. This this creates a chain of nodes that resembles the given input shape.

See also
kohonen_som_topology.c

Macro Definition Documentation

◆ _USE_MATH_DEFINES

#define _USE_MATH_DEFINES

required for MS Visual C

◆ max

#define max (   a,
 
)    (((a) > (b)) ? (a) : (b))

shorthand for maximum value \

◆ min

#define min (   a,
 
)    (((a) < (b)) ? (a) : (b))

shorthand for minimum value \

Function Documentation

◆ _random()

double _random ( double  a,
double  b 
)

Helper function to generate a random number in a given interval.
Steps:

  1. r1 = rand() % 100 gets a random number between 0 and 99
  2. r2 = r1 / 100 converts random number to be between 0 and 0.99
  3. scale and offset the random number to given range of \([a,b)\)

    \[ y = (b - a) \times \frac{\text{(random number between 0 and RAND_MAX)} \; \text{mod}\; 100}{100} + a \]

Parameters
[in]alower limit
[in]bupper limit
Returns
random number in the range \([a,b)\)
65 {

◆ get_clock_diff()

double get_clock_diff ( clock_t  start_t,
clock_t  end_t 
)

Convert clock cycle difference to time in seconds

Parameters
[in]start_tstart clock
[in]end_tend clock
Returns
time difference in seconds
507 {

◆ get_min_1d()

void get_min_1d ( double const *  X,
int  N,
double *  val,
int *  idx 
)

Get minimum value and index of the value in a vector

Parameters
[in]xvector to search
[in]Nnumber of points in the vector
[out]valminimum value found
[out]idxindex where minimum value was found
102  {
103  if (X[i] < val[0]) // if a lower value is found
104  { // save the value and its index
105  idx[0] = i;
106  val[0] = X[i];
107  }
108  }
109 }
110 
111 /**

◆ kohonen_som_tracer()

void kohonen_som_tracer ( double **  X,
double *const *  W,
int  num_samples,
int  num_features,
int  num_out,
double  alpha_min 
)

Apply incremental algorithm with updating neighborhood and learning rates on all samples in the given datset.

Parameters
[in]Xdata set
[in,out]Wweights matrix
[in]Dtemporary vector to store distances
[in]num_samplesnumber of output points
[in]num_featuresnumber of features per input sample
[in]num_outnumber of output points
[in]alpha_minterminal value of alpha
175 {
176  int R = num_out >> 2, iter = 0;
177  double alpha = 1.f;
178  double *D = (double *)malloc(num_out * sizeof(double));
179 
180  // Loop alpha from 1 to slpha_min
181  for (; alpha > alpha_min; alpha -= 0.01, iter++)
182  {
183  // Loop for each sample pattern in the data set
184  for (int sample = 0; sample < num_samples; sample++)
185  {
186  const double *x = X[sample];
187  // update weights for the current input pattern sample
188  update_weights(x, W, D, num_out, num_features, alpha, R);
189  }
190 
191  // every 10th iteration, reduce the neighborhood range
192  if (iter % 10 == 0 && R > 1)
193  R--;
194  }
195 
196  free(D);
197 }
198 
199 /** Creates a random set of points distributed *near* the circumference
Here is the call graph for this function:

◆ main()

int main ( int  argc,
char **  argv 
)

Main function

529  : Calculated times include: creating test sets, training "
530  "model and writing files to disk.)\n\n");
531  return 0;
532 }

◆ save_nd_data()

int save_nd_data ( const char *  fname,
double **  X,
int  num_points,
int  num_features 
)

Save a given n-dimensional data martix to file.

Parameters
[in]fnamefilename to save in (gets overwriten without confirmation)
[in]Xmatrix to save
[in]num_pointsrows in the matrix = number of points
[in]num_featurescolumns in the matrix = dimensions of points
Returns
0 if all ok
-1 if file creation failed
65 {
66  FILE *fp = fopen(fname, "wt");
67  if (!fp) // error with fopen
68  {
69  char msg[120];
70  sprintf(msg, "File error (%s): ", fname);
71  perror(msg);
72  return -1;
73  }
74 
75  for (int i = 0; i < num_points; i++) // for each point in the array
76  {
77  for (int j = 0; j < num_features; j++) // for each feature in the array
78  {
79  fprintf(fp, "%.4g", X[i][j]); // print the feature value
80  if (j < num_features - 1) // if not the last feature
81  fprintf(fp, ","); // suffix comma
82  }
83  if (i < num_points - 1) // if not the last row
84  fprintf(fp, "\n"); // start a new line
85  }
86  fclose(fp);
87  return 0;
88 }
89 
90 /**

◆ test1()

void test1 ( )

Test that creates a random set of points distributed near the circumference of a circle and trains an SOM that finds that circular pattern. The following CSV files are created to validate the execution:

  • test1.csv: random test samples points with a circular pattern
  • w11.csv: initial random map
  • w12.csv: trained SOM map

The outputs can be readily plotted in gnuplot using the following snippet

set datafile separator ','
plot "test1.csv" title "original", \
"w11.csv" title "w1", \
"w12.csv" title "w2"

Sample execution
output

263  {
264  if (i < N) // only add new arrays if i < N
265  X[i] = (double *)malloc(features * sizeof(double));
266  if (i < num_out) // only add new arrays if i < num_out
267  {
268  W[i] = (double *)malloc(features * sizeof(double));
269 #ifdef _OPENMP
270 #pragma omp for
271 #endif
272  // preallocate with random initial weights
273  for (j = 0; j < features; j++) W[i][j] = _random(-1, 1);
274  }
275  }
276 
277  test_circle(X, N); // create test data around circumference of a circle
278  save_nd_data("test1.csv", X, N, features); // save test data points
279  save_nd_data("w11.csv", W, num_out,
280  features); // save initial random weights
281  kohonen_som_tracer(X, W, N, features, num_out, 0.1); // train the SOM
282  save_nd_data("w12.csv", W, num_out,
283  features); // save the resultant weights
284 
285  for (int i = 0; i < max(num_out, N); i++)
286  {
287  if (i < N)
288  free(X[i]);
289  if (i < num_out)
290  free(W[i]);
291  }
292 }
293 
294 /** Creates a random set of points distributed *near* the locus

◆ test2()

void test2 ( )

Test that creates a random set of points distributed near the locus of the Lamniscate of Gerono and trains an SOM that finds that circular pattern. The following CSV files are created to validate the execution:

  • test2.csv: random test samples points with a lamniscate pattern
  • w21.csv: initial random map
  • w22.csv: trained SOM map

The outputs can be readily plotted in gnuplot using the following snippet

set datafile separator ','
plot "test2.csv" title "original", \
"w21.csv" title "w1", \
"w22.csv" title "w2"

Sample execution
output

355  {
356  if (i < N) // only add new arrays if i < N
357  X[i] = (double *)malloc(features * sizeof(double));
358  if (i < num_out) // only add new arrays if i < num_out
359  {
360  W[i] = (double *)malloc(features * sizeof(double));
361 
362 #ifdef _OPENMP
363 #pragma omp for
364 #endif
365  // preallocate with random initial weights
366  for (j = 0; j < features; j++) W[i][j] = _random(-1, 1);
367  }
368  }
369 
370  test_lamniscate(X, N); // create test data around the lamniscate
371  save_nd_data("test2.csv", X, N, features); // save test data points
372  save_nd_data("w21.csv", W, num_out,
373  features); // save initial random weights
374  kohonen_som_tracer(X, W, N, features, num_out, 0.01); // train the SOM
375  save_nd_data("w22.csv", W, num_out,
376  features); // save the resultant weights
377 
378  for (int i = 0; i < max(num_out, N); i++)
379  {
380  if (i < N)
381  free(X[i]);
382  if (i < num_out)
383  free(W[i]);
384  }
385  free(X);
386  free(W);
387 }
388 
389 /** Creates a random set of points distributed in four clusters in

◆ test3()

void test3 ( )

Test that creates a random set of points distributed in six clusters in 3D space. The following CSV files are created to validate the execution:

  • test3.csv: random test samples points with a circular pattern
  • w31.csv: initial random map
  • w32.csv: trained SOM map

The outputs can be readily plotted in gnuplot using the following snippet

set datafile separator ','
plot "test3.csv" title "original", \
"w31.csv" title "w1", \
"w32.csv" title "w2"

Sample execution
output

459  {
460  if (i < N) // only add new arrays if i < N
461  X[i] = (double *)malloc(features * sizeof(double));
462  if (i < num_out) // only add new arrays if i < num_out
463  {
464  W[i] = (double *)malloc(features * sizeof(double));
465 
466 #ifdef _OPENMP
467 #pragma omp for
468 #endif
469  // preallocate with random initial weights
470  for (j = 0; j < features; j++) W[i][j] = _random(-1, 1);
471  }
472  }
473 
474  test_3d_classes(X, N); // create test data around the lamniscate
475  save_nd_data("test3.csv", X, N, features); // save test data points
476  save_nd_data("w31.csv", W, num_out,
477  features); // save initial random weights
478  kohonen_som_tracer(X, W, N, features, num_out, 0.01); // train the SOM
479  save_nd_data("w32.csv", W, num_out,
480  features); // save the resultant weights
481 
482  for (int i = 0; i < max(num_out, N); i++)
483  {
484  if (i < N)
485  free(X[i]);
486  if (i < num_out)
487  free(W[i]);
488  }
489  free(X);
490  free(W);
491 }
492 
493 /**

◆ test_3d_classes()

void test_3d_classes ( double *const *  data,
int  N 
)

Creates a random set of points distributed in four clusters in 3D space with centroids at the points

  • \((0,5, 0.5, 0.5)\)
  • \((0,5,-0.5, -0.5)\)
  • \((-0,5, 0.5, 0.5)\)
  • \((-0,5,-0.5, -0.5)\)
Parameters
[out]datamatrix to store data in
[in]Nnumber of points required
404  {
405  // centres of each class cluster
406  {.5, .5, .5}, // centre of class 1
407  {.5, -.5, -.5}, // centre of class 2
408  {-.5, .5, .5}, // centre of class 3
409  {-.5, -.5 - .5} // centre of class 4
410  };
411 
412 #ifdef _OPENMP
413 #pragma omp for
414 #endif
415  for (i = 0; i < N; i++)
416  {
417  int class =
418  rand() % num_classes; // select a random class for the point
419 
420  // create random coordinates (x,y,z) around the centre of the class
421  data[i][0] = _random(centres[class][0] - R, centres[class][0] + R);
422  data[i][1] = _random(centres[class][1] - R, centres[class][1] + R);
423  data[i][2] = _random(centres[class][2] - R, centres[class][2] + R);
424 
425  /* The follosing can also be used
426  for (int j = 0; j < 3; j++)
427  data[i][j] = _random(centres[class][j] - R, centres[class][j] + R);
428  */
429  }
430 }
431 
432 /** Test that creates a random set of points distributed in six clusters in

◆ test_circle()

void test_circle ( double *const *  data,
int  N 
)

Creates a random set of points distributed near the circumference of a circle and trains an SOM that finds that circular pattern. The generating function is

\begin{eqnarray*} r &\in& [1-\delta r, 1+\delta r)\\ \theta &\in& [0, 2\pi)\\ x &=& r\cos\theta\\ y &=& r\sin\theta \end{eqnarray*}

Parameters
[out]datamatrix to store data in
[in]Nnumber of points required
223  {
224  double r = _random(a_r, b_r); // random radius
225  double theta = _random(a_t, b_t); // random theta
226  data[i][0] = r * cos(theta); // convert from polar to cartesian
227  data[i][1] = r * sin(theta);
228  }
229 }
230 
231 /** Test that creates a random set of points distributed *near* the
Here is the call graph for this function:

◆ test_lamniscate()

void test_lamniscate ( double *const *  data,
int  N 
)

Creates a random set of points distributed near the locus of the Lamniscate of Gerono.

\begin{eqnarray*} \delta r &=& 0.2\\ \delta x &\in& [-\delta r, \delta r)\\ \delta y &\in& [-\delta r, \delta r)\\ \theta &\in& [0, \pi)\\ x &=& \delta x + \cos\theta\\ y &=& \delta y + \frac{\sin(2\theta)}{2} \end{eqnarray*}

Parameters
[out]datamatrix to store data in
[in]Nnumber of points required
317  {
318  double dx = _random(-dr, dr); // random change in x
319  double dy = _random(-dr, dr); // random change in y
320  double theta = _random(0, M_PI); // random theta
321  data[i][0] = dx + cos(theta); // convert from polar to cartesian
322  data[i][1] = dy + sin(2. * theta) / 2.f;
323  }
324 }
325 
326 /** Test that creates a random set of points distributed *near* the locus
Here is the call graph for this function:

◆ update_weights()

void update_weights ( double const *  x,
double *const *  W,
double *  D,
int  num_out,
int  num_features,
double  alpha,
int  R 
)

Update weights of the SOM using Kohonen algorithm

Parameters
[in]Xdata point
[in,out]Wweights matrix
[in,out]Dtemporary vector to store distances
[in]num_outnumber of output points
[in]num_featuresnumber of features per input sample
[in]alphalearning rate \(0<\alpha\le1\)
[in]Rneighborhood range
124 {
125  int j, k;
126 
127 #ifdef _OPENMP
128 #pragma omp for
129 #endif
130  // step 1: for each output point
131  for (j = 0; j < num_out; j++)
132  {
133  D[j] = 0.f;
134  // compute Euclidian distance of each output
135  // point from the current sample
136  for (k = 0; k < num_features; k++)
137  D[j] += (W[j][k] - x[k]) * (W[j][k] - x[k]);
138  }
139 
140  // step 2: get closest node i.e., node with snallest Euclidian distance to
141  // the current pattern
142  int d_min_idx;
143  double d_min;
144  get_min_1d(D, num_out, &d_min, &d_min_idx);
145 
146  // step 3a: get the neighborhood range
147  int from_node = max(0, d_min_idx - R);
148  int to_node = min(num_out, d_min_idx + R + 1);
149 
150  // step 3b: update the weights of nodes in the
151  // neighborhood
152 #ifdef _OPENMP
153 #pragma omp for
154 #endif
155  for (j = from_node; j < to_node; j++)
156  for (k = 0; k < num_features; k++)
157  // update weights of nodes in the neighborhood
158  W[j][k] += alpha * (x[k] - W[j][k]);
159 }
160 
161 /**
Here is the call graph for this function:
save_nd_data
int save_nd_data(const char *fname, double **X, int num_points, int num_features)
Definition: kohonen_som_trace.c:65
max
#define max(a, b)
Definition: kohonen_som_trace.c:24
test_circle
void test_circle(double *const *data, int N)
Definition: kohonen_som_trace.c:214
data
Definition: prime_factoriziation.c:25
kohonen_som_tracer
void kohonen_som_tracer(double **X, double *const *W, int num_samples, int num_features, int num_out, double alpha_min)
Definition: kohonen_som_trace.c:175
update_weights
void update_weights(double const *x, double *const *W, double *D, int num_out, int num_features, double alpha, int R)
Definition: kohonen_som_trace.c:124
N
#define N
Definition: sol1.c:109
get_min_1d
void get_min_1d(double const *X, int N, double *val, int *idx)
Definition: kohonen_som_trace.c:99
_random
double _random(double a, double b)
Definition: kohonen_som_trace.c:50
test_lamniscate
void test_lamniscate(double *const *data, int N)
Definition: kohonen_som_trace.c:310
min
#define min(a, b)
Definition: kohonen_som_trace.c:30
test_3d_classes
void test_3d_classes(double *const *data, int N)
Definition: kohonen_som_trace.c:401