Algorithms_in_C  1.0.0
Set of algorithms implemented in C.
kohonen_som_topology.c File Reference

Kohonen self organizing map (topological map) More...

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
Include dependency graph for kohonen_som_topology.c:

Data Structures

struct  array_3d
 

Macros

#define _USE_MATH_DEFINES
 
#define max(a, b)   (((a) > (b)) ? (a) : (b))
 
#define min(a, b)   (((a) < (b)) ? (a) : (b))
 

Functions

double * data_3d (const struct array_3d *arr, int x, int y, int z)
 
double _random (double a, double b)
 
int save_2d_data (const char *fname, double **X, int num_points, int num_features)
 
int save_u_matrix (const char *fname, struct array_3d *W)
 
void get_min_2d (double **X, int N, double *val, int *x_idx, int *y_idx)
 
double update_weights (const double *X, struct array_3d *W, double **D, int num_out, int num_features, double alpha, int R)
 
void kohonen_som (double **X, struct array_3d *W, int num_samples, int num_features, int num_out, double alpha_min)
 
void test_2d_classes (double *const *data, int N)
 
void test1 ()
 
void test_3d_classes1 (double *const *data, int N)
 
void test2 ()
 
void test_3d_classes2 (double *const *data, int N)
 
void test3 ()
 
double get_clock_diff (clock_t start_t, clock_t end_t)
 
int main (int argc, char **argv)
 

Detailed Description

Kohonen self organizing map (topological map)

Author
Krishna Vedala This example implements a powerful unsupervised learning algorithm called as a self organizing map. The algorithm creates a connected network of weights that closely follows the given data points. This thus creates a topological map of the given data i.e., it maintains the relationship between varipus data points in a much higher dimesional space by creating an equivalent in a 2-dimensional space. Trained topological maps for the test cases in the program
Warning
MSVC 2019 compiler generates code that does not execute as expected. However, MinGW, Clang for GCC and Clang for MSVC compilers on windows perform as expected. Any insights and suggestions should be directed to the author.
See also
kohonen_som_trace.c

Macro Definition Documentation

◆ _USE_MATH_DEFINES

#define _USE_MATH_DEFINES

required for MS Visual C

◆ max

#define max (   a,
 
)    (((a) > (b)) ? (a) : (b))

shorthand for maximum value \

◆ min

#define min (   a,
 
)    (((a) < (b)) ? (a) : (b))

shorthand for minimum value \

Function Documentation

◆ _random()

double _random ( double  a,
double  b 
)

Helper function to generate a random number in a given interval.
Steps:

  1. r1 = rand() % 100 gets a random number between 0 and 99
  2. r2 = r1 / 100 converts random number to be between 0 and 0.99
  3. scale and offset the random number to given range of \([a,b)\)

    \[ y = (b - a) \times \frac{\text{(random number between 0 and RAND_MAX)} \; \text{mod}\; 100}{100} + a \]

Parameters
[in]alower limit
[in]bupper limit
Returns
random number in the range \([a,b)\)
99 {

◆ data_3d()

double* data_3d ( const struct array_3d arr,
int  x,
int  y,
int  z 
)

Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by:

\[ X_{i,j,k} = i\times M\times N + j\times N + k \]

where \(L\), \(M\) and \(N\) are the 3D matrix dimensions.

Parameters
[in]arrpointer to array_3d structure
[in]xfirst index
[in]ysecond index
[in]zthird index
Returns
pointer to (x,y,z)^th location of data
83 {

◆ get_clock_diff()

double get_clock_diff ( clock_t  start_t,
clock_t  end_t 
)

Convert clock cycle difference to time in seconds

Parameters
[in]start_tstart clock
[in]end_tend clock
Returns
time difference in seconds
661 {

◆ get_min_2d()

void get_min_2d ( double **  X,
int  N,
double *  val,
int *  x_idx,
int *  y_idx 
)

Get minimum value and index of the value in a matrix

Parameters
[in]Xmatrix to search
[in]Nnumber of points in the vector
[out]valminimum value found
[out]idx_xx-index where minimum value was found
[out]idx_yy-index where minimum value was found
204  {
205  for (int j = 0; j < N; j++) // traverse each y-index
206  {
207  if (X[i][j] < val[0]) // if a lower value is found
208  { // save the value and its index
209  x_idx[0] = i;
210  y_idx[0] = j;
211  val[0] = X[i][j];
212  }
213  }
214  }
215 }
216 
217 /**

◆ kohonen_som()

void kohonen_som ( double **  X,
struct array_3d W,
int  num_samples,
int  num_features,
int  num_out,
double  alpha_min 
)

Apply incremental algorithm with updating neighborhood and learning rates on all samples in the given datset.

Parameters
[in]Xdata set
[in,out]Wweights matrix
[in]Dtemporary vector to store distances
[in]num_samplesnumber of output points
[in]num_featuresnumber of features per input sample
[in]num_outnumber of output points
[in]alpha_minterminal value of alpha
311 {
312  int R = num_out >> 2, iter = 0;
313  double **D = (double **)malloc(num_out * sizeof(double *));
314  for (int i = 0; i < num_out; i++)
315  D[i] = (double *)malloc(num_out * sizeof(double));
316 
317  double dmin = 1.f; // average minimum distance of all samples
318 
319  // Loop alpha from 1 to slpha_min
320  for (double alpha = 1.f; alpha > alpha_min && dmin > 1e-3;
321  alpha -= 0.001, iter++)
322  {
323  dmin = 0.f;
324  // Loop for each sample pattern in the data set
325  for (int sample = 0; sample < num_samples; sample++)
326  {
327  // update weights for the current input pattern sample
328  dmin += update_weights(X[sample], W, D, num_out, num_features,
329  alpha, R);
330  }
331 
332  // every 20th iteration, reduce the neighborhood range
333  if (iter % 100 == 0 && R > 1)
334  R--;
335 
336  dmin /= num_samples;
337  printf("iter: %5d\t alpha: %.4g\t R: %d\td_min: %.4g\r", iter, alpha, R,
338  dmin);
339  }
340  putchar('\n');
341 
342  for (int i = 0; i < num_out; i++)
343  free(D[i]);
344  free(D);
345 }
346 
347 /** Creates a random set of points distributed in four clusters in
Here is the call graph for this function:

◆ main()

int main ( int  argc,
char **  argv 
)

Main function

687  : Calculated times include: writing files to disk.)\n\n");
688  return 0;
689 }

◆ save_2d_data()

int save_2d_data ( const char *  fname,
double **  X,
int  num_points,
int  num_features 
)

Save a given n-dimensional data martix to file.

Parameters
[in]fnamefilename to save in (gets overwriten without confirmation)
[in]Xmatrix to save
[in]num_pointsrows in the matrix = number of points
[in]num_featurescolumns in the matrix = dimensions of points
Returns
0 if all ok
-1 if file creation failed
99 {
100  FILE *fp = fopen(fname, "wt");
101  if (!fp) // error with fopen
102  {
103  char msg[120];
104  sprintf(msg, "File error (%s): ", fname);
105  perror(msg);
106  return -1;
107  }
108 
109  for (int i = 0; i < num_points; i++) // for each point in the array
110  {
111  for (int j = 0; j < num_features; j++) // for each feature in the array
112  {
113  fprintf(fp, "%.4g", X[i][j]); // print the feature value
114  if (j < num_features - 1) // if not the last feature
115  fputc(',', fp); // suffix comma
116  }
117  if (i < num_points - 1) // if not the last row
118  fputc('\n', fp); // start a new line
119  }
120  fclose(fp);
121  return 0;
122 }
123 
124 /**

◆ save_u_matrix()

int save_u_matrix ( const char *  fname,
struct array_3d W 
)

Create the distance matrix or U-matrix from the trained weights and save to disk.

Parameters
[in]fnamefilename to save in (gets overwriten without confirmation)
[in]Wmodel matrix to save
Returns
0 if all ok
-1 if file creation failed
138  {
139  char msg[120];
140  sprintf(msg, "File error (%s): ", fname);
141  perror(msg);
142  return -1;
143  }
144 
145  int R = max(W->dim1 >> 3, 2); /* neighborhood range */
146 
147  for (int i = 0; i < W->dim1; i++) // for each x
148  {
149  for (int j = 0; j < W->dim2; j++) // for each y
150  {
151  double distance = 0.f;
152  int k;
153 
154  int from_x = max(0, i - R);
155  int to_x = min(W->dim1, i + R + 1);
156  int from_y = max(0, j - R);
157  int to_y = min(W->dim2, j + R + 1);
158  int l;
159 #ifdef _OPENMP
160 #pragma omp parallel for reduction(+ : distance)
161 #endif
162  for (l = from_x; l < to_x; l++) // scan neighborhoor in x
163  {
164  for (int m = from_y; m < to_y; m++) // scan neighborhood in y
165  {
166  double d = 0.f;
167  for (k = 0; k < W->dim3; k++) // for each feature
168  {
169  double *w1 = data_3d(W, i, j, k);
170  double *w2 = data_3d(W, l, m, k);
171  d += (w1[0] - w2[0]) * (w1[0] - w2[0]);
172  // distance += w1[0] * w1[0];
173  }
174  distance += sqrt(d);
175  // distance += d;
176  }
177  }
178 
179  distance /= R * R; // mean distance from neighbors
180  fprintf(fp, "%.4g", distance); // print the mean separation
181  if (j < W->dim2 - 1) // if not the last column
182  fputc(',', fp); // suffix comma
183  }
184  if (i < W->dim1 - 1) // if not the last row
185  fputc('\n', fp); // start a new line
186  }
187  fclose(fp);
188  return 0;
189 }
190 
191 /**

◆ test1()

void test1 ( )

Test that creates a random set of points distributed in four clusters in 2D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:

  • test1.csv: random test samples points with a circular pattern
  • w11.csv: initial random U-matrix
  • w12.csv: trained SOM U-matrix
414  {
415  if (i < N) // only add new arrays if i < N
416  X[i] = (double *)malloc(features * sizeof(double));
417  if (i < num_out) // only add new arrays if i < num_out
418  {
419  for (int k = 0; k < num_out; k++)
420  {
421 #ifdef _OPENMP
422 #pragma omp for
423 #endif
424  // preallocate with random initial weights
425  for (j = 0; j < features; j++)
426  {
427  double *w = data_3d(&W, i, k, j);
428  w[0] = _random(-5, 5);
429  }
430  }
431  }
432  }
433 
434  test_2d_classes(X, N); // create test data around circumference of a circle
435  save_2d_data("test1.csv", X, N, features); // save test data points
436  save_u_matrix("w11.csv", &W); // save initial random weights
437  kohonen_som(X, &W, N, features, num_out, 1e-4); // train the SOM
438  save_u_matrix("w12.csv", &W); // save the resultant weights
439 
440  for (int i = 0; i < N; i++)
441  free(X[i]);
442  free(X);
443  free(W.data);
444 }
445 
446 /** Creates a random set of points distributed in four clusters in

◆ test2()

void test2 ( )

Test that creates a random set of points distributed in 4 clusters in 3D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:

  • test2.csv: random test samples points
  • w21.csv: initial random U-matrix
  • w22.csv: trained SOM U-matrix
514  {
515  if (i < N) // only add new arrays if i < N
516  X[i] = (double *)malloc(features * sizeof(double));
517  if (i < num_out) // only add new arrays if i < num_out
518  {
519  for (int k = 0; k < num_out; k++)
520  {
521 #ifdef _OPENMP
522 #pragma omp for
523 #endif
524  for (j = 0; j < features; j++)
525  { // preallocate with random initial weights
526  double *w = data_3d(&W, i, k, j);
527  w[0] = _random(-5, 5);
528  }
529  }
530  }
531  }
532 
533  test_3d_classes1(X, N); // create test data
534  save_2d_data("test2.csv", X, N, features); // save test data points
535  save_u_matrix("w21.csv", &W); // save initial random weights
536  kohonen_som(X, &W, N, features, num_out, 1e-4); // train the SOM
537  save_u_matrix("w22.csv", &W); // save the resultant weights
538 
539  for (int i = 0; i < N; i++)
540  free(X[i]);
541  free(X);
542  free(W.data);
543 }
544 
545 /** Creates a random set of points distributed in four clusters in

◆ test3()

void test3 ( )

Test that creates a random set of points distributed in eight clusters in 3D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:

  • test3.csv: random test samples points
  • w31.csv: initial random U-matrix
  • w32.csv: trained SOM U-matrix
615  {
616  if (i < N) // only add new arrays if i < N
617  X[i] = (double *)malloc(features * sizeof(double));
618  if (i < num_out) // only add new arrays if i < num_out
619  {
620  for (int k = 0; k < num_out; k++)
621  {
622 #ifdef _OPENMP
623 #pragma omp for
624 #endif
625  // preallocate with random initial weights
626  for (j = 0; j < features; j++)
627  {
628  double *w = data_3d(&W, i, k, j);
629  w[0] = _random(-5, 5);
630  }
631  }
632  }
633  }
634 
635  test_3d_classes2(X, N); // create test data around the lamniscate
636  save_2d_data("test3.csv", X, N, features); // save test data points
637  save_u_matrix("w31.csv", &W); // save initial random weights
638  kohonen_som(X, &W, N, features, num_out, 0.01); // train the SOM
639  save_u_matrix("w32.csv", &W); // save the resultant weights
640 
641  for (int i = 0; i < N; i++)
642  free(X[i]);
643  free(X);
644  free(W.data);
645 }
646 
647 /**

◆ test_2d_classes()

void test_2d_classes ( double *const *  data,
int  N 
)

Creates a random set of points distributed in four clusters in 3D space with centroids at the points

  • \((0,5, 0.5, 0.5)\)
  • \((0,5,-0.5, -0.5)\)
  • \((-0,5, 0.5, 0.5)\)
  • \((-0,5,-0.5, -0.5)\)
Parameters
[out]datamatrix to store data in
[in]Nnumber of points required
362  {
363  // centres of each class cluster
364  {.5, .5}, // centre of class 1
365  {.5, -.5}, // centre of class 2
366  {-.5, .5}, // centre of class 3
367  {-.5, -.5} // centre of class 4
368  };
369 
370 #ifdef _OPENMP
371 #pragma omp for
372 #endif
373  for (i = 0; i < N; i++)
374  {
375  int class = rand() % num_classes; // select a random class for the point
376 
377  // create random coordinates (x,y,z) around the centre of the class
378  data[i][0] = _random(centres[class][0] - R, centres[class][0] + R);
379  data[i][1] = _random(centres[class][1] - R, centres[class][1] + R);
380 
381  /* The follosing can also be used
382  for (int j = 0; j < 2; j++)
383  data[i][j] = _random(centres[class][j] - R, centres[class][j] + R);
384  */
385  }
386 }
387 
388 /** Test that creates a random set of points distributed in four clusters in

◆ test_3d_classes1()

void test_3d_classes1 ( double *const *  data,
int  N 
)

Creates a random set of points distributed in four clusters in 3D space with centroids at the points

  • \((0,5, 0.5, 0.5)\)
  • \((0,5,-0.5, -0.5)\)
  • \((-0,5, 0.5, 0.5)\)
  • \((-0,5,-0.5, -0.5)\)
Parameters
[out]datamatrix to store data in
[in]Nnumber of points required
461  {
462  // centres of each class cluster
463  {.5, .5, .5}, // centre of class 1
464  {.5, -.5, -.5}, // centre of class 2
465  {-.5, .5, .5}, // centre of class 3
466  {-.5, -.5 - .5} // centre of class 4
467  };
468 
469 #ifdef _OPENMP
470 #pragma omp for
471 #endif
472  for (i = 0; i < N; i++)
473  {
474  int class = rand() % num_classes; // select a random class for the point
475 
476  // create random coordinates (x,y,z) around the centre of the class
477  data[i][0] = _random(centres[class][0] - R, centres[class][0] + R);
478  data[i][1] = _random(centres[class][1] - R, centres[class][1] + R);
479  data[i][2] = _random(centres[class][2] - R, centres[class][2] + R);
480 
481  /* The follosing can also be used
482  for (int j = 0; j < 3; j++)
483  data[i][j] = _random(centres[class][j] - R, centres[class][j] + R);
484  */
485  }
486 }
487 
488 /** Test that creates a random set of points distributed in 4 clusters in

◆ test_3d_classes2()

void test_3d_classes2 ( double *const *  data,
int  N 
)

Creates a random set of points distributed in four clusters in 3D space with centroids at the points

  • \((0,5, 0.5, 0.5)\)
  • \((0,5,-0.5, -0.5)\)
  • \((-0,5, 0.5, 0.5)\)
  • \((-0,5,-0.5, -0.5)\)
Parameters
[out]datamatrix to store data in
[in]Nnumber of points required
560  {
561  // centres of each class cluster
562  {.5, .5, .5}, // centre of class 1
563  {.5, .5, -.5}, // centre of class 2
564  {.5, -.5, .5}, // centre of class 3
565  {.5, -.5, -.5}, // centre of class 4
566  {-.5, .5, .5}, // centre of class 5
567  {-.5, .5, -.5}, // centre of class 6
568  {-.5, -.5, .5}, // centre of class 7
569  {-.5, -.5, -.5} // centre of class 8
570  };
571 
572 #ifdef _OPENMP
573 #pragma omp for
574 #endif
575  for (i = 0; i < N; i++)
576  {
577  int class = rand() % num_classes; // select a random class for the point
578 
579  // create random coordinates (x,y,z) around the centre of the class
580  data[i][0] = _random(centres[class][0] - R, centres[class][0] + R);
581  data[i][1] = _random(centres[class][1] - R, centres[class][1] + R);
582  data[i][2] = _random(centres[class][2] - R, centres[class][2] + R);
583 
584  /* The follosing can also be used
585  for (int j = 0; j < 3; j++)
586  data[i][j] = _random(centres[class][j] - R, centres[class][j] + R);
587  */
588  }
589 }
590 
591 /** Test that creates a random set of points distributed in eight clusters in

◆ update_weights()

double update_weights ( const double *  X,
struct array_3d W,
double **  D,
int  num_out,
int  num_features,
double  alpha,
int  R 
)

Update weights of the SOM using Kohonen algorithm

Parameters
[in]Xdata point
[in,out]Wweights matrix
[in,out]Dtemporary vector to store distances
[in]num_outnumber of output points
[in]num_featuresnumber of features per input sample
[in]alphalearning rate \(0<\alpha\le1\)
[in]Rneighborhood range
Returns
minimum distance of sample and trained weights
231 {
232  int x, y, k;
233  double d_min = 0.f;
234 
235 #ifdef _OPENMP
236 #pragma omp for
237 #endif
238  // step 1: for each 2D output point
239  for (x = 0; x < num_out; x++)
240  {
241  for (y = 0; y < num_out; y++)
242  {
243  D[x][y] = 0.f;
244  // compute Euclidian distance of each output
245  // point from the current sample
246  for (k = 0; k < num_features; k++)
247  {
248  double *w = data_3d(W, x, y, k);
249  D[x][y] += (w[0] - X[k]) * (w[0] - X[k]);
250  }
251  D[x][y] = sqrt(D[x][y]);
252  }
253  }
254 
255  // step 2: get closest node i.e., node with smallest Euclidian distance to
256  // the current pattern
257  int d_min_x, d_min_y;
258  get_min_2d(D, num_out, &d_min, &d_min_x, &d_min_y);
259 
260  // step 3a: get the neighborhood range
261  int from_x = max(0, d_min_x - R);
262  int to_x = min(num_out, d_min_x + R + 1);
263  int from_y = max(0, d_min_y - R);
264  int to_y = min(num_out, d_min_y + R + 1);
265 
266  // step 3b: update the weights of nodes in the
267  // neighborhood
268 #ifdef _OPENMP
269 #pragma omp for
270 #endif
271  for (x = from_x; x < to_x; x++)
272  {
273  for (y = from_y; y < to_y; y++)
274  {
275  /* you can enable the following normalization if needed.
276  personally, I found it detrimental to convergence */
277  // const double s2pi = sqrt(2.f * M_PI);
278  // double normalize = 1.f / (alpha * s2pi);
279 
280  /* apply scaling inversely proportional to distance from the
281  current node */
282  double d2 =
283  (d_min_x - x) * (d_min_x - x) + (d_min_y - y) * (d_min_y - y);
284  double scale_factor = exp(-d2 / (2.f * alpha * alpha));
285 
286  for (k = 0; k < num_features; k++)
287  {
288  double *w = data_3d(W, x, y, k);
289  // update weights of nodes in the neighborhood
290  w[0] += alpha * scale_factor * (X[k] - w[0]);
291  }
292  }
293  }
294  return d_min;
295 }
296 
297 /**
Here is the call graph for this function:
min
#define min(a, b)
Definition: kohonen_som_topology.c:37
update_weights
double update_weights(const double *X, struct array_3d *W, double **D, int num_out, int num_features, double alpha, int R)
Definition: kohonen_som_topology.c:231
data
Definition: prime_factoriziation.c:25
data_3d
double * data_3d(const struct array_3d *arr, int x, int y, int z)
Definition: kohonen_som_topology.c:64
test_3d_classes1
void test_3d_classes1(double *const *data, int N)
Definition: kohonen_som_topology.c:458
test_3d_classes2
void test_3d_classes2(double *const *data, int N)
Definition: kohonen_som_topology.c:557
N
#define N
Definition: sol1.c:111
save_u_matrix
int save_u_matrix(const char *fname, struct array_3d *W)
Definition: kohonen_som_topology.c:136
array_3d::dim3
int dim3
Definition: kohonen_som_topology.c:48
array_3d::dim1
int dim1
Definition: kohonen_som_topology.c:46
get_min_2d
void get_min_2d(double **X, int N, double *val, int *x_idx, int *y_idx)
Definition: kohonen_som_topology.c:201
_random
double _random(double a, double b)
Definition: kohonen_som_topology.c:84
save_2d_data
int save_2d_data(const char *fname, double **X, int num_points, int num_features)
Definition: kohonen_som_topology.c:99
max
#define max(a, b)
Definition: kohonen_som_topology.c:31
array_3d::dim2
int dim2
Definition: kohonen_som_topology.c:47
kohonen_som
void kohonen_som(double **X, struct array_3d *W, int num_samples, int num_features, int num_out, double alpha_min)
Definition: kohonen_som_topology.c:311
test_2d_classes
void test_2d_classes(double *const *data, int N)
Definition: kohonen_som_topology.c:359