Algorithms_in_C  1.0.0
Set of algorithms implemented in C.
Kohonen SOM topology algorithm
Collaboration diagram for Kohonen SOM topology algorithm:

Data Structures

struct  kohonen_array_3d
 to store info regarding 3D arrays More...
 

Macros

#define max(a, b)   (((a) > (b)) ? (a) : (b))
 shorthand for maximum value
 
#define min(a, b)   (((a) < (b)) ? (a) : (b))
 shorthand for minimum value
 

Functions

double * kohonen_data_3d (const struct kohonen_array_3d *arr, int x, int y, int z)
 Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by: More...
 
double _random (double a, double b)
 Helper function to generate a random number in a given interval. More...
 
int save_2d_data (const char *fname, double **X, int num_points, int num_features)
 Save a given n-dimensional data martix to file. More...
 
int save_u_matrix (const char *fname, struct kohonen_array_3d *W)
 Create the distance matrix or U-matrix from the trained weights and save to disk. More...
 
void get_min_2d (double **X, int N, double *val, int *x_idx, int *y_idx)
 Get minimum value and index of the value in a matrix. More...
 
double kohonen_update_weights (const double *X, struct kohonen_array_3d *W, double **D, int num_out, int num_features, double alpha, int R)
 Update weights of the SOM using Kohonen algorithm. More...
 
void kohonen_som (double **X, struct kohonen_array_3d *W, int num_samples, int num_features, int num_out, double alpha_min)
 Apply incremental algorithm with updating neighborhood and learning rates on all samples in the given datset. More...
 

Detailed Description

Function Documentation

◆ _random()

double _random ( double  a,
double  b 
)

Helper function to generate a random number in a given interval.


Steps:

  1. r1 = rand() % 100 gets a random number between 0 and 99
  2. r2 = r1 / 100 converts random number to be between 0 and 0.99
  3. scale and offset the random number to given range of \([a,b)\)

    \[ y = (b - a) \times \frac{\text{(random number between 0 and RAND_MAX)} \; \text{mod}\; 100}{100} + a \]

Parameters
[in]alower limit
[in]bupper limit
Returns
random number in the range \([a,b)\)
88 {
89  return ((b - a) * (rand() % 100) / 100.f) + a;
90 }

◆ get_min_2d()

void get_min_2d ( double **  X,
int  N,
double *  val,
int *  x_idx,
int *  y_idx 
)

Get minimum value and index of the value in a matrix.

Parameters
[in]Xmatrix to search
[in]Nnumber of points in the vector
[out]valminimum value found
[out]x_idxx-index where minimum value was found
[out]y_idxy-index where minimum value was found
205 {
206  val[0] = INFINITY; // initial min value
207 
208  for (int i = 0; i < N; i++) // traverse each x-index
209  {
210  for (int j = 0; j < N; j++) // traverse each y-index
211  {
212  if (X[i][j] < val[0]) // if a lower value is found
213  { // save the value and its index
214  x_idx[0] = i;
215  y_idx[0] = j;
216  val[0] = X[i][j];
217  }
218  }
219  }
220 }

◆ kohonen_data_3d()

double* kohonen_data_3d ( const struct kohonen_array_3d arr,
int  x,
int  y,
int  z 
)

Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by:

\[ X_{i,j,k} = i\times M\times N + j\times N + k \]

where \(L\), \(M\) and \(N\) are the 3D matrix dimensions.

Parameters
[in]arrpointer to kohonen_array_3d structure
[in]xfirst index
[in]ysecond index
[in]zthird index
Returns
pointer to (x,y,z)^th location of data
68 {
69  int offset = (x * arr->dim2 * arr->dim3) + (y * arr->dim3) + z;
70  return arr->data + offset;
71 }
int dim3
lengths of thirddimension
Definition: kohonen_som_topology.c:51
int dim2
lengths of second dimension
Definition: kohonen_som_topology.c:50
double * data
pointer to data
Definition: kohonen_som_topology.c:52

◆ kohonen_som()

void kohonen_som ( double **  X,
struct kohonen_array_3d W,
int  num_samples,
int  num_features,
int  num_out,
double  alpha_min 
)

Apply incremental algorithm with updating neighborhood and learning rates on all samples in the given datset.

Parameters
[in]Xdata set
[in,out]Wweights matrix
[in]num_samplesnumber of output points
[in]num_featuresnumber of features per input sample
[in]num_outnumber of output points
[in]alpha_minterminal value of alpha
316 {
317  int R = num_out >> 2, iter = 0;
318  double **D = (double **)malloc(num_out * sizeof(double *));
319  for (int i = 0; i < num_out; i++)
320  D[i] = (double *)malloc(num_out * sizeof(double));
321 
322  double dmin = 1.f; // average minimum distance of all samples
323 
324  // Loop alpha from 1 to slpha_min
325  for (double alpha = 1.f; alpha > alpha_min && dmin > 1e-3;
326  alpha -= 0.001, iter++)
327  {
328  dmin = 0.f;
329  // Loop for each sample pattern in the data set
330  for (int sample = 0; sample < num_samples; sample++)
331  {
332  // update weights for the current input pattern sample
333  dmin += kohonen_update_weights(X[sample], W, D, num_out,
334  num_features, alpha, R);
335  }
336 
337  // every 20th iteration, reduce the neighborhood range
338  if (iter % 100 == 0 && R > 1)
339  R--;
340 
341  dmin /= num_samples;
342  printf("iter: %5d\t alpha: %.4g\t R: %d\td_min: %.4g\r", iter, alpha, R,
343  dmin);
344  }
345  putchar('\n');
346 
347  for (int i = 0; i < num_out; i++) free(D[i]);
348  free(D);
349 }
double kohonen_update_weights(const double *X, struct kohonen_array_3d *W, double **D, int num_out, int num_features, double alpha, int R)
Update weights of the SOM using Kohonen algorithm.
Definition: kohonen_som_topology.c:234
#define malloc(bytes)
This macro replace the standard malloc function with malloc_dbg.
Definition: malloc_dbg.h:18
#define free(ptr)
This macro replace the standard free function with free_dbg.
Definition: malloc_dbg.h:26
Here is the call graph for this function:

◆ kohonen_update_weights()

double kohonen_update_weights ( const double *  X,
struct kohonen_array_3d W,
double **  D,
int  num_out,
int  num_features,
double  alpha,
int  R 
)

Update weights of the SOM using Kohonen algorithm.

Parameters
[in]Xdata point
[in,out]Wweights matrix
[in,out]Dtemporary vector to store distances
[in]num_outnumber of output points
[in]num_featuresnumber of features per input sample
[in]alphalearning rate \(0<\alpha\le1\)
[in]Rneighborhood range
Returns
minimum distance of sample and trained weights
237 {
238  int x, y, k;
239  double d_min = 0.f;
240 
241 #ifdef _OPENMP
242 #pragma omp for
243 #endif
244  // step 1: for each 2D output point
245  for (x = 0; x < num_out; x++)
246  {
247  for (y = 0; y < num_out; y++)
248  {
249  D[x][y] = 0.f;
250  // compute Euclidian distance of each output
251  // point from the current sample
252  for (k = 0; k < num_features; k++)
253  {
254  double *w = kohonen_data_3d(W, x, y, k);
255  D[x][y] += (w[0] - X[k]) * (w[0] - X[k]);
256  }
257  D[x][y] = sqrt(D[x][y]);
258  }
259  }
260 
261  // step 2: get closest node i.e., node with smallest Euclidian distance to
262  // the current pattern
263  int d_min_x, d_min_y;
264  get_min_2d(D, num_out, &d_min, &d_min_x, &d_min_y);
265 
266  // step 3a: get the neighborhood range
267  int from_x = max(0, d_min_x - R);
268  int to_x = min(num_out, d_min_x + R + 1);
269  int from_y = max(0, d_min_y - R);
270  int to_y = min(num_out, d_min_y + R + 1);
271 
272  // step 3b: update the weights of nodes in the
273  // neighborhood
274 #ifdef _OPENMP
275 #pragma omp for
276 #endif
277  for (x = from_x; x < to_x; x++)
278  {
279  for (y = from_y; y < to_y; y++)
280  {
281  /* you can enable the following normalization if needed.
282  personally, I found it detrimental to convergence */
283  // const double s2pi = sqrt(2.f * M_PI);
284  // double normalize = 1.f / (alpha * s2pi);
285 
286  /* apply scaling inversely proportional to distance from the
287  current node */
288  double d2 =
289  (d_min_x - x) * (d_min_x - x) + (d_min_y - y) * (d_min_y - y);
290  double scale_factor = exp(-d2 / (2.f * alpha * alpha));
291 
292  for (k = 0; k < num_features; k++)
293  {
294  double *w = kohonen_data_3d(W, x, y, k);
295  // update weights of nodes in the neighborhood
296  w[0] += alpha * scale_factor * (X[k] - w[0]);
297  }
298  }
299  }
300  return d_min;
301 }
double * kohonen_data_3d(const struct kohonen_array_3d *arr, int x, int y, int z)
Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by:
Definition: kohonen_som_topology.c:67
#define min(a, b)
shorthand for minimum value
Definition: kohonen_som_topology.c:43
void get_min_2d(double **X, int N, double *val, int *x_idx, int *y_idx)
Get minimum value and index of the value in a matrix.
Definition: kohonen_som_topology.c:204
#define max(a, b)
shorthand for maximum value
Definition: kohonen_som_topology.c:39
Here is the call graph for this function:

◆ save_2d_data()

int save_2d_data ( const char *  fname,
double **  X,
int  num_points,
int  num_features 
)

Save a given n-dimensional data martix to file.

Parameters
[in]fnamefilename to save in (gets overwritten without confirmation)
[in]Xmatrix to save
[in]num_pointsrows in the matrix = number of points
[in]num_featurescolumns in the matrix = dimensions of points
Returns
0 if all ok
-1 if file creation failed
104 {
105  FILE *fp = fopen(fname, "wt");
106  if (!fp) // error with fopen
107  {
108  char msg[120];
109  sprintf(msg, "File error (%s): ", fname);
110  perror(msg);
111  return -1;
112  }
113 
114  for (int i = 0; i < num_points; i++) // for each point in the array
115  {
116  for (int j = 0; j < num_features; j++) // for each feature in the array
117  {
118  fprintf(fp, "%.4g", X[i][j]); // print the feature value
119  if (j < num_features - 1) // if not the last feature
120  fputc(',', fp); // suffix comma
121  }
122  if (i < num_points - 1) // if not the last row
123  fputc('\n', fp); // start a new line
124  }
125  fclose(fp);
126  return 0;
127 }

◆ save_u_matrix()

int save_u_matrix ( const char *  fname,
struct kohonen_array_3d W 
)

Create the distance matrix or U-matrix from the trained weights and save to disk.

Parameters
[in]fnamefilename to save in (gets overwriten without confirmation)
[in]Wmodel matrix to save
Returns
0 if all ok
-1 if file creation failed
140 {
141  FILE *fp = fopen(fname, "wt");
142  if (!fp) // error with fopen
143  {
144  char msg[120];
145  sprintf(msg, "File error (%s): ", fname);
146  perror(msg);
147  return -1;
148  }
149 
150  int R = max(W->dim1 >> 3, 2); /* neighborhood range */
151 
152  for (int i = 0; i < W->dim1; i++) // for each x
153  {
154  for (int j = 0; j < W->dim2; j++) // for each y
155  {
156  double distance = 0.f;
157  int k;
158 
159  int from_x = max(0, i - R);
160  int to_x = min(W->dim1, i + R + 1);
161  int from_y = max(0, j - R);
162  int to_y = min(W->dim2, j + R + 1);
163  int l;
164 #ifdef _OPENMP
165 #pragma omp parallel for reduction(+ : distance)
166 #endif
167  for (l = from_x; l < to_x; l++) // scan neighborhoor in x
168  {
169  for (int m = from_y; m < to_y; m++) // scan neighborhood in y
170  {
171  double d = 0.f;
172  for (k = 0; k < W->dim3; k++) // for each feature
173  {
174  double *w1 = kohonen_data_3d(W, i, j, k);
175  double *w2 = kohonen_data_3d(W, l, m, k);
176  d += (w1[0] - w2[0]) * (w1[0] - w2[0]);
177  // distance += w1[0] * w1[0];
178  }
179  distance += sqrt(d);
180  // distance += d;
181  }
182  }
183 
184  distance /= R * R; // mean distance from neighbors
185  fprintf(fp, "%.4g", distance); // print the mean separation
186  if (j < W->dim2 - 1) // if not the last column
187  fputc(',', fp); // suffix comma
188  }
189  if (i < W->dim1 - 1) // if not the last row
190  fputc('\n', fp); // start a new line
191  }
192  fclose(fp);
193  return 0;
194 }
int dim1
lengths of first dimension
Definition: kohonen_som_topology.c:49
Here is the call graph for this function: