Algorithms_in_C
1.0.0
Set of algorithms implemented in C.
|
Kohonen self organizing map (topological map)
More...
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
|
double * | data_3d (const struct array_3d *arr, int x, int y, int z) |
|
double | _random (double a, double b) |
|
int | save_2d_data (const char *fname, double **X, int num_points, int num_features) |
|
int | save_u_matrix (const char *fname, struct array_3d *W) |
|
void | get_min_2d (double **X, int N, double *val, int *x_idx, int *y_idx) |
|
double | update_weights (const double *X, struct array_3d *W, double **D, int num_out, int num_features, double alpha, int R) |
|
void | kohonen_som (double **X, struct array_3d *W, int num_samples, int num_features, int num_out, double alpha_min) |
|
void | test_2d_classes (double *const *data, int N) |
|
void | test1 () |
|
void | test_3d_classes1 (double *const *data, int N) |
|
void | test2 () |
|
void | test_3d_classes2 (double *const *data, int N) |
|
void | test3 () |
|
double | get_clock_diff (clock_t start_t, clock_t end_t) |
|
int | main (int argc, char **argv) |
|
Kohonen self organizing map (topological map)
- Author
- Krishna Vedala This example implements a powerful unsupervised learning algorithm called as a self organizing map. The algorithm creates a connected network of weights that closely follows the given data points. This thus creates a topological map of the given data i.e., it maintains the relationship between varipus data points in a much higher dimesional space by creating an equivalent in a 2-dimensional space.
- Warning
- MSVC 2019 compiler generates code that does not execute as expected. However, MinGW, Clang for GCC and Clang for MSVC compilers on windows perform as expected. Any insights and suggestions should be directed to the author.
- See also
- kohonen_som_trace.c
◆ _USE_MATH_DEFINES
#define _USE_MATH_DEFINES |
◆ max
#define max |
( |
|
a, |
|
|
|
b |
|
) |
| (((a) > (b)) ? (a) : (b)) |
shorthand for maximum value \
◆ min
#define min |
( |
|
a, |
|
|
|
b |
|
) |
| (((a) < (b)) ? (a) : (b)) |
shorthand for minimum value \
◆ _random()
double _random |
( |
double |
a, |
|
|
double |
b |
|
) |
| |
Helper function to generate a random number in a given interval.
Steps:
r1 = rand() % 100
gets a random number between 0 and 99
r2 = r1 / 100
converts random number to be between 0 and 0.99
- scale and offset the random number to given range of \([a,b)\)
\[ y = (b - a) \times \frac{\text{(random number between 0 and RAND_MAX)} \; \text{mod}\; 100}{100} + a \]
- Parameters
-
[in] | a | lower limit |
[in] | b | upper limit |
- Returns
- random number in the range \([a,b)\)
◆ data_3d()
double* data_3d |
( |
const struct array_3d * |
arr, |
|
|
int |
x, |
|
|
int |
y, |
|
|
int |
z |
|
) |
| |
Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by:
\[ X_{i,j,k} = i\times M\times N + j\times N + k \]
where \(L\), \(M\) and \(N\) are the 3D matrix dimensions.
- Parameters
-
[in] | arr | pointer to array_3d structure |
[in] | x | first index |
[in] | y | second index |
[in] | z | third index |
- Returns
- pointer to (x,y,z)^th location of data
◆ get_clock_diff()
double get_clock_diff |
( |
clock_t |
start_t, |
|
|
clock_t |
end_t |
|
) |
| |
Convert clock cycle difference to time in seconds
- Parameters
-
[in] | start_t | start clock |
[in] | end_t | end clock |
- Returns
- time difference in seconds
◆ get_min_2d()
void get_min_2d |
( |
double ** |
X, |
|
|
int |
N, |
|
|
double * |
val, |
|
|
int * |
x_idx, |
|
|
int * |
y_idx |
|
) |
| |
Get minimum value and index of the value in a matrix
- Parameters
-
[in] | X | matrix to search |
[in] | N | number of points in the vector |
[out] | val | minimum value found |
[out] | idx_x | x-index where minimum value was found |
[out] | idx_y | y-index where minimum value was found |
205 for (
int j = 0; j <
N; j++)
207 if (X[i][j] < val[0])
◆ kohonen_som()
void kohonen_som |
( |
double ** |
X, |
|
|
struct array_3d * |
W, |
|
|
int |
num_samples, |
|
|
int |
num_features, |
|
|
int |
num_out, |
|
|
double |
alpha_min |
|
) |
| |
Apply incremental algorithm with updating neighborhood and learning rates on all samples in the given datset.
- Parameters
-
[in] | X | data set |
[in,out] | W | weights matrix |
[in] | D | temporary vector to store distances |
[in] | num_samples | number of output points |
[in] | num_features | number of features per input sample |
[in] | num_out | number of output points |
[in] | alpha_min | terminal value of alpha |
312 int R = num_out >> 2, iter = 0;
313 double **D = (
double **)malloc(num_out *
sizeof(
double *));
314 for (
int i = 0; i < num_out; i++)
315 D[i] = (
double *)malloc(num_out *
sizeof(
double));
320 for (
double alpha = 1.f; alpha > alpha_min && dmin > 1e-3;
321 alpha -= 0.001, iter++)
325 for (
int sample = 0; sample < num_samples; sample++)
333 if (iter % 100 == 0 && R > 1)
337 printf(
"iter: %5d\t alpha: %.4g\t R: %d\td_min: %.4g\r", iter, alpha, R,
342 for (
int i = 0; i < num_out; i++)
◆ main()
int main |
( |
int |
argc, |
|
|
char ** |
argv |
|
) |
| |
Main function
687 : Calculated times include: writing files to disk.)\n\n
");
◆ save_2d_data()
int save_2d_data |
( |
const char * |
fname, |
|
|
double ** |
X, |
|
|
int |
num_points, |
|
|
int |
num_features |
|
) |
| |
Save a given n-dimensional data martix to file.
- Parameters
-
[in] | fname | filename to save in (gets overwriten without confirmation) |
[in] | X | matrix to save |
[in] | num_points | rows in the matrix = number of points |
[in] | num_features | columns in the matrix = dimensions of points |
- Returns
- 0 if all ok
-
-1 if file creation failed
100 FILE *fp = fopen(fname,
"wt");
104 sprintf(msg,
"File error (%s): ", fname);
109 for (
int i = 0; i < num_points; i++)
111 for (
int j = 0; j < num_features; j++)
113 fprintf(fp,
"%.4g", X[i][j]);
114 if (j < num_features - 1)
117 if (i < num_points - 1)
◆ save_u_matrix()
int save_u_matrix |
( |
const char * |
fname, |
|
|
struct array_3d * |
W |
|
) |
| |
Create the distance matrix or U-matrix from the trained weights and save to disk.
- Parameters
-
[in] | fname | filename to save in (gets overwriten without confirmation) |
[in] | W | model matrix to save |
- Returns
- 0 if all ok
-
-1 if file creation failed
140 sprintf(msg,
"File error (%s): ", fname);
147 for (
int i = 0; i < W->
dim1; i++)
149 for (
int j = 0; j < W->
dim2; j++)
151 double distance = 0.f;
154 int from_x =
max(0, i - R);
155 int to_x =
min(W->
dim1, i + R + 1);
156 int from_y =
max(0, j - R);
157 int to_y =
min(W->
dim2, j + R + 1);
160 #pragma omp parallel for reduction(+ : distance)
162 for (l = from_x; l < to_x; l++)
164 for (
int m = from_y; m < to_y; m++)
167 for (k = 0; k < W->
dim3; k++)
169 double *w1 =
data_3d(W, i, j, k);
170 double *w2 =
data_3d(W, l, m, k);
171 d += (w1[0] - w2[0]) * (w1[0] - w2[0]);
180 fprintf(fp,
"%.4g", distance);
◆ test1()
Test that creates a random set of points distributed in four clusters in 2D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:
test1.csv
: random test samples points with a circular pattern
w11.csv
: initial random U-matrix
w12.csv
: trained SOM U-matrix
416 X[i] = (
double *)malloc(features *
sizeof(
double));
419 for (
int k = 0; k < num_out; k++)
425 for (j = 0; j < features; j++)
427 double *w =
data_3d(&W, i, k, j);
440 for (
int i = 0; i <
N; i++)
◆ test2()
Test that creates a random set of points distributed in 4 clusters in 3D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:
test2.csv
: random test samples points
w21.csv
: initial random U-matrix
w22.csv
: trained SOM U-matrix
516 X[i] = (
double *)malloc(features *
sizeof(
double));
519 for (
int k = 0; k < num_out; k++)
524 for (j = 0; j < features; j++)
526 double *w =
data_3d(&W, i, k, j);
539 for (
int i = 0; i <
N; i++)
◆ test3()
Test that creates a random set of points distributed in eight clusters in 3D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:
test3.csv
: random test samples points
w31.csv
: initial random U-matrix
w32.csv
: trained SOM U-matrix
617 X[i] = (
double *)malloc(features *
sizeof(
double));
620 for (
int k = 0; k < num_out; k++)
626 for (j = 0; j < features; j++)
628 double *w =
data_3d(&W, i, k, j);
641 for (
int i = 0; i <
N; i++)
◆ test_2d_classes()
void test_2d_classes |
( |
double *const * |
data, |
|
|
int |
N |
|
) |
| |
Creates a random set of points distributed in four clusters in 3D space with centroids at the points
- \((0,5, 0.5, 0.5)\)
- \((0,5,-0.5, -0.5)\)
- \((-0,5, 0.5, 0.5)\)
- \((-0,5,-0.5, -0.5)\)
- Parameters
-
[out] | data | matrix to store data in |
[in] | N | number of points required |
373 for (i = 0; i <
N; i++)
375 int class = rand() % num_classes;
378 data[i][0] =
_random(centres[
class][0] - R, centres[
class][0] + R);
379 data[i][1] =
_random(centres[
class][1] - R, centres[
class][1] + R);
◆ test_3d_classes1()
void test_3d_classes1 |
( |
double *const * |
data, |
|
|
int |
N |
|
) |
| |
Creates a random set of points distributed in four clusters in 3D space with centroids at the points
- \((0,5, 0.5, 0.5)\)
- \((0,5,-0.5, -0.5)\)
- \((-0,5, 0.5, 0.5)\)
- \((-0,5,-0.5, -0.5)\)
- Parameters
-
[out] | data | matrix to store data in |
[in] | N | number of points required |
472 for (i = 0; i <
N; i++)
474 int class = rand() % num_classes;
477 data[i][0] =
_random(centres[
class][0] - R, centres[
class][0] + R);
478 data[i][1] =
_random(centres[
class][1] - R, centres[
class][1] + R);
479 data[i][2] =
_random(centres[
class][2] - R, centres[
class][2] + R);
◆ test_3d_classes2()
void test_3d_classes2 |
( |
double *const * |
data, |
|
|
int |
N |
|
) |
| |
Creates a random set of points distributed in four clusters in 3D space with centroids at the points
- \((0,5, 0.5, 0.5)\)
- \((0,5,-0.5, -0.5)\)
- \((-0,5, 0.5, 0.5)\)
- \((-0,5,-0.5, -0.5)\)
- Parameters
-
[out] | data | matrix to store data in |
[in] | N | number of points required |
575 for (i = 0; i <
N; i++)
577 int class = rand() % num_classes;
580 data[i][0] =
_random(centres[
class][0] - R, centres[
class][0] + R);
581 data[i][1] =
_random(centres[
class][1] - R, centres[
class][1] + R);
582 data[i][2] =
_random(centres[
class][2] - R, centres[
class][2] + R);
◆ update_weights()
double update_weights |
( |
const double * |
X, |
|
|
struct array_3d * |
W, |
|
|
double ** |
D, |
|
|
int |
num_out, |
|
|
int |
num_features, |
|
|
double |
alpha, |
|
|
int |
R |
|
) |
| |
Update weights of the SOM using Kohonen algorithm
- Parameters
-
[in] | X | data point |
[in,out] | W | weights matrix |
[in,out] | D | temporary vector to store distances |
[in] | num_out | number of output points |
[in] | num_features | number of features per input sample |
[in] | alpha | learning rate \(0<\alpha\le1\) |
[in] | R | neighborhood range |
- Returns
- minimum distance of sample and trained weights
239 for (x = 0; x < num_out; x++)
241 for (y = 0; y < num_out; y++)
246 for (k = 0; k < num_features; k++)
248 double *w =
data_3d(W, x, y, k);
249 D[x][y] += (w[0] - X[k]) * (w[0] - X[k]);
251 D[x][y] = sqrt(D[x][y]);
257 int d_min_x, d_min_y;
258 get_min_2d(D, num_out, &d_min, &d_min_x, &d_min_y);
261 int from_x =
max(0, d_min_x - R);
262 int to_x =
min(num_out, d_min_x + R + 1);
263 int from_y =
max(0, d_min_y - R);
264 int to_y =
min(num_out, d_min_y + R + 1);
271 for (x = from_x; x < to_x; x++)
273 for (y = from_y; y < to_y; y++)
283 (d_min_x - x) * (d_min_x - x) + (d_min_y - y) * (d_min_y - y);
284 double scale_factor = exp(-d2 / (2.f * alpha * alpha));
286 for (k = 0; k < num_features; k++)
288 double *w =
data_3d(W, x, y, k);
290 w[0] += alpha * scale_factor * (X[k] - w[0]);
#define min(a, b)
Definition: kohonen_som_topology.c:37
double update_weights(const double *X, struct array_3d *W, double **D, int num_out, int num_features, double alpha, int R)
Definition: kohonen_som_topology.c:231
Definition: prime_factoriziation.c:25
double * data_3d(const struct array_3d *arr, int x, int y, int z)
Definition: kohonen_som_topology.c:64
void test_3d_classes1(double *const *data, int N)
Definition: kohonen_som_topology.c:458
void test_3d_classes2(double *const *data, int N)
Definition: kohonen_som_topology.c:557
#define N
Definition: sol1.c:111
int save_u_matrix(const char *fname, struct array_3d *W)
Definition: kohonen_som_topology.c:136
int dim3
Definition: kohonen_som_topology.c:48
int dim1
Definition: kohonen_som_topology.c:46
void get_min_2d(double **X, int N, double *val, int *x_idx, int *y_idx)
Definition: kohonen_som_topology.c:201
double _random(double a, double b)
Definition: kohonen_som_topology.c:84
int save_2d_data(const char *fname, double **X, int num_points, int num_features)
Definition: kohonen_som_topology.c:99
#define max(a, b)
Definition: kohonen_som_topology.c:31
int dim2
Definition: kohonen_som_topology.c:47
void kohonen_som(double **X, struct array_3d *W, int num_samples, int num_features, int num_out, double alpha_min)
Definition: kohonen_som_topology.c:311
void test_2d_classes(double *const *data, int N)
Definition: kohonen_som_topology.c:359