diff --git a/Computer Oriented Statistical Methods/statistic/README.md b/Computer Oriented Statistical Methods/statistic/README.md new file mode 100644 index 00000000..da312ae5 --- /dev/null +++ b/Computer Oriented Statistical Methods/statistic/README.md @@ -0,0 +1,116 @@ +# Statistic-library for C + +This repository contains a statistic library for the C programming language which prepare useful functions for dealing with average, standard deviation etc. The library is platform-independent. So you can use this library with any C-compiler. + +### Usage + +You needed to put in the files ```statistic.h``` and ```statistic.c``` in your project directory. After that you include the header file ```statistic.h``` +in your project. Then you can use the functions of this library. You will find the files ```statistic.h``` and ```statistic.c``` in the directory **src**. + +### Overview about the functions + +The first int-argument represents the size of the sample (double-array). + +```c +/* + Computes the average of a given sample. + The sample is a set of double values. + The average-function gets a variable number of arguments. + The first argument must be the number of arguments! + The averageArray-function instead gets a double-array of values and a int-number that + represents the size of the double-array. +*/ +double average_Array(int,const double[]); +double average(int,...); +``` + +```c +/* + Computes the standard deviation (n-1) +*/ +double standard_deviation(int,...); +double standard_deviation_array(int, const double[]); + +/* + Computes the standard deviation (n) +*/ +double standard_deviation_N(int,...); +double standard_deviation_N_array(int, const double[]); +``` + +```c +/* + variance: computes the variance (n-1) + variance_N: computes the variance (n) +*/ +double variance(int, const double[]); +double variance_N(int, const double[]); +``` + +```c +/* + gets the max (min) element of the sample +*/ +double max(int, const double[]); +double min(int , const double[]); +``` + +```c +/* + computes the median of the sample +*/ +double median(int, const double[]); +``` + +```c +/* + adds up all values of the sample. +*/ +double sum(int,const double[]); +``` + +```c +/* + computes the range of the sample. +*/ +double range(int, const double[]); +``` + +```c +/* + gets the frequency of the last argument (double) of that sample. +*/ +double frequency_of(int, const double[], double); +``` + +```c +/* + quartile_I: computes the first quartile. + quartile_III: computes the third quartile. + The second quartile is the median! +*/ +double quartile_I(int, const double[]); +double quartile_III(int, const double[]); +``` + +```c +/* + computes the quartile distance +*/ +double quartile_distance(int, const double[]); +``` + + +### Running the tests + +You navigate in the directory of this repository and type in the console: + +```bash +gcc -o myTests test/test.c src/statistic.c -lcunit -lm && ./myTests +``` + +#### Dependencies for tests + +* CUnit +* gcc + diff --git a/Computer Oriented Statistical Methods/statistic/src/statistic.c b/Computer Oriented Statistical Methods/statistic/src/statistic.c new file mode 100644 index 00000000..42047653 --- /dev/null +++ b/Computer Oriented Statistical Methods/statistic/src/statistic.c @@ -0,0 +1,400 @@ +/* + author: Christian Bender + This file contains the implementation part of the statistic-library. +*/ + +#include +#include +#include +#include + +#include "statistic.h" + +double average(int n, ...) +{ + va_list valist; + double sum = 0; + int i; + + /* initializes valist for num number of arguments */ + va_start(valist, n); + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + + sum += va_arg(valist, double); + } + + /* cleans memory reserved for valist */ + va_end(valist); + + return sum / n; +} + +double average_Array(int n, const double values[]) +{ + int i; + double sum = 0; + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + sum += values[i]; + } + + return sum / n; +} + +double standard_deviation(int n, ...) +{ + va_list valist; + double var = 0; + double avg = 0; + double value = 0; + double values[n]; + int i; + + /* initializes valist for num number of arguments */ + va_start(valist, n); + + for (i = 0; i < n; i++) + { + values[i] = va_arg(valist, double); + } + + va_end(valist); + va_start(valist, n); + + /* fetches the average */ + avg = average_Array(n, values); + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + value = va_arg(valist, double); + var += (value - avg) * (value - avg); + } + + var /= (double)(n - 1); + + /* cleans memory reserved for valist */ + va_end(valist); + + return sqrt(var); +} + +double standard_deviation_array(int n, const double values[]) +{ + + double var = 0; + double avg = 0; + int i; + + /* fetches the average */ + avg = average_Array(n, values); + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + var += (values[i] - avg) * (values[i] - avg); + } + + var /= (double)(n - 1); + + return sqrt(var); +} + +double standard_deviation_N(int n, ...) +{ + va_list valist; + double var = 0; + double avg = 0; + double value = 0; + double values[n]; + int i; + + /* initializes valist for num number of arguments */ + va_start(valist, n); + + for (i = 0; i < n; i++) + { + values[i] = va_arg(valist, double); + } + + va_end(valist); + va_start(valist, n); + + /* fetches the average */ + avg = average_Array(n, values); + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + value = va_arg(valist, double); + var += (value - avg) * (value - avg); + } + + var /= (double)n; + + /* cleans memory reserved for valist */ + va_end(valist); + + return sqrt(var); +} + +double standard_deviation_N_array(int n, const double values[]) +{ + double var = 0; + double avg = 0; + int i; + + /* fetches the average */ + avg = average_Array(n, values); + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + var += (values[i] - avg) * (values[i] - avg); + } + + var /= (double)n; + + return sqrt(var); +} + +double variance(int n, const double values[]) +{ + double var = 0; + double avg = 0; + int i; + + /* fetches the average */ + avg = average_Array(n, values); + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + var += (values[i] - avg) * (values[i] - avg); + } + + var /= (double)(n - 1); + + return var; +} + +double variance_N(int n, const double values[]) +{ + double var = 0; + double avg = 0; + int i; + + /* fetches the average */ + avg = average_Array(n, values); + + /* adds up all double values of the sample. */ + for (i = 0; i < n; i++) + { + var += (values[i] - avg) * (values[i] - avg); + } + + var /= (double)n; + + return var; +} + +double max(int n, const double values[]) +{ + double max = values[0]; + int i; + + /* iterates over all elements in 'values' */ + for (i = 1; i < n; i++) + { + if (values[i] > max) + { + max = values[i]; + } + } + + return max; +} + +double min(int n, const double values[]) +{ + double min = values[0]; + int i; + + /* iterates over all elements in 'values' */ + for (i = 1; i < n; i++) + { + if (values[i] < min) + { + min = values[i]; + } + } + + return min; +} + +/* + private helper-function for comparing two double values +*/ +int cmp(const void *a, const void *b) +{ + return (*(double *)a - *(double *)b); +} + +double median(int n, const double values[]) +{ + double tmp[n]; + int i; + + /* clones the array 'values' to array 'tmp' */ + for (i = 0; i < n; i++) + { + tmp[i] = values[i]; + } + + /* sorts the array 'tmp' with quicksort from stdlib.h */ + qsort(tmp, n, sizeof(double), cmp); + + if (n % 2 != 0) /* n is odd */ + { + /* integer division */ + return tmp[n / 2]; + } + else + { /* n is even */ + + /* uses the average(...) function, above. */ + return average(2, tmp[n / 2], tmp[(n / 2) - 1]); + } +} + +double sum(int n, const double values[]) +{ + double sum = 0; + int i; + + /* actual adding procedure */ + for (i = 0; i < n; i++) + { + sum += values[i]; + } + + return sum; +} + +double range(int n, const double values[]) +{ + return max(n, values) - min(n, values); +} + +double frequency_of(int n, const double values[], double val) +{ + int i; + double counter = 0; + + /* counts the number of occurs */ + for (i = 0; i < n; i++) + { + if (values[i] == val) + { + counter++; + } + } + + return counter / n; +} + +double quartile_I(int n, const double values[]) +{ + double sum = 0; + double freq = 0; + int i; + int d = 1; + double tmp[n]; + + for (i = 0; i < n; i++) + { + tmp[i] = values[i]; + } + + /* sorts the array 'tmp' with quicksort from stdlib.h */ + qsort(tmp, n, sizeof(double), cmp); + + double lastVal = tmp[0]; + + freq = frequency_of(n, values, lastVal); + sum += freq; + + for (i = 1; i < n; i++) + { + if (tmp[i] != lastVal) + { + freq = frequency_of(n, values, tmp[i]); + sum += freq; + lastVal = tmp[i]; + if (sum >= 0.25) + { + if (n % 2 != 0) + { + return values[i]; + } + else + { + + return average(2, values[i], values[i + 1]); + } + } + } + } +} + +double quartile_III(int n, const double values[]) +{ + double sum = 0; + double freq = 0; + int i; + double tmp[n]; + + for (i = 0; i < n; i++) + { + tmp[i] = values[i]; + } + + /* sorts the array 'tmp' with quicksort from stdlib.h */ + qsort(tmp, n, sizeof(double), cmp); + + double lastVal = tmp[0]; + + freq = frequency_of(n, values, lastVal); + sum += freq; + + for (i = 1; i < n; i++) + { + if (tmp[i] != lastVal) + { + freq = frequency_of(n, values, tmp[i]); + sum += freq; + lastVal = tmp[i]; + if (sum >= 0.75) + { + if (n % 2 != 0) + { + return values[i]; + } + else + { + return average(2, values[i], values[i + 1]); + } + } + } + } +} + +double quartile_distance(int n, const double values[]) +{ + return quartile_III(n, values) - quartile_I(n, values); +} \ No newline at end of file diff --git a/Computer Oriented Statistical Methods/statistic/src/statistic.h b/Computer Oriented Statistical Methods/statistic/src/statistic.h new file mode 100644 index 00000000..bf7032bb --- /dev/null +++ b/Computer Oriented Statistical Methods/statistic/src/statistic.h @@ -0,0 +1,79 @@ +/* + author: Christian Bender + This file contains the public interface for the statistic-library. +*/ + +#ifndef __STATISTIC__H +#define __STATISTIC__H + +/* + Computes the average of a given sample. + The sample is a set of double values. + The average-function gets a variable number of arguments. The first argument + must be the number of arguments! + The averageArray-function instead gets a double-array of values and a int-number that + represents the size of the double-array. +*/ +double average_Array(int, const double[]); +double average(int, ...); + +/* + computes the standard deviation (n-1) +*/ +double standard_deviation(int, ...); +double standard_deviation_array(int, const double[]); + +/* + computes the standard deviation (n) +*/ +double standard_deviation_N(int, ...); +double standard_deviation_N_array(int, const double[]); + +/* + variance: computes the variance (n-1) + variance_N: computes the variance (n) +*/ + +double variance(int, const double[]); +double variance_N(int, const double[]); + +/* + gets the max (min) element of the sample +*/ +double max(int, const double[]); +double min(int, const double[]); + +/* + computes the median of the sample +*/ +double median(int, const double[]); + +/* + adds up all values of the sample. +*/ +double sum(int, const double[]); + +/* + computes the range of the sample. +*/ +double range(int, const double[]); + +/* + gets the frequency of the last argument (double) of that sample. +*/ +double frequency_of(int, const double[], double); + +/* + quartile_I: computes the first quartile. + quartile_III: computes the third quartile. + The second quartile is the median! +*/ +double quartile_I(int, const double[]); +double quartile_III(int, const double[]); + +/* + computes the quartile distance +*/ +double quartile_distance(int, const double[]); + +#endif \ No newline at end of file diff --git a/Computer Oriented Statistical Methods/statistic/test/test.c b/Computer Oriented Statistical Methods/statistic/test/test.c new file mode 100644 index 00000000..0396dc85 --- /dev/null +++ b/Computer Oriented Statistical Methods/statistic/test/test.c @@ -0,0 +1,190 @@ +/* + author: Christian Bender + This file contains a CUnit test suit for the statistic-library +*/ + +#include +#include +#include "CUnit/Basic.h" + +#include "../src/statistic.h" + +/* test for function average(...) */ +void test_average(void) +{ + CU_ASSERT_DOUBLE_EQUAL(average(3,1.0,2.5,3.5),2.333,0.01); +} + +/* test for function averageArray(...) */ +void test_average_Array(void) +{ + double values[] = {1.0, 2.5, 3.5}; + CU_ASSERT_DOUBLE_EQUAL(average_Array(3, values), 2.333, 0.01); +} + +/* test for function standard_deviation(...) */ +void test_standard_deviation(void) +{ + CU_ASSERT_DOUBLE_EQUAL(standard_deviation(4, 15.0, 70.0, 25.0, 50.0), 24.8328, 0.01); +} + +/* test for function standard_deviation_array() */ +void test_standard_deviation_array(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(standard_deviation_array(4, values), 24.8328, 0.01); +} + +/* test for function standard_deviation_N(...) */ +void test_standard_deviation_N(void) +{ + CU_ASSERT_DOUBLE_EQUAL(standard_deviation_N(4, 15.0, 70.0, 25.0, 50.0), 21.5058, 0.01); +} + +/* test for function standard_deviation_N_array() */ +void test_standard_deviation_N_array(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(standard_deviation_N_array(4, values), 21.5058, 0.01); +} + +/* test for the function variance(...) */ +void test_variance(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(variance(4, values), 616.6667, 0.01); +} + +/* test for the function variance(...) */ +void test_variance_N(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(variance_N(4, values), 462.5, 0.01); +} + +/* test for the max(...) function */ +void test_max(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(max(4, values), 70.0, 0.01); +} + +/* test for the min(...) function */ +void test_min(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(min(4, values), 15.0, 0.01); +} + +/* test for the median(...)-function */ +void test_median(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(median(4, values), 37.5, 0.01); +} + + +/* test for the sum(...)-function */ +void test_sum(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(sum(4, values), 160, 0.01); +} + + +/* test for the range(...)-function */ +void test_range(void) +{ + double values[] = {15.0, 70.0, 25.0, 50.0}; + CU_ASSERT_DOUBLE_EQUAL(range(4, values), 55, 0.01); +} + + +/* test of frequency_of(...)-function */ +void test_frequency_of(void) +{ + double values[] = {1.0,7.0,2.5,2.5,6.0}; + CU_ASSERT_DOUBLE_EQUAL(frequency_of(5, values,2.5), 0.4, 0.01); + CU_ASSERT_DOUBLE_EQUAL(frequency_of(5, values,6.0), 0.2, 0.01); +} + + +/* test of quartile_I(...) and quartile_III(...)-function */ +void test_quartile(void) +{ + double values[] = {3.0,4.0,5.0,7.0,7.0,7.0,8.0,9.0,11.0,13.0,13.0,13.0,15.0,16.0}; + double sample[] = {1600.0,2300.0,2300.0,2400.0,2900.0,3200,3500,4500,4600,5200,6500,12000}; + CU_ASSERT_DOUBLE_EQUAL(quartile_I(14, values), 7.0, 0.01); + CU_ASSERT_DOUBLE_EQUAL(quartile_III(14, values), 13.0, 0.01); + CU_ASSERT_DOUBLE_EQUAL(quartile_III(12, sample), 4900.0, 0.01); +} + + +/* test for quartile_distance(...)-function */ +void test_quartile_distance(void) +{ + double values[] = {3.0,4.0,5.0,7.0,7.0,7.0,8.0,9.0,11.0,13.0,13.0,13.0,15.0,16.0}; + CU_ASSERT_DOUBLE_EQUAL(quartile_distance(14, values), 6.0, 0.01); +} + +/* + init suite +*/ +int init_suite1(void) +{ + return 0; +} + +/* + clean suite +*/ +int clean_suite1(void) +{ + return 0; +} + +/* test runner */ +int main(void) +{ + CU_pSuite pSuite = NULL; + + /* initializes CUnit */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* adds the suit "Test for statistic" to the registry */ + pSuite = CU_add_suite("Test for statistic", init_suite1, clean_suite1); + if (NULL == pSuite) + { + CU_cleanup_registry(); + return CU_get_error(); + } + + /* registers the individual tests to the test-suite */ + if ((NULL == CU_add_test(pSuite, "test of average()", test_average)) + || (NULL == CU_add_test(pSuite, "test of average_Array()", test_average_Array)) + || (NULL == CU_add_test(pSuite, "test of standard_deviation()", test_standard_deviation)) + || (NULL == CU_add_test(pSuite, "test of standard_deviation_array()", test_standard_deviation_array)) + || (NULL == CU_add_test(pSuite, "test of standard_deviation_N_array()", test_standard_deviation_N_array)) + || (NULL == CU_add_test(pSuite, "test of standard_deviation_N()", test_standard_deviation_N)) + || (NULL == CU_add_test(pSuite, "test of variance()", test_variance)) + || (NULL == CU_add_test(pSuite, "test of variance_N()", test_variance_N)) + || (NULL == CU_add_test(pSuite, "test of max()", test_max)) + || (NULL == CU_add_test(pSuite, "test of min()", test_min)) + || (NULL == CU_add_test(pSuite, "test of median()", test_median)) + || (NULL == CU_add_test(pSuite, "test of sum()", test_sum)) + || (NULL == CU_add_test(pSuite, "test of range()", test_range)) + || (NULL == CU_add_test(pSuite, "test of frequency_of()", test_frequency_of)) + || (NULL == CU_add_test(pSuite, "test of quartile_I() and quartile_III()", test_quartile)) + || (NULL == CU_add_test(pSuite, "test of quartile_distance()", test_quartile_distance))) + { + CU_cleanup_registry(); + return CU_get_error(); + } + + /* runs tests */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + CU_cleanup_registry(); + return CU_get_error(); +} \ No newline at end of file