Multiscale Entropy Analysis 1.0
(12,038 bytes)
/* file: mse.c M. Costa 1 August 2004
Last revised: 4 August 2004 (GBM)
-------------------------------------------------------------------------------
mse: calculates multiscale entropy (MSE) of one or multiple data sets
Copyright (C) 2004 Madalena Costa
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place - Suite 330, Boston, MA 02111-1307, USA.
You may contact the author by e-mail (mcosta@fsa.harvard.edu). For updates to
this software, please visit PhysioNet (http://www.physionet.org/).
_______________________________________________________________________________
Compile this program by
gcc -o mse -O mse.c -lm
There are two major steps in the calculations performed by mse:
1. Time series are coarse-grained.
2. Sample entropy (SampEn) is calculated for each coarse-grained time series.
Output file:
1st line: shows the r value.
2nd line: shows the m values.
After the 2nd line there are several columns: the first column (of integers)
is the scale factor. The following columns are SampEn values for coarse-grained
time series calculated for the values of r and m specified. If the option for
calculating MSE for several r values is chosen a new line containing the new r
value and new columns with the corresponding results are written.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#define MAXSTR 1250 /* maximum string size */
#define DATA_MAX 200000 /* maximum number of data points */
#define M_MAX 10 /* maximum value of the parameter m */
#define SCALE_MAX 40 /* maximum scale value */
#define R_STEP_MAX 10 /* maximum number of different r values */
#define FILEMAX 100 /* maximum number of data files in the list file*/
/* Global variables */
char *prog, line[MAXSTR];
double SE[FILEMAX][R_STEP_MAX][SCALE_MAX][M_MAX];
double *u, *y, r_min, r_max, r_step;
int c, nlin, m_min, m_max, m_step, scale_max, scale_step, i_min, i_max;
static char file[500];
FILE *fl;
FILE *pin;
main(argc, argv)
int argc;
char *argv[];
{
int i, j, k, l, m, nfile, flag;
double r, sd, tmp;
/* Function prototypes */
void ReadData(void);
double *array(int n);
double StandardDeviation(void);
void CoarseGraining (int j);
void SampleEntropy (int ll, double r, double sd, int j);
void PrintResults (int nfile);
void PrintAverageResults (int nfile);
/* Initialize variables. */
prog = argv[0];
scale_max = 20;
scale_step = 1;
m_min = 2;
m_max = 2;
m_step = 1;
i_min = 0;
i_max = 39999;
r_min = 0.15;
r_max = 0.15;
r_step = 0.05;
c = 0;
nfile = 0;
flag = 0;
/* Read and interpret the command line. */
i = 0;
while (++i < argc && *argv[i] == '-') {
switch(argv[i][1]) {
case 'F':
if ((fl = fopen(argv[++i], "r")) == NULL) {
fprintf(stderr, "%s [-F]: can't open input file %s\n",
prog, argv[i]);
exit(1);
}
flag=1;
break;
case 'n':
if ((scale_max=atoi(argv[++i])) <= 0 || scale_max > SCALE_MAX) {
fprintf(stderr,
"%s [-n]: maximum scale must be between 1 and %d (default: 20)\n",
prog, SCALE_MAX);
exit(1);
}
break;
case 'a':
if ((scale_step=atoi(argv[++i])) <= 0 || scale_step >= SCALE_MAX) {
fprintf(stderr,
"%s [-a]: scale increment must be between 1 and %d (default: 1)\n",
prog, SCALE_MAX);
exit(1);
}
break;
case 'm':
if ((m_min = atoi(argv[++i])) >= M_MAX || m_min <= 0 ) {
fprintf(stderr,
"%s [-m]: minimum m value must be between 1 and %d (default: 2)\n",
prog, M_MAX);
exit(1);
}
break;
case 'M':
if ((m_max = atoi(argv[++i])) >= M_MAX || m_max <= 0) {
fprintf(stderr,
"%s [-M]: maximum m value must be between 1 and %d (default: 2)\n",
prog, M_MAX);
exit(1);
}
break;
case 'b':
if ((m_step = atoi(argv[++i])) <= 0 || m_step > M_MAX) {
fprintf(stderr,
"%s [-b]: m increment must be between 1 and %d (default: 1)\n",
prog, M_MAX);
exit(1);
}
break;
case 'r':
if ((r_min = atof(argv[++i])) <= 0) {
fprintf(stderr,
"%s [-r]: minimum r must be greater than 0 (default: 0.15)\n",
prog);
exit(1);
}
break;
case 'R':
if ((r_max = atof(argv[++i])) <= 0) {
fprintf(stderr,
"%s [-R]: maximum r must be greater than 0 (default: 0.15)\n",
prog);
exit(1);
}
break;
case 'c':
if ((r_step=atof(argv[++i]))<=0||r_step<(r_max-r_min)/R_STEP_MAX) {
fprintf(stderr,
"%s [-c]: r increment must be greater than %g (default: 0.05)\n",
prog, (r_max-r_min)/R_STEP_MAX);
}
exit(1);
break;
case 'i':
if ((i_min = atoi(argv[++i])) < 0) {
fprintf(stderr,
"%s [-i]: minimum i must not be less than 0 (default: 0)\n",
prog);
exit(1);
}
break;
case 'I':
if ((i_max = atoi(argv[++i])) <= 0 || i_max <= i_min) {
fprintf(stderr,
"%s [-I]: maximum i must be greater than %d "
"(default: number of data points)\n",
prog, i_min);
exit(1);
}
break;
default:
usage();
exit(1);
}
}
if (m_max < m_min) {
tmp = m_max;
m_max = m_min;
m_min = tmp;
}
if (r_max < r_min){
tmp = r_max;
r_max = r_min;
r_min = tmp;
}
/* Memory allocation. */
u = array(DATA_MAX);
y = array(DATA_MAX);
/* Process a single data file. */
if (flag == 0) {
l = 0;
nfile = 1;
/* Read data from stdin. */
pin = stdin;
ReadData();
/* Calculate standard deviation. */
sd = StandardDeviation();
/* Perform the coarse-graining procedure. */
for (j = 1; j <= scale_max; j += scale_step){
CoarseGraining(j);
/* Calculate SampEn for each scale and each r value. */
c = 0;
for (r = r_min; r <= (r_max*1.0000000001); r += r_step){
SampleEntropy(l, r, sd, j);
c++;
}
}
/* Print results. */
PrintResults(nfile);
}
/* Process multiple data files. */
if (flag == 1) {
/* Read the list of data files. */
for (l = 0; fscanf(fl, "%s", file) == 1; l++) {
nfile++; /*count the number of data files*/
if ((pin = fopen(file, "r")) == NULL) { /* open each data file */
fprintf(stderr, "%s : Cannot open file %s\n", prog, file);
exit(1);
}
/* Read the data. */
ReadData();
/* Calculate the standard deviation. */
sd = StandardDeviation ();
/* Perform the coarse-graining procedure. */
for (j = 1; j <= scale_max; j += scale_step) {
CoarseGraining(j);
c = 0;
/* Calculate SampEn for each scale and each r value. */
for (r = r_min; r <= (r_max*1.0000000001) ; r += r_step) {
SampleEntropy (l, r, sd, j);
c++;
}
}
}
/* Print results. */
PrintResults(nfile);
/* Print average results. */
if (nfile > 1)
PrintAverageResults(nfile);
fclose(pin);
fclose(fl);
}
free(u);
free(y);
exit(0);
}
double *array(int n)
{
int i;
double *a;
if ((a = calloc (n, sizeof (double))) == NULL) {
fprintf(stderr, "%s : insufficient memory\n", prog);
exit(2);
}
return (a);
}
void ReadData(void)
{
int j = -1;
while (fgets(line, MAXSTR, pin) != NULL) {
j++;
if (j >= i_min && j <= i_max) {
sscanf(line, "%lf", &u[j-i_min]);
nlin=j-i_min+1;
}
}
}
double StandardDeviation(void)
{
double sum=0.0, sum2=0.0, sd;
int j;
for (j = 0; j < nlin; j++) {
sum += u[j];
sum2 += u[j] * u[j];
}
sd = sqrt((sum2 - sum*sum/nlin)/(nlin - 1));
return (sd);
}
void CoarseGraining(int j)
{
int i, k;
for (i = 0; i < nlin/j; i++) {
y[i] = 0;
for (k = 0; k < j; k++)
y[i] += u[i*j+k];
y[i] /= j;
}
}
void SampleEntropy(int ll, double r, double sd, int j)
{
int i, k, l, nlin_j;
int cont[M_MAX+1];
double r_new;
nlin_j = (nlin/j) - m_max;
r_new = r*sd;
for (i = 0; i < M_MAX; i++)
cont[i]=0;
for (i = 0; i < nlin_j; ++i) {
for (l = i+1; l < nlin_j; ++l) { /*self-matches are not counted*/
k = 0;
while (k < m_max && fabs(y[i+k] - y[l+k]) <= r_new)
cont[++k]++;
if (k == m_max && fabs(y[i+m_max] - y[l+m_max]) <= r_new)
cont[m_max+1]++;
}
}
for (i = 1; i <= m_max; i++)
if (cont[i+1] == 0 || cont[i] == 0)
SE[ll][c][j][i] = -log((double)1/((nlin_j)*(nlin_j-1)));
else
SE[ll][c][j][i] = -log((double)cont[i+1]/cont[i]);
}
void PrintResults(int nfile)
{
int j, m, k, l, i;
printf ("\n");
for (m = m_min; m <= m_max; m += m_step)
for (k = 0; k < c; k++) {
printf ("\nm = %d, r = %.3f\n\n", m, r_min+k*r_step);
if (nfile > 1) {
fseek(fl, 0, SEEK_SET);
for(l = 0; fscanf(fl, "%s", file) == 1; l++)
printf("\t%.6s", file);
printf("\n");
}
for (j = 1; j <= scale_max; j += scale_step) {
printf("%d\t", j);
for (l=0; l<nfile; l++)
printf("%.3lf\t", SE[l][k][j][m]);
printf("\n");
}
}
}
void PrintAverageResults(int nfile)
{
int k, m, j, i, l;
double av, av2, sd1;
printf("\n**************************\n");
printf("Mean and SD over all files\n");
printf("**************************\n");
for (k = 0; k < c; k++) {
printf("\n");
for (m = m_min; m <= m_max; m += m_step)
printf("\tm=%d, r=%5.3lf", m, r_min+k*r_step);
printf("\n");
for (m = m_min; m <= m_max; m += m_step)
printf("\tmean\tsd");
printf("\n");
for (j = 1; j <= scale_max; j += scale_step) {
printf("%d\t", j);
for (i = m_min; i <= m_max; i += m_step) {
av = 0.0;
av2 = 0.0;
/* Calculate entropy mean values and SD over all files. */
for (l = 0; l < nfile; l++) {
av += SE[l][k][j][i];
av2 += (SE[l][k][j][i]*SE[l][k][j][i]);
}
sd1 = sqrt((av2-av*av/nfile) / (nfile-1));
av /= nfile;
printf("%.3lf\t%.3lf\t", av, sd1);
}
printf("\n");
}
}
}
usage()
{
fprintf(stderr, "usage: %s [options]\n", prog);
fprintf(stderr, "\nTo calculate MSE for a single data file:\n"
" %s <datafile >outputfile\n"
"To calculate MSE for multiple data files:\n"
" %s -F listfile >outputfile\n"
"(where listfile contains a list of data files).\n\n", prog, prog);
fprintf(stderr, "Data files should contain a single column of numbers\n");
fprintf(stderr, "Options may include:\n");
fprintf(stderr, " -a N set scale increment to N [1-%d; default: 1]\n",
SCALE_MAX);
fprintf(stderr, " -b N set m increment to N [1-%d; default: 1]\n",
M_MAX);
fprintf(stderr, " -c X set r increment to X [>%g; default: 0.05]\n",
(r_max-r_min)/R_STEP_MAX);
fprintf(stderr, " -i N set minimum i to N [0-39998; default: 0]\n");
fprintf(stderr, " -I N set maximum i to N [1-39999: default: 39999]\n");
fprintf(stderr, " -m N set minimum m to N [1-%d; default: 2]\n", M_MAX);
fprintf(stderr, " -M N set maximum m to N [1-%d; default: 2]\n", M_MAX);
fprintf(stderr, " -n N set maximum scale to N [1-%d; default: 20]\n",
SCALE_MAX);
fprintf(stderr, " -r X set minimum r to X [>0; default: 0.15]\n");
fprintf(stderr, " -R X set maximum r to X [>0; default: 0.15]\n");
fprintf(stderr,
"Option arguments indicated as N are integers; those shown as X may be given"
"\nin any floating point format. For further information, visit:\n"
" http://physionet.org/physiotools/mse/\n");
}