%\documentclass[onecolumn,preprintnumbers,amsmath,amssymb]{revtex4}
\documentclass{article}
\usepackage{rawfonts}
\IfFileExists{times.sty}{\usepackage{times}}{\@missingfileerror{times}{sty}}
\topmargin -0.5in
\hoffset -1in
\textheight 650pt
\footskip 48pt
\def\textwidth{7 in}
\usepackage{epsfig}
\usepackage{html}
\newcommand{\htmlexternallink}[2]{%
\htmladdnormallink{#1}{#2}}
\begin{htmlonly}
\newcommand{\htmlexternallink}[2]{%
\htmladdnormallink{#1}{#2" target="other}}
\end{htmlonly}
%\usepackage{graphicx}% Include figure files
%\usepackage{bm}% bold math
\begin{document}
%\preprint{}
\title{Multiscale Entropy Analysis (MSE)}
\author{Madalena Costa, Ary L. Goldberger and C.-K. Peng\\
Beth Israel Deaconess Medical Center, Boston, USA}
\date{}
\maketitle
\noindent
A detailed description of the multiscale entropy algorithm and its
application can be found in:
\begin{itemize}
\item
Costa M., Goldberger A.L., Peng C.-K.
\htmladdnormallink{Multiscale entropy analysis of biological
signals}{http://physionet.org/physiotools/mse/papers/pre-2005.pdf}.
{\em Phys Rev E} 2005;{\bf{71}}:021906.
\item
Costa M., Goldberger A.L., Peng C.-K.
\htmladdnormallink{Multiscale entropy analysis of physiologic time
series.}{http://physionet.org/physiotools/mse/papers/prl-2002.pdf}
{\em{Phys Rev Lett}} 2002; {\bf{89}}:062102.
\end{itemize}
Please cite these publications when referencing this material, and also
include the standard citation for PhysioNet:
\begin{itemize}
\item
Goldberger AL, Amaral
LAN, Glass L, Hausdorff JM, Ivanov PCh, Mark RG, Mietus JE, Moody GB,
Peng C-K, Stanley HE. PhysioBank, PhysioToolkit, and PhysioNet:
components of a new research resource for complex physiologic
signals. {\em{Circulation}} {\bf{101}}(23):e215-e220 [Circulation
Electronic Pages;
\htmlexternallink{http://circ.ahajournals.org/cgi/content/full/101/23/e215}{http://circ.ahajournals.org/cgi/content/full/101/23/e215}]; 2000 (June
13)
\end{itemize}
Readers of this tutorial may also wish to read:
\begin{itemize}
\item
Costa M, Peng C-K, Goldberger AL, Hausdorff JM.
\htmladdnormallink{Multiscale entropy analysis of human gait
dynamics.}{http://physionet.org/physiotools/mse/papers/pha-2003.pdf}
{\em{Physica A}} 2003;{\bf{330}}:53-60.
\end{itemize}
\begin{htmlonly}
A \htmladdnormallink{PDF}{tutorial.pdf} version of this tutorial is
also available.
\end{htmlonly}
The software described in this tutorial is available
\htmladdnormallink{here}{http://physionet.org/physiotools/mse/mse.c}.
\section{Background}
Multiscale entropy (MSE) analysis~\cite{Costa-PRE, Costa-PRL} is a
new method of measuring the complexity of finite length time series. This
computational tool can be applied both to physical and physiologic
data sets, and can be used with a variety of measures of entropy. We
have developed and applied MSE for the analysis of physiologic time
series, for which we prefer to estimate entropy using the
\htmladdnormallink{sample
entropy}{http://physionet.org/physiotools/sampen/} (SampEn)
measure~\cite{SampEn}. SampEn is a refinement of the approximate
entropy family of statistics introduced by Pincus~\cite{ApEn}. Both
have been widely used for the analysis of physiologic data
sets~\cite{Lake-2002, Pincus-2002}.
Traditional entropy measures quantify only the regularity
(predictability) of time series on a single scale. There is no
straightforward correspondence, however, between regularity and
complexity. Neither completely predictable (e.g., periodic) signals,
which have minimum entropy, nor completely unpredictable (e.g.,
uncorrelated random) signals, which have maximum entropy, are
truly complex, since they can be described very compactly. There is no
consensus definition of complexity. Intuitively, complexity is
associated with ``meaningful structural richness''~\cite{Grassberger}
incorporating correlations over multiple spatio-temporal scales.
For example, we and others have observed that traditional single-scale
entropy estimates tend to yield lower entropy in time series of
physiologic data such as inter-beat (RR) interval series than in
surrogate series formed by shuffling the original physiologic data.
This happens because the shuffled data are more irregular and less
predictable than the original series, which typically contain
correlations at many time scales. The process of generating surrogate
data destroys the correlations and degrades the information content of the
original signal; if one supposes that greater entropy is
characteristic of greater complexity, such results are profoundly
misleading. The MSE method, in contrast, shows that the original
time series are more complex than the surrogate ones, by revealing
the dependence of entropy measures on
scale~\cite{Costa-CinC-2002, Costa-PhysicaA, Costa-CinC-2003, Nikulin,
Costa-Reply-Nikulin}.
The MSE method incorporates two procedures:
\begin{enumerate}
\item
A ``coarse-graining'' process is applied to the time series. For a
given time series, multiple coarse-grained time series are constructed
by averaging the data points within non-overlapping windows of
increasing length, $\tau$ (see Figure~\ref{Tutorial-Coarse}). Each
element of the coarse-grained time series, $y^{(\tau)}_j$, is
calculated according to the equation:
\begin{equation}
y_j^{(\tau)}=1/\tau \sum_{i=(j-1)\tau+1}^{j\tau}x_i
\label{eq:coarse-graining}
\end{equation}
where $\tau$ represents the scale factor and $1\leq j \leq
N/\tau$. The length of each coarse-grained time series is
$N/\tau$. For scale 1, the coarse-grained time series is simply the
original time series.
\item
SampEn is calculated for each coarse-grained time series, and then
plotted as a function of the scale factor. SampEn is a ``regularity
statistic.'' It ``looks for patterns'' in a time series and quantifies
its degree of predictability or regularity (see Figure~\ref{SampEn}).
\end{enumerate}
\begin{figure}
\centerline{\epsfig{file=figures/coarse,width=.5\linewidth}}
%\includegraphics[scale=0.5]{figures/coarse}
\caption{\label{Tutorial-Coarse}Schematic illustration of the
coarse-graining procedure for scale 2 and 3. Adapted from
reference~\cite{Costa-CinC-2002}.}
\end{figure}
\begin{figure}
\centerline{\epsfig{file=figures/sampen,width=.9\linewidth}}
\caption{\label{SampEn}A simulated time series {u[1], \ldots, u[n]} is
shown to illustrate the procedure for calculating sample entropy
(SampEn) for the case in which the pattern length, {\em m}, is 2, and
the similarity criterion, {\em r}, is 20. ({\em r} is a given positive
real value that is typically chosen to be between 10\% and 20\% of the
sample deviation of the time series.) Dotted horizontal lines around
data points u[1], u[2] and u[3] represent u[1]~$\pm$~{\em r},
u[2]~$\pm$~{\em r}, and u[3]~$\pm$~{\em r}, respectively. Two data
values match each other, that is, they are indistinguishable, if the
absolute difference between them is $\leq$~{\em r}. All green points
represent data points that match the data point u[1]. Similarly, all
red and blue points match the data points u[2] and u[3],
respectively. Consider the 2-component green-red template sequence
(u[1], u[2]) and the 3-component green-red-blue (u[1], u[2], u[3])
template sequence. For the segment shown, there are two green-red
sequences, (u[13], u[14]) and (u[43], u[44]), that match the template
sequence (u[1], u[2]) but only one green-red-blue sequence that
matches the template sequence (u[1], u[2], u[3]). Therefore, in this
case, the number of sequences matching the 2-component template
sequences is two and the number of sequences matching the 3-component
template sequence is 1. These calculations are repeated for the next
2-component and 3-component template sequence, which are, (u[2], u[3])
and (u[2], u[3], u[4]), respectively. The numbers of sequences that
match each of the 2- and 3-component template sequences are again
counted and added to the previous values. This procedure is then
repeated for all other possible template sequences, (u[3], u[4],
u[5]), \ldots, (u[N-2], u[N-1], u[N]), to determine the ratio between
the total number of 2-component template matches and the total number
of 3-component template matches. SampEn is the natural logarithm of
this ratio and reflects the probability that sequences that match each
other for the first two data points will also match for the next
point.}
\end{figure}
\section{MSE analysis of simulated white and 1/f noise}
Figure~\ref{MSE-noise} presents the MSE results for simulated
uncorrelated (white) and long-range correlated (1/f) noise. Note
that for scale one, a higher value of SampEn is obtained for white
noise time series than for 1/f time series. Although
the value of entropy for the coarse-grained 1/f series remains
almost constant for all scales, the value of entropy for the
coarse-grained white noise time series monotonically decreases, such
that for scales above 4, it becomes smaller than the corresponding
values for 1/f noise. In contrast with the conclusions drawn from
single-scale entropy-based analyses, the MSE results are consistent
with the fact that, unlike white noise, 1/f noise contains
correlations across multiple time scales and is, therefore, more
complex than white noise~\cite{Zhang}.
\begin{figure}
\centerline{\epsfig{file=figures/noise,width=.7\linewidth}}
%\includegraphics[scale=0.8]{figures/noise}
\caption{\label{MSE-noise}MSE analysis of simulated white and 1/f
noise time series. Symbols represent mean values over 30 time
series. Parameters to calculate sample entropy are: {\em m}~=~2, {\em
r}~=~0.15, and {\em N}~=~30,000. Adapted from reference~\cite{Costa-PRL}.}
\end{figure}
\section{Software for MSE analysis}
Download \htmladdnormallink{{\tt
mse.c}}{http://physionet.org/physiotools/mse/mse.c}, the C language
source for a program that performs multiscale entropy analysis. The
program can be compiled using any ANSI/ISO C compiler, and should be
linked to the C math library (it uses only the {\tt sqrt} function
from that library). For example, using the freely available GNU C
compiler, {\tt mse.c} can be compiled into an executable {\tt mse} by
the command:
\begin{verbatim}
gcc -o mse -O mse.c -lm
\end{verbatim}
\subsection*{Preparing data for MSE analysis}
In this tutorial, we illustrate the use of {\tt mse} to analyze time
series of intervals between consecutive heart beats (RR intervals).
RR interval lists as used by {\tt mse} are in text format, consisting
of one column (the RR intervals). Interval lists in this format can be
prepared from beat annotation files using
\htmladdnormallink{ann2rr}{http://physionet.org/physiotools/wag/ann2rr-1.htm}.
Use a command of the form:
\begin{verbatim}
ann2rr -r RECORD -a ANNOTATOR -A -i s4 >RECORD.rr
\end{verbatim}
where {\tt RECORD} is the record name and {\tt ANNOTATOR} is the
annotator name of the beat annotation file you wish to study. (If you
choose a PhysioBank record and have not previously downloaded the
annotation file into a local directory, ann2rr obtains the annotations
directly from PhysioNet. For details on the options used in this
command, see
\htmladdnormallink{ann2rr}{http://physionet.org/physiotools/wag/ann2rr-1.htm}
in the \htmladdnormallink{WFDB Applications
Guide}{http://physionet.org/physiotools/wag/}.) For example, the
command line:
\begin{verbatim}
ann2rr -r nsr2db/nsr040 -a ecg -A -i s4 >nsr040.rr
\end{verbatim}
creates an interval list from the {\tt ecg} beat annotations for record
{\tt nsr040} of the \htmladdnormallink{Normal Sinus Rhythm RR
Interval
Database}{http://physionet.org/physiobank/database/nsr2db/}. The first
few lines of output from this command are:
\begin{verbatim}
0.8984
0.7109
0.7188
0.7188
0.7109
0.7031
0.7031
0.7031
0.7031
...
\end{verbatim}
Of course, {\tt mse} can accept text files containing any similarly
formatted series; it is not restricted to use with RR interval time
series.
\subsection*{Setting parameters for {\tt mse}}
In order to calculate entropy, the values of parameters {\em{m}} and
{\em{r}} defining the pattern length and the similarity
criterion~\cite{SampEn} respectively, have to be fixed. The default
values for these parameters are {\em m}~=~2 and {\em r}~=~0.15. The
options {\bf -m} and {\bf -r} may be used to change the default
values. It is possible to run MSE for a set of different {\em{m}} and
{\em{r}} values using the options {\bf -M}, {\bf -b}, {\bf -R} and
{\bf -c}, which specify, respectively, the maximum {\em{m}} value, the
difference between consecutive {\em{m}} values, the maximum {\em{r}}
value, and the difference between consecutive {\em{r}} values. For
example, the command line:
\begin{verbatim}
mse -m 2 -M 4 -b 1 -r 0.15 -R 0.2 -c 0.01 nsr040.mse
\end{verbatim}
calculates the MSE curves for the file {\tt nsr040.rr} for all
combinations of {\em{m}} (2, 3, and 4) and {\em{r}}
(0.15, 0.16, 0.17, ..., 0.2) values.
The entire time series or a segment may be selected for the MSE
analysis. By default the first 40,000 data points (or the entire time
series if it contains fewer than 40,000 data points) are selected. The
user may select a different segment using the options {\bf -i} and
{\bf -I} that specify the first and the last points of the segment.
The MSE curve is calculated for a range of scales, typically from 1 to
20 data points. Each scale defines the length of the window used for
building the coarse-grained time series. The user may change the
maximum scale value and the difference between consecutive scale
values using the options {\bf -n} and {\bf -a} respectively. For
example, if we run the command line:
\begin{verbatim}
mse -n 10 -a 2 mse-nsr040
\end{verbatim}
we obtain an output file ({\tt MSE-nsr040}) containing:
\begin{verbatim}
m = 2, r = 0.150
1 0.235
3 0.192
5 0.238
7 0.257
9 0.277
\end{verbatim}
In this output, the first column contains the scale factors, and the
second column provides the corresponding entropy values.
Several time series may be analyzed simultaneously with the option
{\bf -F}. For this purpose a list with the names of the data files (one
per line) should be saved as a text file. For example, we can generate
an RR interval series for record nsr047 following the same method as
for nsr040, above, and then create a file named {\tt filelist},
containing:
\begin{verbatim}
nsr040.rr
nsr047.rr
\end{verbatim}
We can then process both of these files using {\tt mse} by the command:
\begin{verbatim}
mse -n 10 -a 2 -F filelist >filelist.mse
\end{verbatim}
to obtain this output:
\begin{verbatim}
m = 2, r = 0.150
nsr040 nsr047
1 0.235 0.796
3 0.192 1.053
5 0.238 1.218
7 0.257 1.228
9 0.277 1.201
**************************
Mean and SD over all files
**************************
m=2, r=0.150
mean sd
1 0.515 0.397
3 0.623 0.609
5 0.728 0.693
7 0.742 0.686
9 0.739 0.654
\end{verbatim}
For each scale, the mean and the sample deviation of the entropy values
over all data files are calculated.
\subsection*{Summary of options and default values for {\tt mse}}
\noindent
{\bf Scale factor}
{\bf -n}: maximum scale factor (default: 20)
{\bf -a}: difference between consecutive scale factors (default: 1)
\\
\noindent
{\bf Parameter {\em m}}
{\bf -m}: minimum m value (default: 2)
{\bf -M}: maximum m value (default: 2)
{\bf -b}: difference between consecutive m values (default: 1)
\\
\noindent
{\bf Parameter {\em r}}
{\bf -r}: minimum r value (default: 0.15)
{\bf -R}: maximum r value (default: 0.15)
{\bf -c}: difference between consecutive r values (default: 0.05)
\\
\noindent
{\bf Segment selection}
{\bf -i}: starting data point (default: 0)
{\bf -I}: ending data point (default: 39999)
\\
\noindent
{\bf Multiple data files}
{\bf -F}: text file; each line lists the name of a data file
\section{Effect of outliers on the MSE curves}
Outliers may affect the entropy values because they change the time
series standard deviation and therefore, the value of the parameter
{\em{r}} that defines the similarity criterion. Figure~\ref{Contour}
shows that a small number of outliers with high amplitude has similar
effects on the variance as a higher percentage of outliers with lower
amplitude.
\begin{figure}
\centerline{\epsfig{file=figures/contour,width=.7\linewidth}}
%\includegraphics[scale=0.8]{figures/contour}
\caption{\label{Contour}Contour plot showing how the percentage of
outliers and their amplitude (relative to the mean value of the time
series) affects the variance of the time series. Lines connect $(x,y)$
pairs of values that change the variance by the same amount.}
\end{figure}
Figure~\ref{TimeSeries} presents three RR interval time series derived
from a 24 hour Holter recording of a healthy subject (nsr020). We
calculate the MSE curves for a segment of the original time series
(file 1) and two filtered time series (file 2 and file 3). File 1
contains the first 30,000 data points (RR intervals) of the original
time series. File 2 contains the same data as file 1, but excluding
the 6 RR intervals that exceed 2s. Similarly, file 3 contains these
same intervals, but excluding the 43 RR intervals that are less than
0.3s or greater than 1s.
Using {\tt mse}, we can obtain the following MSE analysis of these
three files:
\begin{verbatim}
Scale File 1 File 2 File 3
1 0.009 0.734 0.734
3 0.012 0.937 0.933
5 0.012 1.137 1.140
7 0.011 1.138 1.144
9 0.011 1.222 1.210
11 0.011 1.174 1.224
13 0.011 1.204 1.186
15 0.011 1.199 1.186
17 0.010 1.183 1.189
19 0.009 1.186 1.212
\end{verbatim}
File 1 includes 6 outliers (225.8, 4.43, 5.24, 4.65, 4.40, 8.61) at
least one order of magnitude higher than the mean value of the time
series. The sample deviations of the contents of files 1, 2 and 3 are
1.3, 0.62 and 0.60, respectively. For file 1, any two data points
$x_i$ and $x_j$ such that $|x_i-x_j| \leq 0.2$s are not
distinguishable. Therefore, this time series seems to be very regular
and the entropy values are close to zero. File 3 contains 37 fewer
outliers than file 2. However, since the difference between their
sample deviations is less than 0.05\%, the entropy values are very
close. We note that the inclusion of a low percentage of outliers does
not significantly affect MSE analysis unless their differences from
the mean value of the time series are orders of magnitude larger than
the sample deviation.
\begin{figure}
\centerline{\epsfig{file=figures/timeseries,width=.9\linewidth}}
%\includegraphics[scale=0.99]{figures/timeseries}
\caption{\label{TimeSeries}Top panel: cardiac interbeat (RR) interval
time series from a healthy subject (nsr020). One outlier (225.8) is
not represented. Middle panel: Time series obtained from the time
series presented in the top panel excluding the 6 RR intervals $<
2$s. Bottom panel: Time series obtained from the time series presented
in the top panel excluding the 43 RR intervals outside the interval
0.3 to 1s.}
\end{figure}
\begin{thebibliography}{14}
\bibitem{Costa-PRE}
Costa M., Goldberger A.L., Peng C.-K.
\htmladdnormallink{Multiscale entropy analysis of biological
signals}{http://physionet.org/physiotools/mse/papers/pre-2005.pdf}.
{\em Phys Rev E} 2005;{\bf{71}}:021906.
\bibitem{Costa-PRL}
Costa M., Goldberger A.L., Peng C.-K. \htmladdnormallink{Multiscale entropy
analysis of physiologic time
series}{http://physionet.org/physiotools/mse/papers/prl-2002.pdf}.
{\em{Phys Rev Lett}} 2002;{\bf{89}}:062102.
\bibitem{SampEn}
Richman J.S., Moorman J.R. \htmlexternallink{Physiological time-series analysis
using approximate entropy and sample
entropy}{http://ajpheart.physiology.org/cgi/content/full/278/6/H2039}. {\em Am
J Physiol Heart Circ Physiol} 2000;{\bf{278}}(6):H2039-H2049.
\bibitem{ApEn}
Pincus S.M. \htmlexternallink{Approximate entropy as a measure of system
complexity}{http://www.pnas.org/cgi/reprint/88/6/2297.pdf}. {\em Proc Natl Acad
Sci USA} 1991;{\bf{88}}:2297-2301.
\bibitem{Lake-2002}
Lake D.E., Richman J.S., Griffin M.P., Moorman J.R. \htmlexternallink{Sample
entropy analysis of neonatal heart rate
variability}{http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=12185014}. {\em
Am J Physiol Regul Integr Comp Physiol} 2002;{\bf{283}}(3):R789-97.
\bibitem{Pincus-2002}
Pincus S.M. \htmlexternallink{Assessing serial irregularity and its
implications for
health}{http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=11797860}. {\em
Ann N Y Acad Sci} 2002;{\bf{954}}:245-67.
\bibitem{Grassberger}
Grassberger P. Information and complexity measures in dynamical systems, in
Atmanspacher H, and Scheingraber H (eds.), {\em Information Dynamics}.
New York: Plenum Press, 1991; 15-33.
\bibitem{Costa-CinC-2002}
Costa M., Goldberger A.L., Peng C.-K. \htmladdnormallink{Multiscale
entropy to distinguish between physiologic and synthetic RR time
series}{http://physionet.org/physiotools/mse/papers/cinc-2002.pdf}. {\em
Computers in Cardiology} 2002;{\bf{29}}:137-140.
\bibitem{Costa-PhysicaA}
Costa M, Peng C-K, Goldberger AL, Hausdorff JM. \htmladdnormallink{Multiscale
entropy analysis of human gait
dynamics.}{http://physionet.org/physiotools/mse/papers/pha-2003.pdf}
{\em{Physica A}} 2003;{\bf{330}}:53-60.
\bibitem{Costa-CinC-2003}
Costa M., Healey J.A. \htmladdnormallink{Multiscale entropy analysis of
complex heart rate dynamics: discrimination of age and heart failure
effects}{http://physionet.org/physiotools/mse/papers/cinc-2003.pdf}. {\em Computers in Cardiology}
2003;{\bf{30}}:705-708.
\bibitem{Nikulin}
Nikulin V.V., Brismar T. \htmlexternallink{Comment on ``Multiscale entropy
analysis of complex physiologic time
series''}{http://link.aps.org/abstract/PRL/v92/e089803}. {\em Phys Rev Lett}
2004;{\bf{92}}(8):089803.
\bibitem{Costa-Reply-Nikulin}
Costa M., Goldberger A.L., Peng C.-K.
\htmlexternallink{Reply}{http://link.aps.org/abstract/PRL/v92/e089804}. {\em
Phys Rev Lett} 2004;{\bf{92}}(8):89804.
\bibitem{Zhang}
Zhang Y.C. \htmlexternallink{Complexity and 1/f noise: a phase space
approach}{http://www.edpsciences.org/articles/jp1/pdf/1991/07/jp1v1p971.pdf}.
{\em J Phys I France} 1991;{\bf{1}}:971-977.
\end{thebibliography}
\end{document}