Software for generating the PhysioBank Index 1.0.0

File: <base>/pbindex.c (19,419 bytes)
/* file: pbindex.c		G. Moody	5 March 2008
				Last revised:  13 March 2012

-------------------------------------------------------------------------------
pbindex.c: create index entries for records
Copyright (C) 2008-2012 George B. Moody

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place - Suite 330, Boston, MA 02111-1307, USA.

You may contact the author by e-mail (george@mit.edu) or postal mail
(MIT Room E25-505A, Cambridge, MA 02139 USA).  For updates to this software,
please visit PhysioNet (http://www.physionet.org/).
_______________________________________________________________________________

This program reads a list of PhysioBank records from its standard input and
writes an index of their contents on its standard output.

The (text) output of this program consists of one line per signal and annotation
file, containing these tab-separated fields:
  record name
  class
  signal or annotator name
  sampling frequency (Hz)
  gain (A/D units per physical unit), or number of annotations
  duration (in seconds)
  time intervals during which samples or annotations are present (in seconds)

  * If the gain is not recorded in the header, it is reported as 200.
  * If the type of physical unit is not recorded, it is reported as 'mV'.
    In variable-format multisegment records, the recorded gain may not be
    constant;  in these records, the gain recorded in the layout header is
    reported.
  * Signal loss is reported only for variable-format multisegment records.
    In all other cases, the reported time interval is the duration of the
    record.
  * In the most common case, the signal is present throughout the record,
    and the final field (time intervals) is omitted.

The gain field contains an embedded space between the numerical value and
the units, and the time intervals field contains embedded spaces separating
disjoint intervals.

Note that this program does not process remote variable-layout records, or
remote records in which the header does not specify the record length
(because it uses ordinary file I/O to read the segment headers in the first
case, or to determine the length of the record from seeking in the signal
file in the second case).
*/

#include <stdio.h>
#include <string.h>
#include <wfdb/wfdb.h>
#include <wfdb/ecgcodes.h>
#include <wfdb/ecgmap.h>

#define AFTYPES   "/home/physionet/html/physiobank/database/aftypes"
#define SIGTYPES  "/home/physionet/html/physiobank/database/sigtypes"

char **aname = NULL, **annclass, *pname, **signame = NULL, **sigclass;
int annclasses = 0, nclasses = 0;
WFDB_Anninfo ai;
WFDB_Annotation annot;
WFDB_Frequency sfreq;
WFDB_Time t0, tf;

struct anstats {
  char *name;
  int n, anntyp;
  WFDB_Time t0, tf;
  struct anstats *next;
} *as, *asp, *as0, *lastr;

void init(void);
char *signaltype(char *name, char *units);
char *annotatortype(char *name);
int process(char *record);
int processms(char *record, char *sdesc, WFDB_Time tf);
void process_info(char *record);
char *token(char *p);
char *ltimstr(WFDB_Time t);
char *prog_name(char *s);
void cleanup(void);

main(int argc, char **argv)
{
    static char record[1024];

    pname = prog_name(argv[0]);
    init();

    while (fgets(record, sizeof(record), stdin)) {
        record[strlen(record) - 1] = '\0';
	(void)process(record);
    }

    cleanup();
    exit(0);
}

void init() {
    char buf[256], *p, *prog_name();
    FILE *ifile;
    int i = 0;

    if ((ifile = fopen(SIGTYPES, "r")) == NULL) {
      fprintf(stderr, "%s: can't open %s\n", pname, SIGTYPES);
	exit(1);
    }
    while (fgets(buf, sizeof(buf), ifile))
	nclasses++;
    rewind(ifile);
    signame = calloc(nclasses, sizeof(char *));
    sigclass = calloc(nclasses, sizeof(char *));
    if (sigclass == NULL || signame == NULL) {
	fclose(ifile);
	fprintf(stderr, "%s: insufficient memory\n", pname);
	exit(2);
    }
    while (fgets(buf, sizeof(buf), ifile)) {
	p = strstr(buf, "\t");
	if (p == NULL) continue;
	*p++ = '\0';
	sigclass[i] = calloc(strlen(buf)+1, sizeof(char));
	strcpy(sigclass[i], buf);
	signame[i] = calloc(strlen(p), sizeof(char));
	p[strlen(p)-1] = '\0';
	strcpy(signame[i++], p);
    }
    fclose(ifile);

    if ((ifile = fopen(AFTYPES, "r")) == NULL) {
	fprintf(stderr, "%s: can't open aftypes\n", pname);
	exit(1);
    }
    while (fgets(buf, sizeof(buf), ifile))
	annclasses++;
    rewind(ifile);
    aname = calloc(annclasses, sizeof(char *));
    annclass = calloc(annclasses, sizeof(char *));
    if (annclass == NULL || aname == NULL) {
	fclose(ifile);
	fprintf(stderr, "%s: insufficient memory\n", pname);
	exit(2);
    }
    i = 0;
    while (fgets(buf, sizeof(buf), ifile)) {
	p = strstr(buf, "\t");
	if (p == NULL) continue;
	*p++ = '\0';
	annclass[i] = calloc(strlen(buf)+1, sizeof(char));
	strcpy(annclass[i], buf);
	aname[i] = calloc(strlen(p), sizeof(char));
	p[strlen(p)-1] = '\0';
	strcpy(aname[i++], p);
    }
    fclose(ifile);

    ai.stat = WFDB_READ;
}

char *signaltype(char *name, char *units)
{
    int i = 0;

    for (i = 0; i < nclasses; i++) {
	if (strcmp(name, signame[i]) == 0)
	    return(sigclass[i]);
    }
    if (strcmp(units, "mV") == 0)
        return("ECG");
    else if (strcmp(units, "mmHg") == 0)
        return("BP");
    else
        return("unknown");
}

char *annotatortype(char *name)
{
    int i = 0;

    for (i = 0; i < annclasses; i++) {
	if (strcmp(name, aname[i]) == 0)
	    return(annclass[i]);
    }
    return("AnnU");
}

int process(char *record)
{
    char *p, *pname;
    int anum = 0, i, n, nsig = 0, vlmsrec = 0;
    int nbp = 0, nco = 0, nco2 = 0, necg = 0, neeg = 0, nemg = 0, neog = 0,
        nep = 0, nflow = 0, nhr = 0, nnoise = 0, no2 = 0, npleth = 0, npos = 0,
        nresp = 0, nsound = 0, nst = 0, nstatus = 0, nstim = 0, nsv = 0,
      ntemp = 0, nscg=0;
    FILE *ifile;
    WFDB_Siginfo *s;
    WFDB_Time t;

    anum = nsig = nbp = nco = nco2 = necg = neeg = nemg = neog = nep = nflow =
      nhr = nnoise = no2 = npleth = npos = nresp = nsound = nst = nstatus =
      nstim = nsv = ntemp = 0;

    /* Discover the number of signals defined in the header. */
    wfdbquiet();
    if ((nsig = isigopen(record, NULL, 0)) < 0) {
        wfdbquit();
	return (1);
    }
    wfdbverbose();

    /* Index metadata from the header. */
    process_info(record);

    if (nsig == 0)	/* no signals -- sfreq needed for annotations */
      sfreq = sampfreq(record);
    else {

      /* Allocate storage for nsig signal information structures. */
      if ((s = malloc(nsig * sizeof(WFDB_Siginfo))) == NULL) {
	fprintf(stderr, "%s: insufficient memory\n", pname);
	return (2);
      }
      
      if ((nsig = isigopen(record, s, nsig)) <= 0)
	return (0);
      
      setgvmode(WFDB_LOWRES);
      sfreq = sampfreq(NULL);
      t = strtim("e");
      if (*(s[0].fname) != '~') p = wfdbfile(s[0].fname, NULL);
      else p = NULL;

      if (s[0].nsamp != t && s[0].nsamp == 0) 
        vlmsrec = 1;  /* it's a variable-layout multisegment record */
      else if (t == 0 && p &&  /* length unspecified in header file */
	       (ifile = fopen(p, "r")) && (fseek(ifile, 0L, 2) == 0)) {
        int framesize = 0;
	long nbytes = ftell(ifile) - wfdbgetstart(0); /* # data bytes */
	
	fclose(ifile);
	for (i = 0; i < nsig && s[i].group == 0; i++)
	  framesize += s[i].spf;	/* frame size in samples */
	switch (s[0].fmt) {
	case 8:
	case 80:
	  t = nbytes / framesize;
	  break;
	default:
	case 16:
	case 61:
	case 160:
	  t = nbytes / (2*framesize);
	  break;
	case 212:
	  t = (2L * nbytes) / (3*framesize);
	  break;
	case 310:
	case 311:
	  t = (3L * nbytes) / (4*framesize);
	  break;
	}
      }

      for (i = 0; i < nsig; i++) {
        (void)printf("%s\t", record);
	if (s[i].units == NULL) s[i].units = "mV";
	p = signaltype(s[i].desc, s[i].units);
	if (strcmp(p, "BP") == 0)
	  (void)printf("BP%d\t", ++nbp);
	else if (strcmp(p, "CO") == 0)
	  (void)printf("CO-%d\t", ++nco);
	else if (strcmp(p, "CO2") == 0)
	  (void)printf("CO2-%d\t", ++nco2);
	else if (strcmp(p, "ECG") == 0)
	  (void)printf("ECG%d\t", ++necg);
	else if (strcmp(p, "EEG") == 0)
	  (void)printf("EEG%d\t", ++neeg);
	else if (strcmp(p, "EMG") == 0)
	  (void)printf("EMG%d\t", ++nemg);
	else if (strcmp(p, "EOG") == 0)
	  (void)printf("EOG%d\t", ++neog);
	else if (strcmp(p, "EP") == 0)
	  (void)printf("EP%d\t", ++nep);
	else if (strcmp(p, "Flow") == 0)
	  (void)printf("Flow%d\t", ++nflow);
	else if (strcmp(p, "HR") == 0)
	  (void)printf("HR%d\t", ++nhr);
	else if (strcmp(p, "Noise") == 0)
	  (void)printf("Noise%d\t", ++nnoise);
	else if (strcmp(p, "O2") == 0)
	  (void)printf("O2-%d\t", ++no2);
	else if (strcmp(p, "PLETH") == 0)
	  (void)printf("PLETH%d\t", ++npleth);
	else if (strcmp(p, "Pos") == 0)
	  (void)printf("Pos%d\t", ++npos);
	else if (strcmp(p, "Resp") == 0)
	  (void)printf("Resp%d\t", ++nresp);
	else if (strcmp(p, "SCG") == 0)
          (void)printf("SCG%d\t", ++nscg);
	else if (strcmp(p, "Sound") == 0)
	  (void)printf("Sound%d\t", ++nsound);
	else if (strcmp(p, "ST") == 0)
	  (void)printf("ST%d\t", ++nst);
	else if (strcmp(p, "Status") == 0)
	  (void)printf("Status%d\t", ++nstatus);
	else if (strcmp(p, "Stim") == 0)
	  (void)printf("Stim%d\t", ++nstim);
	else if (strcmp(p, "SV") == 0)
	  (void)printf("SV%d\t", ++nsv);
	else if (strcmp(p, "Temp") == 0)
	  (void)printf("Temp%d\t", ++ntemp);

	else
	  (void)printf("%s\t", p);
	(void)printf("%s\t", s[i].desc);
	printf("%g\t", sfreq * s[i].spf);
	if (s[i].gain)
	  printf("%g adu/%s\t", s[i].gain, s[i].units?s[i].units:"mV");
	else
	  printf("no calibration\t");
	if (vlmsrec) {
	  char sdesc[256];
	  sprintf(sdesc, " %s\r\n", s[i].desc);
	  processms(record, sdesc, t);
	}
	else
	  (void)printf("%s\n", ltimstr(t));
      }
    }

    wfdbquiet();	/* suppress WFDB library error messages */
    for (i = 0; i < annclasses; i++) {
	ai.name = aname[i];
        if (annopen(record, &ai, 1) < 0)
	    continue;	/* file doesn't exist, move on */
	if (getann(0, &annot) < 0)
	    continue;	/* file is empty, move on */
	t0 = tf = annot.time;
	n = 1;
	asp = lastr = NULL;
	as0 = as = calloc(sizeof(struct anstats), 1);
	if (annot.anntyp == RHYTHM) {
	    p = annot.aux+1;
	    lastr = as;
	    as->tf = 0;
	}
	else {
	     p = annstr(annot.anntyp);
	     as->tf = annot.time;
	}
	as->name = calloc(strlen(p) + 1, 1);
	strcpy(as->name, p);
	as->n = 1;
	as->anntyp = annot.anntyp;
	as->t0 = annot.time;
	while (getann(0, &annot) >= 0) {
	    n++;
	    tf = annot.time;
	    if (annot.anntyp == RHYTHM) p = annot.aux+1;
	    else (p = annstr(annot.anntyp));
	    as = as0;
	    while (as) {
	        if (strcmp(as->name, p) == 0) {
		    as->n++;
		    if (annot.anntyp == RHYTHM) {
		        as->t0 = annot.time;
			if (lastr)
			    lastr->tf += annot.time - lastr->t0;
			else
			    as->tf = annot.time;
			lastr = as;
		    }
		    else
		        as->tf = annot.time;
		    break;
		}
		asp = as;
		as = as->next;
	    }
	    if (as == NULL) {
	        as = calloc(sizeof(struct anstats), 1);
		as->name = calloc(strlen(p) + 1, 1);
		strcpy(as->name, p);
		as->n = 1;
		as->anntyp = annot.anntyp;
		as->t0 = annot.time;
		if (annot.anntyp == RHYTHM) {
		    lastr = as;
		    as->tf = 0;
		}
		else
		    as->tf = annot.time;
		if (asp) asp->next = as;
		else as0 = as;
	    }
	}
	if (lastr) lastr->tf += tf - lastr->t0;
	printf("%s\t%s%d\t%s\t%g\t%ld\t",
	       record, annclass[i], ++anum, ai.name, sfreq, n);
	printf("%s\t", ltimstr(tf - t0));
	printf("%s-", ltimstr(t0));
	printf("%s\n", ltimstr(tf));
	while (as = as0) {
	    printf("%s\t%s%d\t%s/%s\t%g\t%ld\t",
		   record, annclass[i], anum, ai.name, as->name, sfreq, as->n);
	    if (as->anntyp == RHYTHM)
		printf("%s\n", ltimstr(as->tf));
	    else {
		printf("%s\t", ltimstr(as->tf - as->t0));
		printf("%s-", ltimstr(as->t0));
		printf("%s\n", ltimstr(as->tf));
	    }
	    as0 = as->next;
	    free(as->name);
	    free(as);
	}
	as0 = NULL;
	iannclose(0);
    }
    wfdbverbose();   /* reenable error messages */
    wfdbquit();
    return (0);
}

int processms(char *record, char *sdesc, WFDB_Time tf)
{
  char buf[256], *d, *p, *q, *tbp, *tbpmax, *hfname, *shfname;
  FILE *ifile, *sfile;
  int signalon = 0;
  static char tbuf[204800];
  WFDB_Time t = 0, t0 = 0, tsum = 0;

  p = wfdbfile("hea", record);
  hfname = calloc(strlen(p) + 1, 1);
  strcpy(hfname, p);
  if ((ifile = fopen(hfname, "r")) == NULL) {
    fprintf(stderr, "%s: can't open %s\n", pname, hfname);
    free(hfname);
    return (-1);
  }
  for (d = p + strlen(p); d > p; d--)
    if (*(d-1) == '/') {
      *d = '\0';
      break;
    }
  shfname = calloc(strlen(hfname) + 16, 1);
  strcpy(shfname, p);
  d = shfname + strlen(shfname);   /* d points to first char after '/' */
  tbp = tbuf; *tbp = '\0'; tbpmax = tbuf + sizeof(tbuf) - 50;
  fgets(buf, sizeof(buf), ifile);  /* read and ignore first two lines */
  fgets(buf, sizeof(buf), ifile);

  while (fgets(buf, sizeof(buf), ifile)) {	/* read a segment descriptor */ 
    char *tp;

    if (buf[0] == '~') {  /* segment is null (all signals off) */
      if (signalon) {
	if (tbp < tbpmax) {
	  sprintf(tbp, "-%s", ltimstr(t));
	  tbp += strlen(tbp);
	  if (tbp >= tbpmax) {
	    sprintf(tbp, " ...");
	    break;
	  }
	}
	tsum += t - t0;
	signalon = 0;
      }
      t += atol(buf+2);
      continue;
    }
    for (tp = buf+1; *tp != ' '; tp++)
      ; 
    *tp = '\0';
    sprintf(d, "%s.hea", buf);
    if (sfile = fopen(shfname, "r")) {  /* open the segment header file */
      char sbuf[256];
      int i;

      fgets(sbuf, sizeof(sbuf), sfile);	/* read and ignore the first line */
      
      i = 1;
      while ((p = fgets(sbuf, sizeof(sbuf), sfile)) && *sbuf != '#' &&
	     (i = strcmp(sbuf + strlen(sbuf) - strlen(sdesc), sdesc)))
    	;	/* stop reading if the specified signal is found */
      if (i == 0) {	/* signal appears in this segment */
	if (signalon == 0) {
	  if (tbp < tbpmax) {
	    sprintf(tbp, " %s", ltimstr(t));	/* signal begins here */
	    tbp += strlen(tbp);
	  }
	  t0 = t;
	  signalon = 1;
	}
      }
      else {		/* signal does not appear in this segment */
	if (signalon) {
	  if (tbp < tbpmax) {
	    sprintf(tbp, "-%s", ltimstr(t));		/* signal ends here */
	    tbp += strlen(tbp);
	  }
	  tsum += t - t0;
	  signalon = 0;
	  if (tbp >= tbpmax) {
	    sprintf(tbp, " ...");
	    break;
	  }
	}
      }
      t += atol(tp+1);
      fclose(sfile);
    }
  }
  if (signalon) {
    sprintf(tbp, "-%s", ltimstr(t));
    tsum += t - t0;
  }
  printf("%s", ltimstr(tsum));
  if (tbuf[0])
    printf("\t%s", tbuf+1);
  printf("\n");
  fclose(ifile);
  free(shfname);
  free(hfname);
}

void process_info(char *record)
{
    char *info, *p, *sex = NULL;
    int ndiag = 0, ninfo = 0, nmeds = 0;
    double age = -1.0;

    if (info = getinfo(record)) {
      /* Find the first non-space in the first info string. */
      for (p = info; *p && *p == ' '; p++)
	;
      if ('0' <= *p && *p <= '9') {
	/* If the first token of the first info string is numeric,
	   and the second token is 'f', 'F', 'm', or 'M', the
	   current .hea file does not have tagged info, and the first
	   and second tokens are the age and sex; and the second info
	   string (if present) contains the medications. Handle this
	   case first. */
	sscanf(p, "%lf", &age);
	p = token(p); /* go to the next token */
	if (p && (*p == 'm' || *p == 'M')) sex = "M";
	else if (p && (*p == 'f' || *p == 'F')) sex = "F";
	if (sex == NULL) {   /* it wasn't age and sex after all! */
	  age = -1.0;
	  p = info;
	}
	/* If there are any more tokens, save them as 'Info'. */
	if (p = token(p))
	  printf("%s\tInfo%d\t%s\n", record, ++ninfo, p);
	if (info = getinfo((char *)NULL)) {
	  printf("%s\tMeds%d\t%s\n", record, ++nmeds, info);
	  info = getinfo((char *)NULL);
	}
      }
      /* process standard (tagged) info */
      while (info) {
	if (age < 0) {
	  if ((p = strstr(info, "age")) || (p = strstr(info, "Age"))) {
	    if (p = token(p)) {
	      sscanf(p, "%lf", &age);
	    }
	    /* Additional tagged data may follow age.  Continue processing
	       the remainder of this info string below. */
	    if (!(info = token(p)))
	      /* If there is nothing else, get the next info if any. */
	      info = getinfo((char *)NULL);
	  }
	}
	if (sex == NULL) {
	  if (info &&
	      ((p = strstr(info, "sex")) || (p = strstr(info, "Sex")))) {
	    if ((p = token(p)) && (*p == 'm' || *p == 'M')) sex = "M";
	    else if (p && (*p == 'f' || *p == 'F')) sex = "F";
	    /* Additional tagged data may follow sex.  Continue processing
	       the remainder of this info string. */
	    if (!(info = token(p)))
	      /* If there is nothing else, get the next info if any. */
	      info = getinfo((char *)NULL);
	  }
	}
	/* Diagnoses may be present in more than one info string. */
	if (info && *info &&
	    ((p=strstr(info,"diagnos")) || (p=strstr(info,"Diagnos")))) {
	  if ((p = token(p)) == NULL)
	    /* If nothing follows the 'diagnosis' tag, assume the next info
	       is the diagnosis. */
	    p = getinfo((char *)NULL);
	  if (p) {
	    printf("%s\tDiag%d\t%s\n", record, ++ndiag, p);
	    /* This info has been consumed;  get the next info if any. */
	    info = getinfo((char *)NULL);
	    continue;
	  }
	}
	if (info && *info && 
	    ((p=strstr(info,"medication"))||(p=strstr(info,"Medication")))) {
	  if ((p = token(p)) == NULL)
	    /* If nothing follows the 'medication' tag, assume the next info
	       is the medication. */
	    p = getinfo((char *)NULL);
	  if (p) {
	    printf("%s\tMeds%d\t%s\n", record, ++nmeds, p);
	    /* This info has been consumed;  get the next info if any. */
	    info = getinfo((char *)NULL);
	    continue;
	  }
	}
	/* Process any info that was not identified above. */
	if (info && *info)
	  printf("%s\tInfo%d\t%s\n", record, ++ninfo, info);
	info = getinfo((char *)NULL);
      }
      if (age > -1.0 && sex == NULL) sex = "?";
      if (sex) printf("%s\tAgeSex\t%g\t%s\n", record, age, sex);
    }
}

char *token(char *p)
{
  if (p) {
    while (*p && *p != ' ' && *p != '\t' && *p != '\n')
      p++;	/* find whitespace */
    while (*p && (*p == ' ' || *p == '\t' || *p == '\n'))
      p++;	/* find first non-whitespace */
    if (*p == '\0') p = NULL;
  }
  return (p);
}

char *ltimstr(WFDB_Time t)
{
  static char p[10];

  sprintf(p, "%ld", (long)(t/sfreq + 0.5));
  return (p);
}

char *prog_name(char *s)
{
    char *p = s + strlen(s);

#ifdef MSDOS
    while (p >= s && *p != '\\' && *p != ':') {
	if (*p == '.')
	    *p = '\0';		/* strip off extension */
	if ('A' <= *p && *p <= 'Z')
	    *p += 'a' - 'A';	/* convert to lower case */
	p--;
    }
#else
    while (p >= s && *p != '/')
	p--;
#endif
    return (p+1);
}

void cleanup()
{
    if (signame) {
	while (nclasses > 0) {
	    free(signame[--nclasses]);
	    free(sigclass[nclasses]);
	}
	free(sigclass);
	free(signame);
	signame = NULL;
   }
}