""" Module for parsing header files. This module will eventually replace _header.py """ import datetime import re from typing import List, Tuple class HeaderSyntaxError(ValueError): """Invalid syntax found in a WFDB header file.""" # Record line pattern. Format: # RECORD_NAME/NUM_SEG NUM_SIG SAMP_FREQ/COUNT_FREQ(BASE_COUNT_VAL) SAMPS_PER_SIG BASE_TIME BASE_DATE rx_record = re.compile( r""" [ \t]* (?P[-\w]+) /?(?P\d*) [ \t]+ (?P\d+) [ \t]* (?P\d*\.?\d*) /*(?P-?\d*\.?\d*) \(?(?P-?\d*\.?\d*)\)? [ \t]* (?P\d*) [ \t]* (?P\d{,2}:?\d{,2}:?\d{,2}\.?\d{,6}) [ \t]* (?P\d{,2}/?\d{,2}/?\d{,4}) """, re.VERBOSE, ) # Signal line pattern. Format: # FILE_NAME FORMATxSAMP_PER_FRAME:SKEW+BYTE_OFFSET ADC_GAIN(BASELINE)/UNITS ADC_RES ADC_ZERO CHECKSUM BLOCK_SIZE DESCRIPTION rx_signal = re.compile( r""" [ \t]* (?P~?[-\w]*\.?[\w]*) [ \t]+ (?P\d+) x?(?P\d*) :?(?P\d*) \+?(?P\d*) [ \t]* (?P-?\d*\.?\d*e?[\+-]?\d*) \(?(?P-?\d*)\)? /?(?P[\w\^\-\?%\/]*) [ \t]* (?P\d*) [ \t]* (?P-?\d*) [ \t]* (?P-?\d*) [ \t]* (?P-?\d*) [ \t]* (?P\d*) [ \t]* (?P[\S]?[^\t\n\r\f\v]*) """, re.VERBOSE, ) # Segment line rx_segment = re.compile( r""" [ \t]* (?P[-\w]*~?) [ \t]+ (?P\d+) """, re.VERBOSE, ) def wfdb_strptime(time_string: str) -> datetime.time: """ Given a time string in an acceptable WFDB format, return a datetime.time object. Valid formats: SS, MM:SS, HH:MM:SS, all with and without microsec. Parameters ---------- time_string : str The time to be converted to a datetime.time object. Returns ------- datetime.time object The time converted from str format. """ n_colons = time_string.count(":") if n_colons == 0: time_fmt = "%S" elif n_colons == 1: time_fmt = "%M:%S" elif n_colons == 2: time_fmt = "%H:%M:%S" if "." in time_string: time_fmt += ".%f" return datetime.datetime.strptime(time_string, time_fmt).time() def parse_header_content( header_content: str, ) -> Tuple[List[str], List[str]]: """ Parse the text of a header file. Parameters ---------- header_content: str The string content of the full header file Returns ------- header_lines : List[str] A list of all the non-comment lines comment_lines : List[str] A list of all the comment lines """ header_lines, comment_lines = [], [] for line in header_content.splitlines(): line = line.strip() # Comment line if line.startswith("#"): comment_lines.append(line) # Non-empty non-comment line = header line. elif line: header_lines.append(line) return header_lines, comment_lines