import datetime import os import struct import numpy as np from wfdb.io.record import rdrecord def wfdb_to_mat( record_name, pn_dir=None, sampfrom=0, sampto=None, channels=None ): """ This program converts the signals of any PhysioNet record (or one in any compatible format) into a .mat file that can be read directly using any version of Matlab, and a short text file containing information about the signals (names, gains, baselines, units, sampling frequency, and start time/date if known). If the input record name is REC, the output files are RECm.mat and RECm.hea. The output files can also be read by any WFDB application as record RECm. This program does not convert annotation files; for that task, 'rdann' is recommended. The output .mat file contains a single matrix named `val` containing raw (unshifted, unscaled) samples from the selected record. Using various options, you can select any time interval within a record, or any subset of the signals, which can be rearranged as desired within the rows of the matrix. Since .mat files are written in column-major order (i.e., all of column n precedes all of column n+1), each vector of samples is written as a column rather than as a row, so that the column number in the .mat file equals the sample number in the input record (minus however many samples were skipped at the beginning of the record, as specified using the `start_time` option). If this seems odd, transpose your matrix after reading it! This program writes version 5 MAT-file format output files, as documented in http://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf The samples are written as 32-bit signed integers (mattype=20 below) in little-endian format if the record contains any format 24 or format 32 signals, as 8-bit unsigned integers (mattype=50) if the record contains only format 80 signals, or as 16-bit signed integers in little-endian format (mattype=30) otherwise. The maximum size of the output variable is 2^31 bytes. `wfdb2mat` from versions 10.5.24 and earlier of the original WFDB software package writes version 4 MAT- files which have the additional constraint of 100,000,000 elements per variable. The output files (recordm.mat + recordm.hea) are still WFDB-compatible, given the .hea file constructed by this program. Parameters ---------- record_name : str The name of the input WFDB record to be read. pn_dir : str, optional Option used to stream data from Physionet. The Physionet database directory from which to find the required record files. eg. For record '100' in 'http://physionet.org/content/mitdb' pn_dir='mitdb'. sampfrom : int, optional The starting sample number to read for all channels. sampto : int, 'end', optional The sample number at which to stop reading for all channels. Reads the entire duration by default. channels : list, optional List of integer indices specifying the channels to be read. Reads all channels by default. Returns ------- N/A Notes ----- The entire file is composed of: Bytes 0 - 127: descriptive text Bytes 128 - 131: master tag (data type = matrix) Bytes 132 - 135: master tag (data size) Bytes 136 - 151: array flags (4 byte tag with data type, 4 byte tag with subelement size, 8 bytes of content) Bytes 152 - 167: array dimension (4 byte tag with data type, 4 byte tag with subelement size, 8 bytes of content) Bytes 168 - 183: array name (4 byte tag with data type, 4 byte tag with subelement size, 8 bytes of content) Bytes 184 - ...: array content (4 byte tag with data type, 4 byte tag with subelement size, ... bytes of content) Examples -------- >>> wfdb2mat('100', pn_dir='pwave') The output file name is 100m.mat and 100m.hea """ record = rdrecord( record_name, pn_dir=pn_dir, sampfrom=sampfrom, sampto=sampto ) record_name_out = record_name.split(os.sep)[-1].replace("-", "_") + "m" # Some variables describing the format of the .mat file field_version = 256 # 0x0100 or 256 endian_indicator = b"IM" # little endian master_type = 14 # matrix sub1_type = 6 # UINT32 sub2_type = 5 # INT32 sub3_type = 1 # INT8 sub1_class = 6 # double precision array # Determine if we can write 8-bit unsigned samples, or if 16 or 32 bits # are needed per sample bytes_per_element = 1 for i in range(record.n_sig): if record.adc_res[i] > 0: if record.adc_res[i] > 16: bytes_per_element = 4 elif (record.adc_res[i] > 8) and (bytes_per_element < 2): bytes_per_element = 2 else: # adc_res not specified.. try to guess from format if (record.fmt[i] == "24") or (record.fmt[i] == "32"): bytes_per_element = 4 elif (record.fmt[i] != "80") and (bytes_per_element < 2): bytes_per_element = 2 if bytes_per_element == 1: sub4_type = 2 # MAT8 out_type = " max_length: raise Exception("Can't write .mat file: data size exceeds 2GB limit") # Bytes of actual data bytes_of_data = bytes_per_element * record.n_sig * desired_length # This is the remaining number of bytes that don't fit into integer # multiple of 8: i.e. if 18 bytes, bytes_remain = 2, from 17 to 18 bytes_remain = bytes_of_data % 8 # master_bytes = (8 + 8) + (8 + 8) + (8 + 8) + (8 + bytes_of_data) + padding # Must be integer multiple 8 if bytes_remain == 0: master_bytes = bytes_of_data + 56 else: master_bytes = bytes_of_data + 64 - (bytes_remain) # Start writing the file output_file = record_name_out + ".mat" with open(output_file, "wb") as f: # Descriptive text (124 bytes) f.write(struct.pack("<124s", b"MATLAB 5.0")) # Version (2 bytes) f.write(struct.pack("