#!/usr/bin/env python3 """ Created on Thu Mar 29 13:47:45 2018 AUTHORS: Mohammad M. Ghassemi : Benjamin E. Moody PURPOSE: This script prepares an entry for the physionet 2018 Challenge REQUIREMENTS: We assume that you have downloaded the data from https://physionet.org/physiobank/database/challenge/2018/#files """ import numpy as np import os import sys import physionetchallenge2018_lib as phyc from score2018 import Challenge2018Score from pylab import find from sklearn.metrics import precision_recall_curve, auc, roc_auc_score from zipfile import ZipFile, ZIP_DEFLATED import gc import train_classifier as T import run_my_classifier as R # ----------------------------------------------------------------------------- # Generate the data to train the classifier # ----------------------------------------------------------------------------- def train(): T.init() # Generate a data frame that points to the challenge files tr_files, te_files = phyc.get_files() # For each subject in the training set... for i in range(0, np.size(tr_files, 0)): gc.collect() print('Preprocessing training subject: %d/%d' % (i + 1, np.size(tr_files, 0))) record_name = tr_files.header.values[i][:-4] T.preprocess_record(record_name) T.finish() # ----------------------------------------------------------------------------- # Run the classifier on each training subject, and compute the mean performance # ----------------------------------------------------------------------------- def score_training_set(): # Generate a data frame that points to the challenge files tr_files, te_files = phyc.get_files() score = Challenge2018Score() for i in range(0, np.size(tr_files, 0)): gc.collect() sys.stdout.write('Evaluating training subject: %d/%d' % (i + 1, np.size(tr_files, 0))) sys.stdout.flush() record_name = tr_files.header.values[i][:-4] predictions = R.classify_record(record_name) arousals = phyc.import_arousals(tr_files.arousal.values[i]) arousals = np.ravel(arousals) score.score_record(arousals, predictions, record_name) auroc = score.record_auroc(record_name) auprc = score.record_auprc(record_name) print(' AUROC:%f AUPRC:%f' % (auroc, auprc)) print() auroc_g = score.gross_auroc() auprc_g = score.gross_auprc() print('Training AUROC Performance (gross): %f' % auroc_g) print('Training AUPRC Performance (gross): %f' % auprc_g) print() # ----------------------------------------------------------------------------- # Run the classifier on each test subject, and save the predictions # for submission # ----------------------------------------------------------------------------- def evaluate_test_set(): # Generate a data frame that points to the challenge files tr_files, te_files = phyc.get_files() for i in range(0, np.size(te_files, 0)): gc.collect() print('Evaluating test subject: %d/%d' % (i+1, np.size(te_files, 0))) record_name = te_files.header.values[i][:-4] output_file = os.path.basename(record_name) + '.vec' predictions = R.classify_record(record_name) np.savetxt(output_file, predictions, fmt='%.3f') # ----------------------------------------------------------------------------- # Build a zip file for submission to the Challenge # ----------------------------------------------------------------------------- def package_entry(): with ZipFile('entry.zip', 'w', ZIP_DEFLATED) as myzip: for dirName, subdirList, fileList in os.walk('.'): for fname in fileList: if ('.vec' in fname[-4:] or '.py' in fname[-3:] or '.pkl' in fname[-4:] or '.txt' in fname[-4:]): myzip.write(os.path.join(dirName, fname)) if __name__ == '__main__': train() score_training_set() evaluate_test_set() package_entry()