##--------------------------------------------------------------------------- ## Submission for PhysioNet 2012 Challenge ## To call the file against a single patient: ## cat set-b/.txt | Rscript physionet2012.R ## Output format prints RecordID, the binary prediction, and the risk estimate to stdout ## ## Team: Natalia M. Arzeno, Joyce C. Ho, Cheng H. Lee ##--------------------------------------------------------------------------- # Imputation for Event 2 Model model2.genderAge.raw <- list(Platelets = c(222.153846153846,224.049180327869,211.988764044944,206.744318181818,219.52,212.22384341637,184.367346938776,199.886010362694,222.89247311828,159.952380952381,176.59375,190.041666666667,194.124031007752,179.9972899729,191.957537154989,172.327450980392,183.87037037037,185.441176470588), NIMAP = c(78.7508333333333,80.1334,80.1371604938272,79.0186075949367,77.2689371980676,73.4796370967742,72.8062647058824,74.4854093567251,76.5491954022988,78.2136363636364,82.3342352941176,80.6461445783133,80.1981222707424,79.9435240963855,77.3094660194175,75.6358351893096,75.9144128113879,74.065737704918), SysABP = c(120.666666666667,121.75,106.344827586207,117.582608695652,116.350993377483,119.314720812183,122.164983164983,120.799256505576,131.263157894737,135.25,128.848484848485,122.71186440678,120.473684210526,116.777385159011,119.371584699454,122.871794871795,123.612334801762,124.405405405405), Lactate = c(1.53333333333333,1.88974358974359,2.16734693877551,2.17105263157895,2.03303571428571,1.77295597484277,1.99834821428571,1.78700483091787,1.47954545454545,2.34166666666667,2.34754098360656,1.87884615384615,1.92108974358974,2.13239436619718,1.96918819188192,1.85883802816901,1.85456790123457,1.67777777777778), MAP = c(84.5,94.9032258064516,85.1034482758621,85.295652173913,83.6308724832215,83.5510204081633,81.9560810810811,83.5576208178439,84.6052631578947,89.5625,88.1060606060606,84.3389830508475,84.4315789473684,82.354609929078,79.275956284153,79.8410256410256,79.2212389380531,86.1081081081081), Glucose = c(121.923076923077,129.377049180328,116.775280898876,125.217142857143,131.538461538462,131.169675090253,129.811369509044,123.80310880829,127.849462365591,116.5,121.652631578947,121.610526315789,125.893700787402,127.801104972376,132.00641025641,134.241035856574,128.595611285266,116.328358208955), BUN = c(10.8461538461538,11.5901639344262,15.0666666666667,17.7272727272727,21.8177777777778,23.7330960854093,26.4162436548223,27.0440414507772,30.0215053763441,10.9,14.1666666666667,17.25,21.9571984435798,24.710027100271,26.3411016949153,29.8203125,34.6141975308642,36.1323529411765), HCO3 = c(24.1538461538462,23.3934426229508,23.0795454545455,23.88,24.4330357142857,24.711743772242,24.1933842239186,24.1943005181347,23.6989247311828,26.55,25.40625,25.25,24.6731517509728,24.41689373297,24.3659574468085,24.3654223968566,23.6327160493827,22.5588235294118), HR = c(84.3076923076923,94.625,90.4494382022472,87.9488636363636,86.7092511013216,86.9822695035461,84.4015151515152,83.2447916666667,82.7741935483871,87.9565217391304,92.62,93.2553191489362,88.945945945946,85.5108108108108,86.5940803382664,84.3222003929273,82.3425925925926,83.7462686567164), NISysABP = c(117.416666666667,118.26,114.135802469136,114.177215189873,117.057971014493,114.692,118.355882352941,120.539358600583,124.16091954023,123.5,124.367816091954,120.674698795181,118.212121212121,116.09009009009,117.256038647343,117.274944567628,120.459074733096,117.44262295082), WBC = c(11.2692307692308,11.5754098360656,13.4561797752809,12.0868181818182,11.8648888888889,12.7498220640569,12.8139540816327,13.0257142857143,12.2655913978495,11.2238095238095,11.1875,12.1229166666667,11.5937743190661,11.5097826086957,11.7189361702128,12.2687254901961,12.5429012345679,13.7313432835821), Urine = c(151.692307692308,261.625,218.370786516854,163.080924855491,159.773755656109,136.018050541516,111.645244215938,96.7842105263158,91.2934782608696,225.869565217391,232.68,220.064516129032,173.492248062016,161.695890410959,150.156182212581,120.329365079365,118.83125,86.5522388059701), Weight = c(68.8363636363636,80.4254901960784,78.36925,80.3198214285714,84.380193236715,80.8066917293233,76.2579787234042,67.2857954545455,65.0373493975904,81.1619047619048,88.5326136363636,90.5059523809524,94.2208,92.7296551724138,92.1778336980306,87.8658895705521,80.0354368932039,73.2983870967742), Bilirubin = c(1.88,1.68666666666667,1.52352941176471,2.61758241758242,2.99509803921569,1.36238532110092,1.15497076023392,0.766,1.52727272727273,1.44285714285714,1.76486486486486,2.12,2.86764705882353,2.71230769230769,2.21306532663317,1.6980487804878,1.725,1.57857142857143), NIDiasABP = c(62.4166666666667,64.24,62.7901234567901,61.5886075949367,58.7487922705314,55.1526104417671,51.6205882352941,52.874269005848,54.9540229885057,58.0454545454545,61.8352941176471,62.9518072289157,62.4217391304348,62.2259036144578,58.7572815533981,55.2583518930958,55.8291814946619,54.5573770491803), Albumin = c(2.81666666666667,2.92413793103448,2.95581395348837,2.87625,2.88555555555556,2.82636363636364,2.96666666666667,2.92435897435897,2.9875,3.275,3.246875,3.08684210526316,2.97207207207207,3.06404494382022,2.99020618556701,2.90732984293194,2.92460317460317,2.79032258064516), Na = c(137.307692307692,139.475409836066,138.741573033708,138.545454545455,139.236607142857,138.487544483986,138.954198473282,139.186528497409,140.408602150538,139.55,139.21875,138.833333333333,138.78125,138.577656675749,138.582978723404,138.520628683694,138.854938271605,140.220588235294), pH = c(7.41333333333333,7.40511627906977,7.39074626865672,7.3896875,7.38746913580247,7.7948,7.39366771159875,7.69348122866894,7.37176470588235,7.40176470588235,7.41275362318841,7.38820895522388,7.40253807106599,7.39880398671096,7.40279069767442,7.40340579710145,7.40267489711934,7.40857142857143), FiO2 = c(0.43125,0.413333333333333,0.486440677966102,0.496759259259259,0.483733333333333,0.514705882352941,0.491901408450704,0.487142857142857,0.519464285714286,0.458888888888889,0.50234375,0.503230769230769,0.48505376344086,0.503946360153257,0.512890173410405,0.504761904761905,0.518190476190476,0.494791666666667), PaCO2 = c(38.3333333333333,35.953488372093,39.2537313432836,41.1507936507937,41.141975308642,40.8348214285714,40.1167192429022,40.4241379310345,39.8235294117647,43,40.8059701492537,42.0149253731343,40.2233502538071,40.3966666666667,40.0989583333333,39.6966019417476,38.5289256198347,36.530612244898), DiasABP = c(64.8333333333333,75.40625,59.8793103448276,66.1565217391304,63.3046357615894,59.1116751269036,56.7542087542088,56.6579925650558,59.6052631578947,69.6875,67.4545454545455,66.6779661016949,65.5947368421053,61.8233215547703,59.0655737704918,58.6794871794872,55.5506607929515,58.1351351351351), ALP = c(99.2,83.7142857142857,105,136.758241758242,117.922330097087,104.759259259259,95.3941176470588,99.6,110.8125,92.75,72.5428571428571,92.325,91.1851851851852,107.814814814815,104.577889447236,103.78,94.7355371900826,178.724137931034), Mg = c(1.93076923076923,1.97049180327869,1.99204545454545,1.97714285714286,2.00495495495495,2.06690647482014,2.07704081632653,2.06831168831169,2.02150537634409,1.955,1.91368421052632,1.90421052631579,2.02918287937743,2.04806629834254,2.055,2.07583497053045,2.11645962732919,2.16417910447761), Creatinine = c(0.838461538461538,0.926229508196721,1.00111111111111,1.09943181818182,1.25555555555556,1.23202846975089,1.27868020304569,1.13031088082902,1.2247311827957,1.105,1.11979166666667,1.61875,1.42607003891051,1.49159891598916,1.54279661016949,1.5603515625,1.58672839506173,1.56764705882353), K = c(3.79230769230769,3.80163934426229,3.94333333333333,3.93125,4.04260089686099,4.1,4.09102564102564,4.03626943005181,3.95161290322581,3.85714285714286,3.90625,3.94895833333333,4.03110236220472,4.09917355371901,4.13297872340426,4.1718253968254,4.11097178683386,3.98970588235294), GCS = c(11.4615384615385,12.3392857142857,11.7865168539326,12.6590909090909,12.5550660792952,12.8014184397163,12.1843434343434,12.2916666666667,12.9139784946237,11.7391304347826,11.79,12,12.1853281853282,12.0648648648649,12.5623678646934,12.5559921414538,12.4489164086687,11.955223880597), ALT = c(671.666666666667,862.535714285714,375.5,245.826086956522,209.657142857143,144.018348623853,124.364705882353,107.431506849315,43.4117647058824,383.75,595.472222222222,242.675,171.814285714286,126.609375,166.45145631068,133.830097087379,75.827868852459,54.1), HCT = c(29.7153846153846,30.3098360655738,29.5455555555556,30.2579545454545,30.1702222222222,30.1056939501779,30.4272264631043,31.0518134715026,31.3956989247312,32.0333333333333,32.1773195876289,32.259375,31.8942084942085,31.1635869565217,30.6511677282378,30.9508806262231,31.0993827160494,30.7985294117647), Temp = c(37.1692307692308,37.2214285714286,37.1696629213483,37.1170454545455,37.0453744493392,37.0606382978723,37.0166666666667,36.7377604166667,36.710752688172,37.3,37.342,37.3574468085106,37.2355212355212,37.022972972973,37.0868921775899,36.9345776031434,36.940866873065,36.710447761194), AST = c(677.833333333333,797.607142857143,295.9,347.239130434783,277.628571428571,167.917431192661,189.959064327485,153.575342465753,54.9705882352941,149.25,382.888888888889,234.725,258.821428571429,240.243523316062,248.965853658537,166.966183574879,80.7258064516129,74.6), PaO2 = c(162,129.976744186047,127.805970149254,114.865079365079,123.932098765432,118.513392857143,118.249211356467,120.334482758621,114.538461538462,156.588235294118,134.132352941176,122.059701492537,114.944162436548,116.12,113.485639686684,111.970873786408,112.55601659751,122.367346938776)); ## The data for the logistic regression used to classify the data model2.logr.threshold = 0.3006986; model2.logr.intercept = 1.74745572492957; model2.coeff.names <- c("DiasABP","FiO2","GCS","HR","MAP","NIDiasABP","NIMAP","NISysABP","SysABP","Temp","Urine","Weight","Albumin","ALP","ALT","AST","Bilirubin","BUN","Creatinine","Glucose","HCO3","HCT","K","Lactate","Mg","Na","PaCO2","PaO2","pH","Platelets","WBC","Gender","Age","TotObs","LastHrObs","RespRate","TroponinI","TroponinT","MechVent","SaO2"); model2.coeff.values <- c(-0.00791711531684091,0.685905115382091,-0.262809571293153,0.010674025407591,-0.000489899602329878,-0.0055025949691382,-0.000599398701014135,-0.00342628300576963,0.00135237660459052,-0.066614652351121,-0.000692128540414467,-0.0086028561411428,-0.334789136858762,0.0016129738451976,0.000115047391804305,-2.60304122592535e-05,0.0751351223030456,0.0190394058623855,-0.0948234213897181,0.0044914351780612,-0.02120708547697,0.00528570788412731,-0.0361830950606304,0.165171064016769,-0.00782961654014021,0.00792816137892864,0.0123263065099425,0.00143026172298546,-0.0254845202022271,0.000257789938992586,0.0128369863101664,0.00234052378455844,0.241690178630741,-0.000557052647543139,-0.00679362794323282,-0.920625682400111,0.580478679200097,0.379647001845034,-1.04054390821889,-0.0916402239054046) model2.logr.coeff = rbind(data.frame(), model2.coeff.values); names(model2.logr.coeff) <- model2.coeff.names; ## DATA FOR MODEL 1 ## The data to impute the missing values based on age + gender model1.genderAge.raw <- list(Platelets = c(222.153846153846,224.049180327869,211.988764044944,206.744318181818,219.52,212.22384341637,184.367346938776,199.886010362694,222.89247311828,159.952380952381,176.59375,190.041666666667,194.124031007752,179.9972899729,191.957537154989,172.327450980392,183.87037037037,185.441176470588), mean.ALT = c(224.342592592593,840.10119047619,443.755857142857,252.275,170.535608465608,160.629860200961,125.091638655462,109.480593607306,39.5245098039216,525.328125,658.747937710438,273.229166666667,159.55244047619,120.652703373016,157.36879334258,114.585841423948,72.5450819672131,60.6722222222222), mean.Na = c(138.685256410256,140.076031982589,138.991079582091,138.501711719467,139.152033085962,138.546670055923,138.864322064704,139.063056172383,139.810855094726,139.279226190476,139.613840037278,139.19840969216,138.640950661751,138.594038199872,138.609938365417,138.615087728251,138.945592053694,140.185994397759), NIMAP = c(78.7508333333333,80.1334,80.1371604938272,79.0186075949367,77.2689371980676,73.4796370967742,72.8062647058824,74.4854093567251,76.5491954022988,78.2136363636364,82.3342352941176,80.6461445783133,80.1981222707424,79.9435240963855,77.3094660194175,75.6358351893096,75.9144128113879,74.065737704918), SysABP = c(120.666666666667,121.75,106.344827586207,117.582608695652,116.350993377483,119.314720812183,122.164983164983,120.799256505576,131.263157894737,135.25,128.848484848485,122.71186440678,120.473684210526,116.777385159011,119.371584699454,122.871794871795,123.612334801762,124.405405405405), mean.AST = c(227.175925925926,935.819047619048,411.143142857143,389.422101449275,266.528337112623,222.809392747925,206.82261208577,170.417237442922,51.8700980392157,364.1875,511.639676527177,400.666666666667,259.940603741497,241.849438687392,257.975025164537,141.665458937198,84.3274193548387,88.0472222222222), mean.FiO2 = c(0.533971688034188,0.501068860830823,0.536756234942175,0.528799870379453,0.540341337907509,0.555817364661469,0.535510300886623,0.531684670186377,0.552948502446835,0.498114106753813,0.551537820527855,0.547925926721176,0.555748771169579,0.548035336293561,0.568461328691204,0.554219306240199,0.568646245886826,0.546344112427462), Lactate = c(1.53333333333333,1.88974358974359,2.16734693877551,2.17105263157895,2.03303571428571,1.77295597484277,1.99834821428571,1.78700483091787,1.47954545454545,2.34166666666667,2.34754098360656,1.87884615384615,1.92108974358974,2.13239436619718,1.96918819188192,1.85883802816901,1.85456790123457,1.67777777777778), MAP = c(84.5,94.9032258064516,85.1034482758621,85.295652173913,83.6308724832215,83.5510204081633,81.9560810810811,83.5576208178439,84.6052631578947,89.5625,88.1060606060606,84.3389830508475,84.4315789473684,82.354609929078,79.275956284153,79.8410256410256,79.2212389380531,86.1081081081081), mean.HCT = c(30.5112454212454,31.2356515979467,30.1459204144621,30.3787356637996,30.9412834486501,30.6533527603278,30.7799569401478,31.3733800899734,31.419169925057,33.4078621246478,33.4652018056399,33.4004435639592,32.6863108083276,32.1849926096921,31.4317583224749,31.3438985427488,31.3501901622041,31.1872799953315), mean.HCO3 = c(22.1570512820513,21.6649924301564,22.5379361799816,23.2211981034838,23.7379942602041,24.134011184545,23.7118128761259,23.7488002714039,23.708064516129,25.1466666666667,24.0926700036075,24.3553255772006,23.7857621518127,23.8588422752471,23.7146632504611,23.6671474781494,23.3933103076622,22.1480042016807), Glucose = c(121.923076923077,129.377049180328,116.775280898876,125.217142857143,131.538461538462,131.169675090253,129.811369509044,123.80310880829,127.849462365591,116.5,121.652631578947,121.610526315789,125.893700787402,127.801104972376,132.00641025641,134.241035856574,128.595611285266,116.328358208955), BUN = c(10.8461538461538,11.5901639344262,15.0666666666667,17.7272727272727,21.8177777777778,23.7330960854093,26.4162436548223,27.0440414507772,30.0215053763441,10.9,14.1666666666667,17.25,21.9571984435798,24.710027100271,26.3411016949153,29.8203125,34.6141975308642,36.1323529411765), mean.NIMAP = c(75.575942782853,79.9538433303199,79.1493312814645,77.7315856624493,77.3852865623665,73.3508905139635,72.5672032479551,73.740665730756,73.6350727975515,79.2382943029338,81.921865584695,81.203981159036,80.6094297690925,80.4296950062287,76.4160005234167,75.6980719761924,74.1214603289599,72.6132222441187), mean.ALP = c(95.7777777777778,86.8619047619048,103.141643323996,136.351098901099,122.215002311604,105.016710758377,95.90581232493,101.071609195402,111.768229166667,91.015625,71.419387755102,88.2041666666667,91.1796502057613,110.046243806164,97.9440735423148,104.38775,100.754132231405,180.962643678161), mean.Albumin = c(2.76111111111111,3.03074712643678,2.94145348837209,2.87645833333333,2.9267619047619,2.84716666666667,2.98341111111111,2.94267094017094,2.98,3.3,3.26899305555556,3.10263157894737,2.98536036036036,3.07801498127341,3.01005154639175,2.92193717277487,2.95939153439153,2.80591397849462), mean.6.NIDiasABP = c(61.5883333333333,63.3354166666667,63.3228650793651,61.7987207987208,59.6897442252404,56.1074659966169,52.8882916928776,53.8261588876284,53.3609744990893,56.3936507936508,63.5942048893136,65.5194003527337,63.0711619013702,63.0230229661937,59.4262070028011,56.2928388144883,56.0298905110937,57.4132219169719), mean.6.Temp = c(37.1448717948718,37.2834401709402,37.1690617913832,37.0212255892256,37.0599184303351,37.0794025294966,37.017934010066,36.829082132315,36.7383712121212,37.1903968253968,37.3508680555556,37.3913964686998,37.124204957938,37.0778201471455,37.0901880608131,36.9795801316739,36.8902053926891,36.709978021978), HCO3 = c(24.1538461538462,23.3934426229508,23.0795454545455,23.88,24.4330357142857,24.711743772242,24.1933842239186,24.1943005181347,23.6989247311828,26.55,25.40625,25.25,24.6731517509728,24.41689373297,24.3659574468085,24.3654223968566,23.6327160493827,22.5588235294118), mean.6.GCS = c(11.3461538461538,12.2939393939394,11.5793650793651,12.5126705653021,12.4845622119816,12.8087009803922,12.1429469901168,12.3529891304348,12.7083333333333,11.7121212121212,11.6355902777778,12.035393258427,12.1031420765027,12.0109065155807,12.4635119047619,12.449075167257,12.2588779956427,11.9416666666667), HR = c(84.3076923076923,94.625,90.4494382022472,87.9488636363636,86.7092511013216,86.9822695035461,84.4015151515152,83.2447916666667,82.7741935483871,87.9565217391304,92.62,93.2553191489362,88.945945945946,85.5108108108108,86.5940803382664,84.3222003929273,82.3425925925926,83.7462686567164), NISysABP = c(117.416666666667,118.26,114.135802469136,114.177215189873,117.057971014493,114.692,118.355882352941,120.539358600583,124.16091954023,123.5,124.367816091954,120.674698795181,118.212121212121,116.09009009009,117.256038647343,117.274944567628,120.459074733096,117.44262295082), mean.K = c(3.90055860805861,3.86480679156909,3.98142684784351,3.97402197707595,4.08979802832942,4.15777960748285,4.14935307285307,4.08702010856156,4.04465181771633,3.98051587301587,4.01850942460317,4.08461268187831,4.14681789776278,4.13796192005477,4.22662626321669,4.22617195194576,4.18417214012042,4.09641631652661), mean.PaCO2 = c(40.4472222222222,36.1926536311985,37.9967108298978,40.6684469358839,41.4315609787402,41.3170867868275,40.566034502494,39.7647287517047,40.6556736836149,41.7765573693592,40.4160054020796,41.9143106795653,41.0690260743986,40.9431979764223,40.5528548097649,40.2936480536722,39.261181930456,37.1340336867348), mean.BUN = c(11.8397435897436,12.0835870413739,15.5021693121693,17.9807777449823,22.6855343915344,23.576335931763,26.1264845701394,27.4259807549963,30.6680235535074,11.6447023809524,15.0252397486772,17.9904205747956,22.2071178713202,24.7524132146083,26.6960522293149,29.5681110068317,34.2905827454439,37.2164740896359), mean.Temp = c(37.2162877921475,36.907871171689,37.1438443124131,37.0474987398665,36.9972524510018,36.9912192646824,36.9359294548837,36.8275490971757,36.6819564070759,37.3089557361143,37.28879717932,37.2599921624071,37.0746119752498,37.0871970210354,36.9846502634328,36.9434488553197,36.8083121948207,36.7570839451185), mean.SysABP = c(115.859601776286,117.403194769062,114.634029450014,116.814659378203,117.107723746229,116.458429386841,118.873075937585,120.466988257412,125.894582223254,128.280779303262,126.035229821708,121.490820968741,119.865351550741,116.027914350449,117.550911619303,119.183886182849,119.343987283034,116.762666873643), mean.6.DiasABP = c(69.0634920634921,76.6163492063492,65.8224126984127,65.510582010582,63.7298259088582,59.6058437328254,57.059402996665,56.5553297326496,55.7882724301842,65.5720238095238,69.4999452654625,65.520207194766,66.9076360809476,63.2666334843608,59.1617161296395,58.4539886297849,56.0077779755972,57.797905444229), WBC = c(11.2692307692308,11.5754098360656,13.4561797752809,12.0868181818182,11.8648888888889,12.7498220640569,12.8139540816327,13.0257142857143,12.2655913978495,11.2238095238095,11.1875,12.1229166666667,11.5937743190661,11.5097826086957,11.7189361702128,12.2687254901961,12.5429012345679,13.7313432835821), mean.6.FiO2 = c(0.441666666666667,0.398666666666667,0.470416666666667,0.491510416666667,0.50018115942029,0.526279257465698,0.486984585741811,0.464535907859079,0.469032258064516,0.5,0.507246376811594,0.48375,0.502704545454545,0.503009554140127,0.516885521885522,0.504351395730706,0.521463293650794,0.478787878787879), Urine = c(151.692307692308,261.625,218.370786516854,163.080924855491,159.773755656109,136.018050541516,111.645244215938,96.7842105263158,91.2934782608696,225.869565217391,232.68,220.064516129032,173.492248062016,161.695890410959,150.156182212581,120.329365079365,118.83125,86.5522388059701), mean.6.NIMAP = c(78.5630416666667,80.6638229166667,81.1178828571429,79.8098884199134,78.3215734029043,74.9402621295546,74.8660650979333,76.0608736517265,74.7471089643508,77.3414523809524,84.0130007166746,83.4352352292769,81.3667988033722,81.1108655769705,77.9616195665439,77.0305688538473,76.2861008937796,76.0301160804473), Weight = c(68.8363636363636,80.4254901960784,78.36925,80.3198214285714,84.380193236715,80.8066917293233,76.2579787234042,67.2857954545455,65.0373493975904,81.1619047619048,88.5326136363636,90.5059523809524,94.2208,92.7296551724138,92.1778336980306,87.8658895705521,80.0354368932039,73.2983870967742), Bilirubin = c(1.88,1.68666666666667,1.52352941176471,2.61758241758242,2.99509803921569,1.36238532110092,1.15497076023392,0.766,1.52727272727273,1.44285714285714,1.76486486486486,2.12,2.86764705882353,2.71230769230769,2.21306532663317,1.6980487804878,1.725,1.57857142857143), NIDiasABP = c(62.4166666666667,64.24,62.7901234567901,61.5886075949367,58.7487922705314,55.1526104417671,51.6205882352941,52.874269005848,54.9540229885057,58.0454545454545,61.8352941176471,62.9518072289157,62.4217391304348,62.2259036144578,58.7572815533981,55.2583518930958,55.8291814946619,54.5573770491803), mean.Urine = c(163.42736797942,168.287583867802,196.027820756457,151.500931015918,138.407438678817,125.650835928444,102.146319317826,95.072922064395,79.6993437591609,215.220023655542,211.523399886107,221.056147146593,169.100711928556,164.129379936953,138.445764876339,119.417434334788,114.52678957638,83.8711410790284), mean.Glucose = c(130.646062271062,138.20007333286,128.684512460074,128.862910946197,143.676050420168,140.023629018394,139.417746400886,135.294356032568,130.600409626216,120.94375,126.037953216374,126.062687780056,134.302032416402,137.93832253896,139.789221272555,141.447005915731,138.845098770961,138.351670220327), Albumin = c(2.81666666666667,2.92413793103448,2.95581395348837,2.87625,2.88555555555556,2.82636363636364,2.96666666666667,2.92435897435897,2.9875,3.275,3.246875,3.08684210526316,2.97207207207207,3.06404494382022,2.99020618556701,2.90732984293194,2.92460317460317,2.79032258064516), mean.Platelets = c(226.864102564103,243.519017457952,217.219970572499,213.27398989899,229.936281531983,224.924107444552,201.299508247563,215.107616548679,230.991039426523,169.186507936508,188.402430555556,202.40775462963,207.96221014128,196.120646000592,204.967541850106,185.21587594867,192.033212081129,197.083928571429), Na = c(137.307692307692,139.475409836066,138.741573033708,138.545454545455,139.236607142857,138.487544483986,138.954198473282,139.186528497409,140.408602150538,139.55,139.21875,138.833333333333,138.78125,138.577656675749,138.582978723404,138.520628683694,138.854938271605,140.220588235294), mean.Creatinine = c(0.856730769230769,0.874735380739479,0.987571669071669,1.0998538961039,1.29928888888889,1.23765237530362,1.25577844251068,1.14195379965458,1.24093189964158,1.03304166666667,1.16283151455026,1.607414246633,1.45773926030735,1.49393839559311,1.56733001167853,1.53315886262175,1.56025511953753,1.59738270308123), pH = c(7.41333333333333,7.40511627906977,7.39074626865672,7.3896875,7.38746913580247,7.7948,7.39366771159875,7.69348122866894,7.37176470588235,7.40176470588235,7.41275362318841,7.38820895522388,7.40253807106599,7.39880398671096,7.40279069767442,7.40340579710145,7.40267489711934,7.40857142857143), mean.6.HR = c(86.2047619047619,95.4405129335685,90.8226441342821,88.7034724799011,87.7773258346592,87.6254845680114,84.9334301413854,84.2122480386392,82.6974055918664,86.7425324675325,93.7588941528993,94.5454742829031,89.3138726442549,86.4170448434383,87.0143503994206,84.4868518054218,82.998766648809,82.9129188682744), Train = c(6,5.6031746031746,5.42391304347826,5.40223463687151,5.48051948051948,5.36140350877193,5.63157894736842,5.26020408163265,5.53125,6.8695652173913,5.00990099009901,5.48453608247423,5.6551724137931,5.6,5.37974683544304,5.52131782945736,5.71732522796353,5.47058823529412), FiO2 = c(0.43125,0.413333333333333,0.486440677966102,0.496759259259259,0.483733333333333,0.514705882352941,0.491901408450704,0.487142857142857,0.519464285714286,0.458888888888889,0.50234375,0.503230769230769,0.48505376344086,0.503946360153257,0.512890173410405,0.504761904761905,0.518190476190476,0.494791666666667), mean.NISysABP = c(113.405712652302,119.595498127708,115.529842470613,114.33787656562,117.255104008961,114.146276974469,117.401821045556,119.481398057465,121.22219008621,124.537354860717,123.378236586644,121.603214297397,118.088742788746,116.926186133188,115.238425902877,117.659346517536,116.820144086657,115.081350389409), mean.6.Urine = c(174.5125,200.629318394024,218.936071428571,159.30550232288,140.632738095238,128.920532708033,104.638884644767,92.9217179689555,84.6856862745098,256.329166666667,240.928890525282,223.248946360153,172.660883424408,154.78465916979,139.953684152645,120.462528701208,112.397711811998,87.8434798534798), PaCO2 = c(38.3333333333333,35.953488372093,39.2537313432836,41.1507936507937,41.141975308642,40.8348214285714,40.1167192429022,40.4241379310345,39.8235294117647,43,40.8059701492537,42.0149253731343,40.2233502538071,40.3966666666667,40.0989583333333,39.6966019417476,38.5289256198347,36.530612244898), DiasABP = c(64.8333333333333,75.40625,59.8793103448276,66.1565217391304,63.3046357615894,59.1116751269036,56.7542087542088,56.6579925650558,59.6052631578947,69.6875,67.4545454545455,66.6779661016949,65.5947368421053,61.8233215547703,59.0655737704918,58.6794871794872,55.5506607929515,58.1351351351351), mean.DiasABP = c(63.6485383690938,67.9935579476544,64.3532386425766,64.1271453501732,62.1150801027089,58.1095680229491,56.0516693708688,55.786609712619,55.8860852629923,67.0490030709054,66.7022119029625,66.0715534496917,65.7919830550318,62.4830189924834,58.9498981592993,57.8610763276221,55.1598277336243,55.3453021024812), ALP = c(99.2,83.7142857142857,105,136.758241758242,117.922330097087,104.759259259259,95.3941176470588,99.6,110.8125,92.75,72.5428571428571,92.325,91.1851851851852,107.814814814815,104.577889447236,103.78,94.7355371900826,178.724137931034), mean.pH = c(7.35745261437908,7.35064582980939,7.38853863043843,7.37804928131381,7.36973640576463,7.77462045213759,7.56901812539554,7.73202316536896,7.36755693489517,7.38219914760162,7.38811473646294,7.37730361512836,7.42739090088195,7.37875092756162,7.3850004676551,7.67672696675783,7.5678280608292,7.38721818470978), Mg = c(1.93076923076923,1.97049180327869,1.99204545454545,1.97714285714286,2.00495495495495,2.06690647482014,2.07704081632653,2.06831168831169,2.02150537634409,1.955,1.91368421052632,1.90421052631579,2.02918287937743,2.04806629834254,2.055,2.07583497053045,2.11645962732919,2.16417910447761), Creatinine = c(0.838461538461538,0.926229508196721,1.00111111111111,1.09943181818182,1.25555555555556,1.23202846975089,1.27868020304569,1.13031088082902,1.2247311827957,1.105,1.11979166666667,1.61875,1.42607003891051,1.49159891598916,1.54279661016949,1.5603515625,1.58672839506173,1.56764705882353), mean.WBC = c(12.8802747252747,12.2918696330991,13.4400561797753,12.1078523465171,12.3396915985249,12.9028372309778,12.9510727142047,13.1857059369202,12.6516308243728,12.5575925925926,11.9693543320106,12.5427033730159,12.2393598295349,12.0345474592587,12.1062462006079,12.3232895269219,12.7225312316285,14.0862094290452), mean.Mg = c(1.86923076923077,1.94351223002862,1.95108643250689,1.96043296227582,1.97506864006864,2.03658922212879,2.04886044298672,2.03469913419913,2.01426523297491,1.81441666666667,1.86021720969089,1.88854344193818,1.98215850837446,2.02181564062089,2.03622438672439,2.05841043752144,2.06545166617372,2.09961265103056), mean.6.SysABP = c(121.610317460317,127.852238095238,116.819047619048,120.023380769932,120.746675479538,121.379163911969,124.047012819254,124.349432818928,128.735001273237,129.534027777778,133.794047619048,125.165610105581,124.978106612032,120.516476769983,121.266279474079,125.241675732158,123.013012994642,124.079175925178), K = c(3.79230769230769,3.80163934426229,3.94333333333333,3.93125,4.04260089686099,4.1,4.09102564102564,4.03626943005181,3.95161290322581,3.85714285714286,3.90625,3.94895833333333,4.03110236220472,4.09917355371901,4.13297872340426,4.1718253968254,4.11097178683386,3.98970588235294), mean.PaO2 = c(189.8435500516,149.706248565796,146.481525669969,144.563658677017,145.606663817688,142.293931272946,151.986014245921,147.663898909956,137.775573585771,196.73753869969,158.635750280864,149.970127692033,149.694621160316,152.347606784355,148.955989818692,147.236720641522,141.787807046112,137.149255294303), mean.6.MAP = c(87.6035714285714,94.5656746031746,84.3690833333333,84.7945244729728,84.3416082013864,81.1632201216805,81.36080660283,82.3407932904351,82.3597519310755,84.1934523809524,89.2302730696799,84.0158587980647,86.003289306479,82.3622051933574,79.5511069793304,80.7184865203806,78.4238408504903,82.6232982856604), mean.Weight = c(68.6585123966942,79.6958356399348,78.2191230555261,80.8564271116016,84.209664216898,79.9520528184514,75.4539656832202,66.989264126428,64.8069905369784,81.4938758465312,88.5963224577083,90.4784079616821,94.0739929182695,92.4571744316132,91.9464418199293,87.5256133185439,79.5591408733865,72.8564930158427), mean.Bilirubin = c(1.7525,1.51777777777778,1.47423902894491,2.61635531135531,2.66823218176159,1.43901703800786,1.1639933166249,0.807822222222222,1.5030303030303,1.44464285714286,1.7350858000858,2.16125,2.73848039215686,2.71997171347171,2.16415809204754,1.66817886178862,1.7255859375,1.5297619047619), GCS = c(11.4615384615385,12.3392857142857,11.7865168539326,12.6590909090909,12.5550660792952,12.8014184397163,12.1843434343434,12.2916666666667,12.9139784946237,11.7391304347826,11.79,12,12.1853281853282,12.0648648648649,12.5623678646934,12.5559921414538,12.4489164086687,11.955223880597), ALT = c(671.666666666667,862.535714285714,375.5,245.826086956522,209.657142857143,144.018348623853,124.364705882353,107.431506849315,43.4117647058824,383.75,595.472222222222,242.675,171.814285714286,126.609375,166.45145631068,133.830097087379,75.827868852459,54.1), mean.MAP = c(81.9472681151236,88.2195666984006,87.3530228661303,83.959007164689,82.7128179874923,81.6209945984216,79.8240626048238,81.5179169352223,81.36321143093,87.1230983693768,86.528725370311,83.7945484314346,83.7460867360245,81.9853756480636,78.8595032097729,79.1817547857215,77.483107783349,78.210553438412), HCT = c(29.7153846153846,30.3098360655738,29.5455555555556,30.2579545454545,30.1702222222222,30.1056939501779,30.4272264631043,31.0518134715026,31.3956989247312,32.0333333333333,32.1773195876289,32.259375,31.8942084942085,31.1635869565217,30.6511677282378,30.9508806262231,31.0993827160494,30.7985294117647), mean.GCS = c(11.4135151173613,11.7447985950548,11.3917629235843,11.8612327784982,11.8982262010925,12.0727553120268,11.3908605747231,11.6119214617439,12.3831900918792,10.6198830875374,11.3089549863586,11.2605745095694,11.4087832521043,11.2660580351665,11.6312957388546,11.5996598314882,11.8185657494693,11.0770282798405), mean.6.NISysABP = c(119.273333333333,118.579166666667,119.100111111111,116.693416628015,119.096988131362,117.963517614461,124.042122529303,124.374637474767,123.476626333594,121.89253968254,129.329323936933,124.571009700176,119.765616752075,119.234200252283,119.04056085222,120.021698674856,121.266817682429,121.280409798535), Temp = c(37.1692307692308,37.2214285714286,37.1696629213483,37.1170454545455,37.0453744493392,37.0606382978723,37.0166666666667,36.7377604166667,36.710752688172,37.3,37.342,37.3574468085106,37.2355212355212,37.022972972973,37.0868921775899,36.9345776031434,36.940866873065,36.710447761194), mean.Lactate = c(1.97444444444444,2.30419719169719,2.59858033689666,2.38367167919799,2.21745338393921,2.23621997836093,2.38027859332606,2.19170665888057,1.6948538961039,2.69484126984127,2.71161864127402,2.56614234056542,2.32636527925912,2.46302024397095,2.31858015805433,2.29480328876502,2.19588820387431,2.36889408431075), mean.HR = c(93.6526736594408,100.425416023074,94.82034825511,90.2831432094122,88.8614422192853,88.0954038545092,84.4181724316646,84.0832220784835,83.5244757586384,89.5717546803492,95.7655167930664,95.8777929838439,91.6012687110631,87.6333771542474,86.78690363953,84.6316673040563,82.6339158140302,83.1896595048917), AST = c(677.833333333333,797.607142857143,295.9,347.239130434783,277.628571428571,167.917431192661,189.959064327485,153.575342465753,54.9705882352941,149.25,382.888888888889,234.725,258.821428571429,240.243523316062,248.965853658537,166.966183574879,80.7258064516129,74.6), mean.NIDiasABP = c(59.444175563946,63.2263452147626,61.844335864457,60.5051149516552,58.8560113442476,55.2056844079461,52.0465028616702,52.5491349220078,52.1804975012114,58.5352050237867,62.0029646210696,63.2767460821034,63.1902493965042,63.0032025324066,58.2626985185924,55.9137865082005,54.7214304155324,54.2824969003908), PaO2 = c(162,129.976744186047,127.805970149254,114.865079365079,123.932098765432,118.513392857143,118.249211356467,120.334482758621,114.538461538462,156.588235294118,134.132352941176,122.059701492537,114.944162436548,116.12,113.485639686684,111.970873786408,112.55601659751,122.367346938776), mean.6.Weight = c(71.9,82.6245614035088,81.5008064516129,82.2150862068966,86.2476519916143,81.8629531810782,78.4334804246848,68.0729565054368,63.9102083333333,82.2727272727273,88.8541666666667,94.0238461538462,94.3724714019686,93.6931501057082,93.6904973544974,89.7314904261609,81.057270370387,73.66875)) ## The raw data for the logistic regression model model1.logr.threshold = 0.3078326788; model1.logr.intercept = 19.4057603538017; model1.coeff.names <- c("Age","Gender","RespRate","SaO2","TroponinI","TroponinT","MechVent","Albumin","ALP","ALT","AST","Bilirubin","BUN","Creatinine","DiasABP","FiO2","GCS","Glucose","HCO3","HCT","HR","K","Lactate","MAP","Mg","Na","NIDiasABP","NIMAP","NISysABP","PaCO2","PaO2","pH","Platelets","SysABP","Temp","Urine","WBC","Weight","mean.Albumin","mean.ALP","mean.ALT","mean.AST","mean.Bilirubin","mean.BUN","mean.Creatinine","mean.DiasABP","mean.FiO2","mean.GCS","mean.Glucose","mean.HCO3","mean.HCT","mean.HR","mean.K","mean.Lactate","mean.MAP","mean.Mg","mean.Na","mean.NIDiasABP","mean.NIMAP","mean.NISysABP","mean.PaCO2","mean.PaO2","mean.pH","mean.Platelets","mean.SysABP","mean.Temp","mean.Urine","mean.WBC","mean.Weight","TotObs","LastHrObs","mean.6.DiasABP","mean.6.FiO2","mean.6.GCS","mean.6.HR","mean.6.MAP","mean.6.NIDiasABP","mean.6.NIMAP","mean.6.NISysABP","mean.6.SysABP","mean.6.Temp","mean.6.Urine","mean.6.Weight"); model1.coeff.values <- c(0.221974693539886,0.0226069675203702,-0.594130093202961,0.151438371382316,0.67887473985775,0.524200878887242,-0.97568571887081,0.316156141287406,0.00500560158362028,-0.00066923854088726,0.00033777276698252,0.0659403138983737,0.0374100850983807,-0.343243631211176,-0.0154683880280529,0.810020109207058,-0.221683998648513,0.0043091864970345,-0.0729113807618519,-0.0167723280215336,-0.00123621200153142,0.0209406297701917,0.0645154182233022,-0.0151148755042465,0.182009998081006,0.0808404973573679,0.0152691532909765,-0.0231325002699865,-0.000390431356492978,0.0280216538156045,0.00288250795547651,-0.841558244670487,0.00262499407612986,0.0107273668149931,0.00840461411638613,0.00013360607817502,0.0236482011006626,-0.0299413496614949,-0.703728153199155,-0.00332651849059798,0.000730621831486575,-0.000283544634152196,0.00583218102179827,-0.0191273327268737,0.230944800899337,-0.0231626676493692,-0.631190332368138,-0.0625949069858484,0.000455747329140102,0.0866649979039666,0.0197234769633188,0.00687766501650615,-0.101833173062073,0.107314278125317,0.0175233859935339,-0.262159967677371,-0.0886741961666364,-0.0246994104073873,0.0121456844099438,0.00243445793255346,-0.0398303705923974,-0.0033320124386313,0.0421313069450813,-0.00250400890533759,0.00364300752709667,-0.212044674602792,-5.55361274657072e-05,-0.0115775063054973,0.0249282364529987,-0.000896585141949711,-0.0170248035564242,0.0246464819603304,0.364429771245359,-0.0117475996764385,0.0112803510103849,0.0163841273539294,-0.0170511100239075,0.0265462557854755,-0.00705384979903976,-0.0187573043703427,-0.111106769965224,-0.00115148084982555,-0.00185688665458167) model1.logr.coeff = rbind(data.frame(), model1.coeff.values); names(model1.logr.coeff) <- model1.coeff.names; ## Prior probability for Male/Female split prob.male = 0.5618; ## -------------------------------------------------------------- ## Variables for the features from the data file ## -------------------------------------------------------------- ## The last hour of observation time.lastHour <- 48; # The name of the parameters that we don't care about excludeFeat.vars <- c("Age", "RecordID", "RespRate", "SaO2", "Cholesterol", "TroponinI", "TroponinT", "Height", 'Gender', "MechVent"); # The name of the parameters to use a boolean flag to denote if they exist or not boolFeat.vars <- c("RespRate", "TroponinI", "TroponinT", "SaO2"); # The name of the features that we're requiring in our model # This in addution to the boolFeats constitutes our model feat.vars <- c("Albumin", "ALP", "ALT", "AST", "Bilirubin", "BUN", "Creatinine", "Glucose", "HCO3", "HCT", "K", "Lactate", "Mg", "Na", "PaCO2", "PaO2", "pH", "Platelets", "WBC", "DiasABP", "FiO2", "GCS", "HR", "MAP", "NIDiasABP","NIMAP","NISysABP", "SysABP", "Temp", "Urine", "Weight"); # Define the variables that we will treat as lab # (so only do the last value + mean of 48 hours) lab.vars <- c("Albumin", "ALP", "ALT", "AST", "Bilirubin", "BUN", "Creatinine", "Glucose", "HCO3", "HCT", "K", "Lactate", "Mg", "Na", "PaCO2", "PaO2", "pH", "Platelets", "WBC"); # Define the variables of we will treat as clinical (last value + mean 48 hours + std 48 hrs + mean last x hours) clinical.vars <- c( "DiasABP", "FiO2", "GCS", "HR", "MAP","NIDiasABP", "NIMAP","NISysABP", "SysABP", "Temp", "Urine", "Weight"); # Fix the raw genderAge to be matrix form model1.genderAge.tables <- lapply(model1.genderAge.raw, function(x) { # Convert the raw format into a 2x9 matrix y <- matrix(x, 2, 9); rownames(y) <- c("0", "1"); colnames(y) <- c("(0,1]","(1,2]","(2,3]","(3,4]","(4,5]", "(5,6]", "(6,7]", "(7,8]", "(8,9]"); return(y); }); # Fix the raw genderAge to be matrix form model2.genderAge.tables <- lapply(model2.genderAge.raw, function(x) { # Convert the raw format into a 2x9 matrix y <- matrix(x, 2, 9); rownames(y) <- c("0", "1"); colnames(y) <- c("(0,1]","(1,2]","(2,3]","(3,4]","(4,5]", "(5,6]", "(6,7]", "(7,8]", "(8,9]"); return(y); }); # Function to impute the data based on the table above impute.model1.Data <- function(data) { # data$Gender returns a 0, 1 but we add 1 to do the offset impute.gender <- data$Gender + 1; impute.age <- cut(data$Age, breaks = seq(0, 10, by = 1), right = TRUE); # Get the names of the columns to impute impute.cols <- names(data) [which(is.na(data))]; for (i in impute.cols) { data[1,i] <- model1.genderAge.tables[[i]][impute.gender, impute.age]; } return(data); } # Function to impute the data based on the table above impute.model2.Data <- function(data) { # data$Gender returns a 0, 1 but we add 1 to do the offset impute.gender <- data$Gender + 1; impute.age <- cut(data$Age, breaks = seq(0, 10, by = 1), right = TRUE); # Get the names of the columns to impute impute.cols <- names(data) [which(is.na(data))]; for (i in impute.cols) { data[1,i] <- model2.genderAge.tables[[i]][impute.gender, impute.age]; } return(data); } # Find the age decile (1:9) findAgeDecile <- function(data) { indices = which(data$Parameter == "Age"); if (length(indices) == 0) return(NA); # Get the median of the age medianAge <- median(data$Value[indices]); # Smash all the ages > 90 into the same decile if (medianAge > 90) { medianAge <- 90; } return(floor(medianAge/ 10)); } # Get the gender findGender <- function(data) { indices = which(data$Parameter == "Gender"); if (length(indices) == 0) return(NA); return(tail(data$Value[indices], 1)); } # Return whether such a parameter exists findBoolFlag <- function(tab, parameterName) { indices = which(tab$Parameter == parameterName); return( if(length(indices) > 0) 1 else 0); } # Get the number in the last hour findCountLastHr <- function(tab) { indices = which(tab$Time >= (time.lastHour - 1) & tab$Time < time.lastHour); return(length(indices)); } # Function to get the feature data for a particular recordID get.model1.FeatureData <- function(physio) { # Df will the patient data df <- data.frame(); # Convert the time to hours physio$Time <- sapply(strsplit(physio$Time,":"), function(y) { y <- as.numeric(y); y[1]+y[2]/60; }); df[1, 'RecordID'] <- physio$Value[which(physio$Parameter == 'RecordID')]; df[, c(lab.vars, clinical.vars, paste('mean', lab.vars, sep='.'), paste('mean', clinical.vars, sep='.'), paste('mean.6', clinical.vars, sep='.'))] <- NA; # remove any entries with value < 0 idx.bad <- which(physio$Value < 0 | is.na(physio$Value) | physio$Parameter == ''); if (length(idx.bad) > 0) { physio <- physio[-idx.bad,]; } # Take care of the age by using the median and binning df[1, 'Age'] <- findAgeDecile(physio); df[1, 'Gender'] <- findGender(physio); # get all the boolean flags for (v in boolFeat.vars) { df[1, v] <- findBoolFlag(physio, v); } # For the mechanical see if there's a 1 df[1, 'MechVent'] <- (length(which(physio$Parameter == 'MechVent' & physio$Value == 1)) > 0) * 1; # Removed the unused variables exclude.vars.idx <- which(physio$Parameter %in% excludeFeat.vars); physio <- physio[-exclude.vars.idx,]; # Take the last value of each variable that is in the file last.var <- tapply(X=physio$Value, INDEX=physio$Parameter, function(x) {tail(x, 1)}); df[, names(last.var)] <- last.var; df[, "TotObs"] <- nrow(physio); df[, "LastHrObs"] <- findCountLastHr(physio); if (nrow(physio) > 0) { # Get the mean of the data variables mean.var <- tapply(X=physio$Value, INDEX=physio$Parameter, mean); names(mean.var) <- paste('mean', names(mean.var), sep='.'); df[, names(mean.var)] <- mean.var; # Get the mean of the clinical vars only variables idx <- which(physio$Parameter %in% clinical.vars & physio$Time >= time.lastHour - 6); if (length(idx) > 0) { lastXHour.x <- tapply(X=physio$Value[idx], INDEX=physio$Parameter[idx], mean); names(lastXHour.x) <- paste('mean.6', names(lastXHour.x), sep='.'); df[, names(lastXHour.x)] <- lastXHour.x; } } return(df); } # Function to get the feature data for a particular recordID get.model2.FeatureData <- function(physio) { # Df will the patient data df <- data.frame(); df[1, 'RecordID'] <- NA; df[, c(feat.vars)] <- NA; # Convert the time to hours physio$Time <- sapply(strsplit(physio$Time,":"), function(y) { y <- as.numeric(y); y[1]+y[2]/60; }); df[1, 'RecordID'] <- physio$Value[which(physio$Parameter == 'RecordID')]; # remove any entries with value < 0 idx.bad <- which(physio$Value < 0 | is.na(physio$Value) | physio$Parameter == ''); if (length(idx.bad) > 0) { physio <- physio[-idx.bad,]; } # Take care of the age by using the median and binning df[1, 'Age'] <- findAgeDecile(physio); df[1, 'Gender'] <- findGender(physio); # get all the boolean flags for (v in boolFeat.vars) { df[1, v] <- findBoolFlag(physio, v); } # For the mechanical see if there's a 1 df[1, 'MechVent'] <- (length(which(physio$Parameter == 'MechVent' & physio$Value == 1)) > 0) * 1; # Removed the unused variables exclude.vars.idx <- which(physio$Parameter %in% excludeFeat.vars); physio <- physio[-exclude.vars.idx,]; # Take the last value of each variable that is in the file last.var <- tapply(X=physio$Value, INDEX=physio$Parameter, function(x) {tail(x, 1)}); df[, names(last.var)] <- last.var; df[1, "TotObs"] <- nrow(physio); df[1, "LastHrObs"] <- findCountLastHr(physio); return(df); } predict.model1.Value <- function(data) { # Z = b0 + b1*x + b2*x + ... z = model1.logr.intercept; for (b in names(model1.logr.coeff)) { z = z + model1.logr.coeff[1,b]*data[1,b]; } # f(z) = 1/(1+e^-z) pred = 0; fz = 1/(1+exp(-z)); if (fz > model1.logr.threshold) { pred = 1; } return(list(class=pred, prob = fz)); } predict.model2.Value <- function(data) { # Z = b0 + b1*x + b2*x + ... z = model2.logr.intercept; for (b in names(model2.logr.coeff)) { z = z + model2.logr.coeff[1,b]*data[1,b]; } # f(z) = 1/(1+e^-z) fz = 1/(1+exp(-z)); return(fz); } getFeatureData <- function(inputFile) { # read the file in csv and then pass it into eac hfeature data physio <- read.csv(file=inputFile, header=TRUE, stringsAsFactors=FALSE); model1.patient <- get.model1.FeatureData(physio); model2.patient <- get.model2.FeatureData(physio); return(list(id=model1.patient$RecordID, model1.features = model1.patient, model2.features = model2.patient)); } ## -------------------------------------------------------------- ## Predict risk and class using provided feature vector ## -------------------------------------------------------------- predict.risk <- function(model1.data, model2.data) { prob <- 0; pred <- 0; if ((length(which(colnames(model1.data) == 'Gender')) == 0) || is.na(model1.data['Gender']) ) { # set it to be female model1.data$Gender <- 0; model2.data$Gender <- 0; # impute and predict using female model1.features <- impute.model1.Data(model1.data); model2.features <- impute.model2.Data(model2.data); f.m1.prob <- predict.model1.Value(model1.features); f.prob <- predict.model2.Value(model2.features); # Repeat for male model1.data$Gender <- 1; model2.data$Gender <- 1; # impute and predict using female model1.features <- impute.model1.Data(model1.data); model2.features <- impute.model2.Data(model2.data); m.m1.prob <- predict.model1.Value(model1.features); m.prob <- predict.model2.Value(model2.features); # marginalize for both probabilities marg.m1.prob <- prob.male * m.m1.prob$prob + (1-prob.male) * m.m1.prob$prob; if (marg.m1.prob > model1.logr.threshold) { pred = 1; } prob <- prob.male * m.prob + (1-prob.male) * f.prob; } else { model1.features <- impute.model1.Data(model1.data); model2.features <- impute.model2.Data(model2.data); # model 1 gives you the classification m1.prob <- predict.model1.Value(model1.features); pred <- m1.prob$class; # model 2 gives you the predicted risk prob <- predict.model2.Value(model2.features); } return( list(class=pred, prob=prob) ); } patient <- getFeatureData(file("stdin")); pred <- predict.risk(patient$model1.features, patient$model2.features); cat(sprintf("%i,%i,%f\n", patient$id, pred$class, pred$prob));