Alex Rubinsteyn Merge branch 'master' of github.com:hammerlab/mhcflurry-paper1  about 8 years ago

Commit id: 88d9259e4376975eb9336f39366cf347a85bbe84

deletions | additions      

       

title = {{Multiple imputation by chained equations: what is it and how does it work?}},  journal = {International Journal of Methods in Psychiatric Research},  }  @article{lundegaard2008accurate,  title = {{Accurate approximation method for prediction of class I MHC affinities for peptides of length 8, 10 and 11 using prediction tools trained on 9mers}},  author = {Lundegaard, Claus and Lund, Ole and Nielsen, Morten},  journal = {Bioinformatics},  volume = {24},  number = {11},  pages = {1397--1398},  year = {2008},  publisher = {Oxford Univ Press},  }  @article{Anderson_2004,  doi = {10.4049/jimmunol.172.10.5851},  url = {http://dx.doi.org/10.4049/jimmunol.172.10.5851},  year = {2004},  month = {may},  publisher = {The American Association of Immunologists},  volume = {172},  number = {10},  pages = {5851--5860},  author = {M. K. Anderson and R. Pant and A. L. Miracle and X. Sun and C. A. Luer and C. J. Walsh and J. C. Telfer and G. W. Litman and E. V. Rothenberg},  title = {{Evolutionary Origins of Lymphocytes: Ensembles of T Cell and B Cell Transcriptional Regulators in a Cartilaginous Fish}},  journal = {The Journal of Immunology},  }           

activation,allele,allele_size,dropout_probability,embedding_output_dim,layer_sizes,test_auc,test_f1,test_tau,train_auc,train_f1,train_size,train_tau,layer0_size,combined  tanh,HLA-A0201,32876,0.5,32,[64],0.948859224635,0.829284584403,0.611111788519,0.952787573501,0.835376335904,21917.3333333,0.619511045449,64.0,2.38925559756  tanh,HLA-A0201,32876,0.0,8,[4],0.949783907887,0.824546421325,0.610722720647,0.961369550594,0.852806489037,21917.3333333,0.637738834691,4.0,2.38505304986  tanh,HLA-A0201,32876,0.5,32,[4],0.947045743882,0.825316226587,0.606809392519,0.95090676537,0.830576863532,21917.3333333,0.614604905044,4.0,2.37917136299  tanh,HLA-A0201,32876,0.0,32,[4],0.945431851684,0.814543482548,0.594808046713,0.968297946562,0.862478716967,21917.3333333,0.653041195915,4.0,2.35478338095  tanh,HLA-A0201,32876,0.5,8,[4],0.939210925562,0.797945924441,0.594154537435,0.941786515167,0.80147407161,21917.3333333,0.598982352368,4.0,2.33131138744  tanh,HLA-A0201,32876,0.5,8,[64],0.940983539736,0.79144903494,0.597841012914,0.943402333279,0.800677140665,21917.3333333,0.602517362108,64.0,2.33027358759  tanh,HLA-A0201,32876,0.0,8,[64],0.904228551551,0.749725486519,0.531197414445,0.995129449276,0.950996665072,21917.3333333,0.775690619396,64.0,2.18515145252  tanh,HLA-A0201,32876,0.0,32,[64],0.894236762069,0.736074577248,0.502893177031,0.99843884762,0.968539688086,21917.3333333,0.856814617992,64.0,2.13320451635         

sectionContent_Text_.tex  sectionSection_title.tex  section_Network_architecture_begin_figure__.tex  section_Evaluating_a_predictor_Throughout__.tex section_Evaluating_the_performance_of__.tex  section_Matrix_completion_and_imputation__.tex  section_Pretraining_an_artificial_neural__.tex  sectionAcknowledgeme.tex         

"cells": [  {  "cell_type": "code",  "execution_count": 28, 25,  "metadata": {  "collapsed": false  }, 

},  {  "cell_type": "code",  "execution_count": 29, 26,  "metadata": {  "collapsed": true  }, 

},  {  "cell_type": "code",  "execution_count": 30, 27,  "metadata": {  "collapsed": false  }, 

},  {  "cell_type": "code",  "execution_count": 31, 28,  "metadata": {  "collapsed": false  }, 

},  {  "cell_type": "code",  "execution_count": 32, 29,  "metadata": {  "collapsed": false  }, 

"0.36749263596306014"  ]  },  "execution_count": 32, 29,  "metadata": {},  "output_type": "execute_result"  } 

},  {  "cell_type": "code",  "execution_count": 33, 30,  "metadata": {  "collapsed": false  }, 

"{8, 9, 10, 11, 12, 13, 14, 15}"  ]  },  "execution_count": 33, 30,  "metadata": {},  "output_type": "execute_result"  } 

},  {  "cell_type": "code",  "execution_count": 34, 31,  "metadata": {  "collapsed": false  }, 

"{9}"  ]  },  "execution_count": 34, 31,  "metadata": {},  "output_type": "execute_result"  } 

},  {  "cell_type": "code",  "execution_count": 35, 32,  "metadata": {  "collapsed": false  }, 

},  {  "cell_type": "code",  "execution_count": 36, 33,  "metadata": {  "collapsed": false  }, 

},  {  "cell_type": "code",  "execution_count": 37, 45,  "metadata": {  "collapsed": false  }, 

"name": "stdout",  "output_type": "stream",  "text": [  "48 "8  models\n" ]  },  { 

"{'activation', 'dropout_probability', 'embedding_output_dim', 'layer_sizes'}"  ]  },  "execution_count": 37, 45,  "metadata": {},  "output_type": "execute_result"  }  ],  "source": [  "dropout_probabilities = [0.0,0.1,  0.5]\n", "\n",  "embedding_output_dims = [16, 32, 64, 128]\n", [8, 32]\n",  "#embedding_output_dims = [4, 32]\n",  "\n",  "#layer_sizes = [[4], [8], [16], [64], [128]]\n",  "layer_sizes_list = [[16], [64], [100], [128]]\n", [[4], [64]]\n",  "\n",  "activations = [\"tanh\"]\n",  "\n", 

},  {  "cell_type": "code",  "execution_count": 38, 41,  "metadata": {  "collapsed": true  },  "outputs": [],  "source": []  },  {  "cell_type": "code",  "execution_count": 46,  "metadata": {  "collapsed": false,  "scrolled": false 

"text": [  "Allele: HLA-A0201\n",  "-- fold #1/3\n",  " HLA-A0201 fold 0 [ 0 / 48] 8]  train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 16, 8,  'dropout_probability': 0.0, 'layer_sizes': [16]}\n", [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.635373\n",  "train auc: 0.961440\n",  "train f1: 0.857345\n",  "test tau: 0.612421\n",  "test auc: 0.947852\n",  "test f1: 0.812253\n",  " HLA-A0201 fold 0 [ 1 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.595930\n",  "train auc: 0.941433\n",  "train f1: 0.816062\n",  "test tau: 0.593591\n",  "test auc: 0.934945\n",  "test f1: 0.794962\n",  " HLA-A0201 fold 0 [ 2 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.853621\n",  "train auc: 0.998382\n",  "train f1: 0.969092\n",  "test tau: 0.504335\n",  "test auc: 0.893769\n",  "test f1: 0.727977\n",  " HLA-A0201 fold 0 [ 3 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.612068\n",  "train auc: 0.951109\n",  "train f1: 0.828886\n",  "test tau: 0.609418\n",  "test auc: 0.945726\n",  "test f1: 0.810010\n",  " HLA-A0201 fold 0 [ 4 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.649106\n",  "train auc: 0.968903\n",  "train f1: 0.851287\n",  "test tau: 0.592833\n",  "test auc: 0.941599\n",  "test f1: 0.793750\n",  " HLA-A0201 fold 0 [ 5 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.598930\n",  "train auc: 0.943413\n",  "train f1: 0.809371\n",  "test tau: 0.596894\n",  "test auc: 0.937999\n",  "test f1: 0.786494\n",  " HLA-A0201 fold 0 [ 6 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.775955\n",  "train auc: 0.995456\n",  "train f1: 0.955315\n",  "test tau: 0.522877\n",  "test auc: 0.897357\n",  "test f1: 0.732064\n",  " HLA-A0201 fold 0 [ 7 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.721963\n", 0.617173\n",  "train auc: 0.987146\n", 0.952590\n",  "train f1: 0.917336\n", 0.843105\n",  "test tau: 0.559493\n", 0.613136\n",  "test auc: 0.915950\n", 0.947580\n",  "test f1: 0.770642\n", 0.822068\n",  "-- fold #2/3\n",  " HLA-A0201 fold 1 [  0 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.658287\n",  "train auc: 0.966108\n",  "train f1: 0.860940\n",  "test tau: 0.599771\n",  "test auc: 0.951407\n",  "test f1: 0.826462\n",  " HLA-A0201 fold 1 [ 1 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.617840\n",  "train auc: 0.949856\n",  "train f1: 0.829387\n",  "test tau: 0.603994\n",  "test auc: 0.949710\n",  "test f1: 0.841608\n",  " HLA-A0201 fold 1  [ 2 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.862390\n",  "train auc: 0.998619\n",  "train f1: 0.968971\n",  "test tau: 0.496758\n",  "test auc: 0.893675\n",  "test f1: 0.737198\n",  " HLA-A0201 fold  1 [ 3  / 48] 8]  train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 16, 8, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.641526\n",  "train auc: 0.959798\n",  "train f1: 0.844069\n",  "test tau: 0.607306\n",  "test auc: 0.953694\n",  "test f1: 0.836469\n",  " HLA-A0201 fold 1 [ 4 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.785620\n",  "train auc: 0.995677\n",  "train f1: 0.953636\n",  "test tau: 0.519690\n",  "test auc: 0.900378\n",  "test f1: 0.745602\n",  " HLA-A0201 fold 1 [ 5 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.622023\n",  "train auc: 0.951691\n",  "train f1: 0.830517\n",  "test tau: 0.606459\n",  "test auc: 0.951017\n",  "test f1: 0.837929\n",  " HLA-A0201 fold 1 [ 6 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.606570\n",  "train auc: 0.942179\n",  "train f1: 0.789515\n",  "test tau: 0.592905\n",  "test auc: 0.943026\n",  "test f1: 0.785859\n",  " HLA-A0201 fold 1 [ 7 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6376\n",  "-- # unique peptides = 3189\n",  "train tau: 0.601921\n",  "train auc: 0.940715\n",  "train f1: 0.800403\n",  "test tau: 0.589560\n",  "test auc: 0.942098\n",  "test f1: 0.802542\n",  "-- fold #3/3\n",  " HLA-A0201 fold 2 [ 0 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8,  'dropout_probability': 0.0, 'layer_sizes': [64]}\n" [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.636318\n",  "train auc: 0.962871\n",  "train f1: 0.857006\n",  "test tau: 0.612441\n",  "test auc: 0.947806\n",  "test f1: 0.824917\n",  " HLA-A0201 fold 2 [ 1 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.619337\n",  "train auc: 0.954082\n",  "train f1: 0.832507\n",  "test tau: 0.613741\n",  "test auc: 0.947980\n",  "test f1: 0.827857\n",  " HLA-A0201 fold 2 [ 2 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.602052\n",  "train auc: 0.944615\n",  "train f1: 0.803146\n",  "test tau: 0.603724\n",  "test auc: 0.941925\n",  "test f1: 0.801995\n",  " HLA-A0201 fold 2 [ 3 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.613908\n",  "train auc: 0.951755\n",  "train f1: 0.833457\n",  "test tau: 0.607017\n",  "test auc: 0.945700\n",  "test f1: 0.824331\n",  " HLA-A0201 fold 2 [ 4 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.765497\n",  "train auc: 0.994255\n",  "train f1: 0.944039\n",  "test tau: 0.551025\n",  "test auc: 0.914951\n",  "test f1: 0.771511\n",  " HLA-A0201 fold 2 [ 5 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.599096\n",  "train auc: 0.943211\n",  "train f1: 0.787957\n",  "test tau: 0.599313\n",  "test auc: 0.940589\n",  "test f1: 0.796334\n",  " HLA-A0201 fold 2 [ 6 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.651730\n",  "train auc: 0.969883\n",  "train f1: 0.875209\n",  "test tau: 0.591820\n",  "test auc: 0.943290\n",  "test f1: 0.823418\n",  " HLA-A0201 fold 2 [ 7 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",  "-- # unique peptides = 6377\n",  "-- # unique peptides = 3188\n",  "train tau: 0.854433\n",  "train auc: 0.998315\n",  "train f1: 0.967556\n",  "test tau: 0.507586\n",  "test auc: 0.895266\n",  "test f1: 0.743049\n",  "2240.77424288\n"  ]  },  {  "ename": "KeyboardInterrupt",  "evalue": "",  "output_type": "error",  "traceback": "data": {  "text/html": [  "
\n",  "\n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  "
activationalleleallele_sizedropout_probabilityembedding_output_dimfit_timelayer_sizesmodel_paramstest_auctest_f1test_tautrain_auctrain_f1train_sizetrain_taulayer0_size
0tanhHLA-A0201328760.0884.926343[4]{u'activation': u'tanh', u'embedding_output_di...0.9478520.8122530.6124210.9614400.857345219170.6353734
1tanhHLA-A0201328760.5896.526969[4]{u'activation': u'tanh', u'embedding_output_di...0.9349450.7949620.5935910.9414330.816062219170.5959304
2tanhHLA-A0201328760.03287.635577[64]{u'activation': u'tanh', u'embedding_output_di...0.8937690.7279770.5043350.9983820.969092219170.85362164
3tanhHLA-A0201328760.53299.237046[4]{u'activation': u'tanh', u'embedding_output_di...0.9457260.8100100.6094180.9511090.828886219170.6120684
4tanhHLA-A0201328760.03287.175426[4]{u'activation': u'tanh', u'embedding_output_di...0.9415990.7937500.5928330.9689030.851287219170.6491064
5tanhHLA-A0201328760.5898.160665[64]{u'activation': u'tanh', u'embedding_output_di...0.9379990.7864940.5968940.9434130.809371219170.59893064
6tanhHLA-A0201328760.0886.410095[64]{u'activation': u'tanh', u'embedding_output_di...0.8973570.7320640.5228770.9954560.955315219170.77595564
7tanhHLA-A0201328760.532100.420329[64]{u'activation': u'tanh', u'embedding_output_di...0.9475800.8220680.6131360.9525900.843105219170.61717364
8tanhHLA-A0201328760.03286.968521[4]{u'activation': u'tanh', u'embedding_output_di...0.9514070.8264620.5997710.9661080.860940219170.6582874
9tanhHLA-A0201328760.53298.788730[4]{u'activation': u'tanh', u'embedding_output_di...0.9497100.8416080.6039940.9498560.829387219170.6178404
10tanhHLA-A0201328760.03287.397654[64]{u'activation': u'tanh', u'embedding_output_di...0.8936750.7371980.4967580.9986190.968971219170.86239064
11tanhHLA-A0201328760.0884.933377[4]{u'activation': u'tanh', u'embedding_output_di...0.9536940.8364690.6073060.9597980.844069219170.6415264
12tanhHLA-A0201328760.0886.291791[64]{u'activation': u'tanh', u'embedding_output_di...0.9003780.7456020.5196900.9956770.953636219170.78562064
13tanhHLA-A0201328760.532100.035493[64]{u'activation': u'tanh', u'embedding_output_di...0.9510170.8379290.6064590.9516910.830517219170.62202364
14tanhHLA-A0201328760.5898.413581[64]{u'activation': u'tanh', u'embedding_output_di...0.9430260.7858590.5929050.9421790.789515219170.60657064
15tanhHLA-A0201328760.5897.010899[4]{u'activation': u'tanh', u'embedding_output_di...0.9420980.8025420.5895600.9407150.800403219170.6019214
16tanhHLA-A0201328760.0885.211551[4]{u'activation': u'tanh', u'embedding_output_di...0.9478060.8249170.6124410.9628710.857006219180.6363184
17tanhHLA-A0201328760.532100.653926[64]{u'activation': u'tanh', u'embedding_output_di...0.9479800.8278570.6137410.9540820.832507219180.61933764
18tanhHLA-A0201328760.5898.564194[64]{u'activation': u'tanh', u'embedding_output_di...0.9419250.8019950.6037240.9446150.803146219180.60205264
19tanhHLA-A0201328760.53299.454345[4]{u'activation': u'tanh', u'embedding_output_di...0.9457000.8243310.6070170.9517550.833457219180.6139084
20tanhHLA-A0201328760.0886.658918[64]{u'activation': u'tanh', u'embedding_output_di...0.9149510.7715110.5510250.9942550.944039219180.76549764
21tanhHLA-A0201328760.5896.809296[4]{u'activation': u'tanh', u'embedding_output_di...0.9405890.7963340.5993130.9432110.787957219180.5990964
22tanhHLA-A0201328760.03287.281114[4]{u'activation': u'tanh', u'embedding_output_di...0.9432900.8234180.5918200.9698830.875209219180.6517304
23tanhHLA-A0201328760.03287.568679[64]{u'activation': u'tanh', u'embedding_output_di...0.8952660.7430490.5075860.9983150.967556219180.85443364
\n",
  "
"
  ],  "text/plain":  [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",  "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",  "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweights_cv_train\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m n_training_epochs=200)\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0mfit_time\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",  "\u001b[0;32m/Users/tim/sinai/git/mhcflurry/mhcflurry/class1_binding_predictor.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, Y, sample_weights, X_pretrain, Y_pretrain, sample_weights_pretrain, n_training_epochs, verbose, batch_size)\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 333\u001b[0;31m shuffle=True)\n\u001b[0m\u001b[1;32m 334\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 335\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",  "\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/models.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, **kwargs)\u001b[0m\n\u001b[1;32m 403\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[0mclass_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 405\u001b[0;31m sample_weight=sample_weight)\n\u001b[0m\u001b[1;32m 406\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 407\u001b[0m def evaluate(self, x, y, batch_size=32, verbose=1,\n",  "\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight)\u001b[0m\n\u001b[1;32m 1044\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0mval_f\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mval_f\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_ins\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mval_ins\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1046\u001b[0;31m callback_metrics=callback_metrics)\n\u001b[0m\u001b[1;32m 1047\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1048\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",  "\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36m_fit_loop\u001b[0;34m(self, f, ins, out_labels, batch_size, nb_epoch, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics)\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[0mbatch_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 790\u001b[0;31m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_logs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 791\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[0mepoch_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",  "\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/callbacks.pyc\u001b[0m in \u001b[0;36mon_batch_end\u001b[0;34m(self, batch, logs)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mt_before_callbacks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcallback\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m \u001b[0mcallback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_delta_ts_batch_end\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mt_before_callbacks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0mdelta_t_median\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_delta_ts_batch_end\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",  "\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/callbacks.pyc\u001b[0m in \u001b[0;36mon_batch_end\u001b[0;34m(self, batch, logs)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotals\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotals\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotals\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",  "\u001b[0;31mKeyboardInterrupt\u001b[0m: " activation allele allele_size dropout_probability \\\n",  "0 tanh HLA-A0201 32876 0.0 \n",  "1 tanh HLA-A0201 32876 0.5 \n",  "2 tanh HLA-A0201 32876 0.0 \n",  "3 tanh HLA-A0201 32876 0.5 \n",  "4 tanh HLA-A0201 32876 0.0 \n",  "5 tanh HLA-A0201 32876 0.5 \n",  "6 tanh HLA-A0201 32876 0.0 \n",  "7 tanh HLA-A0201 32876 0.5 \n",  "8 tanh HLA-A0201 32876 0.0 \n",  "9 tanh HLA-A0201 32876 0.5 \n",  "10 tanh HLA-A0201 32876 0.0 \n",  "11 tanh HLA-A0201 32876 0.0 \n",  "12 tanh HLA-A0201 32876 0.0 \n",  "13 tanh HLA-A0201 32876 0.5 \n",  "14 tanh HLA-A0201 32876 0.5 \n",  "15 tanh HLA-A0201 32876 0.5 \n",  "16 tanh HLA-A0201 32876 0.0 \n",  "17 tanh HLA-A0201 32876 0.5 \n",  "18 tanh HLA-A0201 32876 0.5 \n",  "19 tanh HLA-A0201 32876 0.5 \n",  "20 tanh HLA-A0201 32876 0.0 \n",  "21 tanh HLA-A0201 32876 0.5 \n",  "22 tanh HLA-A0201 32876 0.0 \n",  "23 tanh HLA-A0201 32876 0.0 \n",  "\n",  " embedding_output_dim fit_time layer_sizes \\\n",  "0 8 84.926343 [4] \n",  "1 8 96.526969 [4] \n",  "2 32 87.635577 [64] \n",  "3 32 99.237046 [4] \n",  "4 32 87.175426 [4] \n",  "5 8 98.160665 [64] \n",  "6 8 86.410095 [64] \n",  "7 32 100.420329 [64] \n",  "8 32 86.968521 [4] \n",  "9 32 98.788730 [4] \n",  "10 32 87.397654 [64] \n",  "11 8 84.933377 [4] \n",  "12 8 86.291791 [64] \n",  "13 32 100.035493 [64] \n",  "14 8 98.413581 [64] \n",  "15 8 97.010899 [4] \n",  "16 8 85.211551 [4] \n",  "17 32 100.653926 [64] \n",  "18 8 98.564194 [64] \n",  "19 32 99.454345 [4] \n",  "20 8 86.658918 [64] \n",  "21 8 96.809296 [4] \n",  "22 32 87.281114 [4] \n",  "23 32 87.568679 [64] \n",  "\n",  " model_params test_auc test_f1 \\\n",  "0 {u'activation': u'tanh', u'embedding_output_di... 0.947852 0.812253 \n",  "1 {u'activation': u'tanh', u'embedding_output_di... 0.934945 0.794962 \n",  "2 {u'activation': u'tanh', u'embedding_output_di... 0.893769 0.727977 \n",  "3 {u'activation': u'tanh', u'embedding_output_di... 0.945726 0.810010 \n",  "4 {u'activation': u'tanh', u'embedding_output_di... 0.941599 0.793750 \n",  "5 {u'activation': u'tanh', u'embedding_output_di... 0.937999 0.786494 \n",  "6 {u'activation': u'tanh', u'embedding_output_di... 0.897357 0.732064 \n",  "7 {u'activation': u'tanh', u'embedding_output_di... 0.947580 0.822068 \n",  "8 {u'activation': u'tanh', u'embedding_output_di... 0.951407 0.826462 \n",  "9 {u'activation': u'tanh', u'embedding_output_di... 0.949710 0.841608 \n",  "10 {u'activation': u'tanh', u'embedding_output_di... 0.893675 0.737198 \n",  "11 {u'activation': u'tanh', u'embedding_output_di... 0.953694 0.836469 \n",  "12 {u'activation': u'tanh', u'embedding_output_di... 0.900378 0.745602 \n",  "13 {u'activation': u'tanh', u'embedding_output_di... 0.951017 0.837929 \n",  "14 {u'activation': u'tanh', u'embedding_output_di... 0.943026 0.785859 \n",  "15 {u'activation': u'tanh', u'embedding_output_di... 0.942098 0.802542 \n",  "16 {u'activation': u'tanh', u'embedding_output_di... 0.947806 0.824917 \n",  "17 {u'activation': u'tanh', u'embedding_output_di... 0.947980 0.827857 \n",  "18 {u'activation': u'tanh', u'embedding_output_di... 0.941925 0.801995 \n",  "19 {u'activation': u'tanh', u'embedding_output_di... 0.945700 0.824331 \n",  "20 {u'activation': u'tanh', u'embedding_output_di... 0.914951 0.771511 \n",  "21 {u'activation': u'tanh', u'embedding_output_di... 0.940589 0.796334 \n",  "22 {u'activation': u'tanh', u'embedding_output_di... 0.943290 0.823418 \n",  "23 {u'activation': u'tanh', u'embedding_output_di... 0.895266 0.743049 \n",  "\n",  " test_tau train_auc train_f1 train_size train_tau layer0_size \n",  "0 0.612421 0.961440 0.857345 21917 0.635373 4 \n",  "1 0.593591 0.941433 0.816062 21917 0.595930 4 \n",  "2 0.504335 0.998382 0.969092 21917 0.853621 64 \n",  "3 0.609418 0.951109 0.828886 21917 0.612068 4 \n",  "4 0.592833 0.968903 0.851287 21917 0.649106 4 \n",  "5 0.596894 0.943413 0.809371 21917 0.598930 64 \n",  "6 0.522877 0.995456 0.955315 21917 0.775955 64 \n",  "7 0.613136 0.952590 0.843105 21917 0.617173 64 \n",  "8 0.599771 0.966108 0.860940 21917 0.658287 4 \n",  "9 0.603994 0.949856 0.829387 21917 0.617840 4 \n",  "10 0.496758 0.998619 0.968971 21917 0.862390 64 \n",  "11 0.607306 0.959798 0.844069 21917 0.641526 4 \n",  "12 0.519690 0.995677 0.953636 21917 0.785620 64 \n",  "13 0.606459 0.951691 0.830517 21917 0.622023 64 \n",  "14 0.592905 0.942179 0.789515 21917 0.606570 64 \n",  "15 0.589560 0.940715 0.800403 21917 0.601921 4 \n",  "16 0.612441 0.962871 0.857006 21918 0.636318 4 \n",  "17 0.613741 0.954082 0.832507 21918 0.619337 64 \n",  "18 0.603724 0.944615 0.803146 21918 0.602052 64 \n",  "19 0.607017 0.951755 0.833457 21918 0.613908 4 \n",  "20 0.551025 0.994255 0.944039 21918 0.765497 64 \n",  "21 0.599313 0.943211 0.787957 21918 0.599096 4 \n",  "22 0.591820 0.969883 0.875209 21918 0.651730 4 \n",  "23 0.507586 0.998315 0.967556 21918 0.854433 64  " ] },  "execution_count": 46,  "metadata": {},  "output_type": "execute_result"  }  ],  "source": [ 

" \n",  " original_peptides_train = original_peptides[train_indices]\n",  " original_peptides_test = original_peptides[test_indices]\n",  " impute = False\n",  " \n",  " np.random.shuffle(models_params_list)\n",  " for (i, model_params) in enumerate(models_params_list):\n",  " print(\"%10s fold %3d [%3d / %3d] train_size=%d test_size=%d impute=%s model=%s\" %\n",  " (allele, fold_num, i, len(models_params_list), len(train_indices), len(test_indices), impute, model_params))\n", 

" Y_cv_train,\n",  " sample_weights=weights_cv_train,\n",  " verbose=False,\n",  " n_training_epochs=200)\n", n_training_epochs=250)\n",  " fit_time += time.time()\n",  " \n",  " Y_cv_train_9mer_predictions = predictor.predict(X_cv_train)\n", 

},  {  "cell_type": "code",  "execution_count": null, 51,  "metadata": {  "collapsed": false  },  "outputs": [  {  "data": {  "text/plain": [  "(24, 17)"  ]  },  "execution_count": 51,  "metadata": {},  "output_type": "execute_result"  }  ],  "source": [  "cv_df.shape"  ]  },  {  "cell_type": "code",  "execution_count": 78,  "metadata": {  "collapsed": false  },  "outputs": [  {  "name": "stdout",  "output_type": "stream",  "text": [  "Index([u'activation', u'allele', u'allele_size', u'dropout_probability',\n",  " u'embedding_output_dim', u'fit_time', u'layer_sizes', u'model_params',\n",  " u'test_auc', u'test_f1', u'test_tau', u'train_auc', u'train_f1',\n",  " u'train_size', u'train_tau', u'layer0_size', u'combined'],\n",  " dtype='object')\n"  ]  },  {  "name": "stderr",  "output_type": "stream",  "text": [  "/home/tim/anaconda2/envs/standard-2.7/lib/python2.7/site-packages/ipykernel/__main__.py:9: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n"  ]  }  ],  "source": [  "cv_df_str = cv_df.copy()\n",  "print(cv_df_str.columns)\n",  "del cv_df_str['model_params']\n",  "del cv_df_str['fit_time']\n",  "\n",  "for col in [\"layer_sizes\"]:\n",  " cv_df_str[col] = [str(x) for x in cv_df_str[col]]\n",  "summary = cv_df_str.groupby(list(cv_df_str.columns[:6])).mean() #.reset_index()\n",  "summary.sort(\"combined\", ascending=False, inplace=True)\n",  "summary.to_csv(\"../data/cv_hla0201_summary.csv\")"  ]  },  {  "cell_type": "code",  "execution_count": 49,  "metadata": {  "collapsed": true  },  "outputs": [],  "source": [  "train_data[\"HLA-A0201\"].X_index.shape" "cv_df[\"combined\"] = cv_df.test_auc + cv_df.test_f1 + cv_df.test_tau"  ]  },  {  "cell_type": "code",  "execution_count": null, 50,  "metadata": {  "collapsed": false  },  "outputs": [  {  "name": "stderr",  "output_type": "stream",  "text": [  "/home/tim/anaconda2/envs/standard-2.7/lib/python2.7/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n",  " if __name__ == '__main__':\n"  ]  },  {  "data": {  "text/html": [  "
\n",  "\n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  "
activationalleleallele_sizedropout_probabilityembedding_output_dimfit_timelayer_sizesmodel_paramstest_auctest_f1test_tautrain_auctrain_f1train_sizetrain_taulayer0_sizecombined
11tanhHLA-A0201328760.0884.933377[4]{u'activation': u'tanh', u'embedding_output_di...0.9536940.8364690.6073060.9597980.844069219170.64152642.397469
13tanhHLA-A0201328760.532100.035493[64]{u'activation': u'tanh', u'embedding_output_di...0.9510170.8379290.6064590.9516910.830517219170.622023642.395405
9tanhHLA-A0201328760.53298.788730[4]{u'activation': u'tanh', u'embedding_output_di...0.9497100.8416080.6039940.9498560.829387219170.61784042.395312
17tanhHLA-A0201328760.532100.653926[64]{u'activation': u'tanh', u'embedding_output_di...0.9479800.8278570.6137410.9540820.832507219180.619337642.389578
16tanhHLA-A0201328760.0885.211551[4]{u'activation': u'tanh', u'embedding_output_di...0.9478060.8249170.6124410.9628710.857006219180.63631842.385164
7tanhHLA-A0201328760.532100.420329[64]{u'activation': u'tanh', u'embedding_output_di...0.9475800.8220680.6131360.9525900.843105219170.617173642.382784
8tanhHLA-A0201328760.03286.968521[4]{u'activation': u'tanh', u'embedding_output_di...0.9514070.8264620.5997710.9661080.860940219170.65828742.377641
19tanhHLA-A0201328760.53299.454345[4]{u'activation': u'tanh', u'embedding_output_di...0.9457000.8243310.6070170.9517550.833457219180.61390842.377048
0tanhHLA-A0201328760.0884.926343[4]{u'activation': u'tanh', u'embedding_output_di...0.9478520.8122530.6124210.9614400.857345219170.63537342.372526
3tanhHLA-A0201328760.53299.237046[4]{u'activation': u'tanh', u'embedding_output_di...0.9457260.8100100.6094180.9511090.828886219170.61206842.365154
22tanhHLA-A0201328760.03287.281114[4]{u'activation': u'tanh', u'embedding_output_di...0.9432900.8234180.5918200.9698830.875209219180.65173042.358528
18tanhHLA-A0201328760.5898.564194[64]{u'activation': u'tanh', u'embedding_output_di...0.9419250.8019950.6037240.9446150.803146219180.602052642.347645
21tanhHLA-A0201328760.5896.809296[4]{u'activation': u'tanh', u'embedding_output_di...0.9405890.7963340.5993130.9432110.787957219180.59909642.336237
15tanhHLA-A0201328760.5897.010899[4]{u'activation': u'tanh', u'embedding_output_di...0.9420980.8025420.5895600.9407150.800403219170.60192142.334200
4tanhHLA-A0201328760.03287.175426[4]{u'activation': u'tanh', u'embedding_output_di...0.9415990.7937500.5928330.9689030.851287219170.64910642.328182
1tanhHLA-A0201328760.5896.526969[4]{u'activation': u'tanh', u'embedding_output_di...0.9349450.7949620.5935910.9414330.816062219170.59593042.323498
14tanhHLA-A0201328760.5898.413581[64]{u'activation': u'tanh', u'embedding_output_di...0.9430260.7858590.5929050.9421790.789515219170.606570642.321790
5tanhHLA-A0201328760.5898.160665[64]{u'activation': u'tanh', u'embedding_output_di...0.9379990.7864940.5968940.9434130.809371219170.598930642.321387
20tanhHLA-A0201328760.0886.658918[64]{u'activation': u'tanh', u'embedding_output_di...0.9149510.7715110.5510250.9942550.944039219180.765497642.237487
12tanhHLA-A0201328760.0886.291791[64]{u'activation': u'tanh', u'embedding_output_di...0.9003780.7456020.5196900.9956770.953636219170.785620642.165669
6tanhHLA-A0201328760.0886.410095[64]{u'activation': u'tanh', u'embedding_output_di...0.8973570.7320640.5228770.9954560.955315219170.775955642.152299
23tanhHLA-A0201328760.03287.568679[64]{u'activation': u'tanh', u'embedding_output_di...0.8952660.7430490.5075860.9983150.967556219180.854433642.145901
10tanhHLA-A0201328760.03287.397654[64]{u'activation': u'tanh', u'embedding_output_di...0.8936750.7371980.4967580.9986190.968971219170.862390642.127631
2tanhHLA-A0201328760.03287.635577[64]{u'activation': u'tanh', u'embedding_output_di...0.8937690.7279770.5043350.9983820.969092219170.853621642.126081
\n",
  "
"
  ],  "text/plain": [  " activation allele allele_size dropout_probability \\\n",  "11 tanh HLA-A0201 32876 0.0 \n",  "13 tanh HLA-A0201 32876 0.5 \n",  "9 tanh HLA-A0201 32876 0.5 \n",  "17 tanh HLA-A0201 32876 0.5 \n",  "16 tanh HLA-A0201 32876 0.0 \n",  "7 tanh HLA-A0201 32876 0.5 \n",  "8 tanh HLA-A0201 32876 0.0 \n",  "19 tanh HLA-A0201 32876 0.5 \n",  "0 tanh HLA-A0201 32876 0.0 \n",  "3 tanh HLA-A0201 32876 0.5 \n",  "22 tanh HLA-A0201 32876 0.0 \n",  "18 tanh HLA-A0201 32876 0.5 \n",  "21 tanh HLA-A0201 32876 0.5 \n",  "15 tanh HLA-A0201 32876 0.5 \n",  "4 tanh HLA-A0201 32876 0.0 \n",  "1 tanh HLA-A0201 32876 0.5 \n",  "14 tanh HLA-A0201 32876 0.5 \n",  "5 tanh HLA-A0201 32876 0.5 \n",  "20 tanh HLA-A0201 32876 0.0 \n",  "12 tanh HLA-A0201 32876 0.0 \n",  "6 tanh HLA-A0201 32876 0.0 \n",  "23 tanh HLA-A0201 32876 0.0 \n",  "10 tanh HLA-A0201 32876 0.0 \n",  "2 tanh HLA-A0201 32876 0.0 \n",  "\n",  " embedding_output_dim fit_time layer_sizes \\\n",  "11 8 84.933377 [4] \n",  "13 32 100.035493 [64] \n",  "9 32 98.788730 [4] \n",  "17 32 100.653926 [64] \n",  "16 8 85.211551 [4] \n",  "7 32 100.420329 [64] \n",  "8 32 86.968521 [4] \n",  "19 32 99.454345 [4] \n",  "0 8 84.926343 [4] \n",  "3 32 99.237046 [4] \n",  "22 32 87.281114 [4] \n",  "18 8 98.564194 [64] \n",  "21 8 96.809296 [4] \n",  "15 8 97.010899 [4] \n",  "4 32 87.175426 [4] \n",  "1 8 96.526969 [4] \n",  "14 8 98.413581 [64] \n",  "5 8 98.160665 [64] \n",  "20 8 86.658918 [64] \n",  "12 8 86.291791 [64] \n",  "6 8 86.410095 [64] \n",  "23 32 87.568679 [64] \n",  "10 32 87.397654 [64] \n",  "2 32 87.635577 [64] \n",  "\n",  " model_params test_auc test_f1 \\\n",  "11 {u'activation': u'tanh', u'embedding_output_di... 0.953694 0.836469 \n",  "13 {u'activation': u'tanh', u'embedding_output_di... 0.951017 0.837929 \n",  "9 {u'activation': u'tanh', u'embedding_output_di... 0.949710 0.841608 \n",  "17 {u'activation': u'tanh', u'embedding_output_di... 0.947980 0.827857 \n",  "16 {u'activation': u'tanh', u'embedding_output_di... 0.947806 0.824917 \n",  "7 {u'activation': u'tanh', u'embedding_output_di... 0.947580 0.822068 \n",  "8 {u'activation': u'tanh', u'embedding_output_di... 0.951407 0.826462 \n",  "19 {u'activation': u'tanh', u'embedding_output_di... 0.945700 0.824331 \n",  "0 {u'activation': u'tanh', u'embedding_output_di... 0.947852 0.812253 \n",  "3 {u'activation': u'tanh', u'embedding_output_di... 0.945726 0.810010 \n",  "22 {u'activation': u'tanh', u'embedding_output_di... 0.943290 0.823418 \n",  "18 {u'activation': u'tanh', u'embedding_output_di... 0.941925 0.801995 \n",  "21 {u'activation': u'tanh', u'embedding_output_di... 0.940589 0.796334 \n",  "15 {u'activation': u'tanh', u'embedding_output_di... 0.942098 0.802542 \n",  "4 {u'activation': u'tanh', u'embedding_output_di... 0.941599 0.793750 \n",  "1 {u'activation': u'tanh', u'embedding_output_di... 0.934945 0.794962 \n",  "14 {u'activation': u'tanh', u'embedding_output_di... 0.943026 0.785859 \n",  "5 {u'activation': u'tanh', u'embedding_output_di... 0.937999 0.786494 \n",  "20 {u'activation': u'tanh', u'embedding_output_di... 0.914951 0.771511 \n",  "12 {u'activation': u'tanh', u'embedding_output_di... 0.900378 0.745602 \n",  "6 {u'activation': u'tanh', u'embedding_output_di... 0.897357 0.732064 \n",  "23 {u'activation': u'tanh', u'embedding_output_di... 0.895266 0.743049 \n",  "10 {u'activation': u'tanh', u'embedding_output_di... 0.893675 0.737198 \n",  "2 {u'activation': u'tanh', u'embedding_output_di... 0.893769 0.727977 \n",  "\n",  " test_tau train_auc train_f1 train_size train_tau layer0_size \\\n",  "11 0.607306 0.959798 0.844069 21917 0.641526 4 \n",  "13 0.606459 0.951691 0.830517 21917 0.622023 64 \n",  "9 0.603994 0.949856 0.829387 21917 0.617840 4 \n",  "17 0.613741 0.954082 0.832507 21918 0.619337 64 \n",  "16 0.612441 0.962871 0.857006 21918 0.636318 4 \n",  "7 0.613136 0.952590 0.843105 21917 0.617173 64 \n",  "8 0.599771 0.966108 0.860940 21917 0.658287 4 \n",  "19 0.607017 0.951755 0.833457 21918 0.613908 4 \n",  "0 0.612421 0.961440 0.857345 21917 0.635373 4 \n",  "3 0.609418 0.951109 0.828886 21917 0.612068 4 \n",  "22 0.591820 0.969883 0.875209 21918 0.651730 4 \n",  "18 0.603724 0.944615 0.803146 21918 0.602052 64 \n",  "21 0.599313 0.943211 0.787957 21918 0.599096 4 \n",  "15 0.589560 0.940715 0.800403 21917 0.601921 4 \n",  "4 0.592833 0.968903 0.851287 21917 0.649106 4 \n",  "1 0.593591 0.941433 0.816062 21917 0.595930 4 \n",  "14 0.592905 0.942179 0.789515 21917 0.606570 64 \n",  "5 0.596894 0.943413 0.809371 21917 0.598930 64 \n",  "20 0.551025 0.994255 0.944039 21918 0.765497 64 \n",  "12 0.519690 0.995677 0.953636 21917 0.785620 64 \n",  "6 0.522877 0.995456 0.955315 21917 0.775955 64 \n",  "23 0.507586 0.998315 0.967556 21918 0.854433 64 \n",  "10 0.496758 0.998619 0.968971 21917 0.862390 64 \n",  "2 0.504335 0.998382 0.969092 21917 0.853621 64 \n",  "\n",  " combined \n",  "11 2.397469 \n",  "13 2.395405 \n",  "9 2.395312 \n",  "17 2.389578 \n",  "16 2.385164 \n",  "7 2.382784 \n",  "8 2.377641 \n",  "19 2.377048 \n",  "0 2.372526 \n",  "3 2.365154 \n",  "22 2.358528 \n",  "18 2.347645 \n",  "21 2.336237 \n",  "15 2.334200 \n",  "4 2.328182 \n",  "1 2.323498 \n",  "14 2.321790 \n",  "5 2.321387 \n",  "20 2.237487 \n",  "12 2.165669 \n",  "6 2.152299 \n",  "23 2.145901 \n",  "10 2.127631 \n",  "2 2.126081 "  ]  },  "execution_count": 50,  "metadata": {},  "output_type": "execute_result"  }  ],  "source": [  "cv_df.sort(\"combined\", ascending=False, inplace=True)\n",  "cv_df"  ]  },  {  "cell_type": "code",  "execution_count": 47,  "metadata": {  "collapsed": false,  "scrolled": false  },  "outputs": [], [  {  "data": {  "text/html": [  "
\n",  "\n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  "
activationalleleallele_sizedropout_probabilityembedding_output_dimfit_timelayer_sizesmodel_paramstest_auctest_f1test_tautrain_auctrain_f1train_sizetrain_taulayer0_size
0tanhHLA-A0201328760.0884.926343[4]{u'activation': u'tanh', u'embedding_output_di...0.9478520.8122530.6124210.9614400.857345219170.6353734
1tanhHLA-A0201328760.5896.526969[4]{u'activation': u'tanh', u'embedding_output_di...0.9349450.7949620.5935910.9414330.816062219170.5959304
2tanhHLA-A0201328760.03287.635577[64]{u'activation': u'tanh', u'embedding_output_di...0.8937690.7279770.5043350.9983820.969092219170.85362164
3tanhHLA-A0201328760.53299.237046[4]{u'activation': u'tanh', u'embedding_output_di...0.9457260.8100100.6094180.9511090.828886219170.6120684
4tanhHLA-A0201328760.03287.175426[4]{u'activation': u'tanh', u'embedding_output_di...0.9415990.7937500.5928330.9689030.851287219170.6491064
5tanhHLA-A0201328760.5898.160665[64]{u'activation': u'tanh', u'embedding_output_di...0.9379990.7864940.5968940.9434130.809371219170.59893064
6tanhHLA-A0201328760.0886.410095[64]{u'activation': u'tanh', u'embedding_output_di...0.8973570.7320640.5228770.9954560.955315219170.77595564
7tanhHLA-A0201328760.532100.420329[64]{u'activation': u'tanh', u'embedding_output_di...0.9475800.8220680.6131360.9525900.843105219170.61717364
8tanhHLA-A0201328760.03286.968521[4]{u'activation': u'tanh', u'embedding_output_di...0.9514070.8264620.5997710.9661080.860940219170.6582874
9tanhHLA-A0201328760.53298.788730[4]{u'activation': u'tanh', u'embedding_output_di...0.9497100.8416080.6039940.9498560.829387219170.6178404
10tanhHLA-A0201328760.03287.397654[64]{u'activation': u'tanh', u'embedding_output_di...0.8936750.7371980.4967580.9986190.968971219170.86239064
11tanhHLA-A0201328760.0884.933377[4]{u'activation': u'tanh', u'embedding_output_di...0.9536940.8364690.6073060.9597980.844069219170.6415264
12tanhHLA-A0201328760.0886.291791[64]{u'activation': u'tanh', u'embedding_output_di...0.9003780.7456020.5196900.9956770.953636219170.78562064
13tanhHLA-A0201328760.532100.035493[64]{u'activation': u'tanh', u'embedding_output_di...0.9510170.8379290.6064590.9516910.830517219170.62202364
14tanhHLA-A0201328760.5898.413581[64]{u'activation': u'tanh', u'embedding_output_di...0.9430260.7858590.5929050.9421790.789515219170.60657064
15tanhHLA-A0201328760.5897.010899[4]{u'activation': u'tanh', u'embedding_output_di...0.9420980.8025420.5895600.9407150.800403219170.6019214
16tanhHLA-A0201328760.0885.211551[4]{u'activation': u'tanh', u'embedding_output_di...0.9478060.8249170.6124410.9628710.857006219180.6363184
17tanhHLA-A0201328760.532100.653926[64]{u'activation': u'tanh', u'embedding_output_di...0.9479800.8278570.6137410.9540820.832507219180.61933764
18tanhHLA-A0201328760.5898.564194[64]{u'activation': u'tanh', u'embedding_output_di...0.9419250.8019950.6037240.9446150.803146219180.60205264
19tanhHLA-A0201328760.53299.454345[4]{u'activation': u'tanh', u'embedding_output_di...0.9457000.8243310.6070170.9517550.833457219180.6139084
20tanhHLA-A0201328760.0886.658918[64]{u'activation': u'tanh', u'embedding_output_di...0.9149510.7715110.5510250.9942550.944039219180.76549764
21tanhHLA-A0201328760.5896.809296[4]{u'activation': u'tanh', u'embedding_output_di...0.9405890.7963340.5993130.9432110.787957219180.5990964
22tanhHLA-A0201328760.03287.281114[4]{u'activation': u'tanh', u'embedding_output_di...0.9432900.8234180.5918200.9698830.875209219180.6517304
23tanhHLA-A0201328760.03287.568679[64]{u'activation': u'tanh', u'embedding_output_di...0.8952660.7430490.5075860.9983150.967556219180.85443364
\n",
  "
"
  ],  "text/plain": [  " activation allele allele_size dropout_probability \\\n",  "0 tanh HLA-A0201 32876 0.0 \n",  "1 tanh HLA-A0201 32876 0.5 \n",  "2 tanh HLA-A0201 32876 0.0 \n",  "3 tanh HLA-A0201 32876 0.5 \n",  "4 tanh HLA-A0201 32876 0.0 \n",  "5 tanh HLA-A0201 32876 0.5 \n",  "6 tanh HLA-A0201 32876 0.0 \n",  "7 tanh HLA-A0201 32876 0.5 \n",  "8 tanh HLA-A0201 32876 0.0 \n",  "9 tanh HLA-A0201 32876 0.5 \n",  "10 tanh HLA-A0201 32876 0.0 \n",  "11 tanh HLA-A0201 32876 0.0 \n",  "12 tanh HLA-A0201 32876 0.0 \n",  "13 tanh HLA-A0201 32876 0.5 \n",  "14 tanh HLA-A0201 32876 0.5 \n",  "15 tanh HLA-A0201 32876 0.5 \n",  "16 tanh HLA-A0201 32876 0.0 \n",  "17 tanh HLA-A0201 32876 0.5 \n",  "18 tanh HLA-A0201 32876 0.5 \n",  "19 tanh HLA-A0201 32876 0.5 \n",  "20 tanh HLA-A0201 32876 0.0 \n",  "21 tanh HLA-A0201 32876 0.5 \n",  "22 tanh HLA-A0201 32876 0.0 \n",  "23 tanh HLA-A0201 32876 0.0 \n",  "\n",  " embedding_output_dim fit_time layer_sizes \\\n",  "0 8 84.926343 [4] \n",  "1 8 96.526969 [4] \n",  "2 32 87.635577 [64] \n",  "3 32 99.237046 [4] \n",  "4 32 87.175426 [4] \n",  "5 8 98.160665 [64] \n",  "6 8 86.410095 [64] \n",  "7 32 100.420329 [64] \n",  "8 32 86.968521 [4] \n",  "9 32 98.788730 [4] \n",  "10 32 87.397654 [64] \n",  "11 8 84.933377 [4] \n",  "12 8 86.291791 [64] \n",  "13 32 100.035493 [64] \n",  "14 8 98.413581 [64] \n",  "15 8 97.010899 [4] \n",  "16 8 85.211551 [4] \n",  "17 32 100.653926 [64] \n",  "18 8 98.564194 [64] \n",  "19 32 99.454345 [4] \n",  "20 8 86.658918 [64] \n",  "21 8 96.809296 [4] \n",  "22 32 87.281114 [4] \n",  "23 32 87.568679 [64] \n",  "\n",  " model_params test_auc test_f1 \\\n",  "0 {u'activation': u'tanh', u'embedding_output_di... 0.947852 0.812253 \n",  "1 {u'activation': u'tanh', u'embedding_output_di... 0.934945 0.794962 \n",  "2 {u'activation': u'tanh', u'embedding_output_di... 0.893769 0.727977 \n",  "3 {u'activation': u'tanh', u'embedding_output_di... 0.945726 0.810010 \n",  "4 {u'activation': u'tanh', u'embedding_output_di... 0.941599 0.793750 \n",  "5 {u'activation': u'tanh', u'embedding_output_di... 0.937999 0.786494 \n",  "6 {u'activation': u'tanh', u'embedding_output_di... 0.897357 0.732064 \n",  "7 {u'activation': u'tanh', u'embedding_output_di... 0.947580 0.822068 \n",  "8 {u'activation': u'tanh', u'embedding_output_di... 0.951407 0.826462 \n",  "9 {u'activation': u'tanh', u'embedding_output_di... 0.949710 0.841608 \n",  "10 {u'activation': u'tanh', u'embedding_output_di... 0.893675 0.737198 \n",  "11 {u'activation': u'tanh', u'embedding_output_di... 0.953694 0.836469 \n",  "12 {u'activation': u'tanh', u'embedding_output_di... 0.900378 0.745602 \n",  "13 {u'activation': u'tanh', u'embedding_output_di... 0.951017 0.837929 \n",  "14 {u'activation': u'tanh', u'embedding_output_di... 0.943026 0.785859 \n",  "15 {u'activation': u'tanh', u'embedding_output_di... 0.942098 0.802542 \n",  "16 {u'activation': u'tanh', u'embedding_output_di... 0.947806 0.824917 \n",  "17 {u'activation': u'tanh', u'embedding_output_di... 0.947980 0.827857 \n",  "18 {u'activation': u'tanh', u'embedding_output_di... 0.941925 0.801995 \n",  "19 {u'activation': u'tanh', u'embedding_output_di... 0.945700 0.824331 \n",  "20 {u'activation': u'tanh', u'embedding_output_di... 0.914951 0.771511 \n",  "21 {u'activation': u'tanh', u'embedding_output_di... 0.940589 0.796334 \n",  "22 {u'activation': u'tanh', u'embedding_output_di... 0.943290 0.823418 \n",  "23 {u'activation': u'tanh', u'embedding_output_di... 0.895266 0.743049 \n",  "\n",  " test_tau train_auc train_f1 train_size train_tau layer0_size \n",  "0 0.612421 0.961440 0.857345 21917 0.635373 4 \n",  "1 0.593591 0.941433 0.816062 21917 0.595930 4 \n",  "2 0.504335 0.998382 0.969092 21917 0.853621 64 \n",  "3 0.609418 0.951109 0.828886 21917 0.612068 4 \n",  "4 0.592833 0.968903 0.851287 21917 0.649106 4 \n",  "5 0.596894 0.943413 0.809371 21917 0.598930 64 \n",  "6 0.522877 0.995456 0.955315 21917 0.775955 64 \n",  "7 0.613136 0.952590 0.843105 21917 0.617173 64 \n",  "8 0.599771 0.966108 0.860940 21917 0.658287 4 \n",  "9 0.603994 0.949856 0.829387 21917 0.617840 4 \n",  "10 0.496758 0.998619 0.968971 21917 0.862390 64 \n",  "11 0.607306 0.959798 0.844069 21917 0.641526 4 \n",  "12 0.519690 0.995677 0.953636 21917 0.785620 64 \n",  "13 0.606459 0.951691 0.830517 21917 0.622023 64 \n",  "14 0.592905 0.942179 0.789515 21917 0.606570 64 \n",  "15 0.589560 0.940715 0.800403 21917 0.601921 4 \n",  "16 0.612441 0.962871 0.857006 21918 0.636318 4 \n",  "17 0.613741 0.954082 0.832507 21918 0.619337 64 \n",  "18 0.603724 0.944615 0.803146 21918 0.602052 64 \n",  "19 0.607017 0.951755 0.833457 21918 0.613908 4 \n",  "20 0.551025 0.994255 0.944039 21918 0.765497 64 \n",  "21 0.599313 0.943211 0.787957 21918 0.599096 4 \n",  "22 0.591820 0.969883 0.875209 21918 0.651730 4 \n",  "23 0.507586 0.998315 0.967556 21918 0.854433 64 "  ]  },  "execution_count": 47,  "metadata": {},  "output_type": "execute_result"  }  ],  "source": [  "cv_df = pandas.DataFrame(cv_df)\n",  "cv_df[\"layer0_size\"] = [x[0] for x in cv_df.layer_sizes]\n", 

},  {  "cell_type": "code",  "execution_count": null, 48,  "metadata": {  "collapsed": false,  "scrolled": false  },  "outputs": [],  "source": [  "cv_df.to_csv(\"cv4.csv\")" "cv_df.to_csv(\"cv5.csv\")"  ]  },  { 

"name": "python",  "nbconvert_exporter": "python",  "pygments_lexer": "ipython2",  "version": "2.7.10" "2.7.11"  }  },  "nbformat": 4,         

"all_validation_data = mhcflurry.data.load_allele_datasets(data_dir + \"bdata.2013.mhci.public.blind.1.txt\")\n"  ]  },  {  "cell_type": "code",  "execution_count": 48,  "metadata": {  "collapsed": false  },  "outputs": [  {  "name": "stdout",  "output_type": "stream",  "text": [  "[MICE] Completing matrix with shape (31539, 106)\n",  "[MICE] Starting imputation round 1/110, elapsed time 0.111\n",  "[MICE] Starting imputation round 2/110, elapsed time 8.241\n",  "[MICE] Starting imputation round 3/110, elapsed time 16.674\n",  "[MICE] Starting imputation round 4/110, elapsed time 26.568\n",  "[MICE] Starting imputation round 5/110, elapsed time 34.593\n",  "[MICE] Starting imputation round 6/110, elapsed time 42.498\n",  "[MICE] Starting imputation round 7/110, elapsed time 51.072\n",  "[MICE] Starting imputation round 8/110, elapsed time 59.647\n",  "[MICE] Starting imputation round 9/110, elapsed time 71.593\n",  "[MICE] Starting imputation round 10/110, elapsed time 81.222\n",  "[MICE] Starting imputation round 11/110, elapsed time 89.827\n",  "[MICE] Starting imputation round 12/110, elapsed time 102.841\n",  "[MICE] Starting imputation round 13/110, elapsed time 115.194\n",  "[MICE] Starting imputation round 14/110, elapsed time 125.657\n",  "[MICE] Starting imputation round 15/110, elapsed time 136.601\n",  "[MICE] Starting imputation round 16/110, elapsed time 150.123\n",  "[MICE] Starting imputation round 17/110, elapsed time 162.037\n",  "[MICE] Starting imputation round 18/110, elapsed time 172.226\n",  "[MICE] Starting imputation round 19/110, elapsed time 184.679\n",  "[MICE] Starting imputation round 20/110, elapsed time 195.810\n",  "[MICE] Starting imputation round 21/110, elapsed time 206.368\n",  "[MICE] Starting imputation round 22/110, elapsed time 216.587\n",  "[MICE] Starting imputation round 23/110, elapsed time 227.364\n",  "[MICE] Starting imputation round 24/110, elapsed time 237.498\n",  "[MICE] Starting imputation round 25/110, elapsed time 248.307\n",  "[MICE] Starting imputation round 26/110, elapsed time 259.084\n",  "[MICE] Starting imputation round 27/110, elapsed time 270.167\n",  "[MICE] Starting imputation round 28/110, elapsed time 283.990\n",  "[MICE] Starting imputation round 29/110, elapsed time 296.113\n",  "[MICE] Starting imputation round 30/110, elapsed time 307.106\n",  "[MICE] Starting imputation round 31/110, elapsed time 316.434\n",  "[MICE] Starting imputation round 32/110, elapsed time 328.301\n",  "[MICE] Starting imputation round 33/110, elapsed time 341.276\n",  "[MICE] Starting imputation round 34/110, elapsed time 356.325\n",  "[MICE] Starting imputation round 35/110, elapsed time 366.570\n",  "[MICE] Starting imputation round 36/110, elapsed time 377.418\n",  "[MICE] Starting imputation round 37/110, elapsed time 387.342\n",  "[MICE] Starting imputation round 38/110, elapsed time 396.340\n",  "[MICE] Starting imputation round 39/110, elapsed time 405.177\n",  "[MICE] Starting imputation round 40/110, elapsed time 415.509\n",  "[MICE] Starting imputation round 41/110, elapsed time 424.908\n",  "[MICE] Starting imputation round 42/110, elapsed time 435.436\n",  "[MICE] Starting imputation round 43/110, elapsed time 445.738\n",  "[MICE] Starting imputation round 44/110, elapsed time 454.812\n",  "[MICE] Starting imputation round 45/110, elapsed time 463.573\n",  "[MICE] Starting imputation round 46/110, elapsed time 472.524\n",  "[MICE] Starting imputation round 47/110, elapsed time 481.888\n",  "[MICE] Starting imputation round 48/110, elapsed time 491.173\n",  "[MICE] Starting imputation round 49/110, elapsed time 504.550\n",  "[MICE] Starting imputation round 50/110, elapsed time 516.815\n",  "[MICE] Starting imputation round 51/110, elapsed time 527.519\n",  "[MICE] Starting imputation round 52/110, elapsed time 539.220\n",  "[MICE] Starting imputation round 53/110, elapsed time 548.554\n",  "[MICE] Starting imputation round 54/110, elapsed time 557.358\n",  "[MICE] Starting imputation round 55/110, elapsed time 566.810\n",  "[MICE] Starting imputation round 56/110, elapsed time 575.670\n",  "[MICE] Starting imputation round 57/110, elapsed time 586.083\n",  "[MICE] Starting imputation round 58/110, elapsed time 595.799\n",  "[MICE] Starting imputation round 59/110, elapsed time 605.537\n",  "[MICE] Starting imputation round 60/110, elapsed time 614.768\n",  "[MICE] Starting imputation round 61/110, elapsed time 624.430\n",  "[MICE] Starting imputation round 62/110, elapsed time 635.647\n",  "[MICE] Starting imputation round 63/110, elapsed time 645.239\n",  "[MICE] Starting imputation round 64/110, elapsed time 654.488\n",  "[MICE] Starting imputation round 65/110, elapsed time 663.584\n",  "[MICE] Starting imputation round 66/110, elapsed time 673.521\n",  "[MICE] Starting imputation round 67/110, elapsed time 682.477\n",  "[MICE] Starting imputation round 68/110, elapsed time 691.423\n",  "[MICE] Starting imputation round 69/110, elapsed time 700.841\n",  "[MICE] Starting imputation round 70/110, elapsed time 710.097\n",  "[MICE] Starting imputation round 71/110, elapsed time 718.485\n",  "[MICE] Starting imputation round 72/110, elapsed time 727.323\n",  "[MICE] Starting imputation round 73/110, elapsed time 736.386\n",  "[MICE] Starting imputation round 74/110, elapsed time 745.016\n",  "[MICE] Starting imputation round 75/110, elapsed time 753.102\n",  "[MICE] Starting imputation round 76/110, elapsed time 760.977\n",  "[MICE] Starting imputation round 77/110, elapsed time 769.145\n",  "[MICE] Starting imputation round 78/110, elapsed time 778.712\n",  "[MICE] Starting imputation round 79/110, elapsed time 787.006\n",  "[MICE] Starting imputation round 80/110, elapsed time 795.750\n",  "[MICE] Starting imputation round 81/110, elapsed time 804.539\n",  "[MICE] Starting imputation round 82/110, elapsed time 812.682\n",  "[MICE] Starting imputation round 83/110, elapsed time 821.102\n",  "[MICE] Starting imputation round 84/110, elapsed time 830.003\n",  "[MICE] Starting imputation round 85/110, elapsed time 838.924\n",  "[MICE] Starting imputation round 86/110, elapsed time 847.681\n",  "[MICE] Starting imputation round 87/110, elapsed time 856.701\n",  "[MICE] Starting imputation round 88/110, elapsed time 865.043\n",  "[MICE] Starting imputation round 89/110, elapsed time 874.018\n",  "[MICE] Starting imputation round 90/110, elapsed time 882.666\n",  "[MICE] Starting imputation round 91/110, elapsed time 891.618\n",  "[MICE] Starting imputation round 92/110, elapsed time 899.558\n",  "[MICE] Starting imputation round 93/110, elapsed time 908.443\n",  "[MICE] Starting imputation round 94/110, elapsed time 916.337\n",  "[MICE] Starting imputation round 95/110, elapsed time 924.289\n",  "[MICE] Starting imputation round 96/110, elapsed time 933.124\n",  "[MICE] Starting imputation round 97/110, elapsed time 941.719\n",  "[MICE] Starting imputation round 98/110, elapsed time 951.259\n",  "[MICE] Starting imputation round 99/110, elapsed time 961.569\n",  "[MICE] Starting imputation round 100/110, elapsed time 970.936\n",  "[MICE] Starting imputation round 101/110, elapsed time 980.006\n",  "[MICE] Starting imputation round 102/110, elapsed time 989.664\n",  "[MICE] Starting imputation round 103/110, elapsed time 999.131\n",  "[MICE] Starting imputation round 104/110, elapsed time 1008.392\n",  "[MICE] Starting imputation round 105/110, elapsed time 1016.863\n",  "[MICE] Starting imputation round 106/110, elapsed time 1026.306\n",  "[MICE] Starting imputation round 107/110, elapsed time 1035.933\n",  "[MICE] Starting imputation round 108/110, elapsed time 1044.824\n",  "[MICE] Starting imputation round 109/110, elapsed time 1054.162\n",  "[MICE] Starting imputation round 110/110, elapsed time 1063.809\n"  ]  }  ],  "source": [  "imputed_train_data = mhcflurry.imputation.create_imputed_datasets(all_train_data, fancyimpute.MICE())\n"  ]  },  {  "cell_type": "code",  "execution_count": 17, 

"scores_df.sort(\"test_size\", ascending=False, inplace=True)\n",  "scores_df"  ]  },  {  "cell_type": "code",  "execution_count": null,  "metadata": {  "collapsed": true  },  "outputs": [],  "source": [  "# train models\n",  "def make_and_fit_model(allele, original_params):\n",  " params = dict(original_params)\n",  " impute = params[\"impute\"]\n",  " del params[\"impute\"]\n",  " training = (imputed_train_data if impute else all_train_data)[\"allele\"]\n",  " model = mhcflurry.Class1BindingPredictor.from_hyperparameters(max_ic50=max_ic50, **params)\n",  " print(\"Fitting model for allele %s (%d): %s\" % (allele, len(training.Y), str(original_params)))\n",  " \n",  "\n",  "models = dict((allele, [make_model(allele, params) for params in models_params_list]) for allele in alleles)\n"  ]  },  {  "cell_type": "code",  "execution_count": 44,  "metadata": {  "collapsed": false  },  "outputs": [  {  "data": {  "text/html": [  "
\n",  "\n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  " \n",  "
allelenetmhc_aucnetmhc_f1netmhc_taunetmhcpan_aucnetmhcpan_f1netmhcpan_tausmmpmbec_cpp_aucsmmpmbec_cpp_f1smmpmbec_cpp_tautest_size
4HLA-A02010.9322340.8843360.6354980.9304790.8809630.6373380.9273580.8851210.6262242126
36HLA-B27050.9484570.2857140.4305610.9438600.4000000.3772080.9416820.3043480.416884314
\n",
  "
"
  ],  "text/plain": [  " allele netmhc_auc netmhc_f1 netmhc_tau netmhcpan_auc netmhcpan_f1 \\\n",  "4 HLA-A0201 0.932234 0.884336 0.635498 0.930479 0.880963 \n",  "36 HLA-B2705 0.948457 0.285714 0.430561 0.943860 0.400000 \n",  "\n",  " netmhcpan_tau smmpmbec_cpp_auc smmpmbec_cpp_f1 smmpmbec_cpp_tau \\\n",  "4 0.637338 0.927358 0.885121 0.626224 \n",  "36 0.377208 0.941682 0.304348 0.416884 \n",  "\n",  " test_size \n",  "4 2126 \n",  "36 314 "  ]  },  "execution_count": 44,  "metadata": {},  "output_type": "execute_result"  }  ],  "source": [  "summary_df = scores_df[scores_df.allele.isin(alleles)]\n",  "summary_df"  ]  }  ],  "metadata": {         

\section*{Introduction} \section{Introduction}    Most vertebrates are capable of generating diverse populations of adaptive immune cells which detect and eliminate of infected and cancerous cells.  The detection and elimination of both infection and cancer is the central task of the vertebrate adaptive immune system.   In most vertebrates \cite{Anderson_2004}  Adaptive immunity in most vertebrates can be roughly divided into the realm of antibodies (B-cells) and  \begin{itemize}  \item Overview of MHC binding  \item Allele specific vs. pan allele and that here we're focusing on allelle specific         

\section*{Datasets} \section{Datasets}  Two datasets were used from a recent paper studying the relationship between training data and pMHC predictor accuracy\cite{Kim_2014}. The training dataset (BD2009) contained entries from IEDB\cite{Salimi_2012} up to 2009 and the test dataset (BLIND) contained IEDB entries from between 2010 and 2013 which did not overlap with BD2009 (Table~\ref{tab:datasets}).  \begin{table}[h!]  \label{tab:datasets} \centering  \begin{tabular}{l||cccc}  \toprule  {} & Alleles &Alleles w/ 10+ measurements &  IC50 Measurements & Expanded 9mers \\ \midrule  BD2009 & 106 &98 &  137,654 & 470,170 \\ BLIND & 53 &53 &  27,680 & 83,752 \\ \bottomrule  \end{tabular}  \caption{Train (BD2009) and test (BLIND) dataset sizes.}  \label{tab:datasets}  \end{table}               

\section{Evaluating the performance of  a binding  predictor} Throughout this paper we will evaluate a pMHC binding predictor using three different metrics:         

\centering  \includegraphics[scale=0.5]{figures/mhcflurry-gliffy-network.png}  \caption{Neural network architecture for predicting peptide-MHC affinities from fixed length amino acid sequences}  \end{figure} \section{Predicting affinities for multiple peptide lengths using a 9mer encoding}  Reduction from multiple peptide lengths to a 9mer encoding was done using a scheme inspired by NetMHC\cite{lundegaard2008accurate}. Peptides with only 8 amino acids were extended with the insertion of a special wildcard ``X'' at every position in the sequence. Peptides longer than 9 amino acids were shortened by removing consecutive stretches of residues at every position. These lengthened or shortened samples were included in the training set with a sample weight inversely proportional to the number of samples created from a single measurement.