deletions | additions
diff --git a/bibliography/biblio.bib b/bibliography/biblio.bib
index 16bd695..be98844 100644
--- a/bibliography/biblio.bib
+++ b/bibliography/biblio.bib
...
title = {{Multiple imputation by chained equations: what is it and how does it work?}},
journal = {International Journal of Methods in Psychiatric Research},
}
@article{lundegaard2008accurate,
title = {{Accurate approximation method for prediction of class I MHC affinities for peptides of length 8, 10 and 11 using prediction tools trained on 9mers}},
author = {Lundegaard, Claus and Lund, Ole and Nielsen, Morten},
journal = {Bioinformatics},
volume = {24},
number = {11},
pages = {1397--1398},
year = {2008},
publisher = {Oxford Univ Press},
}
@article{Anderson_2004,
doi = {10.4049/jimmunol.172.10.5851},
url = {http://dx.doi.org/10.4049/jimmunol.172.10.5851},
year = {2004},
month = {may},
publisher = {The American Association of Immunologists},
volume = {172},
number = {10},
pages = {5851--5860},
author = {M. K. Anderson and R. Pant and A. L. Miracle and X. Sun and C. A. Luer and C. J. Walsh and J. C. Telfer and G. W. Litman and E. V. Rothenberg},
title = {{Evolutionary Origins of Lymphocytes: Ensembles of T Cell and B Cell Transcriptional Regulators in a Cartilaginous Fish}},
journal = {The Journal of Immunology},
}
diff --git a/data/cv_hla0201_summary.csv b/data/cv_hla0201_summary.csv
new file mode 100644
index 0000000..46578ea
--- /dev/null
+++ b/data/cv_hla0201_summary.csv
...
activation,allele,allele_size,dropout_probability,embedding_output_dim,layer_sizes,test_auc,test_f1,test_tau,train_auc,train_f1,train_size,train_tau,layer0_size,combined
tanh,HLA-A0201,32876,0.5,32,[64],0.948859224635,0.829284584403,0.611111788519,0.952787573501,0.835376335904,21917.3333333,0.619511045449,64.0,2.38925559756
tanh,HLA-A0201,32876,0.0,8,[4],0.949783907887,0.824546421325,0.610722720647,0.961369550594,0.852806489037,21917.3333333,0.637738834691,4.0,2.38505304986
tanh,HLA-A0201,32876,0.5,32,[4],0.947045743882,0.825316226587,0.606809392519,0.95090676537,0.830576863532,21917.3333333,0.614604905044,4.0,2.37917136299
tanh,HLA-A0201,32876,0.0,32,[4],0.945431851684,0.814543482548,0.594808046713,0.968297946562,0.862478716967,21917.3333333,0.653041195915,4.0,2.35478338095
tanh,HLA-A0201,32876,0.5,8,[4],0.939210925562,0.797945924441,0.594154537435,0.941786515167,0.80147407161,21917.3333333,0.598982352368,4.0,2.33131138744
tanh,HLA-A0201,32876,0.5,8,[64],0.940983539736,0.79144903494,0.597841012914,0.943402333279,0.800677140665,21917.3333333,0.602517362108,64.0,2.33027358759
tanh,HLA-A0201,32876,0.0,8,[64],0.904228551551,0.749725486519,0.531197414445,0.995129449276,0.950996665072,21917.3333333,0.775690619396,64.0,2.18515145252
tanh,HLA-A0201,32876,0.0,32,[64],0.894236762069,0.736074577248,0.502893177031,0.99843884762,0.968539688086,21917.3333333,0.856814617992,64.0,2.13320451635
diff --git a/layout.md b/layout.md
index 846d2d5..51e2bca 100644
--- a/layout.md
+++ b/layout.md
...
sectionContent_Text_.tex
sectionSection_title.tex
section_Network_architecture_begin_figure__.tex
section_Evaluating_a_predictor_Throughout__.tex section_Evaluating_the_performance_of__.tex
section_Matrix_completion_and_imputation__.tex
section_Pretraining_an_artificial_neural__.tex
sectionAcknowledgeme.tex
diff --git a/notebooks/cv.ipynb b/notebooks/cv.ipynb
index 38c814f..8493e69 100644
--- a/notebooks/cv.ipynb
+++ b/notebooks/cv.ipynb
...
"cells": [
{
"cell_type": "code",
"execution_count":
28, 25,
"metadata": {
"collapsed": false
},
...
},
{
"cell_type": "code",
"execution_count":
29, 26,
"metadata": {
"collapsed": true
},
...
},
{
"cell_type": "code",
"execution_count":
30, 27,
"metadata": {
"collapsed": false
},
...
},
{
"cell_type": "code",
"execution_count":
31, 28,
"metadata": {
"collapsed": false
},
...
},
{
"cell_type": "code",
"execution_count":
32, 29,
"metadata": {
"collapsed": false
},
...
"0.36749263596306014"
]
},
"execution_count":
32, 29,
"metadata": {},
"output_type": "execute_result"
}
...
},
{
"cell_type": "code",
"execution_count":
33, 30,
"metadata": {
"collapsed": false
},
...
"{8, 9, 10, 11, 12, 13, 14, 15}"
]
},
"execution_count":
33, 30,
"metadata": {},
"output_type": "execute_result"
}
...
},
{
"cell_type": "code",
"execution_count":
34, 31,
"metadata": {
"collapsed": false
},
...
"{9}"
]
},
"execution_count":
34, 31,
"metadata": {},
"output_type": "execute_result"
}
...
},
{
"cell_type": "code",
"execution_count":
35, 32,
"metadata": {
"collapsed": false
},
...
},
{
"cell_type": "code",
"execution_count":
36, 33,
"metadata": {
"collapsed": false
},
...
},
{
"cell_type": "code",
"execution_count":
37, 45,
"metadata": {
"collapsed": false
},
...
"name": "stdout",
"output_type": "stream",
"text": [
"48 "8 models\n"
]
},
{
...
"{'activation', 'dropout_probability', 'embedding_output_dim', 'layer_sizes'}"
]
},
"execution_count":
37, 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dropout_probabilities = [0.0,
0.1, 0.5]\n",
"\n",
"embedding_output_dims =
[16, 32, 64, 128]\n", [8, 32]\n",
"#embedding_output_dims = [4, 32]\n",
"\n",
"#layer_sizes = [[4], [8], [16], [64], [128]]\n",
"layer_sizes_list =
[[16], [64], [100], [128]]\n", [[4], [64]]\n",
"\n",
"activations = [\"tanh\"]\n",
"\n",
...
},
{
"cell_type": "code",
"execution_count":
38, 41,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false,
"scrolled": false
...
"text": [
"Allele: HLA-A0201\n",
"-- fold #1/3\n",
" HLA-A0201 fold 0 [ 0 /
48] 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim':
16, 8, 'dropout_probability': 0.0, 'layer_sizes':
[16]}\n", [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.635373\n",
"train auc: 0.961440\n",
"train f1: 0.857345\n",
"test tau: 0.612421\n",
"test auc: 0.947852\n",
"test f1: 0.812253\n",
" HLA-A0201 fold 0 [ 1 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.595930\n",
"train auc: 0.941433\n",
"train f1: 0.816062\n",
"test tau: 0.593591\n",
"test auc: 0.934945\n",
"test f1: 0.794962\n",
" HLA-A0201 fold 0 [ 2 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.853621\n",
"train auc: 0.998382\n",
"train f1: 0.969092\n",
"test tau: 0.504335\n",
"test auc: 0.893769\n",
"test f1: 0.727977\n",
" HLA-A0201 fold 0 [ 3 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.612068\n",
"train auc: 0.951109\n",
"train f1: 0.828886\n",
"test tau: 0.609418\n",
"test auc: 0.945726\n",
"test f1: 0.810010\n",
" HLA-A0201 fold 0 [ 4 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.649106\n",
"train auc: 0.968903\n",
"train f1: 0.851287\n",
"test tau: 0.592833\n",
"test auc: 0.941599\n",
"test f1: 0.793750\n",
" HLA-A0201 fold 0 [ 5 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.598930\n",
"train auc: 0.943413\n",
"train f1: 0.809371\n",
"test tau: 0.596894\n",
"test auc: 0.937999\n",
"test f1: 0.786494\n",
" HLA-A0201 fold 0 [ 6 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.775955\n",
"train auc: 0.995456\n",
"train f1: 0.955315\n",
"test tau: 0.522877\n",
"test auc: 0.897357\n",
"test f1: 0.732064\n",
" HLA-A0201 fold 0 [ 7 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau:
0.721963\n", 0.617173\n",
"train auc:
0.987146\n", 0.952590\n",
"train f1:
0.917336\n", 0.843105\n",
"test tau:
0.559493\n", 0.613136\n",
"test auc:
0.915950\n", 0.947580\n",
"test f1:
0.770642\n", 0.822068\n",
"-- fold #2/3\n",
" HLA-A0201 fold
1 [ 0
/ 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.658287\n",
"train auc: 0.966108\n",
"train f1: 0.860940\n",
"test tau: 0.599771\n",
"test auc: 0.951407\n",
"test f1: 0.826462\n",
" HLA-A0201 fold 1 [ 1 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.617840\n",
"train auc: 0.949856\n",
"train f1: 0.829387\n",
"test tau: 0.603994\n",
"test auc: 0.949710\n",
"test f1: 0.841608\n",
" HLA-A0201 fold 1 [
2 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.862390\n",
"train auc: 0.998619\n",
"train f1: 0.968971\n",
"test tau: 0.496758\n",
"test auc: 0.893675\n",
"test f1: 0.737198\n",
" HLA-A0201 fold 1
[ 3 /
48] 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim':
16, 8, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.641526\n",
"train auc: 0.959798\n",
"train f1: 0.844069\n",
"test tau: 0.607306\n",
"test auc: 0.953694\n",
"test f1: 0.836469\n",
" HLA-A0201 fold 1 [ 4 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.785620\n",
"train auc: 0.995677\n",
"train f1: 0.953636\n",
"test tau: 0.519690\n",
"test auc: 0.900378\n",
"test f1: 0.745602\n",
" HLA-A0201 fold 1 [ 5 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.622023\n",
"train auc: 0.951691\n",
"train f1: 0.830517\n",
"test tau: 0.606459\n",
"test auc: 0.951017\n",
"test f1: 0.837929\n",
" HLA-A0201 fold 1 [ 6 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.606570\n",
"train auc: 0.942179\n",
"train f1: 0.789515\n",
"test tau: 0.592905\n",
"test auc: 0.943026\n",
"test f1: 0.785859\n",
" HLA-A0201 fold 1 [ 7 / 8] train_size=21917 test_size=10959 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6376\n",
"-- # unique peptides = 3189\n",
"train tau: 0.601921\n",
"train auc: 0.940715\n",
"train f1: 0.800403\n",
"test tau: 0.589560\n",
"test auc: 0.942098\n",
"test f1: 0.802542\n",
"-- fold #3/3\n",
" HLA-A0201 fold 2 [ 0 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.0, 'layer_sizes':
[64]}\n" [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.636318\n",
"train auc: 0.962871\n",
"train f1: 0.857006\n",
"test tau: 0.612441\n",
"test auc: 0.947806\n",
"test f1: 0.824917\n",
" HLA-A0201 fold 2 [ 1 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.619337\n",
"train auc: 0.954082\n",
"train f1: 0.832507\n",
"test tau: 0.613741\n",
"test auc: 0.947980\n",
"test f1: 0.827857\n",
" HLA-A0201 fold 2 [ 2 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.602052\n",
"train auc: 0.944615\n",
"train f1: 0.803146\n",
"test tau: 0.603724\n",
"test auc: 0.941925\n",
"test f1: 0.801995\n",
" HLA-A0201 fold 2 [ 3 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.613908\n",
"train auc: 0.951755\n",
"train f1: 0.833457\n",
"test tau: 0.607017\n",
"test auc: 0.945700\n",
"test f1: 0.824331\n",
" HLA-A0201 fold 2 [ 4 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.765497\n",
"train auc: 0.994255\n",
"train f1: 0.944039\n",
"test tau: 0.551025\n",
"test auc: 0.914951\n",
"test f1: 0.771511\n",
" HLA-A0201 fold 2 [ 5 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 8, 'dropout_probability': 0.5, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.599096\n",
"train auc: 0.943211\n",
"train f1: 0.787957\n",
"test tau: 0.599313\n",
"test auc: 0.940589\n",
"test f1: 0.796334\n",
" HLA-A0201 fold 2 [ 6 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [4]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.651730\n",
"train auc: 0.969883\n",
"train f1: 0.875209\n",
"test tau: 0.591820\n",
"test auc: 0.943290\n",
"test f1: 0.823418\n",
" HLA-A0201 fold 2 [ 7 / 8] train_size=21918 test_size=10958 impute=False model={'activation': 'tanh', 'embedding_output_dim': 32, 'dropout_probability': 0.0, 'layer_sizes': [64]}\n",
"-- # unique peptides = 6377\n",
"-- # unique peptides = 3188\n",
"train tau: 0.854433\n",
"train auc: 0.998315\n",
"train f1: 0.967556\n",
"test tau: 0.507586\n",
"test auc: 0.895266\n",
"test f1: 0.743049\n",
"2240.77424288\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": "data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" | activation | allele | allele_size | dropout_probability | embedding_output_dim | fit_time | layer_sizes | model_params | test_auc | test_f1 | test_tau | train_auc | train_f1 | train_size | train_tau | layer0_size |
---|
0 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 84.926343 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.947852 | 0.812253 | 0.612421 | 0.961440 | 0.857345 | 21917 | 0.635373 | 4 |
---|
1 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 96.526969 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.934945 | 0.794962 | 0.593591 | 0.941433 | 0.816062 | 21917 | 0.595930 | 4 |
---|
2 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.635577 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.893769 | 0.727977 | 0.504335 | 0.998382 | 0.969092 | 21917 | 0.853621 | 64 |
---|
3 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 99.237046 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.945726 | 0.810010 | 0.609418 | 0.951109 | 0.828886 | 21917 | 0.612068 | 4 |
---|
4 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.175426 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.941599 | 0.793750 | 0.592833 | 0.968903 | 0.851287 | 21917 | 0.649106 | 4 |
---|
5 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.160665 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.937999 | 0.786494 | 0.596894 | 0.943413 | 0.809371 | 21917 | 0.598930 | 64 |
---|
6 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.410095 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.897357 | 0.732064 | 0.522877 | 0.995456 | 0.955315 | 21917 | 0.775955 | 64 |
---|
7 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.420329 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.947580 | 0.822068 | 0.613136 | 0.952590 | 0.843105 | 21917 | 0.617173 | 64 |
---|
8 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 86.968521 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.951407 | 0.826462 | 0.599771 | 0.966108 | 0.860940 | 21917 | 0.658287 | 4 |
---|
9 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 98.788730 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.949710 | 0.841608 | 0.603994 | 0.949856 | 0.829387 | 21917 | 0.617840 | 4 |
---|
10 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.397654 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.893675 | 0.737198 | 0.496758 | 0.998619 | 0.968971 | 21917 | 0.862390 | 64 |
---|
11 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 84.933377 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.953694 | 0.836469 | 0.607306 | 0.959798 | 0.844069 | 21917 | 0.641526 | 4 |
---|
12 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.291791 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.900378 | 0.745602 | 0.519690 | 0.995677 | 0.953636 | 21917 | 0.785620 | 64 |
---|
13 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.035493 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.951017 | 0.837929 | 0.606459 | 0.951691 | 0.830517 | 21917 | 0.622023 | 64 |
---|
14 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.413581 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.943026 | 0.785859 | 0.592905 | 0.942179 | 0.789515 | 21917 | 0.606570 | 64 |
---|
15 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 97.010899 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.942098 | 0.802542 | 0.589560 | 0.940715 | 0.800403 | 21917 | 0.601921 | 4 |
---|
16 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 85.211551 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.947806 | 0.824917 | 0.612441 | 0.962871 | 0.857006 | 21918 | 0.636318 | 4 |
---|
17 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.653926 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.947980 | 0.827857 | 0.613741 | 0.954082 | 0.832507 | 21918 | 0.619337 | 64 |
---|
18 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.564194 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.941925 | 0.801995 | 0.603724 | 0.944615 | 0.803146 | 21918 | 0.602052 | 64 |
---|
19 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 99.454345 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.945700 | 0.824331 | 0.607017 | 0.951755 | 0.833457 | 21918 | 0.613908 | 4 |
---|
20 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.658918 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.914951 | 0.771511 | 0.551025 | 0.994255 | 0.944039 | 21918 | 0.765497 | 64 |
---|
21 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 96.809296 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.940589 | 0.796334 | 0.599313 | 0.943211 | 0.787957 | 21918 | 0.599096 | 4 |
---|
22 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.281114 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.943290 | 0.823418 | 0.591820 | 0.969883 | 0.875209 | 21918 | 0.651730 | 4 |
---|
23 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.568679 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.895266 | 0.743049 | 0.507586 | 0.998315 | 0.967556 | 21918 | 0.854433 | 64 |
---|
\n",
" "
],
"text/plain": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweights_cv_train\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m n_training_epochs=200)\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0mfit_time\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/tim/sinai/git/mhcflurry/mhcflurry/class1_binding_predictor.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, Y, sample_weights, X_pretrain, Y_pretrain, sample_weights_pretrain, n_training_epochs, verbose, batch_size)\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 333\u001b[0;31m shuffle=True)\n\u001b[0m\u001b[1;32m 334\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 335\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mclassmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/models.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, **kwargs)\u001b[0m\n\u001b[1;32m 403\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[0mclass_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 405\u001b[0;31m sample_weight=sample_weight)\n\u001b[0m\u001b[1;32m 406\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 407\u001b[0m def evaluate(self, x, y, batch_size=32, verbose=1,\n",
"\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight)\u001b[0m\n\u001b[1;32m 1044\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0mval_f\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mval_f\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_ins\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mval_ins\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1046\u001b[0;31m callback_metrics=callback_metrics)\n\u001b[0m\u001b[1;32m 1047\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1048\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36m_fit_loop\u001b[0;34m(self, f, ins, out_labels, batch_size, nb_epoch, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics)\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[0mbatch_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 790\u001b[0;31m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_logs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 791\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[0mepoch_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/callbacks.pyc\u001b[0m in \u001b[0;36mon_batch_end\u001b[0;34m(self, batch, logs)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mt_before_callbacks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcallback\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m \u001b[0mcallback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_delta_ts_batch_end\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mt_before_callbacks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0mdelta_t_median\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_delta_ts_batch_end\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/tim/venvs/analysis-venv-2.7/lib/python2.7/site-packages/keras/callbacks.pyc\u001b[0m in \u001b[0;36mon_batch_end\u001b[0;34m(self, batch, logs)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotals\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotals\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotals\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " activation allele allele_size dropout_probability \\\n",
"0 tanh HLA-A0201 32876 0.0 \n",
"1 tanh HLA-A0201 32876 0.5 \n",
"2 tanh HLA-A0201 32876 0.0 \n",
"3 tanh HLA-A0201 32876 0.5 \n",
"4 tanh HLA-A0201 32876 0.0 \n",
"5 tanh HLA-A0201 32876 0.5 \n",
"6 tanh HLA-A0201 32876 0.0 \n",
"7 tanh HLA-A0201 32876 0.5 \n",
"8 tanh HLA-A0201 32876 0.0 \n",
"9 tanh HLA-A0201 32876 0.5 \n",
"10 tanh HLA-A0201 32876 0.0 \n",
"11 tanh HLA-A0201 32876 0.0 \n",
"12 tanh HLA-A0201 32876 0.0 \n",
"13 tanh HLA-A0201 32876 0.5 \n",
"14 tanh HLA-A0201 32876 0.5 \n",
"15 tanh HLA-A0201 32876 0.5 \n",
"16 tanh HLA-A0201 32876 0.0 \n",
"17 tanh HLA-A0201 32876 0.5 \n",
"18 tanh HLA-A0201 32876 0.5 \n",
"19 tanh HLA-A0201 32876 0.5 \n",
"20 tanh HLA-A0201 32876 0.0 \n",
"21 tanh HLA-A0201 32876 0.5 \n",
"22 tanh HLA-A0201 32876 0.0 \n",
"23 tanh HLA-A0201 32876 0.0 \n",
"\n",
" embedding_output_dim fit_time layer_sizes \\\n",
"0 8 84.926343 [4] \n",
"1 8 96.526969 [4] \n",
"2 32 87.635577 [64] \n",
"3 32 99.237046 [4] \n",
"4 32 87.175426 [4] \n",
"5 8 98.160665 [64] \n",
"6 8 86.410095 [64] \n",
"7 32 100.420329 [64] \n",
"8 32 86.968521 [4] \n",
"9 32 98.788730 [4] \n",
"10 32 87.397654 [64] \n",
"11 8 84.933377 [4] \n",
"12 8 86.291791 [64] \n",
"13 32 100.035493 [64] \n",
"14 8 98.413581 [64] \n",
"15 8 97.010899 [4] \n",
"16 8 85.211551 [4] \n",
"17 32 100.653926 [64] \n",
"18 8 98.564194 [64] \n",
"19 32 99.454345 [4] \n",
"20 8 86.658918 [64] \n",
"21 8 96.809296 [4] \n",
"22 32 87.281114 [4] \n",
"23 32 87.568679 [64] \n",
"\n",
" model_params test_auc test_f1 \\\n",
"0 {u'activation': u'tanh', u'embedding_output_di... 0.947852 0.812253 \n",
"1 {u'activation': u'tanh', u'embedding_output_di... 0.934945 0.794962 \n",
"2 {u'activation': u'tanh', u'embedding_output_di... 0.893769 0.727977 \n",
"3 {u'activation': u'tanh', u'embedding_output_di... 0.945726 0.810010 \n",
"4 {u'activation': u'tanh', u'embedding_output_di... 0.941599 0.793750 \n",
"5 {u'activation': u'tanh', u'embedding_output_di... 0.937999 0.786494 \n",
"6 {u'activation': u'tanh', u'embedding_output_di... 0.897357 0.732064 \n",
"7 {u'activation': u'tanh', u'embedding_output_di... 0.947580 0.822068 \n",
"8 {u'activation': u'tanh', u'embedding_output_di... 0.951407 0.826462 \n",
"9 {u'activation': u'tanh', u'embedding_output_di... 0.949710 0.841608 \n",
"10 {u'activation': u'tanh', u'embedding_output_di... 0.893675 0.737198 \n",
"11 {u'activation': u'tanh', u'embedding_output_di... 0.953694 0.836469 \n",
"12 {u'activation': u'tanh', u'embedding_output_di... 0.900378 0.745602 \n",
"13 {u'activation': u'tanh', u'embedding_output_di... 0.951017 0.837929 \n",
"14 {u'activation': u'tanh', u'embedding_output_di... 0.943026 0.785859 \n",
"15 {u'activation': u'tanh', u'embedding_output_di... 0.942098 0.802542 \n",
"16 {u'activation': u'tanh', u'embedding_output_di... 0.947806 0.824917 \n",
"17 {u'activation': u'tanh', u'embedding_output_di... 0.947980 0.827857 \n",
"18 {u'activation': u'tanh', u'embedding_output_di... 0.941925 0.801995 \n",
"19 {u'activation': u'tanh', u'embedding_output_di... 0.945700 0.824331 \n",
"20 {u'activation': u'tanh', u'embedding_output_di... 0.914951 0.771511 \n",
"21 {u'activation': u'tanh', u'embedding_output_di... 0.940589 0.796334 \n",
"22 {u'activation': u'tanh', u'embedding_output_di... 0.943290 0.823418 \n",
"23 {u'activation': u'tanh', u'embedding_output_di... 0.895266 0.743049 \n",
"\n",
" test_tau train_auc train_f1 train_size train_tau layer0_size \n",
"0 0.612421 0.961440 0.857345 21917 0.635373 4 \n",
"1 0.593591 0.941433 0.816062 21917 0.595930 4 \n",
"2 0.504335 0.998382 0.969092 21917 0.853621 64 \n",
"3 0.609418 0.951109 0.828886 21917 0.612068 4 \n",
"4 0.592833 0.968903 0.851287 21917 0.649106 4 \n",
"5 0.596894 0.943413 0.809371 21917 0.598930 64 \n",
"6 0.522877 0.995456 0.955315 21917 0.775955 64 \n",
"7 0.613136 0.952590 0.843105 21917 0.617173 64 \n",
"8 0.599771 0.966108 0.860940 21917 0.658287 4 \n",
"9 0.603994 0.949856 0.829387 21917 0.617840 4 \n",
"10 0.496758 0.998619 0.968971 21917 0.862390 64 \n",
"11 0.607306 0.959798 0.844069 21917 0.641526 4 \n",
"12 0.519690 0.995677 0.953636 21917 0.785620 64 \n",
"13 0.606459 0.951691 0.830517 21917 0.622023 64 \n",
"14 0.592905 0.942179 0.789515 21917 0.606570 64 \n",
"15 0.589560 0.940715 0.800403 21917 0.601921 4 \n",
"16 0.612441 0.962871 0.857006 21918 0.636318 4 \n",
"17 0.613741 0.954082 0.832507 21918 0.619337 64 \n",
"18 0.603724 0.944615 0.803146 21918 0.602052 64 \n",
"19 0.607017 0.951755 0.833457 21918 0.613908 4 \n",
"20 0.551025 0.994255 0.944039 21918 0.765497 64 \n",
"21 0.599313 0.943211 0.787957 21918 0.599096 4 \n",
"22 0.591820 0.969883 0.875209 21918 0.651730 4 \n",
"23 0.507586 0.998315 0.967556 21918 0.854433 64 "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
...
" \n",
" original_peptides_train = original_peptides[train_indices]\n",
" original_peptides_test = original_peptides[test_indices]\n",
" impute = False\n",
" \n",
" np.random.shuffle(models_params_list)\n",
" for (i, model_params) in enumerate(models_params_list):\n",
" print(\"%10s fold %3d [%3d / %3d] train_size=%d test_size=%d impute=%s model=%s\" %\n",
" (allele, fold_num, i, len(models_params_list), len(train_indices), len(test_indices), impute, model_params))\n",
...
" Y_cv_train,\n",
" sample_weights=weights_cv_train,\n",
" verbose=False,\n",
"
n_training_epochs=200)\n", n_training_epochs=250)\n",
" fit_time += time.time()\n",
" \n",
" Y_cv_train_9mer_predictions = predictor.predict(X_cv_train)\n",
...
},
{
"cell_type": "code",
"execution_count":
null, 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(24, 17)"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cv_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index([u'activation', u'allele', u'allele_size', u'dropout_probability',\n",
" u'embedding_output_dim', u'fit_time', u'layer_sizes', u'model_params',\n",
" u'test_auc', u'test_f1', u'test_tau', u'train_auc', u'train_f1',\n",
" u'train_size', u'train_tau', u'layer0_size', u'combined'],\n",
" dtype='object')\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tim/anaconda2/envs/standard-2.7/lib/python2.7/site-packages/ipykernel/__main__.py:9: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n"
]
}
],
"source": [
"cv_df_str = cv_df.copy()\n",
"print(cv_df_str.columns)\n",
"del cv_df_str['model_params']\n",
"del cv_df_str['fit_time']\n",
"\n",
"for col in [\"layer_sizes\"]:\n",
" cv_df_str[col] = [str(x) for x in cv_df_str[col]]\n",
"summary = cv_df_str.groupby(list(cv_df_str.columns[:6])).mean() #.reset_index()\n",
"summary.sort(\"combined\", ascending=False, inplace=True)\n",
"summary.to_csv(\"../data/cv_hla0201_summary.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_data[\"HLA-A0201\"].X_index.shape" "cv_df[\"combined\"] = cv_df.test_auc + cv_df.test_f1 + cv_df.test_tau"
]
},
{
"cell_type": "code",
"execution_count":
null, 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tim/anaconda2/envs/standard-2.7/lib/python2.7/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n",
" if __name__ == '__main__':\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" | activation | allele | allele_size | dropout_probability | embedding_output_dim | fit_time | layer_sizes | model_params | test_auc | test_f1 | test_tau | train_auc | train_f1 | train_size | train_tau | layer0_size | combined |
---|
11 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 84.933377 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.953694 | 0.836469 | 0.607306 | 0.959798 | 0.844069 | 21917 | 0.641526 | 4 | 2.397469 |
---|
13 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.035493 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.951017 | 0.837929 | 0.606459 | 0.951691 | 0.830517 | 21917 | 0.622023 | 64 | 2.395405 |
---|
9 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 98.788730 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.949710 | 0.841608 | 0.603994 | 0.949856 | 0.829387 | 21917 | 0.617840 | 4 | 2.395312 |
---|
17 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.653926 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.947980 | 0.827857 | 0.613741 | 0.954082 | 0.832507 | 21918 | 0.619337 | 64 | 2.389578 |
---|
16 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 85.211551 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.947806 | 0.824917 | 0.612441 | 0.962871 | 0.857006 | 21918 | 0.636318 | 4 | 2.385164 |
---|
7 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.420329 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.947580 | 0.822068 | 0.613136 | 0.952590 | 0.843105 | 21917 | 0.617173 | 64 | 2.382784 |
---|
8 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 86.968521 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.951407 | 0.826462 | 0.599771 | 0.966108 | 0.860940 | 21917 | 0.658287 | 4 | 2.377641 |
---|
19 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 99.454345 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.945700 | 0.824331 | 0.607017 | 0.951755 | 0.833457 | 21918 | 0.613908 | 4 | 2.377048 |
---|
0 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 84.926343 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.947852 | 0.812253 | 0.612421 | 0.961440 | 0.857345 | 21917 | 0.635373 | 4 | 2.372526 |
---|
3 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 99.237046 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.945726 | 0.810010 | 0.609418 | 0.951109 | 0.828886 | 21917 | 0.612068 | 4 | 2.365154 |
---|
22 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.281114 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.943290 | 0.823418 | 0.591820 | 0.969883 | 0.875209 | 21918 | 0.651730 | 4 | 2.358528 |
---|
18 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.564194 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.941925 | 0.801995 | 0.603724 | 0.944615 | 0.803146 | 21918 | 0.602052 | 64 | 2.347645 |
---|
21 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 96.809296 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.940589 | 0.796334 | 0.599313 | 0.943211 | 0.787957 | 21918 | 0.599096 | 4 | 2.336237 |
---|
15 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 97.010899 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.942098 | 0.802542 | 0.589560 | 0.940715 | 0.800403 | 21917 | 0.601921 | 4 | 2.334200 |
---|
4 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.175426 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.941599 | 0.793750 | 0.592833 | 0.968903 | 0.851287 | 21917 | 0.649106 | 4 | 2.328182 |
---|
1 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 96.526969 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.934945 | 0.794962 | 0.593591 | 0.941433 | 0.816062 | 21917 | 0.595930 | 4 | 2.323498 |
---|
14 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.413581 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.943026 | 0.785859 | 0.592905 | 0.942179 | 0.789515 | 21917 | 0.606570 | 64 | 2.321790 |
---|
5 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.160665 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.937999 | 0.786494 | 0.596894 | 0.943413 | 0.809371 | 21917 | 0.598930 | 64 | 2.321387 |
---|
20 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.658918 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.914951 | 0.771511 | 0.551025 | 0.994255 | 0.944039 | 21918 | 0.765497 | 64 | 2.237487 |
---|
12 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.291791 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.900378 | 0.745602 | 0.519690 | 0.995677 | 0.953636 | 21917 | 0.785620 | 64 | 2.165669 |
---|
6 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.410095 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.897357 | 0.732064 | 0.522877 | 0.995456 | 0.955315 | 21917 | 0.775955 | 64 | 2.152299 |
---|
23 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.568679 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.895266 | 0.743049 | 0.507586 | 0.998315 | 0.967556 | 21918 | 0.854433 | 64 | 2.145901 |
---|
10 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.397654 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.893675 | 0.737198 | 0.496758 | 0.998619 | 0.968971 | 21917 | 0.862390 | 64 | 2.127631 |
---|
2 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.635577 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.893769 | 0.727977 | 0.504335 | 0.998382 | 0.969092 | 21917 | 0.853621 | 64 | 2.126081 |
---|
\n",
" "
],
"text/plain": [
" activation allele allele_size dropout_probability \\\n",
"11 tanh HLA-A0201 32876 0.0 \n",
"13 tanh HLA-A0201 32876 0.5 \n",
"9 tanh HLA-A0201 32876 0.5 \n",
"17 tanh HLA-A0201 32876 0.5 \n",
"16 tanh HLA-A0201 32876 0.0 \n",
"7 tanh HLA-A0201 32876 0.5 \n",
"8 tanh HLA-A0201 32876 0.0 \n",
"19 tanh HLA-A0201 32876 0.5 \n",
"0 tanh HLA-A0201 32876 0.0 \n",
"3 tanh HLA-A0201 32876 0.5 \n",
"22 tanh HLA-A0201 32876 0.0 \n",
"18 tanh HLA-A0201 32876 0.5 \n",
"21 tanh HLA-A0201 32876 0.5 \n",
"15 tanh HLA-A0201 32876 0.5 \n",
"4 tanh HLA-A0201 32876 0.0 \n",
"1 tanh HLA-A0201 32876 0.5 \n",
"14 tanh HLA-A0201 32876 0.5 \n",
"5 tanh HLA-A0201 32876 0.5 \n",
"20 tanh HLA-A0201 32876 0.0 \n",
"12 tanh HLA-A0201 32876 0.0 \n",
"6 tanh HLA-A0201 32876 0.0 \n",
"23 tanh HLA-A0201 32876 0.0 \n",
"10 tanh HLA-A0201 32876 0.0 \n",
"2 tanh HLA-A0201 32876 0.0 \n",
"\n",
" embedding_output_dim fit_time layer_sizes \\\n",
"11 8 84.933377 [4] \n",
"13 32 100.035493 [64] \n",
"9 32 98.788730 [4] \n",
"17 32 100.653926 [64] \n",
"16 8 85.211551 [4] \n",
"7 32 100.420329 [64] \n",
"8 32 86.968521 [4] \n",
"19 32 99.454345 [4] \n",
"0 8 84.926343 [4] \n",
"3 32 99.237046 [4] \n",
"22 32 87.281114 [4] \n",
"18 8 98.564194 [64] \n",
"21 8 96.809296 [4] \n",
"15 8 97.010899 [4] \n",
"4 32 87.175426 [4] \n",
"1 8 96.526969 [4] \n",
"14 8 98.413581 [64] \n",
"5 8 98.160665 [64] \n",
"20 8 86.658918 [64] \n",
"12 8 86.291791 [64] \n",
"6 8 86.410095 [64] \n",
"23 32 87.568679 [64] \n",
"10 32 87.397654 [64] \n",
"2 32 87.635577 [64] \n",
"\n",
" model_params test_auc test_f1 \\\n",
"11 {u'activation': u'tanh', u'embedding_output_di... 0.953694 0.836469 \n",
"13 {u'activation': u'tanh', u'embedding_output_di... 0.951017 0.837929 \n",
"9 {u'activation': u'tanh', u'embedding_output_di... 0.949710 0.841608 \n",
"17 {u'activation': u'tanh', u'embedding_output_di... 0.947980 0.827857 \n",
"16 {u'activation': u'tanh', u'embedding_output_di... 0.947806 0.824917 \n",
"7 {u'activation': u'tanh', u'embedding_output_di... 0.947580 0.822068 \n",
"8 {u'activation': u'tanh', u'embedding_output_di... 0.951407 0.826462 \n",
"19 {u'activation': u'tanh', u'embedding_output_di... 0.945700 0.824331 \n",
"0 {u'activation': u'tanh', u'embedding_output_di... 0.947852 0.812253 \n",
"3 {u'activation': u'tanh', u'embedding_output_di... 0.945726 0.810010 \n",
"22 {u'activation': u'tanh', u'embedding_output_di... 0.943290 0.823418 \n",
"18 {u'activation': u'tanh', u'embedding_output_di... 0.941925 0.801995 \n",
"21 {u'activation': u'tanh', u'embedding_output_di... 0.940589 0.796334 \n",
"15 {u'activation': u'tanh', u'embedding_output_di... 0.942098 0.802542 \n",
"4 {u'activation': u'tanh', u'embedding_output_di... 0.941599 0.793750 \n",
"1 {u'activation': u'tanh', u'embedding_output_di... 0.934945 0.794962 \n",
"14 {u'activation': u'tanh', u'embedding_output_di... 0.943026 0.785859 \n",
"5 {u'activation': u'tanh', u'embedding_output_di... 0.937999 0.786494 \n",
"20 {u'activation': u'tanh', u'embedding_output_di... 0.914951 0.771511 \n",
"12 {u'activation': u'tanh', u'embedding_output_di... 0.900378 0.745602 \n",
"6 {u'activation': u'tanh', u'embedding_output_di... 0.897357 0.732064 \n",
"23 {u'activation': u'tanh', u'embedding_output_di... 0.895266 0.743049 \n",
"10 {u'activation': u'tanh', u'embedding_output_di... 0.893675 0.737198 \n",
"2 {u'activation': u'tanh', u'embedding_output_di... 0.893769 0.727977 \n",
"\n",
" test_tau train_auc train_f1 train_size train_tau layer0_size \\\n",
"11 0.607306 0.959798 0.844069 21917 0.641526 4 \n",
"13 0.606459 0.951691 0.830517 21917 0.622023 64 \n",
"9 0.603994 0.949856 0.829387 21917 0.617840 4 \n",
"17 0.613741 0.954082 0.832507 21918 0.619337 64 \n",
"16 0.612441 0.962871 0.857006 21918 0.636318 4 \n",
"7 0.613136 0.952590 0.843105 21917 0.617173 64 \n",
"8 0.599771 0.966108 0.860940 21917 0.658287 4 \n",
"19 0.607017 0.951755 0.833457 21918 0.613908 4 \n",
"0 0.612421 0.961440 0.857345 21917 0.635373 4 \n",
"3 0.609418 0.951109 0.828886 21917 0.612068 4 \n",
"22 0.591820 0.969883 0.875209 21918 0.651730 4 \n",
"18 0.603724 0.944615 0.803146 21918 0.602052 64 \n",
"21 0.599313 0.943211 0.787957 21918 0.599096 4 \n",
"15 0.589560 0.940715 0.800403 21917 0.601921 4 \n",
"4 0.592833 0.968903 0.851287 21917 0.649106 4 \n",
"1 0.593591 0.941433 0.816062 21917 0.595930 4 \n",
"14 0.592905 0.942179 0.789515 21917 0.606570 64 \n",
"5 0.596894 0.943413 0.809371 21917 0.598930 64 \n",
"20 0.551025 0.994255 0.944039 21918 0.765497 64 \n",
"12 0.519690 0.995677 0.953636 21917 0.785620 64 \n",
"6 0.522877 0.995456 0.955315 21917 0.775955 64 \n",
"23 0.507586 0.998315 0.967556 21918 0.854433 64 \n",
"10 0.496758 0.998619 0.968971 21917 0.862390 64 \n",
"2 0.504335 0.998382 0.969092 21917 0.853621 64 \n",
"\n",
" combined \n",
"11 2.397469 \n",
"13 2.395405 \n",
"9 2.395312 \n",
"17 2.389578 \n",
"16 2.385164 \n",
"7 2.382784 \n",
"8 2.377641 \n",
"19 2.377048 \n",
"0 2.372526 \n",
"3 2.365154 \n",
"22 2.358528 \n",
"18 2.347645 \n",
"21 2.336237 \n",
"15 2.334200 \n",
"4 2.328182 \n",
"1 2.323498 \n",
"14 2.321790 \n",
"5 2.321387 \n",
"20 2.237487 \n",
"12 2.165669 \n",
"6 2.152299 \n",
"23 2.145901 \n",
"10 2.127631 \n",
"2 2.126081 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cv_df.sort(\"combined\", ascending=False, inplace=True)\n",
"cv_df"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs":
[], [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" | activation | allele | allele_size | dropout_probability | embedding_output_dim | fit_time | layer_sizes | model_params | test_auc | test_f1 | test_tau | train_auc | train_f1 | train_size | train_tau | layer0_size |
---|
0 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 84.926343 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.947852 | 0.812253 | 0.612421 | 0.961440 | 0.857345 | 21917 | 0.635373 | 4 |
---|
1 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 96.526969 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.934945 | 0.794962 | 0.593591 | 0.941433 | 0.816062 | 21917 | 0.595930 | 4 |
---|
2 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.635577 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.893769 | 0.727977 | 0.504335 | 0.998382 | 0.969092 | 21917 | 0.853621 | 64 |
---|
3 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 99.237046 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.945726 | 0.810010 | 0.609418 | 0.951109 | 0.828886 | 21917 | 0.612068 | 4 |
---|
4 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.175426 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.941599 | 0.793750 | 0.592833 | 0.968903 | 0.851287 | 21917 | 0.649106 | 4 |
---|
5 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.160665 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.937999 | 0.786494 | 0.596894 | 0.943413 | 0.809371 | 21917 | 0.598930 | 64 |
---|
6 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.410095 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.897357 | 0.732064 | 0.522877 | 0.995456 | 0.955315 | 21917 | 0.775955 | 64 |
---|
7 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.420329 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.947580 | 0.822068 | 0.613136 | 0.952590 | 0.843105 | 21917 | 0.617173 | 64 |
---|
8 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 86.968521 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.951407 | 0.826462 | 0.599771 | 0.966108 | 0.860940 | 21917 | 0.658287 | 4 |
---|
9 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 98.788730 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.949710 | 0.841608 | 0.603994 | 0.949856 | 0.829387 | 21917 | 0.617840 | 4 |
---|
10 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.397654 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.893675 | 0.737198 | 0.496758 | 0.998619 | 0.968971 | 21917 | 0.862390 | 64 |
---|
11 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 84.933377 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.953694 | 0.836469 | 0.607306 | 0.959798 | 0.844069 | 21917 | 0.641526 | 4 |
---|
12 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.291791 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.900378 | 0.745602 | 0.519690 | 0.995677 | 0.953636 | 21917 | 0.785620 | 64 |
---|
13 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.035493 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.951017 | 0.837929 | 0.606459 | 0.951691 | 0.830517 | 21917 | 0.622023 | 64 |
---|
14 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.413581 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.943026 | 0.785859 | 0.592905 | 0.942179 | 0.789515 | 21917 | 0.606570 | 64 |
---|
15 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 97.010899 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.942098 | 0.802542 | 0.589560 | 0.940715 | 0.800403 | 21917 | 0.601921 | 4 |
---|
16 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 85.211551 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.947806 | 0.824917 | 0.612441 | 0.962871 | 0.857006 | 21918 | 0.636318 | 4 |
---|
17 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 100.653926 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.947980 | 0.827857 | 0.613741 | 0.954082 | 0.832507 | 21918 | 0.619337 | 64 |
---|
18 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 98.564194 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.941925 | 0.801995 | 0.603724 | 0.944615 | 0.803146 | 21918 | 0.602052 | 64 |
---|
19 | tanh | HLA-A0201 | 32876 | 0.5 | 32 | 99.454345 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.945700 | 0.824331 | 0.607017 | 0.951755 | 0.833457 | 21918 | 0.613908 | 4 |
---|
20 | tanh | HLA-A0201 | 32876 | 0.0 | 8 | 86.658918 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.914951 | 0.771511 | 0.551025 | 0.994255 | 0.944039 | 21918 | 0.765497 | 64 |
---|
21 | tanh | HLA-A0201 | 32876 | 0.5 | 8 | 96.809296 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.940589 | 0.796334 | 0.599313 | 0.943211 | 0.787957 | 21918 | 0.599096 | 4 |
---|
22 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.281114 | [4] | {u'activation': u'tanh', u'embedding_output_di... | 0.943290 | 0.823418 | 0.591820 | 0.969883 | 0.875209 | 21918 | 0.651730 | 4 |
---|
23 | tanh | HLA-A0201 | 32876 | 0.0 | 32 | 87.568679 | [64] | {u'activation': u'tanh', u'embedding_output_di... | 0.895266 | 0.743049 | 0.507586 | 0.998315 | 0.967556 | 21918 | 0.854433 | 64 |
---|
\n",
" "
],
"text/plain": [
" activation allele allele_size dropout_probability \\\n",
"0 tanh HLA-A0201 32876 0.0 \n",
"1 tanh HLA-A0201 32876 0.5 \n",
"2 tanh HLA-A0201 32876 0.0 \n",
"3 tanh HLA-A0201 32876 0.5 \n",
"4 tanh HLA-A0201 32876 0.0 \n",
"5 tanh HLA-A0201 32876 0.5 \n",
"6 tanh HLA-A0201 32876 0.0 \n",
"7 tanh HLA-A0201 32876 0.5 \n",
"8 tanh HLA-A0201 32876 0.0 \n",
"9 tanh HLA-A0201 32876 0.5 \n",
"10 tanh HLA-A0201 32876 0.0 \n",
"11 tanh HLA-A0201 32876 0.0 \n",
"12 tanh HLA-A0201 32876 0.0 \n",
"13 tanh HLA-A0201 32876 0.5 \n",
"14 tanh HLA-A0201 32876 0.5 \n",
"15 tanh HLA-A0201 32876 0.5 \n",
"16 tanh HLA-A0201 32876 0.0 \n",
"17 tanh HLA-A0201 32876 0.5 \n",
"18 tanh HLA-A0201 32876 0.5 \n",
"19 tanh HLA-A0201 32876 0.5 \n",
"20 tanh HLA-A0201 32876 0.0 \n",
"21 tanh HLA-A0201 32876 0.5 \n",
"22 tanh HLA-A0201 32876 0.0 \n",
"23 tanh HLA-A0201 32876 0.0 \n",
"\n",
" embedding_output_dim fit_time layer_sizes \\\n",
"0 8 84.926343 [4] \n",
"1 8 96.526969 [4] \n",
"2 32 87.635577 [64] \n",
"3 32 99.237046 [4] \n",
"4 32 87.175426 [4] \n",
"5 8 98.160665 [64] \n",
"6 8 86.410095 [64] \n",
"7 32 100.420329 [64] \n",
"8 32 86.968521 [4] \n",
"9 32 98.788730 [4] \n",
"10 32 87.397654 [64] \n",
"11 8 84.933377 [4] \n",
"12 8 86.291791 [64] \n",
"13 32 100.035493 [64] \n",
"14 8 98.413581 [64] \n",
"15 8 97.010899 [4] \n",
"16 8 85.211551 [4] \n",
"17 32 100.653926 [64] \n",
"18 8 98.564194 [64] \n",
"19 32 99.454345 [4] \n",
"20 8 86.658918 [64] \n",
"21 8 96.809296 [4] \n",
"22 32 87.281114 [4] \n",
"23 32 87.568679 [64] \n",
"\n",
" model_params test_auc test_f1 \\\n",
"0 {u'activation': u'tanh', u'embedding_output_di... 0.947852 0.812253 \n",
"1 {u'activation': u'tanh', u'embedding_output_di... 0.934945 0.794962 \n",
"2 {u'activation': u'tanh', u'embedding_output_di... 0.893769 0.727977 \n",
"3 {u'activation': u'tanh', u'embedding_output_di... 0.945726 0.810010 \n",
"4 {u'activation': u'tanh', u'embedding_output_di... 0.941599 0.793750 \n",
"5 {u'activation': u'tanh', u'embedding_output_di... 0.937999 0.786494 \n",
"6 {u'activation': u'tanh', u'embedding_output_di... 0.897357 0.732064 \n",
"7 {u'activation': u'tanh', u'embedding_output_di... 0.947580 0.822068 \n",
"8 {u'activation': u'tanh', u'embedding_output_di... 0.951407 0.826462 \n",
"9 {u'activation': u'tanh', u'embedding_output_di... 0.949710 0.841608 \n",
"10 {u'activation': u'tanh', u'embedding_output_di... 0.893675 0.737198 \n",
"11 {u'activation': u'tanh', u'embedding_output_di... 0.953694 0.836469 \n",
"12 {u'activation': u'tanh', u'embedding_output_di... 0.900378 0.745602 \n",
"13 {u'activation': u'tanh', u'embedding_output_di... 0.951017 0.837929 \n",
"14 {u'activation': u'tanh', u'embedding_output_di... 0.943026 0.785859 \n",
"15 {u'activation': u'tanh', u'embedding_output_di... 0.942098 0.802542 \n",
"16 {u'activation': u'tanh', u'embedding_output_di... 0.947806 0.824917 \n",
"17 {u'activation': u'tanh', u'embedding_output_di... 0.947980 0.827857 \n",
"18 {u'activation': u'tanh', u'embedding_output_di... 0.941925 0.801995 \n",
"19 {u'activation': u'tanh', u'embedding_output_di... 0.945700 0.824331 \n",
"20 {u'activation': u'tanh', u'embedding_output_di... 0.914951 0.771511 \n",
"21 {u'activation': u'tanh', u'embedding_output_di... 0.940589 0.796334 \n",
"22 {u'activation': u'tanh', u'embedding_output_di... 0.943290 0.823418 \n",
"23 {u'activation': u'tanh', u'embedding_output_di... 0.895266 0.743049 \n",
"\n",
" test_tau train_auc train_f1 train_size train_tau layer0_size \n",
"0 0.612421 0.961440 0.857345 21917 0.635373 4 \n",
"1 0.593591 0.941433 0.816062 21917 0.595930 4 \n",
"2 0.504335 0.998382 0.969092 21917 0.853621 64 \n",
"3 0.609418 0.951109 0.828886 21917 0.612068 4 \n",
"4 0.592833 0.968903 0.851287 21917 0.649106 4 \n",
"5 0.596894 0.943413 0.809371 21917 0.598930 64 \n",
"6 0.522877 0.995456 0.955315 21917 0.775955 64 \n",
"7 0.613136 0.952590 0.843105 21917 0.617173 64 \n",
"8 0.599771 0.966108 0.860940 21917 0.658287 4 \n",
"9 0.603994 0.949856 0.829387 21917 0.617840 4 \n",
"10 0.496758 0.998619 0.968971 21917 0.862390 64 \n",
"11 0.607306 0.959798 0.844069 21917 0.641526 4 \n",
"12 0.519690 0.995677 0.953636 21917 0.785620 64 \n",
"13 0.606459 0.951691 0.830517 21917 0.622023 64 \n",
"14 0.592905 0.942179 0.789515 21917 0.606570 64 \n",
"15 0.589560 0.940715 0.800403 21917 0.601921 4 \n",
"16 0.612441 0.962871 0.857006 21918 0.636318 4 \n",
"17 0.613741 0.954082 0.832507 21918 0.619337 64 \n",
"18 0.603724 0.944615 0.803146 21918 0.602052 64 \n",
"19 0.607017 0.951755 0.833457 21918 0.613908 4 \n",
"20 0.551025 0.994255 0.944039 21918 0.765497 64 \n",
"21 0.599313 0.943211 0.787957 21918 0.599096 4 \n",
"22 0.591820 0.969883 0.875209 21918 0.651730 4 \n",
"23 0.507586 0.998315 0.967556 21918 0.854433 64 "
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cv_df = pandas.DataFrame(cv_df)\n",
"cv_df[\"layer0_size\"] = [x[0] for x in cv_df.layer_sizes]\n",
...
},
{
"cell_type": "code",
"execution_count":
null, 48,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"cv_df.to_csv(\"cv4.csv\")" "cv_df.to_csv(\"cv5.csv\")"
]
},
{
...
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version":
"2.7.10" "2.7.11"
}
},
"nbformat": 4,
diff --git a/notebooks/validation.ipynb b/notebooks/validation.ipynb
index d96cf9d..b8bc963 100644
--- a/notebooks/validation.ipynb
+++ b/notebooks/validation.ipynb
...
"all_validation_data = mhcflurry.data.load_allele_datasets(data_dir + \"bdata.2013.mhci.public.blind.1.txt\")\n"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[MICE] Completing matrix with shape (31539, 106)\n",
"[MICE] Starting imputation round 1/110, elapsed time 0.111\n",
"[MICE] Starting imputation round 2/110, elapsed time 8.241\n",
"[MICE] Starting imputation round 3/110, elapsed time 16.674\n",
"[MICE] Starting imputation round 4/110, elapsed time 26.568\n",
"[MICE] Starting imputation round 5/110, elapsed time 34.593\n",
"[MICE] Starting imputation round 6/110, elapsed time 42.498\n",
"[MICE] Starting imputation round 7/110, elapsed time 51.072\n",
"[MICE] Starting imputation round 8/110, elapsed time 59.647\n",
"[MICE] Starting imputation round 9/110, elapsed time 71.593\n",
"[MICE] Starting imputation round 10/110, elapsed time 81.222\n",
"[MICE] Starting imputation round 11/110, elapsed time 89.827\n",
"[MICE] Starting imputation round 12/110, elapsed time 102.841\n",
"[MICE] Starting imputation round 13/110, elapsed time 115.194\n",
"[MICE] Starting imputation round 14/110, elapsed time 125.657\n",
"[MICE] Starting imputation round 15/110, elapsed time 136.601\n",
"[MICE] Starting imputation round 16/110, elapsed time 150.123\n",
"[MICE] Starting imputation round 17/110, elapsed time 162.037\n",
"[MICE] Starting imputation round 18/110, elapsed time 172.226\n",
"[MICE] Starting imputation round 19/110, elapsed time 184.679\n",
"[MICE] Starting imputation round 20/110, elapsed time 195.810\n",
"[MICE] Starting imputation round 21/110, elapsed time 206.368\n",
"[MICE] Starting imputation round 22/110, elapsed time 216.587\n",
"[MICE] Starting imputation round 23/110, elapsed time 227.364\n",
"[MICE] Starting imputation round 24/110, elapsed time 237.498\n",
"[MICE] Starting imputation round 25/110, elapsed time 248.307\n",
"[MICE] Starting imputation round 26/110, elapsed time 259.084\n",
"[MICE] Starting imputation round 27/110, elapsed time 270.167\n",
"[MICE] Starting imputation round 28/110, elapsed time 283.990\n",
"[MICE] Starting imputation round 29/110, elapsed time 296.113\n",
"[MICE] Starting imputation round 30/110, elapsed time 307.106\n",
"[MICE] Starting imputation round 31/110, elapsed time 316.434\n",
"[MICE] Starting imputation round 32/110, elapsed time 328.301\n",
"[MICE] Starting imputation round 33/110, elapsed time 341.276\n",
"[MICE] Starting imputation round 34/110, elapsed time 356.325\n",
"[MICE] Starting imputation round 35/110, elapsed time 366.570\n",
"[MICE] Starting imputation round 36/110, elapsed time 377.418\n",
"[MICE] Starting imputation round 37/110, elapsed time 387.342\n",
"[MICE] Starting imputation round 38/110, elapsed time 396.340\n",
"[MICE] Starting imputation round 39/110, elapsed time 405.177\n",
"[MICE] Starting imputation round 40/110, elapsed time 415.509\n",
"[MICE] Starting imputation round 41/110, elapsed time 424.908\n",
"[MICE] Starting imputation round 42/110, elapsed time 435.436\n",
"[MICE] Starting imputation round 43/110, elapsed time 445.738\n",
"[MICE] Starting imputation round 44/110, elapsed time 454.812\n",
"[MICE] Starting imputation round 45/110, elapsed time 463.573\n",
"[MICE] Starting imputation round 46/110, elapsed time 472.524\n",
"[MICE] Starting imputation round 47/110, elapsed time 481.888\n",
"[MICE] Starting imputation round 48/110, elapsed time 491.173\n",
"[MICE] Starting imputation round 49/110, elapsed time 504.550\n",
"[MICE] Starting imputation round 50/110, elapsed time 516.815\n",
"[MICE] Starting imputation round 51/110, elapsed time 527.519\n",
"[MICE] Starting imputation round 52/110, elapsed time 539.220\n",
"[MICE] Starting imputation round 53/110, elapsed time 548.554\n",
"[MICE] Starting imputation round 54/110, elapsed time 557.358\n",
"[MICE] Starting imputation round 55/110, elapsed time 566.810\n",
"[MICE] Starting imputation round 56/110, elapsed time 575.670\n",
"[MICE] Starting imputation round 57/110, elapsed time 586.083\n",
"[MICE] Starting imputation round 58/110, elapsed time 595.799\n",
"[MICE] Starting imputation round 59/110, elapsed time 605.537\n",
"[MICE] Starting imputation round 60/110, elapsed time 614.768\n",
"[MICE] Starting imputation round 61/110, elapsed time 624.430\n",
"[MICE] Starting imputation round 62/110, elapsed time 635.647\n",
"[MICE] Starting imputation round 63/110, elapsed time 645.239\n",
"[MICE] Starting imputation round 64/110, elapsed time 654.488\n",
"[MICE] Starting imputation round 65/110, elapsed time 663.584\n",
"[MICE] Starting imputation round 66/110, elapsed time 673.521\n",
"[MICE] Starting imputation round 67/110, elapsed time 682.477\n",
"[MICE] Starting imputation round 68/110, elapsed time 691.423\n",
"[MICE] Starting imputation round 69/110, elapsed time 700.841\n",
"[MICE] Starting imputation round 70/110, elapsed time 710.097\n",
"[MICE] Starting imputation round 71/110, elapsed time 718.485\n",
"[MICE] Starting imputation round 72/110, elapsed time 727.323\n",
"[MICE] Starting imputation round 73/110, elapsed time 736.386\n",
"[MICE] Starting imputation round 74/110, elapsed time 745.016\n",
"[MICE] Starting imputation round 75/110, elapsed time 753.102\n",
"[MICE] Starting imputation round 76/110, elapsed time 760.977\n",
"[MICE] Starting imputation round 77/110, elapsed time 769.145\n",
"[MICE] Starting imputation round 78/110, elapsed time 778.712\n",
"[MICE] Starting imputation round 79/110, elapsed time 787.006\n",
"[MICE] Starting imputation round 80/110, elapsed time 795.750\n",
"[MICE] Starting imputation round 81/110, elapsed time 804.539\n",
"[MICE] Starting imputation round 82/110, elapsed time 812.682\n",
"[MICE] Starting imputation round 83/110, elapsed time 821.102\n",
"[MICE] Starting imputation round 84/110, elapsed time 830.003\n",
"[MICE] Starting imputation round 85/110, elapsed time 838.924\n",
"[MICE] Starting imputation round 86/110, elapsed time 847.681\n",
"[MICE] Starting imputation round 87/110, elapsed time 856.701\n",
"[MICE] Starting imputation round 88/110, elapsed time 865.043\n",
"[MICE] Starting imputation round 89/110, elapsed time 874.018\n",
"[MICE] Starting imputation round 90/110, elapsed time 882.666\n",
"[MICE] Starting imputation round 91/110, elapsed time 891.618\n",
"[MICE] Starting imputation round 92/110, elapsed time 899.558\n",
"[MICE] Starting imputation round 93/110, elapsed time 908.443\n",
"[MICE] Starting imputation round 94/110, elapsed time 916.337\n",
"[MICE] Starting imputation round 95/110, elapsed time 924.289\n",
"[MICE] Starting imputation round 96/110, elapsed time 933.124\n",
"[MICE] Starting imputation round 97/110, elapsed time 941.719\n",
"[MICE] Starting imputation round 98/110, elapsed time 951.259\n",
"[MICE] Starting imputation round 99/110, elapsed time 961.569\n",
"[MICE] Starting imputation round 100/110, elapsed time 970.936\n",
"[MICE] Starting imputation round 101/110, elapsed time 980.006\n",
"[MICE] Starting imputation round 102/110, elapsed time 989.664\n",
"[MICE] Starting imputation round 103/110, elapsed time 999.131\n",
"[MICE] Starting imputation round 104/110, elapsed time 1008.392\n",
"[MICE] Starting imputation round 105/110, elapsed time 1016.863\n",
"[MICE] Starting imputation round 106/110, elapsed time 1026.306\n",
"[MICE] Starting imputation round 107/110, elapsed time 1035.933\n",
"[MICE] Starting imputation round 108/110, elapsed time 1044.824\n",
"[MICE] Starting imputation round 109/110, elapsed time 1054.162\n",
"[MICE] Starting imputation round 110/110, elapsed time 1063.809\n"
]
}
],
"source": [
"imputed_train_data = mhcflurry.imputation.create_imputed_datasets(all_train_data, fancyimpute.MICE())\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
...
"scores_df.sort(\"test_size\", ascending=False, inplace=True)\n",
"scores_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# train models\n",
"def make_and_fit_model(allele, original_params):\n",
" params = dict(original_params)\n",
" impute = params[\"impute\"]\n",
" del params[\"impute\"]\n",
" training = (imputed_train_data if impute else all_train_data)[\"allele\"]\n",
" model = mhcflurry.Class1BindingPredictor.from_hyperparameters(max_ic50=max_ic50, **params)\n",
" print(\"Fitting model for allele %s (%d): %s\" % (allele, len(training.Y), str(original_params)))\n",
" \n",
"\n",
"models = dict((allele, [make_model(allele, params) for params in models_params_list]) for allele in alleles)\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" | allele | netmhc_auc | netmhc_f1 | netmhc_tau | netmhcpan_auc | netmhcpan_f1 | netmhcpan_tau | smmpmbec_cpp_auc | smmpmbec_cpp_f1 | smmpmbec_cpp_tau | test_size |
---|
4 | HLA-A0201 | 0.932234 | 0.884336 | 0.635498 | 0.930479 | 0.880963 | 0.637338 | 0.927358 | 0.885121 | 0.626224 | 2126 |
---|
36 | HLA-B2705 | 0.948457 | 0.285714 | 0.430561 | 0.943860 | 0.400000 | 0.377208 | 0.941682 | 0.304348 | 0.416884 | 314 |
---|
\n",
" "
],
"text/plain": [
" allele netmhc_auc netmhc_f1 netmhc_tau netmhcpan_auc netmhcpan_f1 \\\n",
"4 HLA-A0201 0.932234 0.884336 0.635498 0.930479 0.880963 \n",
"36 HLA-B2705 0.948457 0.285714 0.430561 0.943860 0.400000 \n",
"\n",
" netmhcpan_tau smmpmbec_cpp_auc smmpmbec_cpp_f1 smmpmbec_cpp_tau \\\n",
"4 0.637338 0.927358 0.885121 0.626224 \n",
"36 0.377208 0.941682 0.304348 0.416884 \n",
"\n",
" test_size \n",
"4 2126 \n",
"36 314 "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"summary_df = scores_df[scores_df.allele.isin(alleles)]\n",
"summary_df"
]
}
],
"metadata": {
diff --git a/sectionContent_Text_.tex b/sectionContent_Text_.tex
index bde7ef6..f806a77 100644
--- a/sectionContent_Text_.tex
+++ b/sectionContent_Text_.tex
...
\section*{Introduction} \section{Introduction}
Most vertebrates are capable of generating diverse populations of adaptive immune cells which detect and eliminate of infected and cancerous cells.
The detection and elimination of both infection and cancer is the central task of the vertebrate adaptive immune system.
In most vertebrates \cite{Anderson_2004}
Adaptive immunity in most vertebrates can be roughly divided into the realm of antibodies (B-cells) and
\begin{itemize}
\item Overview of MHC binding
\item Allele specific vs. pan allele and that here we're focusing on allelle specific
diff --git a/sectionSection_title.tex b/sectionSection_title.tex
index 75171a9..4fd77e0 100644
--- a/sectionSection_title.tex
+++ b/sectionSection_title.tex
...
\section*{Datasets} \section{Datasets}
Two datasets were used from a recent paper studying the relationship between training data and pMHC predictor accuracy\cite{Kim_2014}. The training dataset (BD2009) contained entries from IEDB\cite{Salimi_2012} up to 2009 and the test dataset (BLIND) contained IEDB entries from between 2010 and 2013 which did not overlap with BD2009 (Table~\ref{tab:datasets}).
\begin{table}[h!]
\label{tab:datasets} \centering
\begin{tabular}{l||cccc}
\toprule
{} & Alleles &
Alleles w/ 10+ measurements & IC50 Measurements & Expanded 9mers \\
\midrule
BD2009 & 106 &
98 & 137,654 & 470,170 \\
BLIND & 53 &
53 & 27,680 & 83,752 \\
\bottomrule
\end{tabular}
\caption{Train (BD2009) and test (BLIND) dataset sizes.}
\label{tab:datasets}
\end{table}
diff --git a/section_Evaluating_a_predictor_Throughout__.tex b/section_Evaluating_the_performance_of__.tex
similarity index 90%
rename from section_Evaluating_a_predictor_Throughout__.tex
rename to section_Evaluating_the_performance_of__.tex
index 5d795e8..4253f88 100644
--- a/section_Evaluating_a_predictor_Throughout__.tex
+++ b/section_Evaluating_the_performance_of__.tex
...
\section{Evaluating
the performance of a
binding predictor}
Throughout this paper we will evaluate a pMHC binding predictor using three different metrics:
diff --git a/section_Network_architecture_begin_figure__.tex b/section_Network_architecture_begin_figure__.tex
index f21d099..9947dd1 100644
--- a/section_Network_architecture_begin_figure__.tex
+++ b/section_Network_architecture_begin_figure__.tex
...
\centering
\includegraphics[scale=0.5]{figures/mhcflurry-gliffy-network.png}
\caption{Neural network architecture for predicting peptide-MHC affinities from fixed length amino acid sequences}
\end{figure}
\section{Predicting affinities for multiple peptide lengths using a 9mer encoding}
Reduction from multiple peptide lengths to a 9mer encoding was done using a scheme inspired by NetMHC\cite{lundegaard2008accurate}. Peptides with only 8 amino acids were extended with the insertion of a special wildcard ``X'' at every position in the sequence. Peptides longer than 9 amino acids were shortened by removing consecutive stretches of residues at every position. These lengthened or shortened samples were included in the training set with a sample weight inversely proportional to the number of samples created from a single measurement.