this is for holding javascript data
Tim O'Donnell update validation results analysis
almost 8 years ago
Commit id: 5653e4e7f41ae9eea634a54fe2d0d2894962d268
deletions | additions
diff --git a/notebooks/validation results analysis.ipynb b/notebooks/validation results analysis.ipynb
index fe23e83..5ec42b9 100644
--- a/notebooks/validation results analysis.ipynb
+++ b/notebooks/validation results analysis.ipynb
...
" \n",
"panel"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"netmhc\n",
" auc f1 tau\n",
"allele \n",
"overall 0.929947 0.800909 0.582258\n",
"H-2-DB 0.896152 0.621212 0.600337\n",
"H-2-KB 0.891675 0.813675 0.573199\n",
"H-2-KD 0.815331 0.657718 0.403275\n",
"HLA-A0101 0.915334 0.619565 0.524866\n",
"HLA-A0201 0.932234 0.884336 0.635498\n",
"HLA-A0202 0.890382 0.755556 0.627143\n",
"HLA-A0203 0.975879 0.948626 0.586911\n",
"HLA-A0206 0.913636 0.872902 0.543184\n",
"HLA-A0301 0.935154 0.900621 0.629236\n",
"HLA-A1101 0.949252 0.883402 0.632199\n",
"HLA-A2301 0.881451 0.773842 0.605598\n",
"HLA-A2402 0.864200 0.632911 0.575710\n",
"HLA-A2501 0.991727 0.666667 0.151836\n",
"HLA-A2601 0.930840 0.541176 0.436646\n",
"HLA-A2602 0.931577 0.766839 0.525365\n",
"HLA-A2603 0.890172 0.542373 0.366317\n",
"HLA-A2902 0.868946 0.644068 0.619830\n",
"HLA-A3001 0.866899 0.731959 0.456482\n",
"HLA-A3002 0.749659 0.663438 0.342161\n",
"HLA-A3101 0.862606 0.833958 0.525026\n",
"HLA-A3201 0.893070 0.772881 0.448284\n",
"HLA-A3301 0.914593 0.868327 0.587112\n",
"HLA-A6801 0.948038 0.924791 0.571403\n",
"HLA-A6802 0.971961 0.922636 0.652332\n",
"HLA-A6901 0.960066 0.698413 0.335339\n",
"HLA-A8001 0.983487 0.434783 0.182374\n",
"HLA-B0702 0.916126 0.869383 0.606014\n",
"HLA-B0801 0.946187 0.776053 0.629519\n",
"HLA-B0802 0.989930 0.190476 0.393833\n",
"HLA-B0803 0.968395 0.000000 0.318249\n",
"HLA-B1501 0.934416 0.846512 0.591735\n",
"HLA-B1503 0.864784 0.592593 0.468922\n",
"HLA-B1509 0.901247 0.187500 0.354311\n",
"HLA-B1517 0.902687 0.637931 0.422497\n",
"HLA-B1801 0.794549 0.358974 0.237118\n",
"HLA-B2703 NaN 0.000000 0.053658\n",
"HLA-B2705 0.948457 0.285714 0.430561\n",
"HLA-B3501 0.828690 0.712000 0.515205\n",
"HLA-B3801 0.925684 0.000000 0.513269\n",
"HLA-B3901 0.965195 0.750000 0.305138\n",
"HLA-B4001 0.928725 0.829787 0.635308\n",
"HLA-B4002 0.920635 0.758621 0.456242\n",
"HLA-B4402 0.912335 0.568807 0.574916\n",
"HLA-B4403 0.869501 0.766304 0.541066\n",
"HLA-B4501 1.000000 1.000000 0.263574\n",
"HLA-B4601 NaN 0.000000 NaN\n",
"HLA-B5101 0.919175 0.428571 0.365307\n",
"HLA-B5301 0.841415 0.733813 0.515585\n",
"HLA-B5401 0.816216 0.800000 0.300252\n",
"HLA-B5701 0.899092 0.741840 0.484975\n",
"HLA-B5801 0.891654 0.827411 0.545132\n",
"Mamu-A01 0.916721 0.694915 0.569423\n",
"Mamu-A02 0.929688 0.767123 0.598964\n",
"\n",
"netmhcpan\n",
" auc f1 tau\n",
"allele \n",
"overall 0.932924 0.793170 0.579586\n",
"H-2-DB 0.874574 0.577236 0.574262\n",
"H-2-KB 0.825565 0.665354 0.486836\n",
"H-2-KD 0.819189 0.645570 0.390333\n",
"HLA-A0101 0.894895 0.594286 0.498767\n",
"HLA-A0201 0.930479 0.880963 0.637338\n",
"HLA-A0202 0.898697 0.769231 0.624280\n",
"HLA-A0203 0.974158 0.944578 0.591463\n",
"HLA-A0206 0.910796 0.866258 0.535067\n",
"HLA-A0301 0.927287 0.885106 0.611240\n",
"HLA-A1101 0.945065 0.887671 0.625794\n",
"HLA-A2301 0.895229 0.788406 0.636944\n",
"HLA-A2402 0.892876 0.693069 0.600686\n",
"HLA-A2501 0.998540 0.888889 0.153968\n",
"HLA-A2601 0.942279 0.640000 0.451666\n",
"HLA-A2602 0.957811 0.839378 0.561093\n",
"HLA-A2603 0.934300 0.581818 0.426438\n",
"HLA-A2902 0.882377 0.633333 0.641257\n",
"HLA-A3001 0.871974 0.746114 0.459857\n",
"HLA-A3002 0.732749 0.611260 0.320577\n",
"HLA-A3101 0.859261 0.822335 0.528941\n",
"HLA-A3201 0.907161 0.766423 0.480669\n",
"HLA-A3301 0.931156 0.829175 0.612147\n",
"HLA-A6801 0.948907 0.923944 0.599562\n",
"HLA-A6802 0.964197 0.900293 0.653216\n",
"HLA-A6901 0.944257 0.680851 0.327432\n",
"HLA-A8001 0.980799 0.434783 0.186524\n",
"HLA-B0702 0.913527 0.855011 0.604720\n",
"HLA-B0801 0.942822 0.701671 0.606762\n",
"HLA-B0802 0.989590 0.571429 0.410579\n",
"HLA-B0803 0.952346 0.000000 0.315978\n",
"HLA-B1501 0.935596 0.823245 0.589097\n",
"HLA-B1503 0.870064 0.588235 0.522577\n",
"HLA-B1509 0.922907 0.176471 0.448099\n",
"HLA-B1517 0.934516 0.710280 0.448575\n",
"HLA-B1801 0.789549 0.380952 0.234201\n",
"HLA-B2703 NaN 0.000000 0.067451\n",
"HLA-B2705 0.943860 0.400000 0.377208\n",
"HLA-B3501 0.836281 0.711027 0.514413\n",
"HLA-B3801 0.980074 0.000000 0.652173\n",
"HLA-B3901 0.980065 0.744186 0.316739\n",
"HLA-B4001 0.920678 0.845570 0.619418\n",
"HLA-B4002 0.918651 0.909091 0.518720\n",
"HLA-B4402 0.934554 0.607143 0.598960\n",
"HLA-B4403 0.891047 0.775956 0.587824\n",
"HLA-B4501 0.993333 0.800000 0.246426\n",
"HLA-B4601 NaN 0.000000 NaN\n",
"HLA-B5101 0.948695 0.610169 0.406973\n",
"HLA-B5301 0.885722 0.753623 0.559543\n",
"HLA-B5401 0.845946 0.727273 0.349826\n",
"HLA-B5701 0.883077 0.642623 0.460133\n",
"HLA-B5801 0.882016 0.805195 0.531508\n",
"Mamu-A01 0.912755 0.706897 0.539013\n",
"Mamu-A02 0.883087 0.748092 0.524622\n",
"\n",
"mhcflurry big\n"
]
},
{
"ename": "AttributeError",
"evalue": "'module' object has no attribute 'panel_mhcflurry'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodel_name\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodel_groups\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"mhcflurry \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mmodel_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmstats\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpanel_mhcflurry\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodel_groups\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'module' object has no attribute 'panel_mhcflurry'"
]
}
],
"source": [
"for model in [\"netmhc\", \"netmhcpan\"]:\n",
" print(model)\n",
" print(panel[model])\n",
" print(\"\")\n",
"\n",
"for model_name in model_groups.index:\n",
" print(\"mhcflurry \" + model_name)\n",
" print(scipy.stats.mstats.panel_mhcflurry[model_groups[model_name]].mean(0))\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"threshold = 1000\n",
"big_model = 6\n",
"small_model = 5\n",
"extra_cols = collections.defaultdict(list)\n",
"for (allele, row) in df.iterrows():\n",
" for metric in [\"auc\", \"f1\", \"tau\"]:\n",
" model = big_model if row.train_size >= threshold else small_model\n",
" extra_cols[\"mhcflurry standard_%s\" % metric].append(row[\"mhcflurry %s_%s\" % (model, metric)])\n",
" \n",
"for (col, values) in extra_cols.items():\n",
" df[col] = values\n",
"\n",
"df"
]
}
],
"metadata": {