Tim O'Donnell update validation results analysis  almost 8 years ago

Commit id: 5653e4e7f41ae9eea634a54fe2d0d2894962d268

deletions | additions      

       

" \n",  "panel"  ]  },  {  "cell_type": "code",  "execution_count": 24,  "metadata": {  "collapsed": false  },  "outputs": [  {  "name": "stdout",  "output_type": "stream",  "text": [  "netmhc\n",  " auc f1 tau\n",  "allele \n",  "overall 0.929947 0.800909 0.582258\n",  "H-2-DB 0.896152 0.621212 0.600337\n",  "H-2-KB 0.891675 0.813675 0.573199\n",  "H-2-KD 0.815331 0.657718 0.403275\n",  "HLA-A0101 0.915334 0.619565 0.524866\n",  "HLA-A0201 0.932234 0.884336 0.635498\n",  "HLA-A0202 0.890382 0.755556 0.627143\n",  "HLA-A0203 0.975879 0.948626 0.586911\n",  "HLA-A0206 0.913636 0.872902 0.543184\n",  "HLA-A0301 0.935154 0.900621 0.629236\n",  "HLA-A1101 0.949252 0.883402 0.632199\n",  "HLA-A2301 0.881451 0.773842 0.605598\n",  "HLA-A2402 0.864200 0.632911 0.575710\n",  "HLA-A2501 0.991727 0.666667 0.151836\n",  "HLA-A2601 0.930840 0.541176 0.436646\n",  "HLA-A2602 0.931577 0.766839 0.525365\n",  "HLA-A2603 0.890172 0.542373 0.366317\n",  "HLA-A2902 0.868946 0.644068 0.619830\n",  "HLA-A3001 0.866899 0.731959 0.456482\n",  "HLA-A3002 0.749659 0.663438 0.342161\n",  "HLA-A3101 0.862606 0.833958 0.525026\n",  "HLA-A3201 0.893070 0.772881 0.448284\n",  "HLA-A3301 0.914593 0.868327 0.587112\n",  "HLA-A6801 0.948038 0.924791 0.571403\n",  "HLA-A6802 0.971961 0.922636 0.652332\n",  "HLA-A6901 0.960066 0.698413 0.335339\n",  "HLA-A8001 0.983487 0.434783 0.182374\n",  "HLA-B0702 0.916126 0.869383 0.606014\n",  "HLA-B0801 0.946187 0.776053 0.629519\n",  "HLA-B0802 0.989930 0.190476 0.393833\n",  "HLA-B0803 0.968395 0.000000 0.318249\n",  "HLA-B1501 0.934416 0.846512 0.591735\n",  "HLA-B1503 0.864784 0.592593 0.468922\n",  "HLA-B1509 0.901247 0.187500 0.354311\n",  "HLA-B1517 0.902687 0.637931 0.422497\n",  "HLA-B1801 0.794549 0.358974 0.237118\n",  "HLA-B2703 NaN 0.000000 0.053658\n",  "HLA-B2705 0.948457 0.285714 0.430561\n",  "HLA-B3501 0.828690 0.712000 0.515205\n",  "HLA-B3801 0.925684 0.000000 0.513269\n",  "HLA-B3901 0.965195 0.750000 0.305138\n",  "HLA-B4001 0.928725 0.829787 0.635308\n",  "HLA-B4002 0.920635 0.758621 0.456242\n",  "HLA-B4402 0.912335 0.568807 0.574916\n",  "HLA-B4403 0.869501 0.766304 0.541066\n",  "HLA-B4501 1.000000 1.000000 0.263574\n",  "HLA-B4601 NaN 0.000000 NaN\n",  "HLA-B5101 0.919175 0.428571 0.365307\n",  "HLA-B5301 0.841415 0.733813 0.515585\n",  "HLA-B5401 0.816216 0.800000 0.300252\n",  "HLA-B5701 0.899092 0.741840 0.484975\n",  "HLA-B5801 0.891654 0.827411 0.545132\n",  "Mamu-A01 0.916721 0.694915 0.569423\n",  "Mamu-A02 0.929688 0.767123 0.598964\n",  "\n",  "netmhcpan\n",  " auc f1 tau\n",  "allele \n",  "overall 0.932924 0.793170 0.579586\n",  "H-2-DB 0.874574 0.577236 0.574262\n",  "H-2-KB 0.825565 0.665354 0.486836\n",  "H-2-KD 0.819189 0.645570 0.390333\n",  "HLA-A0101 0.894895 0.594286 0.498767\n",  "HLA-A0201 0.930479 0.880963 0.637338\n",  "HLA-A0202 0.898697 0.769231 0.624280\n",  "HLA-A0203 0.974158 0.944578 0.591463\n",  "HLA-A0206 0.910796 0.866258 0.535067\n",  "HLA-A0301 0.927287 0.885106 0.611240\n",  "HLA-A1101 0.945065 0.887671 0.625794\n",  "HLA-A2301 0.895229 0.788406 0.636944\n",  "HLA-A2402 0.892876 0.693069 0.600686\n",  "HLA-A2501 0.998540 0.888889 0.153968\n",  "HLA-A2601 0.942279 0.640000 0.451666\n",  "HLA-A2602 0.957811 0.839378 0.561093\n",  "HLA-A2603 0.934300 0.581818 0.426438\n",  "HLA-A2902 0.882377 0.633333 0.641257\n",  "HLA-A3001 0.871974 0.746114 0.459857\n",  "HLA-A3002 0.732749 0.611260 0.320577\n",  "HLA-A3101 0.859261 0.822335 0.528941\n",  "HLA-A3201 0.907161 0.766423 0.480669\n",  "HLA-A3301 0.931156 0.829175 0.612147\n",  "HLA-A6801 0.948907 0.923944 0.599562\n",  "HLA-A6802 0.964197 0.900293 0.653216\n",  "HLA-A6901 0.944257 0.680851 0.327432\n",  "HLA-A8001 0.980799 0.434783 0.186524\n",  "HLA-B0702 0.913527 0.855011 0.604720\n",  "HLA-B0801 0.942822 0.701671 0.606762\n",  "HLA-B0802 0.989590 0.571429 0.410579\n",  "HLA-B0803 0.952346 0.000000 0.315978\n",  "HLA-B1501 0.935596 0.823245 0.589097\n",  "HLA-B1503 0.870064 0.588235 0.522577\n",  "HLA-B1509 0.922907 0.176471 0.448099\n",  "HLA-B1517 0.934516 0.710280 0.448575\n",  "HLA-B1801 0.789549 0.380952 0.234201\n",  "HLA-B2703 NaN 0.000000 0.067451\n",  "HLA-B2705 0.943860 0.400000 0.377208\n",  "HLA-B3501 0.836281 0.711027 0.514413\n",  "HLA-B3801 0.980074 0.000000 0.652173\n",  "HLA-B3901 0.980065 0.744186 0.316739\n",  "HLA-B4001 0.920678 0.845570 0.619418\n",  "HLA-B4002 0.918651 0.909091 0.518720\n",  "HLA-B4402 0.934554 0.607143 0.598960\n",  "HLA-B4403 0.891047 0.775956 0.587824\n",  "HLA-B4501 0.993333 0.800000 0.246426\n",  "HLA-B4601 NaN 0.000000 NaN\n",  "HLA-B5101 0.948695 0.610169 0.406973\n",  "HLA-B5301 0.885722 0.753623 0.559543\n",  "HLA-B5401 0.845946 0.727273 0.349826\n",  "HLA-B5701 0.883077 0.642623 0.460133\n",  "HLA-B5801 0.882016 0.805195 0.531508\n",  "Mamu-A01 0.912755 0.706897 0.539013\n",  "Mamu-A02 0.883087 0.748092 0.524622\n",  "\n",  "mhcflurry big\n"  ]  },  {  "ename": "AttributeError",  "evalue": "'module' object has no attribute 'panel_mhcflurry'",  "output_type": "error",  "traceback": [  "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",  "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",  "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodel_name\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodel_groups\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"mhcflurry \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mmodel_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmstats\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpanel_mhcflurry\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodel_groups\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",  "\u001b[0;31mAttributeError\u001b[0m: 'module' object has no attribute 'panel_mhcflurry'"  ]  }  ],  "source": [  "for model in [\"netmhc\", \"netmhcpan\"]:\n",  " print(model)\n",  " print(panel[model])\n",  " print(\"\")\n",  "\n",  "for model_name in model_groups.index:\n",  " print(\"mhcflurry \" + model_name)\n",  " print(scipy.stats.mstats.panel_mhcflurry[model_groups[model_name]].mean(0))\n",  " print(\"\")"  ]  },  {  "cell_type": "code",  "execution_count": null,  "metadata": {  "collapsed": false  },  "outputs": [],  "source": [  "threshold = 1000\n",  "big_model = 6\n",  "small_model = 5\n",  "extra_cols = collections.defaultdict(list)\n",  "for (allele, row) in df.iterrows():\n",  " for metric in [\"auc\", \"f1\", \"tau\"]:\n",  " model = big_model if row.train_size >= threshold else small_model\n",  " extra_cols[\"mhcflurry standard_%s\" % metric].append(row[\"mhcflurry %s_%s\" % (model, metric)])\n",  " \n",  "for (col, values) in extra_cols.items():\n",  " df[col] = values\n",  "\n",  "df"  ]  }  ],  "metadata": {