Ino de Bruijn Start add of reapr validation  over 9 years ago

Commit id: 852a4070d3802265d5a28981d76420b6a3e0b560

deletions | additions      

       

{  "metadata": {  "name": "",  "signature": "sha256:7d72c0deb4d46ef6a43fe8b440c208982f2c712cb7fc36e3e24f862aeb726762" "sha256:65079c5a799c29611e195a622030e25cceccfccde9dbb672755e6def48764004"  },  "nbformat": 3,  "nbformat_minor": 0, 

],  "prompt_number": 184  },  {  "cell_type": "heading",  "level": 4,  "metadata": {},  "source": [  "Extracting pure contigs with REAPR"  ]  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "#rns31_path = \"/media/milou/glob/projects/masmvali-partdeux/reassembly-filtered-reads/Sample_1ng_even/metassemble/assemblies/ray/noscaf/noscaf_31/\"\n",  "rns31_path = \"/media/milou/proj/b2010008/nobackup/projects/metassemble_validation/LIMS1884_even/REAPR/assemblies/ray/noscaf/noscaf_31/\"\n",  "# The converter changes the renamed contigs back to the original name\n",  "creapr = pd.read_table(os.path.join(rns31_path, \"reapr_out/05.summary.stats.tsv\"), \n",  " converters = {0: lambda x: \"-\".join(x.split('_')[0:2])}).sort(\"contig\")\n",  "creapr.head()"  ],  "language": "python",  "metadata": {},  "outputs": [  {  "ename": "KeyError",  "evalue": "u'no item named contig'",  "output_type": "pyerr",  "traceback": [  "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",  "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# The converter changes the renamed contings back to the original name\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m creapr = pd.read_table(os.path.join(rns31_path, \"reapr_out/05.summary.stats.tsv\"), \n\u001b[1;32m----> 5\u001b[1;33m converters = {0: lambda x: \"-\".join(x.split('_')[0:2])}).sort(\"contig\")\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[0mcreapr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36msort\u001b[1;34m(self, columns, column, axis, ascending, inplace)\u001b[0m\n\u001b[0;32m 2534\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcolumn\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2535\u001b[0m return self.sort_index(by=columns, axis=axis, ascending=ascending,\n\u001b[1;32m-> 2536\u001b[1;33m inplace=inplace)\n\u001b[0m\u001b[0;32m 2537\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2538\u001b[0m def sort_index(self, axis=0, by=None, ascending=True, inplace=False,\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36msort_index\u001b[1;34m(self, axis, by, ascending, inplace, kind)\u001b[0m\n\u001b[0;32m 2600\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2601\u001b[0m \u001b[0mby\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mby\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2602\u001b[1;33m \u001b[0mk\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2603\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2604\u001b[0m raise ValueError('Cannot sort by duplicate column %s'\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1656\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1657\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1658\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1659\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1660\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_getitem_column\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1663\u001b[0m \u001b[1;31m# get column\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1664\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1665\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1666\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1667\u001b[0m \u001b[1;31m# duplicate columns & possible reduce dimensionaility\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/generic.pyc\u001b[0m in \u001b[0;36m_get_item_cache\u001b[1;34m(self, item)\u001b[0m\n\u001b[0;32m 1003\u001b[0m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1005\u001b[1;33m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1006\u001b[0m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1007\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mres\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/internals.pyc\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, item)\u001b[0m\n\u001b[0;32m 2871\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_for_nan_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2872\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2873\u001b[1;33m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mblock\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_find_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2874\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mblock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2875\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/internals.pyc\u001b[0m in \u001b[0;36m_find_block\u001b[1;34m(self, item)\u001b[0m\n\u001b[0;32m 3183\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3184\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_find_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3185\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_check_have\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3186\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mblock\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3187\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mblock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",  "\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/pandas/core/internals.pyc\u001b[0m in \u001b[0;36m_check_have\u001b[1;34m(self, item)\u001b[0m\n\u001b[0;32m 3190\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_check_have\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3191\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3192\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'no item named %s'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpprint_thing\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3193\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3194\u001b[0m def reindex_axis(self, new_axis, indexer=None, method=None, axis=0,\n",  "\u001b[1;31mKeyError\u001b[0m: u'no item named contig'"  ]  }  ],  "prompt_number": 4  },  {  "cell_type": "heading",  "level": 2,