Steven Roberts ck  about 9 years ago

Commit id: d910806d3eb462724b7e191861df21194779cb0c

deletions | additions      

       

{  "metadata": {  "name": "",  "signature": "sha256:73fb80bb863e1f9740209284d96835d3b1ec04c670a411cf5a64513ea8e4389b" "sha256:58520ef3de968be9d9be080cce36c7cb50bbcac375e0941b7d838be79fa72e50"  },  "nbformat": 3,  "nbformat_minor": 0, 

],  "prompt_number": 25  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!cp /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/Cuffdiff2_heat-b-2014-12-20-22-27-15.4/rebuilt.gtf \\\n",  "/Users/sr320/data-genomic/tentacle/"  ],  "language": "python",  "metadata": {},  "outputs": [],  "prompt_number": 31  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!tail /Users/sr320/data-genomic/tentacle/rebuilt.gtf"  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  "scaffold999\tCufflinks\texon\t122398\t122535\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"7\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\tCDS\t122398\t122535\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"7\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\texon\t123424\t123555\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"8\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\tCDS\t123424\t123555\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"8\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\texon\t124859\t124996\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"9\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\tCDS\t124859\t124996\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"9\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\texon\t126012\t126143\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"10\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\tCDS\t126012\t126143\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"10\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\texon\t126617\t126675\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"11\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",  "scaffold999\tCufflinks\tCDS\t126617\t126675\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"11\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n"  ]  }  ],  "prompt_number": 40  },  {  "cell_type": "code",  "collapsed": false, 

"cell_type": "code",  "collapsed": false,  "input": [  "!awk '{print $1,$2}' " $10}' /Users/sr320/data-genomic/tentacle/rebuilt.gtf | rev | cut -c 2- | rev > \\\n",  "/Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID\n",  "!head /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID\n",  "!wc -l /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID"  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  "XLOC_000001\r\n",  "XLOC_000001\r\n",  "XLOC_000002\r\n",  "XLOC_000002\r\n",  "XLOC_000003\r\n",  "XLOC_000003\r\n",  "XLOC_000004\r\n",  "XLOC_000004\r\n",  "XLOC_000005\r\n",  "XLOC_000005\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  " 1347244 /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID\r\n"  ]  }  ],  "prompt_number": 57  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!wc -l /Users/sr320/data-genomic/tentacle/rebuilt.gtf"  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  " 1347244 /Users/sr320/data-genomic/tentacle/rebuilt.gtf\r\n"  ]  }  ],  "prompt_number": 58  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!paste /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",  "/Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID \\\n",  "> /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend"  ],  "language": "python",  "metadata": {},  "outputs": [],  "prompt_number": 60  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!head /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend"  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  "C12764\tCufflinks\texon\t28\t201\t.\t.\t.\tgene_id XLOC_000001; tss_id \"TSS1\"; oId \"CUFF.1.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000001\"\tXLOC_000001\r\n",  "C12764\tCufflinks\tCDS\t28\t201\t.\t.\t.\tgene_id XLOC_000001; tss_id \"TSS1\"; oId \"CUFF.1.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000001\"\tXLOC_000001\r\n",  "C12768\tCufflinks\texon\t4\t189\t.\t.\t.\tgene_id XLOC_000002; tss_id \"TSS2\"; oId \"CUFF.2.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000002\"\tXLOC_000002\r\n",  "C12768\tCufflinks\tCDS\t4\t189\t.\t.\t.\tgene_id XLOC_000002; tss_id \"TSS2\"; oId \"CUFF.2.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000002\"\tXLOC_000002\r\n",  "C12830\tCufflinks\texon\t1\t198\t.\t.\t.\tgene_id XLOC_000003; tss_id \"TSS3\"; oId \"CUFF.3.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000003\"\tXLOC_000003\r\n",  "C12830\tCufflinks\tCDS\t1\t198\t.\t.\t.\tgene_id XLOC_000003; tss_id \"TSS3\"; oId \"CUFF.3.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000003\"\tXLOC_000003\r\n",  "C13102\tCufflinks\texon\t1\t209\t.\t.\t.\tgene_id XLOC_000004; tss_id \"TSS4\"; oId \"CUFF.4.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000004\"\tXLOC_000004\r\n",  "C13102\tCufflinks\tCDS\t1\t209\t.\t.\t.\tgene_id XLOC_000004; tss_id \"TSS4\"; oId \"CUFF.4.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000004\"\tXLOC_000004\r\n",  "C13868\tCufflinks\texon\t3\t203\t.\t.\t.\tgene_id XLOC_000005; tss_id \"TSS5\"; oId \"CUFF.5.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000005\"\tXLOC_000005\r\n",  "C13868\tCufflinks\tCDS\t3\t203\t.\t.\t.\tgene_id XLOC_000005; tss_id \"TSS5\"; oId \"CUFF.5.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000005\"\tXLOC_000005\r\n"  ]  }  ],  "prompt_number": 61  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "sqls=\"/Applications/bioinfo/sqlshare-pythonclient/tools/\""  ],  "language": "python",  "metadata": {},  "outputs": [],  "prompt_number": 64  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!python {sqls}singleupload.py \\\n",  "-d _rebuilt.gtf.geneIDend \\\n",  "/Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend "  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  "processing chunk line 0 to 486015 (0.167377948761 s elapsed)\r\n",  "pushing /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "parsing 06912255...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "processing chunk line 486015 to 969459 (98.0686910152 s elapsed)\r\n",  "pushing /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "parsing 149D1ED9...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "processing chunk line 969459 to 1347244 (209.124155998 s elapsed)\r\n",  "pushing /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "parsing EDC0423B...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "finished _rebuilt.gtf.geneIDend\r\n"  ]  }  ],  "prompt_number": 65  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!python {sqls}singleupload.py \\\n",  "-d _cuffdiffgenes.sorted_by_expression.sig.txt \\\n",  "./data/Cuffdiff2_heat-b-2014-12-20-22-27-15.4/sorted_data/genes.sorted_by_expression.sig.txt"  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  "processing chunk line 0 to 1551 (0.000243902206421 s elapsed)\r\n",  "pushing ./data/Cuffdiff2_heat-b-2014-12-20-22-27-15.4/sorted_data/genes.sorted_by_expression.sig.txt...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "parsing 9504F7E0...\r\n"  ]  },  {  "output_type": "stream",  "stream": "stdout",  "text": [  "finished _cuffdiffgenes.sorted_by_expression.sig.txt\r\n"  ]  }  ],  "prompt_number": 66  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!python {sqls}fetchdata.py \\\n",  "-s \"SELECT Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9 \\\n",  "FROM [[email protected]].[_cuffdiffgenes.sorted_by_expression.sig.txt]sig \\\n",  "left join \\\n",  "[[email protected]].[_rebuilt.gtf.geneIDend]id \\\n",  "on \\\n",  "sig.gene_ID=id.Column10\" \\\n",  "-f tsv \\\n",  "-o /Users/sr320/data-genomic/tentacle/diffgene.gtf\n",  "!head /Users/sr320/data-genomic/tentacle/diffgene.gtf "  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  "Column1\tColumn2\tColumn3\tColumn4\tColumn5\tColumn6\tColumn7\tColumn8\tColumn9\r",  "\r\n",  "C17036\tCufflinks\texon\t245\t442\t.\t.\t.\tgene_id XLOC_000059; tss_id \"TSS59\"; oId \"CUFF.56.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000059\"\r",  "\r\n",  "C17036\tCufflinks\tCDS\t245\t442\t.\t.\t.\tgene_id XLOC_000059; tss_id \"TSS59\"; oId \"CUFF.56.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000059\"\r",  "\r\n",  "C18346\tCufflinks\texon\t174\t551\t.\t+\t.\tgene_id CGI_10000009; tss_id \"TSS114\"; nearest_ref \"EKC17950\"; exon_number \"1\"; class_code \"=\"; p_id \"P6\"; gene_name \"CGI_10000009\"; transcript_id \"EKC17950\"\r",  "\r\n",  "C18346\tCufflinks\tCDS\t174\t551\t.\t+\t.\tgene_id CGI_10000009; tss_id \"TSS114\"; nearest_ref \"EKC17950\"; exon_number \"1\"; class_code \"=\"; p_id \"P6\"; gene_name \"CGI_10000009\"; transcript_id \"EKC17950\"\r",  "\r\n",  "C18548\tCufflinks\texon\t1\t33\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",  "\r\n",  "C18548\tCufflinks\tCDS\t1\t33\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",  "\r\n",  "C18548\tCufflinks\texon\t300\t629\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"2\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",  "\r\n",  "C18548\tCufflinks\tCDS\t300\t629\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"2\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",  "\r\n",  "C18752\tCufflinks\texon\t2\t631\t.\t.\t.\tgene_id XLOC_000133; tss_id \"TSS133\"; oId \"CUFF.125.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000133\"\r",  "\r\n"  ]  }  ],  "prompt_number": 71  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!wc -l /Users/sr320/data-genomic/tentacle/diffgene.gtf "  ],  "language": "python",  "metadata": {},  "outputs": [  {  "output_type": "stream",  "stream": "stdout",  "text": [  " 122039 /Users/sr320/data-genomic/tentacle/diffgene.gtf\r\n"  ]  }  ],  "prompt_number": 72  },  {  "cell_type": "code",  "collapsed": false,  "input": [  "!tail -n +2 /Users/sr320/data-genomic/tentacle/diffgene.gtf > /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf "  ],  "language": "python",  "metadata": {},  "outputs": [],  "prompt_number": 73  },  {  "cell_type": "heading",  "level": 1,  "metadata": {},  "source": [  "Product GTF of ONLY Diffexp genes"  ]  },  {  "cell_type": "code",  "collapsed": false,  "input": [],  "language": "python",  "metadata": {},  "outputs": []  }  ],