this is for holding javascript data
Steven Roberts ck
about 9 years ago
Commit id: d910806d3eb462724b7e191861df21194779cb0c
deletions | additions
diff --git a/ipynb/.ipynb_checkpoints/RNA-seq-Gene-ID-checkpoint.ipynb b/ipynb/.ipynb_checkpoints/RNA-seq-Gene-ID-checkpoint.ipynb
index e40ef82..76201ae 100644
--- a/ipynb/.ipynb_checkpoints/RNA-seq-Gene-ID-checkpoint.ipynb
+++ b/ipynb/.ipynb_checkpoints/RNA-seq-Gene-ID-checkpoint.ipynb
...
{
"metadata": {
"name": "",
"signature":
"sha256:73fb80bb863e1f9740209284d96835d3b1ec04c670a411cf5a64513ea8e4389b" "sha256:58520ef3de968be9d9be080cce36c7cb50bbcac375e0941b7d838be79fa72e50"
},
"nbformat": 3,
"nbformat_minor": 0,
...
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cp /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/Cuffdiff2_heat-b-2014-12-20-22-27-15.4/rebuilt.gtf \\\n",
"/Users/sr320/data-genomic/tentacle/"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 31
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail /Users/sr320/data-genomic/tentacle/rebuilt.gtf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"scaffold999\tCufflinks\texon\t122398\t122535\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"7\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\tCDS\t122398\t122535\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"7\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\texon\t123424\t123555\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"8\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\tCDS\t123424\t123555\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"8\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\texon\t124859\t124996\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"9\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\tCDS\t124859\t124996\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"9\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\texon\t126012\t126143\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"10\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\tCDS\t126012\t126143\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"10\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\texon\t126617\t126675\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"11\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n",
"scaffold999\tCufflinks\tCDS\t126617\t126675\t.\t+\t.\tgene_id CGI_10006973; tss_id \"TSS54790\"; nearest_ref \"EKC31257\"; exon_number \"11\"; class_code \"=\"; p_id \"P26083\"; gene_name \"CGI_10006973\"; transcript_id \"EKC31257\"\r\n"
]
}
],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
...
"cell_type": "code",
"collapsed": false,
"input": [
"!awk '{print
$1,$2}' " $10}' /Users/sr320/data-genomic/tentacle/rebuilt.gtf | rev | cut -c 2- | rev > \\\n",
"/Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID\n",
"!head /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID\n",
"!wc -l /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"XLOC_000001\r\n",
"XLOC_000001\r\n",
"XLOC_000002\r\n",
"XLOC_000002\r\n",
"XLOC_000003\r\n",
"XLOC_000003\r\n",
"XLOC_000004\r\n",
"XLOC_000004\r\n",
"XLOC_000005\r\n",
"XLOC_000005\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 1347244 /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID\r\n"
]
}
],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!wc -l /Users/sr320/data-genomic/tentacle/rebuilt.gtf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 1347244 /Users/sr320/data-genomic/tentacle/rebuilt.gtf\r\n"
]
}
],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!paste /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"/Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneID \\\n",
"> /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 60
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!head /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"C12764\tCufflinks\texon\t28\t201\t.\t.\t.\tgene_id XLOC_000001; tss_id \"TSS1\"; oId \"CUFF.1.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000001\"\tXLOC_000001\r\n",
"C12764\tCufflinks\tCDS\t28\t201\t.\t.\t.\tgene_id XLOC_000001; tss_id \"TSS1\"; oId \"CUFF.1.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000001\"\tXLOC_000001\r\n",
"C12768\tCufflinks\texon\t4\t189\t.\t.\t.\tgene_id XLOC_000002; tss_id \"TSS2\"; oId \"CUFF.2.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000002\"\tXLOC_000002\r\n",
"C12768\tCufflinks\tCDS\t4\t189\t.\t.\t.\tgene_id XLOC_000002; tss_id \"TSS2\"; oId \"CUFF.2.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000002\"\tXLOC_000002\r\n",
"C12830\tCufflinks\texon\t1\t198\t.\t.\t.\tgene_id XLOC_000003; tss_id \"TSS3\"; oId \"CUFF.3.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000003\"\tXLOC_000003\r\n",
"C12830\tCufflinks\tCDS\t1\t198\t.\t.\t.\tgene_id XLOC_000003; tss_id \"TSS3\"; oId \"CUFF.3.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000003\"\tXLOC_000003\r\n",
"C13102\tCufflinks\texon\t1\t209\t.\t.\t.\tgene_id XLOC_000004; tss_id \"TSS4\"; oId \"CUFF.4.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000004\"\tXLOC_000004\r\n",
"C13102\tCufflinks\tCDS\t1\t209\t.\t.\t.\tgene_id XLOC_000004; tss_id \"TSS4\"; oId \"CUFF.4.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000004\"\tXLOC_000004\r\n",
"C13868\tCufflinks\texon\t3\t203\t.\t.\t.\tgene_id XLOC_000005; tss_id \"TSS5\"; oId \"CUFF.5.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000005\"\tXLOC_000005\r\n",
"C13868\tCufflinks\tCDS\t3\t203\t.\t.\t.\tgene_id XLOC_000005; tss_id \"TSS5\"; oId \"CUFF.5.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000005\"\tXLOC_000005\r\n"
]
}
],
"prompt_number": 61
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sqls=\"/Applications/bioinfo/sqlshare-pythonclient/tools/\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 64
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!python {sqls}singleupload.py \\\n",
"-d _rebuilt.gtf.geneIDend \\\n",
"/Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"processing chunk line 0 to 486015 (0.167377948761 s elapsed)\r\n",
"pushing /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"parsing 06912255...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"processing chunk line 486015 to 969459 (98.0686910152 s elapsed)\r\n",
"pushing /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"parsing 149D1ED9...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"processing chunk line 969459 to 1347244 (209.124155998 s elapsed)\r\n",
"pushing /Users/sr320/data-genomic/tentacle/rebuilt.gtf.geneIDend...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"parsing EDC0423B...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"finished _rebuilt.gtf.geneIDend\r\n"
]
}
],
"prompt_number": 65
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!python {sqls}singleupload.py \\\n",
"-d _cuffdiffgenes.sorted_by_expression.sig.txt \\\n",
"./data/Cuffdiff2_heat-b-2014-12-20-22-27-15.4/sorted_data/genes.sorted_by_expression.sig.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"processing chunk line 0 to 1551 (0.000243902206421 s elapsed)\r\n",
"pushing ./data/Cuffdiff2_heat-b-2014-12-20-22-27-15.4/sorted_data/genes.sorted_by_expression.sig.txt...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"parsing 9504F7E0...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"finished _cuffdiffgenes.sorted_by_expression.sig.txt\r\n"
]
}
],
"prompt_number": 66
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!python {sqls}fetchdata.py \\\n",
"-s \"SELECT Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9 \\\n",
"FROM [[email protected]].[_cuffdiffgenes.sorted_by_expression.sig.txt]sig \\\n",
"left join \\\n",
"[[email protected]].[_rebuilt.gtf.geneIDend]id \\\n",
"on \\\n",
"sig.gene_ID=id.Column10\" \\\n",
"-f tsv \\\n",
"-o /Users/sr320/data-genomic/tentacle/diffgene.gtf\n",
"!head /Users/sr320/data-genomic/tentacle/diffgene.gtf "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Column1\tColumn2\tColumn3\tColumn4\tColumn5\tColumn6\tColumn7\tColumn8\tColumn9\r",
"\r\n",
"C17036\tCufflinks\texon\t245\t442\t.\t.\t.\tgene_id XLOC_000059; tss_id \"TSS59\"; oId \"CUFF.56.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000059\"\r",
"\r\n",
"C17036\tCufflinks\tCDS\t245\t442\t.\t.\t.\tgene_id XLOC_000059; tss_id \"TSS59\"; oId \"CUFF.56.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000059\"\r",
"\r\n",
"C18346\tCufflinks\texon\t174\t551\t.\t+\t.\tgene_id CGI_10000009; tss_id \"TSS114\"; nearest_ref \"EKC17950\"; exon_number \"1\"; class_code \"=\"; p_id \"P6\"; gene_name \"CGI_10000009\"; transcript_id \"EKC17950\"\r",
"\r\n",
"C18346\tCufflinks\tCDS\t174\t551\t.\t+\t.\tgene_id CGI_10000009; tss_id \"TSS114\"; nearest_ref \"EKC17950\"; exon_number \"1\"; class_code \"=\"; p_id \"P6\"; gene_name \"CGI_10000009\"; transcript_id \"EKC17950\"\r",
"\r\n",
"C18548\tCufflinks\texon\t1\t33\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",
"\r\n",
"C18548\tCufflinks\tCDS\t1\t33\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",
"\r\n",
"C18548\tCufflinks\texon\t300\t629\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"2\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",
"\r\n",
"C18548\tCufflinks\tCDS\t300\t629\t.\t+\t.\tgene_id XLOC_000124; tss_id \"TSS124\"; oId \"CUFF.116.1\"; exon_number \"2\"; class_code \"u\"; transcript_id \"TCONS_00000124\"\r",
"\r\n",
"C18752\tCufflinks\texon\t2\t631\t.\t.\t.\tgene_id XLOC_000133; tss_id \"TSS133\"; oId \"CUFF.125.1\"; exon_number \"1\"; class_code \"u\"; transcript_id \"TCONS_00000133\"\r",
"\r\n"
]
}
],
"prompt_number": 71
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!wc -l /Users/sr320/data-genomic/tentacle/diffgene.gtf "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 122039 /Users/sr320/data-genomic/tentacle/diffgene.gtf\r\n"
]
}
],
"prompt_number": 72
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail -n +2 /Users/sr320/data-genomic/tentacle/diffgene.gtf > /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 73
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Product GTF of ONLY Diffexp genes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],