deletions | additions
diff --git a/ipynb/.ipynb_checkpoints/Array-feature-overlap-02-checkpoint.ipynb b/ipynb/.ipynb_checkpoints/Array-feature-overlap-02-checkpoint.ipynb
index 154bf5e..223ff3d 100644
--- a/ipynb/.ipynb_checkpoints/Array-feature-overlap-02-checkpoint.ipynb
+++ b/ipynb/.ipynb_checkpoints/Array-feature-overlap-02-checkpoint.ipynb
...
{
"metadata": {
"name": "",
"signature": "sha256:5d7639992d2c094e51f32dfdb067917953da07822fc898a50b49306fe5f986d3"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": "cells": [
{
"cells": [
{ "cell_type":
"heading",
"level": 1, "markdown",
"metadata": {},
"source": [
"Re-defining "# Re-defining canonical C gigas Genome Tracks"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"via Ensembl"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scaffold44098\tdust\trepeat_region\t518076\t518099\t.\t.\t.\tName=dust;class=dust;type=Dust\r\n",
"scaffold44098\tdust\trepeat_region\t519261\t519281\t.\t.\t.\tName=dust;class=dust;type=Dust\r\n",
"scaffold44098\ttrf\trepeat_region\t519261\t519281\t.\t.\t.\tName=trf;class=trf;repeat_consensus=AT;type=Tandem repeats\r\n"
]
}
],
"source": [
"!tail -3 /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3"
]
},
{
"cell_type":
"markdown", "code",
"execution_count": 8,
"metadata":
{},
"source": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"via Ensembl" "186890 CDS\r\n",
" 5 RNA\r\n",
"189468 exon\r\n",
"26114 gene\r\n",
" 28 miRNA\r\n",
" 28 miRNA_gene\r\n",
"1410 pseudogenic_tRNA\r\n",
" 13 rRNA\r\n",
" 13 rRNA_gene\r\n",
"875275 repeat_region\r\n",
" 47 snRNA\r\n",
" 47 snRNA_gene\r\n",
" 20 snoRNA\r\n",
" 20 snoRNA_gene\r\n",
" 994 tRNA_gene\r\n",
"28523 transcript\r\n"
]
}
],
"source": [
"!cut -f 3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type":
"markdown", "code",
"execution_count": 10,
"metadata":
{},
"source": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"" " 5 EnsemblGenomes\tRNA\r\n",
"2530 EnsemblGenomes\texon\r\n",
" 13 EnsemblGenomes\tgene\r\n",
" 28 EnsemblGenomes\tmiRNA\r\n",
" 28 EnsemblGenomes\tmiRNA_gene\r\n",
"1410 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 13 EnsemblGenomes\trRNA\r\n",
" 13 EnsemblGenomes\trRNA_gene\r\n",
" 47 EnsemblGenomes\tsnRNA\r\n",
" 47 EnsemblGenomes\tsnRNA_gene\r\n",
" 20 EnsemblGenomes\tsnoRNA\r\n",
" 20 EnsemblGenomes\tsnoRNA_gene\r\n",
" 994 EnsemblGenomes\ttRNA_gene\r\n",
"2422 EnsemblGenomes\ttranscript\r\n",
"186890 GigaDB\tCDS\r\n",
"186938 GigaDB\texon\r\n",
"26101 GigaDB\tgene\r\n",
"26101 GigaDB\ttranscript\r\n",
"650376 dust\trepeat_region\r\n",
"224899 trf\trepeat_region\r\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail -3 /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"scaffold44098\tdust\trepeat_region\t518076\t518099\t.\t.\t.\tName=dust;class=dust;type=Dust\r\n",
"scaffold44098\tdust\trepeat_region\t519261\t519281\t.\t.\t.\tName=dust;class=dust;type=Dust\r\n",
"scaffold44098\ttrf\trepeat_region\t519261\t519281\t.\t.\t.\tName=trf;class=trf;repeat_consensus=AT;type=Tandem repeats\r\n"
] }
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": "source": [
"!cut -f
3 2,3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"186890 CDS\r\n",
" 5 RNA\r\n",
"189468 exon\r\n",
"26114 gene\r\n",
" 28 miRNA\r\n",
" 28 miRNA_gene\r\n",
"1410 pseudogenic_tRNA\r\n",
" 13 rRNA\r\n",
" 13 rRNA_gene\r\n",
"875275 repeat_region\r\n",
" 47 snRNA\r\n",
" 47 snRNA_gene\r\n",
" 20 snoRNA\r\n",
" 20 snoRNA_gene\r\n",
" 994 tRNA_gene\r\n",
"28523 transcript\r\n"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cut -f 2,3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python", "execution_count": 3,
"metadata":
{},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 5 EnsemblGenomes\tRNA\r\n",
"2530 EnsemblGenomes\texon\r\n",
" 13 EnsemblGenomes\tgene\r\n",
" 28 EnsemblGenomes\tmiRNA\r\n",
" 28 EnsemblGenomes\tmiRNA_gene\r\n",
"1410 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 13 EnsemblGenomes\trRNA\r\n",
" 13 EnsemblGenomes\trRNA_gene\r\n",
" 47 EnsemblGenomes\tsnRNA\r\n",
" 47 EnsemblGenomes\tsnRNA_gene\r\n",
" 20 EnsemblGenomes\tsnoRNA\r\n",
" 20 EnsemblGenomes\tsnoRNA_gene\r\n",
" 994 EnsemblGenomes\ttRNA_gene\r\n",
"2422 EnsemblGenomes\ttranscript\r\n",
"186890 GigaDB\tCDS\r\n",
"186938 GigaDB\texon\r\n",
"26101 GigaDB\tgene\r\n",
"26101 GigaDB\ttranscript\r\n",
"650376 dust\trepeat_region\r\n",
"224899 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 10
}, {
"cell_type": "code",
"collapsed":
false,
"input": [
"!tail /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gtf"
],
"language": "python",
"metadata": {}, false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"stream": "stdout",
"text": [
"scaffold44098\tprotein_coding\tCDS\t509746\t510288\t.\t-\t0\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"2\"; protein_id \"EKC17988\";\r\n",
"scaffold44098\tprotein_coding\texon\t514550\t514690\t.\t-\t.\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"1\"; seqedit \"false\";\r\n",
"scaffold44098\tprotein_coding\tCDS\t514550\t514690\t.\t-\t0\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"1\"; protein_id \"EKC17988\";\r\n",
"scaffold44098\tprotein_coding\tstart_codon\t514688\t514690\t.\t-\t0\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"1\";\r\n",
"scaffold44098\tprotein_coding\texon\t514859\t515511\t.\t-\t.\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"2\"; seqedit \"false\";\r\n",
"scaffold44098\tprotein_coding\tstop_codon\t514859\t514861\t.\t-\t0\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"2\";\r\n",
"scaffold44098\tprotein_coding\tCDS\t514862\t515511\t.\t-\t2\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"2\"; protein_id \"EKC17989\";\r\n",
"scaffold44098\tprotein_coding\texon\t515871\t515877\t.\t-\t.\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"1\"; seqedit \"false\";\r\n",
"scaffold44098\tprotein_coding\tCDS\t515871\t515877\t.\t-\t0\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"1\"; protein_id \"EKC17989\";\r\n",
"scaffold44098\tprotein_coding\tstart_codon\t515875\t515877\t.\t-\t0\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"1\";\r\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cut -f 2,3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gtf \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 2 RNase_MRP_RNA\texon\r\n",
" 1 RNase_P_RNA\texon\r\n",
" 10 SRP_RNA\texon\r\n",
" 28 miRNA\texon\r\n",
" 5 misc_RNA\texon\r\n",
" 48 nontranslating_CDS\texon\r\n",
"186890 protein_coding\tCDS\r\n",
"186890 protein_coding\texon\r\n",
"25587 protein_coding\tstart_codon\r\n",
"26087 protein_coding\tstop_codon\r\n",
" 13 rRNA\texon\r\n",
" 47 snRNA\texon\r\n",
" 20 snoRNA\texon\r\n",
" 994 tRNA\texon\r\n",
"1410 tRNA_pseudogene\texon\r\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"via GigaDB aka version9"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail -2 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_*.gff" }
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "source": [
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff <==\r\n",
"scaffold999\tflankbed\tpromoter\t99703\t100702\t.\t-\t.\tID=CGI_10006972;\r",
"\r\n",
"scaffold999\tflankbed\tpromoter\t106744\t107743\t.\t+\t.\tID=CGI_10006973;\r",
"\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff <==\r\n",
"scaffold38980\tfuzznuc\tnucleotide_motif\t63903\t63904\t2\t+\t.\tID=scaffold38980.744;note=*pat pattern:CG\r\n",
"scaffold38980\tfuzznuc\tnucleotide_motif\t64051\t64052\t2\t+\t.\tID=scaffold38980.745;note=*pat pattern:CG\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-TANDEMREPEAT.gff <==\r\n",
"scaffold999\tTRF\tTandem_Repeat\t153009\t153196\t189\t+\t.\t.\r\n",
"scaffold999\tTRF\tTandem_Repeat\t166754\t166792\t69\t+\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TEx.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff <==\r\n",
"scaffold22\tGLEAN\tCDS\t1870289\t1870360\t.\t-\t0\tParent=CGI_10028939;\r\n",
"scaffold22\tGLEAN\tCDS\t1869336\t1869428\t.\t-\t0\tParent=CGI_10028939;\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff <==\r\n",
"scaffold22\tGLEAN\tmRNA\t1863760\t1864161\t0.544455\t+\t.\tID=CGI_10028938;\r\n",
"scaffold22\tGLEAN\tmRNA\t1869336\t1885890\t0.999933\t-\t.\tID=CGI_10028939;\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff <==\r\n",
"scaffold999\tsubtractBed\tintrn\t124997\t126011\t.\t+\t.\tParent=CGI_10006973;\r",
"\r\n",
"scaffold999\tsubtractBed\tintrn\t126144\t126616\t.\t+\t.\tParent=CGI_10006973;\r",
"\r\n" "!tail /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gtf"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!wc -l /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_*.gff"
],
"language": "python", "execution_count": 12,
"metadata":
{}, {
"collapsed": false
},
"outputs": [
{
"output_type": "stream",
"stream": "name": "stdout",
"text": [
" 28023 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"
10035701 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [ 2 RNase_MRP_RNA\texon\r\n",
"
61319 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-TANDEMREPEAT.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [ 1 RNase_P_RNA\texon\r\n",
"
58468 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [ 10 SRP_RNA\texon\r\n",
"
119786 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [ 28 miRNA\texon\r\n",
"
58468 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TEx.gff\r\n", 5 misc_RNA\texon\r\n",
"
196691 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [ 48 nontranslating_CDS\texon\r\n",
"186890 protein_coding\tCDS\r\n",
"186890 protein_coding\texon\r\n",
"25587 protein_coding\tstart_codon\r\n",
"26087 protein_coding\tstop_codon\r\n",
"
28027 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [ 13 rRNA\texon\r\n",
"
176049 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff\r\n", 47 snRNA\texon\r\n",
"
10762532 total\r\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"" 20 snoRNA\texon\r\n",
" 994 tRNA\texon\r\n",
"1410 tRNA_pseudogene\texon\r\n"
]
}
],
"source": [
"!cut -f 2,3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gtf \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type":
"heading",
"level": 1, "markdown",
"metadata": {},
"source": [
"Comparison" "# via GigaDB aka version9"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff <==\r\n",
"scaffold999\tflankbed\tpromoter\t99703\t100702\t.\t-\t.\tID=CGI_10006972;\r",
"\r\n",
"scaffold999\tflankbed\tpromoter\t106744\t107743\t.\t+\t.\tID=CGI_10006973;\r",
"\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff <==\r\n",
"scaffold38980\tfuzznuc\tnucleotide_motif\t63903\t63904\t2\t+\t.\tID=scaffold38980.744;note=*pat pattern:CG\r\n",
"scaffold38980\tfuzznuc\tnucleotide_motif\t64051\t64052\t2\t+\t.\tID=scaffold38980.745;note=*pat pattern:CG\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-TANDEMREPEAT.gff <==\r\n",
"scaffold999\tTRF\tTandem_Repeat\t153009\t153196\t189\t+\t.\t.\r\n",
"scaffold999\tTRF\tTandem_Repeat\t166754\t166792\t69\t+\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TEx.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff <==\r\n",
"scaffold22\tGLEAN\tCDS\t1870289\t1870360\t.\t-\t0\tParent=CGI_10028939;\r\n",
"scaffold22\tGLEAN\tCDS\t1869336\t1869428\t.\t-\t0\tParent=CGI_10028939;\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff <==\r\n",
"scaffold22\tGLEAN\tmRNA\t1863760\t1864161\t0.544455\t+\t.\tID=CGI_10028938;\r\n",
"scaffold22\tGLEAN\tmRNA\t1869336\t1885890\t0.999933\t-\t.\tID=CGI_10028939;\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff <==\r\n",
"scaffold999\tsubtractBed\tintrn\t124997\t126011\t.\t+\t.\tParent=CGI_10006973;\r",
"\r\n",
"scaffold999\tsubtractBed\tintrn\t126144\t126616\t.\t+\t.\tParent=CGI_10006973;\r",
"\r\n"
]
}
],
"source": [
"!tail -2 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_*.gff"
]
},
{
"cell_type":
"markdown", "code",
"execution_count": 15,
"metadata":
{},
"source": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"" " 28023 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff\n",
" 10035701 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff\n",
" 61319 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-TANDEMREPEAT.gff\n",
" 58468 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff\n",
" 119786 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff\n",
" 58468 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TEx.gff\n",
" 196691 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff\n",
" 28027 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff\n",
" 176049 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff\n",
" 10762532 total\n"
]
}
],
"source": [
"!wc -l /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_*.gff"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparison"
]
},
{
"cell_type":
"heading",
"level": 1, "markdown",
"metadata": {},
"source": [
"Lets ""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Lets see if can take all array and intersect with Ensembl gff"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1373 GigaDB\tCDS\r\n",
"1373 GigaDB\texon\r\n",
"8468 GigaDB\tgene\r\n",
"8468 GigaDB\ttranscript\r\n",
"1240 dust\trepeat_region\r\n",
" 975 trf\trepeat_region\r\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": }
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1373 GigaDB\tCDS\r\n",
"1373 GigaDB\texon\r\n",
"8468 GigaDB\tgene\r\n",
"8468 GigaDB\ttranscript\r\n",
"1240 dust\trepeat_region\r\n",
" 975 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python", "execution_count": 28,
"metadata":
{}, {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"stream": "stdout",
"text": [
" 2 EnsemblGenomes\texon\r\n",
" 1 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 1 EnsemblGenomes\ttRNA_gene\r\n",
" 2 EnsemblGenomes\ttranscript\r\n",
"1177 GigaDB\tCDS\r\n",
"1177 GigaDB\texon\r\n",
"8491 GigaDB\tgene\r\n",
"8491 GigaDB\ttranscript\r\n",
"1320 dust\trepeat_region\r\n",
" 873 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 1 EnsemblGenomes\texon\r\n",
" 1 EnsemblGenomes\tsnRNA\r\n",
" 1 EnsemblGenomes\tsnRNA_gene\r\n",
" 947 GigaDB\tCDS\r\n",
" 948 GigaDB\texon\r\n",
"9689 GigaDB\tgene\r\n",
"9689 GigaDB\ttranscript\r\n",
"1591 dust\trepeat_region\r\n",
" 864 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 11,12 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 5 EnsemblGenomes\tRNA\r\n",
" 444 EnsemblGenomes\texon\r\n",
" 6 EnsemblGenomes\tgene\r\n",
" 2 EnsemblGenomes\tmiRNA\r\n",
" 2 EnsemblGenomes\tmiRNA_gene\r\n",
" 259 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 14 EnsemblGenomes\tsnRNA\r\n",
" 14 EnsemblGenomes\tsnRNA_gene\r\n",
" 6 EnsemblGenomes\tsnoRNA\r\n",
" 6 EnsemblGenomes\tsnoRNA_gene\r\n",
" 152 EnsemblGenomes\ttRNA_gene\r\n",
" 422 EnsemblGenomes\ttranscript\r\n",
"157279 GigaDB\tCDS\r\n",
"157307 GigaDB\texon\r\n",
"600445 GigaDB\tgene\r\n",
"600445 GigaDB\ttranscript\r\n",
"56210 dust\trepeat_region\r\n",
"42390 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"TEs"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 383 WUBlastX\r\n"
] }
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": "source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b
/Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f
6 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 254 WUBlastX\r\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python", "execution_count": 29,
"metadata":
{},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 168 WUBlastX\r\n"
]
}
],
"prompt_number": 8
}, {
"cell_type": "code",
"collapsed":
false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {}, false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"stream": "stdout",
"text": [
"10322 WUBlastX\r\n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Promoters" " 1 EnsemblGenomes\texon\r\n",
" 1 EnsemblGenomes\tsnRNA\r\n",
" 1 EnsemblGenomes\tsnRNA_gene\r\n",
" 947 GigaDB\tCDS\r\n",
" 948 GigaDB\texon\r\n",
"9689 GigaDB\tgene\r\n",
"9689 GigaDB\ttranscript\r\n",
"1591 dust\trepeat_region\r\n",
" 864 trf\trepeat_region\r\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 976 flankbed\tpromoter\r\n"
] }
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": "source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a
./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b
/Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 992 flankbed\tpromoter\r\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python", "execution_count": 3,
"metadata":
{},
"outputs": [ {
"output_type": "stream",
"stream": "stdout",
"text": [
"1248 flankbed\tpromoter\r\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed":
false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {}, false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"stream": "stdout",
"text": [
"66368 flankbed\r\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Plot" " 5 EnsemblGenomes\tRNA\r\n",
" 444 EnsemblGenomes\texon\r\n",
" 6 EnsemblGenomes\tgene\r\n",
" 2 EnsemblGenomes\tmiRNA\r\n",
" 2 EnsemblGenomes\tmiRNA_gene\r\n",
" 259 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 14 EnsemblGenomes\tsnRNA\r\n",
" 14 EnsemblGenomes\tsnRNA_gene\r\n",
" 6 EnsemblGenomes\tsnoRNA\r\n",
" 6 EnsemblGenomes\tsnoRNA_gene\r\n",
" 152 EnsemblGenomes\ttRNA_gene\r\n",
" 422 EnsemblGenomes\ttranscript\r\n",
"157279 GigaDB\tCDS\r\n",
"157307 GigaDB\texon\r\n",
"600445 GigaDB\tgene\r\n",
"600445 GigaDB\ttranscript\r\n",
"56210 dust\trepeat_region\r\n",
"42390 trf\trepeat_region\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 11,12 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"" "# TEs"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 383 WUBlastX\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type":
"markdown", "code",
"execution_count": 9,
"metadata":
{},
"source": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"```\n",
"oys2\toys4\toys6\tProbes\n",
"gene\t8468\t8491\t9689\t600445\n",
"exon\t1373\t1177\t948\t157307\n",
"intron\t7095\t7314\t8741\t443138\n",
"dust repeat\t1240\t1320\t1591\t56210\n",
"trf repeat\t975\t873\t864\t42390\n",
"TE-blast\t383\t254\t168\t10322\n",
"promoter\t976\t992\t1248\t66368\n",
"```" " 254 WUBlastX\r\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata":
{} {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 168 WUBlastX\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10322 WUBlastX\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Promoters"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 976 flankbed\tpromoter\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 992 flankbed\tpromoter\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1248 flankbed\tpromoter\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"66368 flankbed\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Plot"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"oys2\toys4\toys6\tProbes\n",
"gene\t8468\t8491\t9689\t600445\n",
"exon\t1373\t1177\t948\t157307\n",
"intron\t7095\t7314\t8741\t443138\n",
"dust repeat\t1240\t1320\t1591\t56210\n",
"trf repeat\t975\t873\t864\t42390\n",
"TE-blast\t383\t254\t168\t10322\n",
"promoter\t976\t992\t1248\t66368\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# Analysis of one proportion"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"http://nbviewer.ipython.org/github/thomas-haslwanter/statsintro/blob/master/ipynb/70_compGroups.ipynb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline\n",
"import scipy.stats as stats"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ONE PROPORTION\n",
"The confidence interval for the given sample is 0.224 to 0.226\n"
]
}
],
"source": [
"# Get the data Probes exon\n",
"numTotal = 697753\n",
"numPositive = 157307\n",
"\n",
"# Calculate the confidence intervals\n",
"p = float(numPositive)/numTotal\n",
"se = sqrt(p*(1-p)/numTotal)\n",
"td = stats.t(numTotal-1)\n",
"ci = p + array([-1,1])*td.isf(0.025)*se\n",
"\n",
"# Print them\n",
"print('ONE PROPORTION')\n",
"print('The confidence interval for the given sample is {0:5.3f} to {1:5.3f}'.format(\n",
" ci[0], ci[1]))\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Chi-square test to a 2x2 table\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 47.663, with p=0.000\n",
"The uncorrected chi2 value is 47.772, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2; probes at intron\n",
"obs = array([[7095, 10028], [443138, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 1.597, with p=0.206\n",
"The uncorrected chi2 value is 1.616, with p=0.204\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2; probes at gene\n",
"obs = array([[8468, 10028], [600445, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
] },
"nbformat": 4,
"nbformat_minor": 0
}
diff --git a/ipynb/.ipynb_checkpoints/Array-feature-overlap-04-checkpoint.ipynb b/ipynb/.ipynb_checkpoints/Array-feature-overlap-04-checkpoint.ipynb
index 1329c62..2a2ab95 100644
--- a/ipynb/.ipynb_checkpoints/Array-feature-overlap-04-checkpoint.ipynb
+++ b/ipynb/.ipynb_checkpoints/Array-feature-overlap-04-checkpoint.ipynb
...
"!date"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline\n",
"import scipy.stats as stats"
]
},
{
"cell_type": "markdown",
"metadata": {},
...
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 352.138, with p=0.000\n",
"The uncorrected chi2 value is 352.654, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[880, 10028], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 547.532, with p=0.000\n",
"The uncorrected chi2 value is 548.178, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[704, 10148], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 853.613, with p=0.000\n",
"The uncorrected chi2 value is 854.371, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[632, 11690], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
...
"# Rebuilt (new gtf based on RNAseq data)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8768 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7694 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6160 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1197818 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 2184.818, with p=0.000\n",
"The uncorrected chi2 value is 2185.528, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[8768, 10028], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 3052.863, with p=0.000\n",
"The uncorrected chi2 value is 3053.724, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[7694, 10148], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 6233.645, with p=0.000\n",
"The uncorrected chi2 value is 6234.874, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[6160, 11690], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Housekeeping Genes"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3210 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3369 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3819 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"251970 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 34.806, with p=0.000\n",
"The uncorrected chi2 value is 34.923, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[3210, 10028], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 17.578, with p=0.000\n",
"The uncorrected chi2 value is 17.661, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[3369, 10148], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 28.378, with p=0.000\n",
"The uncorrected chi2 value is 28.476, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[3819, 11690], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Environmental Response Genes"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2809 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2738 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3216 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"190475 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 1.413, with p=0.235\n",
"The uncorrected chi2 value is 1.439, with p=0.230\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[2809, 10028], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 0.280, with p=0.597\n",
"The uncorrected chi2 value is 0.291, with p=0.589\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[2738, 10148], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 0.141, with p=0.707\n",
"The uncorrected chi2 value is 0.149, with p=0.700\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[3216, 11690], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
diff --git a/ipynb/Array-feature-overlap-02.ipynb b/ipynb/Array-feature-overlap-02.ipynb
deleted file mode 100644
index 154bf5e..0000000
--- a/ipynb/Array-feature-overlap-02.ipynb
+++ /dev/null
...
{
"metadata": {
"name": "",
"signature": "sha256:5d7639992d2c094e51f32dfdb067917953da07822fc898a50b49306fe5f986d3"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Re-defining canonical C gigas Genome Tracks"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"via Ensembl"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail -3 /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"scaffold44098\tdust\trepeat_region\t518076\t518099\t.\t.\t.\tName=dust;class=dust;type=Dust\r\n",
"scaffold44098\tdust\trepeat_region\t519261\t519281\t.\t.\t.\tName=dust;class=dust;type=Dust\r\n",
"scaffold44098\ttrf\trepeat_region\t519261\t519281\t.\t.\t.\tName=trf;class=trf;repeat_consensus=AT;type=Tandem repeats\r\n"
]
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cut -f 3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"186890 CDS\r\n",
" 5 RNA\r\n",
"189468 exon\r\n",
"26114 gene\r\n",
" 28 miRNA\r\n",
" 28 miRNA_gene\r\n",
"1410 pseudogenic_tRNA\r\n",
" 13 rRNA\r\n",
" 13 rRNA_gene\r\n",
"875275 repeat_region\r\n",
" 47 snRNA\r\n",
" 47 snRNA_gene\r\n",
" 20 snoRNA\r\n",
" 20 snoRNA_gene\r\n",
" 994 tRNA_gene\r\n",
"28523 transcript\r\n"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cut -f 2,3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 5 EnsemblGenomes\tRNA\r\n",
"2530 EnsemblGenomes\texon\r\n",
" 13 EnsemblGenomes\tgene\r\n",
" 28 EnsemblGenomes\tmiRNA\r\n",
" 28 EnsemblGenomes\tmiRNA_gene\r\n",
"1410 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 13 EnsemblGenomes\trRNA\r\n",
" 13 EnsemblGenomes\trRNA_gene\r\n",
" 47 EnsemblGenomes\tsnRNA\r\n",
" 47 EnsemblGenomes\tsnRNA_gene\r\n",
" 20 EnsemblGenomes\tsnoRNA\r\n",
" 20 EnsemblGenomes\tsnoRNA_gene\r\n",
" 994 EnsemblGenomes\ttRNA_gene\r\n",
"2422 EnsemblGenomes\ttranscript\r\n",
"186890 GigaDB\tCDS\r\n",
"186938 GigaDB\texon\r\n",
"26101 GigaDB\tgene\r\n",
"26101 GigaDB\ttranscript\r\n",
"650376 dust\trepeat_region\r\n",
"224899 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gtf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"scaffold44098\tprotein_coding\tCDS\t509746\t510288\t.\t-\t0\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"2\"; protein_id \"EKC17988\";\r\n",
"scaffold44098\tprotein_coding\texon\t514550\t514690\t.\t-\t.\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"1\"; seqedit \"false\";\r\n",
"scaffold44098\tprotein_coding\tCDS\t514550\t514690\t.\t-\t0\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"1\"; protein_id \"EKC17988\";\r\n",
"scaffold44098\tprotein_coding\tstart_codon\t514688\t514690\t.\t-\t0\t gene_id \"CGI_10017729\"; gene_version \"1\"; transcript_id \"EKC17988\"; transcript_version \"1\"; exon_number \"1\";\r\n",
"scaffold44098\tprotein_coding\texon\t514859\t515511\t.\t-\t.\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"2\"; seqedit \"false\";\r\n",
"scaffold44098\tprotein_coding\tstop_codon\t514859\t514861\t.\t-\t0\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"2\";\r\n",
"scaffold44098\tprotein_coding\tCDS\t514862\t515511\t.\t-\t2\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"2\"; protein_id \"EKC17989\";\r\n",
"scaffold44098\tprotein_coding\texon\t515871\t515877\t.\t-\t.\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"1\"; seqedit \"false\";\r\n",
"scaffold44098\tprotein_coding\tCDS\t515871\t515877\t.\t-\t0\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"1\"; protein_id \"EKC17989\";\r\n",
"scaffold44098\tprotein_coding\tstart_codon\t515875\t515877\t.\t-\t0\t gene_id \"CGI_10017730\"; gene_version \"1\"; transcript_id \"EKC17989\"; transcript_version \"1\"; exon_number \"1\";\r\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cut -f 2,3 \\\n",
"/Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gtf \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 2 RNase_MRP_RNA\texon\r\n",
" 1 RNase_P_RNA\texon\r\n",
" 10 SRP_RNA\texon\r\n",
" 28 miRNA\texon\r\n",
" 5 misc_RNA\texon\r\n",
" 48 nontranslating_CDS\texon\r\n",
"186890 protein_coding\tCDS\r\n",
"186890 protein_coding\texon\r\n",
"25587 protein_coding\tstart_codon\r\n",
"26087 protein_coding\tstop_codon\r\n",
" 13 rRNA\texon\r\n",
" 47 snRNA\texon\r\n",
" 20 snoRNA\texon\r\n",
" 994 tRNA\texon\r\n",
"1410 tRNA_pseudogene\texon\r\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"via GigaDB aka version9"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail -2 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_*.gff"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff <==\r\n",
"scaffold999\tflankbed\tpromoter\t99703\t100702\t.\t-\t.\tID=CGI_10006972;\r",
"\r\n",
"scaffold999\tflankbed\tpromoter\t106744\t107743\t.\t+\t.\tID=CGI_10006973;\r",
"\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff <==\r\n",
"scaffold38980\tfuzznuc\tnucleotide_motif\t63903\t63904\t2\t+\t.\tID=scaffold38980.744;note=*pat pattern:CG\r\n",
"scaffold38980\tfuzznuc\tnucleotide_motif\t64051\t64052\t2\t+\t.\tID=scaffold38980.745;note=*pat pattern:CG\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-TANDEMREPEAT.gff <==\r\n",
"scaffold999\tTRF\tTandem_Repeat\t153009\t153196\t189\t+\t.\t.\r\n",
"scaffold999\tTRF\tTandem_Repeat\t166754\t166792\t69\t+\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TEx.gff <==\r\n",
"scaffold1009\tWUBlastX\tDNA_TcMar-Tc2\t1790325\t1790603\t20\t+\t.\t.\r\n",
"scaffold983\tWUBlastX\tDNA_TcMar-Tc1\t369636\t369770\t26\t-\t.\t.\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff <==\r\n",
"scaffold22\tGLEAN\tCDS\t1870289\t1870360\t.\t-\t0\tParent=CGI_10028939;\r\n",
"scaffold22\tGLEAN\tCDS\t1869336\t1869428\t.\t-\t0\tParent=CGI_10028939;\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff <==\r\n",
"scaffold22\tGLEAN\tmRNA\t1863760\t1864161\t0.544455\t+\t.\tID=CGI_10028938;\r\n",
"scaffold22\tGLEAN\tmRNA\t1869336\t1885890\t0.999933\t-\t.\tID=CGI_10028939;\r\n",
"\r\n",
"==> /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff <==\r\n",
"scaffold999\tsubtractBed\tintrn\t124997\t126011\t.\t+\t.\tParent=CGI_10006973;\r",
"\r\n",
"scaffold999\tsubtractBed\tintrn\t126144\t126616\t.\t+\t.\tParent=CGI_10006973;\r",
"\r\n"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!wc -l /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_*.gff"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 28023 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 10035701 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 61319 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-TANDEMREPEAT.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 58468 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 119786 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 58468 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TEx.gff\r\n",
" 196691 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 28027 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 176049 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff\r\n",
" 10762532 total\r\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Comparison"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Lets see if can take all array and intersect with Ensembl gff"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1373 GigaDB\tCDS\r\n",
"1373 GigaDB\texon\r\n",
"8468 GigaDB\tgene\r\n",
"8468 GigaDB\ttranscript\r\n",
"1240 dust\trepeat_region\r\n",
" 975 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 2 EnsemblGenomes\texon\r\n",
" 1 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 1 EnsemblGenomes\ttRNA_gene\r\n",
" 2 EnsemblGenomes\ttranscript\r\n",
"1177 GigaDB\tCDS\r\n",
"1177 GigaDB\texon\r\n",
"8491 GigaDB\tgene\r\n",
"8491 GigaDB\ttranscript\r\n",
"1320 dust\trepeat_region\r\n",
" 873 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 1 EnsemblGenomes\texon\r\n",
" 1 EnsemblGenomes\tsnRNA\r\n",
" 1 EnsemblGenomes\tsnRNA_gene\r\n",
" 947 GigaDB\tCDS\r\n",
" 948 GigaDB\texon\r\n",
"9689 GigaDB\tgene\r\n",
"9689 GigaDB\ttranscript\r\n",
"1591 dust\trepeat_region\r\n",
" 864 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 11,12 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 5 EnsemblGenomes\tRNA\r\n",
" 444 EnsemblGenomes\texon\r\n",
" 6 EnsemblGenomes\tgene\r\n",
" 2 EnsemblGenomes\tmiRNA\r\n",
" 2 EnsemblGenomes\tmiRNA_gene\r\n",
" 259 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 14 EnsemblGenomes\tsnRNA\r\n",
" 14 EnsemblGenomes\tsnRNA_gene\r\n",
" 6 EnsemblGenomes\tsnoRNA\r\n",
" 6 EnsemblGenomes\tsnoRNA_gene\r\n",
" 152 EnsemblGenomes\ttRNA_gene\r\n",
" 422 EnsemblGenomes\ttranscript\r\n",
"157279 GigaDB\tCDS\r\n",
"157307 GigaDB\texon\r\n",
"600445 GigaDB\tgene\r\n",
"600445 GigaDB\ttranscript\r\n",
"56210 dust\trepeat_region\r\n",
"42390 trf\trepeat_region\r\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"TEs"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 383 WUBlastX\r\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 254 WUBlastX\r\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 168 WUBlastX\r\n"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10322 WUBlastX\r\n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Promoters"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 976 flankbed\tpromoter\r\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 992 flankbed\tpromoter\r\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1248 flankbed\tpromoter\r\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c | sed '/#/d'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"66368 flankbed\r\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Plot"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"oys2\toys4\toys6\tProbes\n",
"gene\t8468\t8491\t9689\t600445\n",
"exon\t1373\t1177\t948\t157307\n",
"intron\t7095\t7314\t8741\t443138\n",
"dust repeat\t1240\t1320\t1591\t56210\n",
"trf repeat\t975\t873\t864\t42390\n",
"TE-blast\t383\t254\t168\t10322\n",
"promoter\t976\t992\t1248\t66368\n",
"```"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
diff --git a/ipynb/Array-feature-overlap-04.ipynb b/ipynb/Array-feature-overlap-04.ipynb
index 1329c62..2a2ab95 100644
--- a/ipynb/Array-feature-overlap-04.ipynb
+++ b/ipynb/Array-feature-overlap-04.ipynb
...
"!date"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline\n",
"import scipy.stats as stats"
]
},
{
"cell_type": "markdown",
"metadata": {},
...
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 352.138, with p=0.000\n",
"The uncorrected chi2 value is 352.654, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[880, 10028], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 547.532, with p=0.000\n",
"The uncorrected chi2 value is 548.178, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[704, 10148], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 853.613, with p=0.000\n",
"The uncorrected chi2 value is 854.371, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[632, 11690], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
...
"# Rebuilt (new gtf based on RNAseq data)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8768 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7694 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6160 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1197818 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 2184.818, with p=0.000\n",
"The uncorrected chi2 value is 2185.528, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[8768, 10028], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 3052.863, with p=0.000\n",
"The uncorrected chi2 value is 3053.724, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[7694, 10148], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 6233.645, with p=0.000\n",
"The uncorrected chi2 value is 6234.874, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[6160, 11690], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Housekeeping Genes"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3210 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3369 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3819 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"251970 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 34.806, with p=0.000\n",
"The uncorrected chi2 value is 34.923, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[3210, 10028], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 17.578, with p=0.000\n",
"The uncorrected chi2 value is 17.661, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[3369, 10148], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 28.378, with p=0.000\n",
"The uncorrected chi2 value is 28.476, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[3819, 11690], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Environmental Response Genes"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2809 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2738 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3216 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"190475 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 1.413, with p=0.235\n",
"The uncorrected chi2 value is 1.439, with p=0.230\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[2809, 10028], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 0.280, with p=0.597\n",
"The uncorrected chi2 value is 0.291, with p=0.589\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[2738, 10148], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 0.141, with p=0.707\n",
"The uncorrected chi2 value is 0.149, with p=0.700\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[3216, 11690], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,