this is for holding javascript data
Ino de Bruijn Start counting kmers for chimerism detection
almost 10 years ago
Commit id: c89407e09226d4aee5d04528cd21f50517e48fef
deletions | additions
diff --git a/notebooks/kmer-counting.ipynb b/notebooks/kmer-counting.ipynb
new file mode 100644
index 0000000..d290f19
--- /dev/null
+++ b/notebooks/kmer-counting.ipynb
...
{
"metadata": {
"name": "",
"signature": "sha256:77258f57f1da947062880cb2958d8967b5b7a24e6b75358781855bf4f6a4caec"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from Bio import SeqIO\n",
"\n",
"with open(\"/media/milou/glob/projects/masmvali-partdeux/reassembly-filtered-reads/Sample_1ng_even/Mircea_07102013_selected_refs.fasta\") as ffile:\n",
" record_dict = SeqIO.to_dict(SeqIO.parse(ffile, 'fasta'))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!echo \"`cd /media/milou/glob/github/masm-vali/masmvali/ && git log -1`\""
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"commit 5e4c182ff5f75d603319cffc11533b810840d2ed\r\n",
"Author: Ino de Bruijn \r\n",
"Date: Tue Jul 8 18:07:20 2014 +0200\r\n",
"\r\n",
" Add contig-metagenome-cover property for backward compatibility\r\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sys\n",
"import os\n",
"import re\n",
"sys.path.append('/media/milou/glob/github/masm-vali/masmvali/')\n",
"import assembly\n",
"reload(assembly)\n",
"import nucmer\n",
"reload(nucmer)\n",
"import refgenome\n",
"reload(refgenome)\n",
"import validation\n",
"reload(validation)\n",
"import metassemble\n",
"reload(metassemble)\n",
"import jellyfish\n",
"reload(jellyfish)\n",
"\n",
"import utils\n",
"reload(utils)\n",
"\n",
"import warnings\n",
"import numpy as np\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"jellyfish.Jellyfish(\"/media/milou/glob/projects/masmvali-partdeux/reassembly-filtered-reads/Sample_1ng_even/ref-jellyfish/Bacteroides_thetaiotaomicron_VPI-5482-19.jf\").count_kmer(\"GTCCCATGGAATCAATAGC\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 26,
"text": [
"0"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"jfr = jellyfish.JellyfishRef(\"/media/milou/glob/projects/masmvali-partdeux/reassembly-filtered-reads/Sample_1ng_even/Mircea_07102013_selected_refs.fasta\", \n",
" \"/media/milou/glob/projects/masmvali-partdeux/reassembly-filtered-reads/Sample_1ng_even/ref-jellyfish/\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"jfr.get_refs_with_kmer_count_of_seq(record_dict[\"Acidobacterium_capsulatum_ATCC_51196\"].seq, 19)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mjfr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_refs_with_kmer_count_of_seq\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecord_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"Acidobacterium_capsulatum_ATCC_51196\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m19\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/media/milou/glob/github/masm-vali/masmvali/jellyfish.py\u001b[0m in \u001b[0;36mget_refs_with_kmer_count_of_seq\u001b[1;34m(self, seq, kmer_size)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_run_kmer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkmer_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[0mrefs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mJellyfish\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcount_kmer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseq\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mrefs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/media/milou/glob/github/masm-vali/masmvali/jellyfish.py\u001b[0m in \u001b[0;36mcount_kmer\u001b[1;34m(self, seq)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mcount_kmer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mseq\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[0mawk_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mawk\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjellyfish\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"query\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"-s\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"/dev/fd/0\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_in\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\">seq\\n%s\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mseq\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"{rv+=$2} END {print rv}\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 15\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mawk_count\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstdout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/sh.pyc\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 767\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 768\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 769\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mRunningCommand\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcall_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstdin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstdout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 770\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 771\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/sh.pyc\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, cmd, call_args, stdin, stdout, stderr)\u001b[0m\n\u001b[0;32m 328\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 329\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshould_wait\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 330\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 331\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 332\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/sh.pyc\u001b[0m in \u001b[0;36mwait\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 332\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 334\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_handle_exit_code\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprocess\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 335\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 336\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/idb/virtual-environments/py27-env/local/lib/python2.7/site-packages/sh.pyc\u001b[0m in \u001b[0;36mwait\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1150\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexit_code\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1151\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"exit code not set, waiting on pid\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1152\u001b[1;33m \u001b[0mpid\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexit_code\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwaitpid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpid\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1153\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexit_code\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_handle_exit_code\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexit_code\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1154\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = set([\"ola\"])\n",
"import sh"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"b = set([\"hej\",'ola'] + list(a))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 65
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sh.awk(sh.echo(\"-e\", \"5 10\\n5 10\"), \"{rv+=$2} END {print rv}\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"20"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}