Skip to content

Instantly share code, notes, and snippets.

@olgabot
Created May 31, 2019 01:49
Show Gist options
  • Save olgabot/1c84f112cd1c3167eba5f33d55b77dd5 to your computer and use it in GitHub Desktop.
Save olgabot/1c84f112cd1c3167eba5f33d55b77dd5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from Bio import SeqIO\n",
"from Bio.Seq import Seq\n",
"from Bio.SeqRecord import SeqRecord\n",
"import pandas as pd\n",
"\n",
"import itertools\n",
"import re\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mv: /Users/olgabot/Downloads/antisense_library.xlsx: No such file or directory\r\n"
]
}
],
"source": [
"! mv /Users/olgabot/Downloads/antisense_library.xlsx ."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 ACCACTTCCAGCACCGGTTCCANNANNGGTGCAGTGCCTGGCCACG...\n",
"1 ACCACTTCCAGCACCGGTTCCANNCNNGGTGCAGTGCCTGGCCACG...\n",
"2 ACCACTTCCAGCACCGGTTCCANNGCCANNGCAGTGCCTGGCCACG...\n",
"3 ACCACTTCCAGCACCGGTTCCANNGCCCNNGCAGTGCCTGGCCACG...\n",
"4 ACCACTTCCAGCACCGGTTCCANNGCCGGTANNGTGCCTGGCCACG...\n",
"Name: 0, dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"template_sequences = pd.read_excel(\"antisense_library.xlsx\", header=None, squeeze=True)\n",
"template_sequences.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ACCACTTCCAGCACCGGTTCCANNANNGGTGCAGTGCCTGGCCACGCTCTTCTCGTACTGCTCCACCACGGTGTAGCCACTAGTCCCACCCGATCC'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"template_sequence = template_sequences[0]\n",
"template_sequence"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"SDF\".find(\"N\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"template_sequence."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"22"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"template_sequence.index('N')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'A'"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"template_sequence[21]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<_sre.SRE_Match object; span=(22, 23), match='N'>,\n",
" <_sre.SRE_Match object; span=(23, 24), match='N'>,\n",
" <_sre.SRE_Match object; span=(25, 26), match='N'>,\n",
" <_sre.SRE_Match object; span=(26, 27), match='N'>]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(re.finditer(\"N\", template_sequence))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"256"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(list(itertools.product(\"ACGT\", repeat=4)))"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"256"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"4**4"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156672"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"records = []\n",
"\n",
"\n",
"for template_sequence in template_sequences:\n",
" \n",
" for bases in itertools.product(\"ACGT\", repeat=4):\n",
" base_id = '{0}{1}-{2}{3}'.format(*bases)\n",
" \n",
" template_copy = str(template_sequence)\n",
" \n",
" for base, match in zip(bases, re.finditer(\"N\", template_sequence)):\n",
" template_copy = template_copy[:match.start()] + base + template_copy[match.end():]\n",
" seq_id = f\"{template_sequence}_{base_id}\"\n",
" record = SeqRecord(Seq(template_copy), seq_id)\n",
" records.append(record)\n",
"\n",
"len(records)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156672"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"SeqIO.write(records, \"laura_antisense_library_genome.fasta\", 'fasta')"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-r--r-- 1 olgabot staff 33M May 30 18:47 laura_antisense_library_genome.fasta\r\n"
]
}
],
"source": [
"ls -lha laura_antisense_library_genome.fasta"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:root] *",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment