Last active
January 15, 2016 22:45
-
-
Save vishnubob/36e9181e2c4ff89ae73b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio.Blast import NCBIWWW | |
from Bio.Blast import NCBIXML | |
import random | |
def random_sequence_generator(length): | |
return str.join('', [random.choice('AGTC') for x in range(length)]) | |
def gc_good(sequence, ideal=.5, spread=.05): | |
val = (sequence.count('G') + sequence.count('C')) / float(len(sequence)) | |
return (ideal - spread) < val < (ideal + spread) | |
def is_unique(sequence, cutoff=.05): | |
blast_results = NCBIWWW.qblast("blastn", "nt", sequence, megablast=True) | |
blast_record = NCBIXML.read(blast_results) | |
if len(blast_record.alignments) == 0: | |
return True | |
scores = [hsp.expect for hsp in al for al in blast_record.alignments] | |
return (not scores or min(scores) > cutoff) | |
def random_sequence_screener(count, length): | |
random_sequences = [] | |
while count: | |
seq = random_sequence_generator(length) | |
if gc_good(seq) and is_unique(seq): | |
random_sequences.append(seq) | |
count -= 1 | |
else: | |
print("Rejecting " + seq) | |
return random_sequences | |
if __name__ == "__main__": | |
random_sequences = random_sequence_screener(100, 50) | |
for i in random_sequences: | |
print(i) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment