Last active
January 25, 2018 17:48
-
-
Save srividya22/d2a7277f2e304391e43918ca31aa18d9 to your computer and use it in GitHub Desktop.
Get gap positions in a fasta file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Script to identify gaps regions in an assembly | |
# input : fasta | |
# output : bed | |
# usage : get_gap_postions.py fasta bed | |
# Import necessary packages | |
import argparse | |
import re | |
from Bio import SeqIO | |
# Parse command-line arguments | |
parser = argparse.ArgumentParser() | |
parser.add_argument("fasta") | |
parser.add_argument("bed") | |
args = parser.parse_args() | |
f = open(args.bed,'wa') | |
# Open FASTA, search for masked regions, print in BED3 format | |
with open(args.fasta) as handle: | |
for record in SeqIO.parse(handle, "fasta"): | |
for match in re.finditer('N+', str(record.seq)): | |
f.write('{0}\t{1}\t{2}\n'.format(record.id, match.start(), match.end())) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment