heliy · December 31, 2015 00:38
diff --git a/seqcutloc.py b/seqcutloc.py
 #coding:UTF-8
 #!/usr/bin/python

 #使用方法：
 #python seqcutloc.py  fasta文件 序列 输出文件
 #./seqcutloc.py fasta文件 序列 输出文件

 #算法是通过将长序列按碱基切开 在拼接回原序列
 #eg seq:ATAATCGGCGATTAATCGAAT  cut:AAT  
 #切完后得到子序列 AT CGGCGATT CG 长度为2 8 2
 #拼接得到 第一个位点:2+1=3 第二个位点:2+3+8+1=14 第三个位点:2+3+8+3+2+1=19

 import string
 import sys

 # fasta文件类
 class fasta(object):
    def __init__(self,head,seq):
 	self.head=head
 	self.seq=seq

 #处理单个fasta文件数据
 def sinfasta(sfas,target):
    targloc=[]
    name=sfas.head.split('|')[3]
 	
    sub=sfas.seq.split(target)
    if len(sub)<1:
        return     #没有╮(╯_╰)╭
 	
    i=1            #找到的序号
    before=1       #之前的位置标记
 	
    tarlen=len(target)
    for s in sub[:-1]:     #最末尾的不用考虑
 	before+=len(s)     #该切点开始位置
 	targloc.append([name,str(i),str(before),str(before+tarlen)])
 	before+=tarlen     #该切点结束位置
 	i+=1

    return targloc
 	
 #将fna文件中的单个fasta分开
 def file2singles(filename):  
    fl=[]
    f=open(filename,'r')
    fastalist=f.read().split('>')
    f.close()
    
    for fas in fastalist:
 	if len(fas)<10:
 	    continue
 	head=fas.split("\n")[0]
 	seq=fas.replace(head,'').replace('\n','')
 	
 	fa=fasta(head,seq)
 	fl.append(fa)
    return fl

 #主处理过程	
 def process(filename,target,tofile):
    w=open(tofile,'w+')
    falist=file2singles(filename)
    for fa in falist:
        result=sinfasta(fa,target)
        lines=[string.join(item,'\t')+'\n' for item in result]

        w.writelines(lines)
        w.write('\n')
    w.close()
    return

 if __name__=="__main__":
    [filename,target,tofile]=sys.argv[1:]
    process(filename,target,tofile)
	#coding:UTF-8
	#!/usr/bin/python

	#使用方法：
	#python seqcutloc.py fasta文件序列输出文件
	#./seqcutloc.py fasta文件序列输出文件

	#算法是通过将长序列按碱基切开在拼接回原序列
	#eg seq:ATAATCGGCGATTAATCGAAT cut:AAT
	#切完后得到子序列 AT CGGCGATT CG 长度为2 8 2
	#拼接得到第一个位点:2+1=3 第二个位点:2+3+8+1=14 第三个位点:2+3+8+3+2+1=19

	import string
	import sys

	# fasta文件类
	class fasta(object):
	def __init__(self,head,seq):
	self.head=head
	self.seq=seq

	#处理单个fasta文件数据
	def sinfasta(sfas,target):
	targloc=[]
	name=sfas.head.split('\|')[3]

	sub=sfas.seq.split(target)
	if len(sub)<1:
	return #没有╮(╯_╰)╭

	i=1 #找到的序号
	before=1 #之前的位置标记

	tarlen=len(target)
	for s in sub[:-1]: #最末尾的不用考虑
	before+=len(s) #该切点开始位置
	targloc.append([name,str(i),str(before),str(before+tarlen)])
	before+=tarlen #该切点结束位置
	i+=1

	return targloc

	#将fna文件中的单个fasta分开
	def file2singles(filename):
	fl=[]
	f=open(filename,'r')
	fastalist=f.read().split('>')
	f.close()

	for fas in fastalist:
	if len(fas)<10:
	continue
	head=fas.split("\n")[0]
	seq=fas.replace(head,'').replace('\n','')

	fa=fasta(head,seq)
	fl.append(fa)
	return fl

	#主处理过程
	def process(filename,target,tofile):
	w=open(tofile,'w+')
	falist=file2singles(filename)
	for fa in falist:
	result=sinfasta(fa,target)
	lines=[string.join(item,'\t')+'\n' for item in result]

	w.writelines(lines)
	w.write('\n')
	w.close()
	return

	if __name__=="__main__":
	[filename,target,tofile]=sys.argv[1:]
	process(filename,target,tofile)