Created
May 8, 2014 11:56
-
-
Save heliy/2a0bd0d2a181c85bf520 to your computer and use it in GitHub Desktop.
最大简约法进化系统发育树 固定模型
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding:utf-8 | |
""" | |
构建最大简约法进化系统发育树 | |
使用 sh: | |
python sysevatree data > eva-tree.txt | |
""" | |
import sys | |
keys=['00','01','10','11'] | |
""" | |
判断该位点是否有信息 | |
""" | |
def is_sig(blas): | |
elems={} | |
for key in keys: | |
elems[key]=0 | |
for i in blas: | |
elems[i]+=1 | |
count=elems.values() | |
count.sort() | |
if 2<=count[2] : #有两个2以上的位点 | |
return True | |
else: | |
return False | |
""" | |
从文件里得到显著的位点 | |
返回{id:[blas,rs],} | |
""" | |
def getsnps(f): | |
snps={} | |
for line in open(f,'r').read().replace("|","").splitlines(): | |
cons=line.split("\t") | |
id=cons[0] | |
rs=cons[3] | |
blas=cons[6:] | |
if is_sig(blas): | |
snps[id]=[blas,rs] | |
return snps | |
""" | |
字典里最小value对应的key | |
""" | |
def min_key(dic): | |
min_value=min(dic.values()) | |
for key in dic.keys(): | |
if dic[key]==min_value: | |
return key | |
""" | |
树节点类 | |
""" | |
class Node(object): | |
#初始化 | |
def __init__(self,data,left,right): | |
self.data=data | |
self.left=left | |
self.right=right | |
# 方便print | |
def __str__(self,level=0): | |
s=("[--"*level)+str(self.data)+"\n" | |
if self.left!=None: | |
s=s+self.left.__str__(level+1) | |
if self.right!=None: | |
s=s+self.right.__str__(level+1) | |
return s | |
# 从子节点推断可能的取值和相应的得分 | |
def simi_data(self): | |
if(isinstance(self.data,str)): | |
return {self.data:0} | |
else: | |
l_simi=self.left.simi_data() | |
r_simi=self.right.simi_data() | |
self.data={} | |
for key in keys: | |
if key in l_simi.keys() and key in r_simi.keys() : #共有 | |
self.data[key]=l_simi[key]+r_simi[key] | |
elif key in l_simi.keys(): #left有right没有 | |
self.data[key]=l_simi[key]+min(r_simi.values())+1 | |
elif key in r_simi.keys(): | |
self.data[key]=r_simi[key]+min(l_simi.values())+1 | |
else: | |
continue | |
return self.data | |
# 从父节点推断自己的取值 | |
def eva_data(self,up): | |
if(isinstance(self.data,str)): | |
return | |
else: | |
if up.keys()[0] in self.data.keys(): # 与父节点相同 | |
key=up.keys()[0] | |
else: # 最优节点 | |
key=min_key(self.data) | |
self.data={key:self.data[key]} | |
self.left.eva_data(self.data) | |
self.right.eva_data(self.data) | |
""" | |
N0 | |
/ \ | |
/ \ | |
/ N1 | |
/ / \ | |
/ / \ | |
/ N2 \ | |
/ / \ \ | |
N3 / N4 N5 | |
/ \ / / \ / \ | |
L0 L1 L2 L3 L4 L5 L6 | |
""" | |
""" | |
发生树类 | |
""" | |
class Tree(object): | |
def __init__(self,blas): | |
self.leaves=[Node(data=ls,left=None,right=None) for ls in blas] | |
node3=Node(data=None,left=self.leaves[0],right=self.leaves[1]) | |
node4=Node(data=None,left=self.leaves[3],right=self.leaves[4]) | |
node5=Node(data=None,left=self.leaves[5],right=self.leaves[6]) | |
node2=Node(data=None,left=self.leaves[2],right=node4) | |
node1=Node(data=None,left=node2,right=node5) | |
node0=Node(data=None,left=node3,right=node1) | |
self.nodes=[node0,node1,node2,node3,node4,node5] | |
self.root=node0 | |
def __str__(self): | |
return self.root.__str__() | |
def tree_print(self): | |
format=""" | |
%s | |
/ \\ | |
/ \\ | |
/ %s | |
/ / \\ | |
/ / \\ | |
/ %s \\ | |
/ / \\ \\ | |
%s / %s %s | |
/ \\ / / \\ / \\ | |
%s %s %s %s %s %s %s | |
""" | |
strs=tuple([str(node.data.keys()[0]) for node in self.nodes]+[str(leaf.data) for leaf in self.leaves]) | |
print format % strs | |
# 最优情况时的根节点 | |
def __get_best_root(self): | |
simi_root=self.root.simi_data() | |
key=min_key(simi_root) | |
return {key:simi_root[key]} | |
# 将最优情况推行至全树 | |
def take_eva(self): | |
best_root=self.__get_best_root() | |
self.root.eva_data(best_root) | |
if __name__=="__main__": | |
f=sys.argv[1] | |
snps=getsnps(f) | |
for id in snps.keys(): | |
[blas,rs]=snps[id] | |
tree=Tree(blas) | |
tree.take_eva() | |
print rs | |
tree.tree_print() | |
print "=========================" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment