sowmith1999 · February 5, 2025 23:51
diff --git a/global_alignment_with_path_counting.py b/global_alignment_with_path_counting.py
 #!/usr/bin/env python3
 # Global Alignment with simple penalties and not affine function penalties
 # We count the number the paths along with the score in the table
 class Cell:
    def __init__(self):
        self.score =  0
        self.path_count = 0

    def __repr__(self):
        return f"({self.score}, {self.path_count})"

 class DPTable:
    def __init__(self,match_penalty, mismatch_penalty, gap_penalty, row_str, col_str):
        self.alpha = match_penalty # if the values are equal
        self.beta = mismatch_penalty # if we take the L for mismatch
        self.gamma = gap_penalty # if mismatch and instead we insert a gap
        self.row_string = row_str # rows
        self.num_rows = len(self.row_string) + 1
        self.col_string = col_str # cols
        self.num_cols = len(self.col_string) + 1
        # one extra column and row for keeping the intial values, i.e., if matched till only with the gap.
        self.table = [] 
        for i in range(self.num_rows):
            self.table.append([])
            for j in range(self.num_cols):
                self.table[i].append(Cell())
        self.initialize_table()

    def initialize_table(self):
        # This is just the initial values for the 0th row and col, 
        # and is initialized to the product of gap penalty multiplied by
        # the length of the string till matched with gap
        # first row init
        for i in range(self.num_cols):
            self.table[0][i].score = i * self.gamma
            self.table[0][i].path_count = 1
        # first col init
        for i in range(self.num_rows):
            self.table[i][0].score = i * self.gamma
            self.table[i][0].path_count = 1

    def sigma(self, row_num, col_num):
        # comparison function
        # self.alpha if x == y
        # self.beta if x != y
        char1 = self.row_string[row_num]
        char2 = self.col_string[col_num]
        if(char1 == char2):
            return self.alpha # match
        else:
            return self.beta # mismatch
    
    # addl work for keeping track of number of paths
    def forward_pass(self):
        for row_num in range(1, self.num_rows):
        # for each row we can skip the column 1 
            for col_num in range(1, self.num_cols):
                diag_cell = self.table[row_num-1][col_num-1]
                hori_cell = self.table[row_num][col_num -1]
                vert_cell = self.table[row_num-1][col_num]
                diag_score = diag_cell.score +  self.sigma(row_num -1, col_num -1)
                hori_score = hori_cell.score + self.gamma
                vert_score = vert_cell.score + self.gamma
                max_score = max(diag_score, hori_score, vert_score)
                self.table[row_num][col_num].score = max_score
                for neighbor in [(diag_cell, diag_score), (hori_cell, hori_score), (vert_cell, vert_score)]:
                    if neighbor[1] == max_score:
                        self.table[row_num][col_num].path_count += neighbor[0].path_count

    # we start at the [row_num - 1][col_num - 1] and look at the three neighbord
    # to get the next move of the cell, and append to the string either a gap,match, mismatch
    def backward_pass(self):
        cur_row = self.num_rows - 1
        cur_col = self.num_cols - 1
        backtrace_cells = []
        while(cur_row > 0 or cur_col > 0):
            assert cur_row >= 0 and cur_col >= 0
            backtrace_cells.append((cur_row, cur_col))
            cur_val = self.table[cur_row][cur_col].score
            diag_val = self.table[cur_row-1][cur_col-1].score + self.sigma(cur_row - 1, cur_col -1)
            hori_val = self.table[cur_row][cur_col -1].score + self.gamma
            vert_val = self.table[cur_row-1][cur_col].score + self.gamma
            # print(f"cur_val {cur_val} diag_val {diag_val} hori_val {hori_val} vert_val {vert_val}")
            if(diag_val == cur_val):
                cur_row -= 1
                cur_col -= 1
                continue
            elif(hori_val == cur_val):
                cur_col -= 1
                continue
            elif(vert_val == cur_val):
                cur_row -= 1
                continue
            assert False
        return backtrace_cells

    def __repr__(self):
        f_str = "\t"
        f_str += f"col_string is {self.col_string}\n\t"
        f_str += f"row_string is {self.row_string}\n\t"
        f_str += f"Num Rows is {self.num_rows} and Num Cols is {self.num_cols}\n\t"
        f_str += f"Match Penalty {self.alpha}\n\tMisMatch Penalty {self.beta}\n\tGap Penatlty {self.gamma}\n\t"
        # attach the strings to the table when printing
        # nstring goes on top
        f_str += 2 * "\t " +  "\t".join(self.col_string)
        f_str += "\n\t"
        for i,row in enumerate(self.table):
            if(i):
                f_str += "\t".join([self.row_string[i - 1]] + [str(val) for val in row])
            else:
                f_str += "\t".join([" "] + [str(val) for val in row])
            f_str += "\n\t"
        return f_str

 if __name__ == "__main__":
    match_penalty = 2
    mismatch_penalty = -1
    gap_penalty = -1
    str1 = "agcttg"
    str2 = "aggctga"
    dp_table = DPTable(match_penalty, mismatch_penalty, gap_penalty, str1, str2)
    print("Initial DP Table is:")
    print(dp_table)
    dp_table.forward_pass()
    print("DP Table after the forward pass")
    print(dp_table)
    print(f"Optimal Score is {dp_table.table[-1][-1].score}")
    print(f"Number of Optimal Paths is {dp_table.table[-1][-1].path_count}")
    print("Path with preference for diagonal movement")
    backtrace_cells = dp_table.backward_pass()
    print(backtrace_cells)
	#!/usr/bin/env python3
	# Global Alignment with simple penalties and not affine function penalties
	# We count the number the paths along with the score in the table
	class Cell:
	def __init__(self):
	self.score = 0
	self.path_count = 0

	def __repr__(self):
	return f"({self.score}, {self.path_count})"

	class DPTable:
	def __init__(self,match_penalty, mismatch_penalty, gap_penalty, row_str, col_str):
	self.alpha = match_penalty # if the values are equal
	self.beta = mismatch_penalty # if we take the L for mismatch
	self.gamma = gap_penalty # if mismatch and instead we insert a gap
	self.row_string = row_str # rows
	self.num_rows = len(self.row_string) + 1
	self.col_string = col_str # cols
	self.num_cols = len(self.col_string) + 1
	# one extra column and row for keeping the intial values, i.e., if matched till only with the gap.
	self.table = []
	for i in range(self.num_rows):
	self.table.append([])
	for j in range(self.num_cols):
	self.table[i].append(Cell())
	self.initialize_table()

	def initialize_table(self):
	# This is just the initial values for the 0th row and col,
	# and is initialized to the product of gap penalty multiplied by
	# the length of the string till matched with gap
	# first row init
	for i in range(self.num_cols):
	self.table[0][i].score = i * self.gamma
	self.table[0][i].path_count = 1
	# first col init
	for i in range(self.num_rows):
	self.table[i][0].score = i * self.gamma
	self.table[i][0].path_count = 1

	def sigma(self, row_num, col_num):
	# comparison function
	# self.alpha if x == y
	# self.beta if x != y
	char1 = self.row_string[row_num]
	char2 = self.col_string[col_num]
	if(char1 == char2):
	return self.alpha # match
	else:
	return self.beta # mismatch

	# addl work for keeping track of number of paths
	def forward_pass(self):
	for row_num in range(1, self.num_rows):
	# for each row we can skip the column 1
	for col_num in range(1, self.num_cols):
	diag_cell = self.table[row_num-1][col_num-1]
	hori_cell = self.table[row_num][col_num -1]
	vert_cell = self.table[row_num-1][col_num]
	diag_score = diag_cell.score + self.sigma(row_num -1, col_num -1)
	hori_score = hori_cell.score + self.gamma
	vert_score = vert_cell.score + self.gamma
	max_score = max(diag_score, hori_score, vert_score)
	self.table[row_num][col_num].score = max_score
	for neighbor in [(diag_cell, diag_score), (hori_cell, hori_score), (vert_cell, vert_score)]:
	if neighbor[1] == max_score:
	self.table[row_num][col_num].path_count += neighbor[0].path_count

	# we start at the [row_num - 1][col_num - 1] and look at the three neighbord
	# to get the next move of the cell, and append to the string either a gap,match, mismatch
	def backward_pass(self):
	cur_row = self.num_rows - 1
	cur_col = self.num_cols - 1
	backtrace_cells = []
	while(cur_row > 0 or cur_col > 0):
	assert cur_row >= 0 and cur_col >= 0
	backtrace_cells.append((cur_row, cur_col))
	cur_val = self.table[cur_row][cur_col].score
	diag_val = self.table[cur_row-1][cur_col-1].score + self.sigma(cur_row - 1, cur_col -1)
	hori_val = self.table[cur_row][cur_col -1].score + self.gamma
	vert_val = self.table[cur_row-1][cur_col].score + self.gamma
	# print(f"cur_val {cur_val} diag_val {diag_val} hori_val {hori_val} vert_val {vert_val}")
	if(diag_val == cur_val):
	cur_row -= 1
	cur_col -= 1
	continue
	elif(hori_val == cur_val):
	cur_col -= 1
	continue
	elif(vert_val == cur_val):
	cur_row -= 1
	continue
	assert False
	return backtrace_cells

	def __repr__(self):
	f_str = "\t"
	f_str += f"col_string is {self.col_string}\n\t"
	f_str += f"row_string is {self.row_string}\n\t"
	f_str += f"Num Rows is {self.num_rows} and Num Cols is {self.num_cols}\n\t"
	f_str += f"Match Penalty {self.alpha}\n\tMisMatch Penalty {self.beta}\n\tGap Penatlty {self.gamma}\n\t"
	# attach the strings to the table when printing
	# nstring goes on top
	f_str += 2 * "\t " + "\t".join(self.col_string)
	f_str += "\n\t"
	for i,row in enumerate(self.table):
	if(i):
	f_str += "\t".join([self.row_string[i - 1]] + [str(val) for val in row])
	else:
	f_str += "\t".join([" "] + [str(val) for val in row])
	f_str += "\n\t"
	return f_str

	if __name__ == "__main__":
	match_penalty = 2
	mismatch_penalty = -1
	gap_penalty = -1
	str1 = "agcttg"
	str2 = "aggctga"
	dp_table = DPTable(match_penalty, mismatch_penalty, gap_penalty, str1, str2)
	print("Initial DP Table is:")
	print(dp_table)
	dp_table.forward_pass()
	print("DP Table after the forward pass")
	print(dp_table)
	print(f"Optimal Score is {dp_table.table[-1][-1].score}")
	print(f"Number of Optimal Paths is {dp_table.table[-1][-1].path_count}")
	print("Path with preference for diagonal movement")
	backtrace_cells = dp_table.backward_pass()
	print(backtrace_cells)