Skip to content

Instantly share code, notes, and snippets.

@sowmith1999
Created February 5, 2025 23:51
Show Gist options
  • Save sowmith1999/af07b52fa4fc9488dd98a637a2f284f9 to your computer and use it in GitHub Desktop.
Save sowmith1999/af07b52fa4fc9488dd98a637a2f284f9 to your computer and use it in GitHub Desktop.
Global Alignment with Path Counting
#!/usr/bin/env python3
# Global Alignment with simple penalties and not affine function penalties
# We count the number the paths along with the score in the table
class Cell:
def __init__(self):
self.score = 0
self.path_count = 0
def __repr__(self):
return f"({self.score}, {self.path_count})"
class DPTable:
def __init__(self,match_penalty, mismatch_penalty, gap_penalty, row_str, col_str):
self.alpha = match_penalty # if the values are equal
self.beta = mismatch_penalty # if we take the L for mismatch
self.gamma = gap_penalty # if mismatch and instead we insert a gap
self.row_string = row_str # rows
self.num_rows = len(self.row_string) + 1
self.col_string = col_str # cols
self.num_cols = len(self.col_string) + 1
# one extra column and row for keeping the intial values, i.e., if matched till only with the gap.
self.table = []
for i in range(self.num_rows):
self.table.append([])
for j in range(self.num_cols):
self.table[i].append(Cell())
self.initialize_table()
def initialize_table(self):
# This is just the initial values for the 0th row and col,
# and is initialized to the product of gap penalty multiplied by
# the length of the string till matched with gap
# first row init
for i in range(self.num_cols):
self.table[0][i].score = i * self.gamma
self.table[0][i].path_count = 1
# first col init
for i in range(self.num_rows):
self.table[i][0].score = i * self.gamma
self.table[i][0].path_count = 1
def sigma(self, row_num, col_num):
# comparison function
# self.alpha if x == y
# self.beta if x != y
char1 = self.row_string[row_num]
char2 = self.col_string[col_num]
if(char1 == char2):
return self.alpha # match
else:
return self.beta # mismatch
# addl work for keeping track of number of paths
def forward_pass(self):
for row_num in range(1, self.num_rows):
# for each row we can skip the column 1
for col_num in range(1, self.num_cols):
diag_cell = self.table[row_num-1][col_num-1]
hori_cell = self.table[row_num][col_num -1]
vert_cell = self.table[row_num-1][col_num]
diag_score = diag_cell.score + self.sigma(row_num -1, col_num -1)
hori_score = hori_cell.score + self.gamma
vert_score = vert_cell.score + self.gamma
max_score = max(diag_score, hori_score, vert_score)
self.table[row_num][col_num].score = max_score
for neighbor in [(diag_cell, diag_score), (hori_cell, hori_score), (vert_cell, vert_score)]:
if neighbor[1] == max_score:
self.table[row_num][col_num].path_count += neighbor[0].path_count
# we start at the [row_num - 1][col_num - 1] and look at the three neighbord
# to get the next move of the cell, and append to the string either a gap,match, mismatch
def backward_pass(self):
cur_row = self.num_rows - 1
cur_col = self.num_cols - 1
backtrace_cells = []
while(cur_row > 0 or cur_col > 0):
assert cur_row >= 0 and cur_col >= 0
backtrace_cells.append((cur_row, cur_col))
cur_val = self.table[cur_row][cur_col].score
diag_val = self.table[cur_row-1][cur_col-1].score + self.sigma(cur_row - 1, cur_col -1)
hori_val = self.table[cur_row][cur_col -1].score + self.gamma
vert_val = self.table[cur_row-1][cur_col].score + self.gamma
# print(f"cur_val {cur_val} diag_val {diag_val} hori_val {hori_val} vert_val {vert_val}")
if(diag_val == cur_val):
cur_row -= 1
cur_col -= 1
continue
elif(hori_val == cur_val):
cur_col -= 1
continue
elif(vert_val == cur_val):
cur_row -= 1
continue
assert False
return backtrace_cells
def __repr__(self):
f_str = "\t"
f_str += f"col_string is {self.col_string}\n\t"
f_str += f"row_string is {self.row_string}\n\t"
f_str += f"Num Rows is {self.num_rows} and Num Cols is {self.num_cols}\n\t"
f_str += f"Match Penalty {self.alpha}\n\tMisMatch Penalty {self.beta}\n\tGap Penatlty {self.gamma}\n\t"
# attach the strings to the table when printing
# nstring goes on top
f_str += 2 * "\t " + "\t".join(self.col_string)
f_str += "\n\t"
for i,row in enumerate(self.table):
if(i):
f_str += "\t".join([self.row_string[i - 1]] + [str(val) for val in row])
else:
f_str += "\t".join([" "] + [str(val) for val in row])
f_str += "\n\t"
return f_str
if __name__ == "__main__":
match_penalty = 2
mismatch_penalty = -1
gap_penalty = -1
str1 = "agcttg"
str2 = "aggctga"
dp_table = DPTable(match_penalty, mismatch_penalty, gap_penalty, str1, str2)
print("Initial DP Table is:")
print(dp_table)
dp_table.forward_pass()
print("DP Table after the forward pass")
print(dp_table)
print(f"Optimal Score is {dp_table.table[-1][-1].score}")
print(f"Number of Optimal Paths is {dp_table.table[-1][-1].path_count}")
print("Path with preference for diagonal movement")
backtrace_cells = dp_table.backward_pass()
print(backtrace_cells)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment