Created
February 5, 2025 23:51
-
-
Save sowmith1999/af07b52fa4fc9488dd98a637a2f284f9 to your computer and use it in GitHub Desktop.
Global Alignment with Path Counting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Global Alignment with simple penalties and not affine function penalties | |
| # We count the number the paths along with the score in the table | |
| class Cell: | |
| def __init__(self): | |
| self.score = 0 | |
| self.path_count = 0 | |
| def __repr__(self): | |
| return f"({self.score}, {self.path_count})" | |
| class DPTable: | |
| def __init__(self,match_penalty, mismatch_penalty, gap_penalty, row_str, col_str): | |
| self.alpha = match_penalty # if the values are equal | |
| self.beta = mismatch_penalty # if we take the L for mismatch | |
| self.gamma = gap_penalty # if mismatch and instead we insert a gap | |
| self.row_string = row_str # rows | |
| self.num_rows = len(self.row_string) + 1 | |
| self.col_string = col_str # cols | |
| self.num_cols = len(self.col_string) + 1 | |
| # one extra column and row for keeping the intial values, i.e., if matched till only with the gap. | |
| self.table = [] | |
| for i in range(self.num_rows): | |
| self.table.append([]) | |
| for j in range(self.num_cols): | |
| self.table[i].append(Cell()) | |
| self.initialize_table() | |
| def initialize_table(self): | |
| # This is just the initial values for the 0th row and col, | |
| # and is initialized to the product of gap penalty multiplied by | |
| # the length of the string till matched with gap | |
| # first row init | |
| for i in range(self.num_cols): | |
| self.table[0][i].score = i * self.gamma | |
| self.table[0][i].path_count = 1 | |
| # first col init | |
| for i in range(self.num_rows): | |
| self.table[i][0].score = i * self.gamma | |
| self.table[i][0].path_count = 1 | |
| def sigma(self, row_num, col_num): | |
| # comparison function | |
| # self.alpha if x == y | |
| # self.beta if x != y | |
| char1 = self.row_string[row_num] | |
| char2 = self.col_string[col_num] | |
| if(char1 == char2): | |
| return self.alpha # match | |
| else: | |
| return self.beta # mismatch | |
| # addl work for keeping track of number of paths | |
| def forward_pass(self): | |
| for row_num in range(1, self.num_rows): | |
| # for each row we can skip the column 1 | |
| for col_num in range(1, self.num_cols): | |
| diag_cell = self.table[row_num-1][col_num-1] | |
| hori_cell = self.table[row_num][col_num -1] | |
| vert_cell = self.table[row_num-1][col_num] | |
| diag_score = diag_cell.score + self.sigma(row_num -1, col_num -1) | |
| hori_score = hori_cell.score + self.gamma | |
| vert_score = vert_cell.score + self.gamma | |
| max_score = max(diag_score, hori_score, vert_score) | |
| self.table[row_num][col_num].score = max_score | |
| for neighbor in [(diag_cell, diag_score), (hori_cell, hori_score), (vert_cell, vert_score)]: | |
| if neighbor[1] == max_score: | |
| self.table[row_num][col_num].path_count += neighbor[0].path_count | |
| # we start at the [row_num - 1][col_num - 1] and look at the three neighbord | |
| # to get the next move of the cell, and append to the string either a gap,match, mismatch | |
| def backward_pass(self): | |
| cur_row = self.num_rows - 1 | |
| cur_col = self.num_cols - 1 | |
| backtrace_cells = [] | |
| while(cur_row > 0 or cur_col > 0): | |
| assert cur_row >= 0 and cur_col >= 0 | |
| backtrace_cells.append((cur_row, cur_col)) | |
| cur_val = self.table[cur_row][cur_col].score | |
| diag_val = self.table[cur_row-1][cur_col-1].score + self.sigma(cur_row - 1, cur_col -1) | |
| hori_val = self.table[cur_row][cur_col -1].score + self.gamma | |
| vert_val = self.table[cur_row-1][cur_col].score + self.gamma | |
| # print(f"cur_val {cur_val} diag_val {diag_val} hori_val {hori_val} vert_val {vert_val}") | |
| if(diag_val == cur_val): | |
| cur_row -= 1 | |
| cur_col -= 1 | |
| continue | |
| elif(hori_val == cur_val): | |
| cur_col -= 1 | |
| continue | |
| elif(vert_val == cur_val): | |
| cur_row -= 1 | |
| continue | |
| assert False | |
| return backtrace_cells | |
| def __repr__(self): | |
| f_str = "\t" | |
| f_str += f"col_string is {self.col_string}\n\t" | |
| f_str += f"row_string is {self.row_string}\n\t" | |
| f_str += f"Num Rows is {self.num_rows} and Num Cols is {self.num_cols}\n\t" | |
| f_str += f"Match Penalty {self.alpha}\n\tMisMatch Penalty {self.beta}\n\tGap Penatlty {self.gamma}\n\t" | |
| # attach the strings to the table when printing | |
| # nstring goes on top | |
| f_str += 2 * "\t " + "\t".join(self.col_string) | |
| f_str += "\n\t" | |
| for i,row in enumerate(self.table): | |
| if(i): | |
| f_str += "\t".join([self.row_string[i - 1]] + [str(val) for val in row]) | |
| else: | |
| f_str += "\t".join([" "] + [str(val) for val in row]) | |
| f_str += "\n\t" | |
| return f_str | |
| if __name__ == "__main__": | |
| match_penalty = 2 | |
| mismatch_penalty = -1 | |
| gap_penalty = -1 | |
| str1 = "agcttg" | |
| str2 = "aggctga" | |
| dp_table = DPTable(match_penalty, mismatch_penalty, gap_penalty, str1, str2) | |
| print("Initial DP Table is:") | |
| print(dp_table) | |
| dp_table.forward_pass() | |
| print("DP Table after the forward pass") | |
| print(dp_table) | |
| print(f"Optimal Score is {dp_table.table[-1][-1].score}") | |
| print(f"Number of Optimal Paths is {dp_table.table[-1][-1].path_count}") | |
| print("Path with preference for diagonal movement") | |
| backtrace_cells = dp_table.backward_pass() | |
| print(backtrace_cells) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment