sequence1="AGWGAHEA"
sequence2 = "PAWHEAEAG"
gap= -8
Blosum50
import numpy as np
import pandas as pd
import os
sequence1="AGWGAHEA"
sequence2 = "PAWHEAEAG"
s1=''
s2=''
gap=-8
os.chdir(r"C:/Users/16926/Desktop/")
score_matrix=pd.read_excel("sample.xlsx")
best_matrix=np.empty(shape= (len(sequence2)+1,len(sequence1)+1),dtype = int)
def get_match_score(s1,s2):
score=score_matrix[s1][s2]
return score
for i in range(len(sequence2)+1):
for j in range(len(sequence1)+1):
if i==0:
best_matrix[i][j]=gap*j
elif j==0:
best_matrix[i][j]=gap*i
else:
match =get_match_score(sequence2[i-1],sequence1[j-1])
gap1_score=best_matrix[i-1][j]+gap
gap2_score = best_matrix[i][j-1]+gap
match_score = best_matrix[i-1][j-1]+match
best_matrix[i][j] = max(gap1_score,gap2_score,match_score)
print(best_matrix)
i,j = len(sequence2),len(sequence1)
while(i>0 or j>0):
match = get_match_score(sequence2[i-1],sequence1[j-1])
if i>0 and j>0 and best_matrix[i][j] == best_matrix[i-1][j-1]+match:
s1 += sequence1[j-1]
s2 += sequence2[i-1]
i-=1;j-=1
elif i>0 and best_matrix[i,j] == best_matrix[i-1,j]+gap:
s1+='-'
s2+=sequence2[i-1]
i-=1
else:
s1+=sequence1[j-1]
s2+='-'
j-=1
print(s1[::-1]+'\n'+s2[::-1])
[[ 0 -8 -16 -24 -32 -40 -48 -56 -64]
[ -8 -1 -9 -17 -25 -33 -41 -49 -57]
[-16 -3 -11 -19 -27 -20 -28 -36 -44]
[-24 -11 -6 4 -4 -12 -20 -28 -36]
[-32 -19 -13 -4 2 -6 -2 -10 -18]
[-40 -27 -21 -12 -6 1 -7 4 -4]
[-48 -35 -29 -20 -14 -1 -9 -4 9]
[-56 -43 -37 -28 -22 -9 -17 -3 1]
[-64 -51 -45 -36 -30 -17 -25 -11 2]
[-72 -59 -43 -44 -28 -25 -33 -19 -6]]
-AGWG-AHEA-
PA-WHEA-EAG
score_matrix
A E G H P W
A 5.0 -1.0 0.0 -2.0 -1.0 -3
E NaN 6.0 -3.0 0.0 -1.0 -3
G NaN NaN 8.0 -2.0 -2.0 -3
H NaN NaN NaN 10.0 -2.0 -3
P NaN NaN NaN NaN 10.0 -4
W NaN NaN NaN NaN NaN 15