Needleman algorithm#
Define a score function and two input sequences for alignment#
score=c(1,-1,0)
seq1="AGTC"
seq2="ACGTC"
Create matrix M#
ncol = nchar(seq1)+1
nrow = nchar(seq2)+1
M = matrix(0, ncol = ncol, nrow = nrow)
colnames(M) = c("",unlist(strsplit(seq1,split="")))
rownames(M) = c("",unlist(strsplit(seq2,split="")))
coln = c("",unlist(strsplit(seq1,split="")))
rown = c("",unlist(strsplit(seq2,split="")))
move = M
Initialize matrix M#
for(i in 2:ncol) M[1,i] = M[1,i-1] + score[3]
for(i in 2:nrow) M[i,1] = M[i-1,1] + score[3]
for(i in 2:ncol) move[1,i] = 1
for(i in 2:nrow) move[i,1] = 2
M
| A | G | T | C | ||
|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | |
| A | 0 | 0 | 0 | 0 | 0 |
| C | 0 | 0 | 0 | 0 | 0 |
| G | 0 | 0 | 0 | 0 | 0 |
| T | 0 | 0 | 0 | 0 | 0 |
| C | 0 | 0 | 0 | 0 | 0 |
Fill in matrix M#
for(i in 2:nrow){
for(j in 2:ncol){
h_score = M[i,j-1] + score[3]
v_score = M[i-1,j] + score[3]
if(coln[j] == rown[i]) d_score = M[i-1,j-1] + score[1]
else d_score = M[i-1,j-1] + score[2]
x = c(h_score,v_score,d_score)
M[i,j] = max(x)
move[i,j] = which(x == M[i,j])[1]
}
}
M
| A | G | T | C | ||
|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | |
| A | 0 | 1 | 1 | 1 | 1 |
| C | 0 | 1 | 1 | 1 | 2 |
| G | 0 | 1 | 2 | 2 | 2 |
| T | 0 | 1 | 2 | 3 | 3 |
| C | 0 | 1 | 2 | 3 | 4 |
Tracing back to find the alignment#
alignment = matrix("",nrow=2,ncol=nrow+ncol)
i = nrow
j = ncol
x = move[i,j]
index = nrow+ncol
while(x != 0){
if(x == 3) {
alignment[1,index] = coln[j]
alignment[2,index] = rown[i]
i = i-1
j = j-1
index = index-1
}else if(x == 2){
alignment[1,index] = "-"
alignment[2,index] = rown[i]
i = i-1
index = index-1
}else{
alignment[1,index] = coln[j]
alignment[2,index] = "-"
j = j-1
index = index-1
}
x = move[i,j]
}
Show the alignment#
alg=rep("",2)
alg[1]=paste(alignment[1,], collapse="")
alg[2]=paste(alignment[2,], collapse="")
result = list(M=as.matrix, score = as.numeric, alignment=as.character)
result$M = M
result$score = M[nrow,ncol]
result$alignment = alg
result
- $M
A matrix: 6 × 5 of type dbl A G T C 0 0 0 0 0 A 0 1 1 1 1 C 0 1 1 1 2 G 0 1 2 2 2 T 0 1 2 3 3 C 0 1 2 3 4 - $score
- 4
- $alignment
-
- 'A-GTC'
- 'ACGTC'
A wrapper function for Needleman-Wunch algorithm#
Needleman_Wunch <- function(seq1, seq2, score){
ncol = nchar(seq1)+1
nrow = nchar(seq2)+1
#create matrix M
M = matrix(0, ncol = ncol, nrow = nrow)
colnames(M) = c("",unlist(strsplit(seq1,split="")))
rownames(M) = c("",unlist(strsplit(seq2,split="")))
coln = c("",unlist(strsplit(seq1,split="")))
rown = c("",unlist(strsplit(seq2,split="")))
move = M
#fill in the first row and first column of M,N
for(i in 2:ncol) M[1,i] = M[1,i-1] + score[3]
for(i in 2:nrow) M[i,1] = M[i-1,1] + score[3]
for(i in 2:ncol) move[1,i] = 1
for(i in 2:nrow) move[i,1] = 2
#fill in M
for(i in 2:nrow){
for(j in 2:ncol){
h_score = M[i,j-1] + score[3]
v_score = M[i-1,j] + score[3]
if(coln[j] == rown[i]) d_score = M[i-1,j-1] + score[1]
else d_score = M[i-1,j-1] + score[2]
x = c(h_score,v_score,d_score)
M[i,j] = max(x)
move[i,j] = which(x == M[i,j])[1]
}
}
alignment = matrix("",nrow=2,ncol=nrow+ncol)
i = nrow
j = ncol
x = move[i,j]
index = nrow+ncol
while(x != 0){
if(x == 3) {
alignment[1,index] = coln[j]
alignment[2,index] = rown[i]
i = i-1
j = j-1
index = index-1
}else if(x == 2){
alignment[1,index] = "-"
alignment[2,index] = rown[i]
i = i-1
index = index-1
}else{
alignment[1,index] = coln[j]
alignment[2,index] = "-"
j = j-1
index = index-1
}
x = move[i,j]
}
alg=rep("",2)
alg[1]=paste(alignment[1,], collapse="")
alg[2]=paste(alignment[2,], collapse="")
result = list(M=as.matrix, score = as.numeric, alignment=as.character)
result$M = M
result$score = M[nrow,ncol]
result$alignment = alg
result
}
seq1="ATTCCTGTTCCCGTC"
seq2="ATCCTGCGTTCGTC"
Needleman_Wunch(seq1,seq2,score=c(1,-1,-1))
- $M
A matrix: 15 × 16 of type dbl A T T C C T G T T C C C G T C 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 A -1 1 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 T -2 0 2 1 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 C -3 -1 1 1 2 1 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 C -4 -2 0 0 2 3 2 1 0 -1 -2 -3 -4 -5 -6 -7 T -5 -3 -1 1 1 2 4 3 2 1 0 -1 -2 -3 -4 -5 G -6 -4 -2 0 0 1 3 5 4 3 2 1 0 -1 -2 -3 C -7 -5 -3 -1 1 1 2 4 4 3 4 3 2 1 0 -1 G -8 -6 -4 -2 0 0 1 3 3 3 3 3 2 3 2 1 T -9 -7 -5 -3 -1 -1 1 2 4 4 3 2 2 2 4 3 T -10 -8 -6 -4 -2 -2 0 1 3 5 4 3 2 1 3 3 C -11 -9 -7 -5 -3 -1 -1 0 2 4 6 5 4 3 2 4 G -12 -10 -8 -6 -4 -2 -2 0 1 3 5 5 4 5 4 3 T -13 -11 -9 -7 -5 -3 -1 -1 1 2 4 4 4 4 6 5 C -14 -12 -10 -8 -6 -4 -2 -2 0 1 3 5 5 4 5 7 - $score
- 7
- $alignment
-
- 'ATTCCTG--TTCCCGTC'
- 'AT-CCTGCGTTC--GTC'