Needleman algorithm

Needleman algorithm#

Define a score function and two input sequences for alignment#

score=c(1,-1,0)
seq1="AGTC"
seq2="ACGTC"

Create matrix M#

ncol = nchar(seq1)+1
nrow = nchar(seq2)+1
M = matrix(0, ncol = ncol, nrow = nrow)
colnames(M) = c("",unlist(strsplit(seq1,split="")))
rownames(M) = c("",unlist(strsplit(seq2,split="")))

coln = c("",unlist(strsplit(seq1,split="")))
rown = c("",unlist(strsplit(seq2,split="")))

move = M

Initialize matrix M#

for(i in 2:ncol) M[1,i] = M[1,i-1] + score[3]
for(i in 2:nrow) M[i,1] = M[i-1,1] + score[3]

for(i in 2:ncol) move[1,i] = 1
for(i in 2:nrow) move[i,1] = 2

M

A matrix: 6 × 5 of type dbl
		A	G	T	C
	0	0	0	0	0
A	0	0	0	0	0
C	0	0	0	0	0
G	0	0	0	0	0
T	0	0	0	0	0
C	0	0	0	0	0

Fill in matrix M#

for(i in 2:nrow){
  for(j in 2:ncol){
    h_score = M[i,j-1] + score[3]
    v_score = M[i-1,j] + score[3]
    if(coln[j] == rown[i]) d_score = M[i-1,j-1] + score[1]
    else d_score = M[i-1,j-1] + score[2]
    x = c(h_score,v_score,d_score)
    M[i,j] = max(x)
    move[i,j] = which(x == M[i,j])[1]
  }
}
M

A matrix: 6 × 5 of type dbl
	A	G	T	C
	0	0	0	0
A	1	1	1	1
C	1	1	1	2
G	1	2	2	2
T	1	2	3	3
C	1	2	3	4

Tracing back to find the alignment#

alignment = matrix("",nrow=2,ncol=nrow+ncol)
i = nrow
j = ncol
x = move[i,j]
index = nrow+ncol
while(x != 0){
  if(x == 3) {
    alignment[1,index] = coln[j]
    alignment[2,index] = rown[i]
    i = i-1
    j = j-1
    index = index-1
  }else if(x == 2){
    alignment[1,index] = "-"
    alignment[2,index] = rown[i]
    i = i-1
    index = index-1
  }else{
    alignment[1,index] = coln[j]
    alignment[2,index] = "-"
    j = j-1
    index = index-1
  }
  x = move[i,j]
}

Show the alignment#

alg=rep("",2)
alg[1]=paste(alignment[1,], collapse="")
alg[2]=paste(alignment[2,], collapse="")

result = list(M=as.matrix, score = as.numeric, alignment=as.character)

result$M = M
result$score = M[nrow,ncol]
result$alignment = alg

result

$M

A matrix: 6 × 5 of type dbl
	A	G	T	C
	0	0	0	0
A	1	1	1	1
C	1	1	1	2
G	1	2	2	2
T	1	2	3	3
C	1	2	3	4

$score

4

$alignment

'A-GTC'
'ACGTC'

A wrapper function for Needleman-Wunch algorithm#

Needleman_Wunch <- function(seq1, seq2, score){
  ncol = nchar(seq1)+1
  nrow = nchar(seq2)+1
  
  
  #create matrix M
  M = matrix(0, ncol = ncol, nrow = nrow)
  colnames(M) = c("",unlist(strsplit(seq1,split="")))
  rownames(M) = c("",unlist(strsplit(seq2,split="")))
  
  coln = c("",unlist(strsplit(seq1,split="")))
  rown = c("",unlist(strsplit(seq2,split="")))
  
  move = M
  
  #fill in the first row and first column of M,N
  for(i in 2:ncol) M[1,i] = M[1,i-1] + score[3]
  for(i in 2:nrow) M[i,1] = M[i-1,1] + score[3]
  
  for(i in 2:ncol) move[1,i] = 1
  for(i in 2:nrow) move[i,1] = 2
  
  #fill in M
  for(i in 2:nrow){
    for(j in 2:ncol){
      h_score = M[i,j-1] + score[3]
      v_score = M[i-1,j] + score[3]
      if(coln[j] == rown[i]) d_score = M[i-1,j-1] + score[1]
      else d_score = M[i-1,j-1] + score[2]
      x = c(h_score,v_score,d_score)
      M[i,j] = max(x)
      move[i,j] = which(x == M[i,j])[1]
    }
  }
  
  alignment = matrix("",nrow=2,ncol=nrow+ncol)
  i = nrow
  j = ncol
  x = move[i,j]
  index = nrow+ncol
  while(x != 0){
    if(x == 3) {
      alignment[1,index] = coln[j]
      alignment[2,index] = rown[i]
      i = i-1
      j = j-1
      index = index-1
    }else if(x == 2){
      alignment[1,index] = "-"
      alignment[2,index] = rown[i]
      i = i-1
      index = index-1
    }else{
      alignment[1,index] = coln[j]
      alignment[2,index] = "-"
      j = j-1
      index = index-1
    }
    x = move[i,j]
  }
  
  alg=rep("",2)
  alg[1]=paste(alignment[1,], collapse="")
  alg[2]=paste(alignment[2,], collapse="")
  
  result = list(M=as.matrix, score = as.numeric, alignment=as.character)
  
  result$M = M
  result$score = M[nrow,ncol]
  result$alignment = alg
  
  
  result

}

seq1="ATTCCTGTTCCCGTC"
seq2="ATCCTGCGTTCGTC"
Needleman_Wunch(seq1,seq2,score=c(1,-1,-1))

$M

A matrix: 15 × 16 of type dbl
		A	T	T	C	C	T	G	T	T	C	C	C	G	T	C
	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13	-14	-15
A	-1	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13
T	-2	0	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11
C	-3	-1	1	1	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9
C	-4	-2	0	0	2	3	2	1	0	-1	-2	-3	-4	-5	-6	-7
T	-5	-3	-1	1	1	2	4	3	2	1	0	-1	-2	-3	-4	-5
G	-6	-4	-2	0	0	1	3	5	4	3	2	1	0	-1	-2	-3
C	-7	-5	-3	-1	1	1	2	4	4	3	4	3	2	1	0	-1
G	-8	-6	-4	-2	0	0	1	3	3	3	3	3	2	3	2	1
T	-9	-7	-5	-3	-1	-1	1	2	4	4	3	2	2	2	4	3
T	-10	-8	-6	-4	-2	-2	0	1	3	5	4	3	2	1	3	3
C	-11	-9	-7	-5	-3	-1	-1	0	2	4	6	5	4	3	2	4
G	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	4	3
T	-13	-11	-9	-7	-5	-3	-1	-1	1	2	4	4	4	4	6	5
C	-14	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	7

$score

7

$alignment

'ATTCCTG--TTCCCGTC'
'AT-CCTGCGTTC--GTC'

		A	T	T	C	C	T	G	T	T	C	C	C	G	T	C
	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13	-14	-15
A	-1	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13
T	-2	0	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11
C	-3	-1	1	1	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9
C	-4	-2	0	0	2	3	2	1	0	-1	-2	-3	-4	-5	-6	-7
T	-5	-3	-1	1	1	2	4	3	2	1	0	-1	-2	-3	-4	-5
G	-6	-4	-2	0	0	1	3	5	4	3	2	1	0	-1	-2	-3
C	-7	-5	-3	-1	1	1	2	4	4	3	4	3	2	1	0	-1
G	-8	-6	-4	-2	0	0	1	3	3	3	3	3	2	3	2	1
T	-9	-7	-5	-3	-1	-1	1	2	4	4	3	2	2	2	4	3
T	-10	-8	-6	-4	-2	-2	0	1	3	5	4	3	2	1	3	3
C	-11	-9	-7	-5	-3	-1	-1	0	2	4	6	5	4	3	2	4
G	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	4	3
T	-13	-11	-9	-7	-5	-3	-1	-1	1	2	4	4	4	4	6	5
C	-14	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	7

		A	T	T	C	C	T	G	T	T	C	C	C	G	T	C
	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13	-14	-15
A	-1	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13
T	-2	0	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11
C	-3	-1	1	1	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9
C	-4	-2	0	0	2	3	2	1	0	-1	-2	-3	-4	-5	-6	-7
T	-5	-3	-1	1	1	2	4	3	2	1	0	-1	-2	-3	-4	-5
G	-6	-4	-2	0	0	1	3	5	4	3	2	1	0	-1	-2	-3
C	-7	-5	-3	-1	1	1	2	4	4	3	4	3	2	1	0	-1
G	-8	-6	-4	-2	0	0	1	3	3	3	3	3	2	3	2	1
T	-9	-7	-5	-3	-1	-1	1	2	4	4	3	2	2	2	4	3
T	-10	-8	-6	-4	-2	-2	0	1	3	5	4	3	2	1	3	3
C	-11	-9	-7	-5	-3	-1	-1	0	2	4	6	5	4	3	2	4
G	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	4	3
T	-13	-11	-9	-7	-5	-3	-1	-1	1	2	4	4	4	4	6	5
C	-14	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	7

Needleman algorithm

Contents

Needleman algorithm#

Define a score function and two input sequences for alignment#

Create matrix M#

Initialize matrix M#

Fill in matrix M#

Tracing back to find the alignment#

Show the alignment#

A wrapper function for Needleman-Wunch algorithm#

		A	T	T	C	C	T	G	T	T	C	C	C	G	T	C
	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13	-14	-15
A	-1	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11	-12	-13
T	-2	0	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9	-10	-11
C	-3	-1	1	1	2	1	0	-1	-2	-3	-4	-5	-6	-7	-8	-9
C	-4	-2	0	0	2	3	2	1	0	-1	-2	-3	-4	-5	-6	-7
T	-5	-3	-1	1	1	2	4	3	2	1	0	-1	-2	-3	-4	-5
G	-6	-4	-2	0	0	1	3	5	4	3	2	1	0	-1	-2	-3
C	-7	-5	-3	-1	1	1	2	4	4	3	4	3	2	1	0	-1
G	-8	-6	-4	-2	0	0	1	3	3	3	3	3	2	3	2	1
T	-9	-7	-5	-3	-1	-1	1	2	4	4	3	2	2	2	4	3
T	-10	-8	-6	-4	-2	-2	0	1	3	5	4	3	2	1	3	3
C	-11	-9	-7	-5	-3	-1	-1	0	2	4	6	5	4	3	2	4
G	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	4	3
T	-13	-11	-9	-7	-5	-3	-1	-1	1	2	4	4	4	4	6	5
C	-14	-12	-10	-8	-6	-4	-2	-2	0	1	3	5	5	4	5	7