www.gusucode.com > DNA序列检测源码程序 > DNA序列检测源码程序/SVD/main.m

    % ------------------------------------------------------------------ %
% -------------- Prepared by : Ismail M. El-Badawy ----------------- %
% ------------------------------------------------------------------ %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This code reads the F56F11.4 dna sequence  and detect the period-3 %
% -------- behavior using singular value decomposition with -------- %
% ---------- frame size 'frmsz' to predict the locations ----------- %
% --------------- of protein-coding regions (exons) ---------------- %  
% ---------------------- in the dna sequence ----------------------- %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% This code is a simulation of the method proposed in the following paper:

% M. Akhtar, E. Ambikairajah, and J. Epps, 揇etection of period-3 
% behavior in genomic sequences using singular value decomposition, 
% in Proc. IEEE Int. Conf. Emerging Technologies, 2005, pp. 13?7.

clear all;close all;clc

% Read dna sequence from FASTA format file downloaded from http://www.ncbi.nlm.nih.gov/ %
% ------------------------------------------------------------------------------------- %
sample_dna = fastaread('sequence_AF099922.fasta');  % Caenorhabditis elegans (Wormbase)

% Convert the sequence from structure array to string %
% --------------------------------------------------- %
sample_dna = struct2cell(sample_dna);
sample_dna = cell2mat(sample_dna(2));

% Take only the F56F11.4 sequence (positions from 7021 to 15020) %
% -------------------------------------------------------------- %
sample_dna = sample_dna(7021:15020);    

% Actual locations of coding sequences (cds or exons) %
% --------------------------------------------------- %
cds = [928 1038 2527 2856 4113 4376 5464 5643 7254 7604];

for ctr = 1:2:length(cds)                       
    actual_exons(cds(ctr):cds(ctr+1)) = 1;
end
actual_exons(cds(length(cds))+1:length(sample_dna)) = 0;

% Define the frame size for singular value decomposition (svd) %
% ------------------------------------------------------------ %
frmsz = 81;

if mod(frmsz,3) ~= 0
    error('Frame size should be a multiple of 3')
end

% Convert string to numerical values (Binary Indicator Sequences) %
% --------------------------------------------------------------- %
[xA xT xC xG] = dna_binary(sample_dna);

xA = [zeros(1,floor(frmsz/2)) xA zeros(1,ceil(frmsz/2))];  % Zero padding from both sides
xT = [zeros(1,floor(frmsz/2)) xT zeros(1,ceil(frmsz/2))];
xC = [zeros(1,floor(frmsz/2)) xC zeros(1,ceil(frmsz/2))];
xG = [zeros(1,floor(frmsz/2)) xG zeros(1,ceil(frmsz/2))];

% Anti-notch Filter centered at angular frequency 2pi/3 %
% ----------------------------------------------------- %
R = 0.992;
theta = (2*pi)/3;
    
    % Filter Coefficients %
    % ------------------- %
    b = [1 0 -1];
    a = [1 -2*R*cos(theta) R^2];
    
% Filtering the signal (DNA Sequence) %
% ----------------------------------- %
uA = filter(b,a,xA);
uT = filter(b,a,xT);
uC = filter(b,a,xC);
uG = filter(b,a,xG);

% Detection of Period-3 Behavior using Singular Value Decomposition %
% ----------------------------------------------------------------- %

for n = 1:length(sample_dna)
    
    sa = reshape(uA(n:n+frmsz-1),3,frmsz/3);
    XA(n) = max(svd(sa));
    
    st = reshape(uT(n:n+frmsz-1),3,frmsz/3);
    XT(n) = max(svd(st));
    
    sc = reshape(uC(n:n+frmsz-1),3,frmsz/3);
    XC(n) = max(svd(sc));
    
    sg = reshape(uG(n:n+frmsz-1),3,frmsz/3);
    XG(n) = max(svd(sg));
    
end

X = XA + XT + XC + XG;

% Plot X  Vs nucleotide positions %
% ------------------------------- %
plot(X/max(X))
hold on;
plot(actual_exons,':k','linewidth',1)      % Show actual exonic regions
axis([0 8000 0 1.05])
xlabel('DNA Sequence (Nucleotide Position)')
ylabel('SVD')
title('Detection of Period-3 Behavior using SVD')