www.gusucode.com > bioinfo 案例源码程序 matlab代码 > bioinfo/CountTheNumberOfReadsMappedToGenomicFeaturesExample.m

    %% Count the number of reads mapped to genomic features
% 
%%
% Count reads from a sample SAM file that map to the features included in a GTF
% file. By default, |featurecount| maps the reads to exons, and summarizes
% the total number of reads at the gene level.
[t,s] = featurecount('Dmel_BDGP5_nohc.gtf','rnaseq_sample1.sam');

%%
% Display the first 10 rows of count data. 
t(1:10,:) 

%%
% The |ID| column contains the names of features (genes in this example).
% The |Reference| column lists the names of reference sequences for the
% features. The third column contains the total number of reads mapped to
% each feature for a given SAM file, that is, rnaseq_sample1.sam. By
% default, the table shows only those features (rows) and SAM files
% (columns) with non-zero read counts. Set 'ShowZeroCounts' to true to
% include those rows and columns with all zero counts in the output table.

%%
% |s| contains the summary statistics of assigned and unassigned reads from
% each SAM file. For instance, the |TotalEntries| row indicates the total
% number of alignment records from the given SAM file, and the |Assigned|
% row includes the number of reads that are assigned to features in the GTF
% file. For details about each row, refer to the Output Arguments section
% of the reference page.
s

%%
% Count reads without any summarization and disable displaying the progress
% messages.
[t2,s2] = featurecount('Dmel_BDGP5_nohc.gtf','rnaseq_sample1.sam', ...
                        'Summarization',false,'Verbose',false);

%%
% Notice the ID column of the output table now reports the feature
% attribute followed by the start and stop positions of each feature,
% separated by underscores.
t2(1:10,:)

%%
% You can choose how to assign a read to a particular feature when the read
% overlaps with multiple features by setting the 'OverlapMethod' option.
% For instance, if you want to count a read only if it fully overlaps
% a feature, use the 'full' option.
[tFull, sFull] = featurecount('Dmel_BDGP5_nohc.gtf','rnaseq_sample1.sam', ...
                              'OverlapMethod','full','Verbose',false);

%%
% If you have paired-end data, you can count reads as fragments. 
[tFrag,sFrag] = featurecount('Dmel_BDGP5_nohc.gtf','rnaseq_sample1.sam', ...
                             'CountFragments',true,'Verbose',false);
%%
% You can also count fragments from multiple SAM files.
[t2,s2] = featurecount('Dmel_BDGP5_nohc.gtf',...
          {'rnaseq_sample1.sam','rnaseq_sample2.sam'},'CountFragments',true, ...
          'Verbose',false);
%%
% Use the following options to count paired-end reads where at least one of
% the read mates are above a certain mapping quality threshold.
[t3,s3] = featurecount('Dmel_BDGP5_nohc.gtf',...
          'rnaseq_sample1.sam','CountFragments',true,'MinMappingQuality',20, ...
          'Verbose',false);
 
%%
% If the reads come from any strand-specific assay, you can specify such
% strand specificity during counting. For instance, if the protocol is
% stranded, the strand of the feature is compared with the strand of the
% read. Then only those reads that have the same strand as the overlapped
% feature are counted.
[t4,s4] = featurecount('Dmel_BDGP5_nohc.gtf',...
          'rnaseq_sample1.sam','StrandSpecificity','stranded','Verbose',false);