www.gusucode.com > stats 源码程序 matlab案例代码 > stats/FeatureSelectionUsingkfoldMarginsECOCExample.m

    %% Feature Selection Using _k_-fold Margins
% One way to perform feature selection is to compare _k_-fold margins from
% multiple models.  Based solely on this criterion, the classifier with the
% larger margins is the better classifier.
%%
% Load the NLP data set.  Preprocess the data as in
% <docid:stats_ug.bu624sp>, and orient the predictor data so that
% observations correspond to columns.
load nlpdata
Y(~(ismember(Y,{'simulink','dsp','comm'}))) = 'others';
X = X';
%%
% Create these two data sets:
%
% * |fullX| contains all predictors.
% * |partX| contains 1/2 of the predictors chosen at random.
%
rng(1); % For reproducibility
p = size(X,1); % Number of predictors
halfPredIdx = randsample(p,ceil(0.5*p));
fullX = X;
partX = X(halfPredIdx,:);
%%
% Create a linear classification model template that specifies to solve the
% objective function using SpaRSA.
t = templateLinear('Solver','sparsa');
%%
% Cross-validate two ECOC models composed of binary, linear classification
% models: one that uses the all of the predictors and one that uses half of
% the predictors. Indicate that observations correspond to columns.
CVMdl = fitcecoc(fullX,Y,'Learners',t,'CrossVal','on',...
    'ObservationsIn','columns');
PCVMdl = fitcecoc(partX,Y,'Learners',t,'CrossVal','on',...
    'ObservationsIn','columns');
%%
% |CVMdl| and |PCVMdl| are |ClassificationPartitionedLinearECOC| models. 
%%
% Estimate the _k_-fold margins for each classifier. Plot the distribution
% of the _k_-fold margins sets using box plots.
fullMargins = kfoldMargin(CVMdl);
partMargins = kfoldMargin(PCVMdl);

figure;
boxplot([fullMargins partMargins],'Labels',...
    {'All Predictors','Half of the Predictors'});
h = gca;
h.YLim = [-1 1];
title('Distribution of Cross-Validated Margins')
%%  
% The distributions of the _k_-fold margins of the two classifiers are
% similar.