www.gusucode.com > stats 源码程序 matlab案例代码 > stats/FeatureSelectionUsingkfoldMarginsExample.m

    %% Feature Selection Using _k_-fold Margins
% One way to perform feature selection is to compare _k_-fold margins from
% multiple models.  Based solely on this criterion, the classifier with the
% larger margins is the better classifier.
%%
% Load the NLP data set.  Preprocess the data as in
% <docid:stats_ug.bu622gg>.
load nlpdata
Ystats = Y == 'stats';
X = X'; 
%%
% Create these two data sets:
%
% * |fullX| contains all predictors.
% * |partX| contains 1/2 of the predictors chosen at random.
%
rng(1); % For reproducibility
p = size(X,1); % Number of predictors
halfPredIdx = randsample(p,ceil(0.5*p));
fullX = X;
partX = X(halfPredIdx,:);
%%
% Cross-validate two binary, linear classification models: one that uses
% the all of the predictors and one that uses half of the predictors. Solve
% the objective function using SpaRSA, and indicate that observations
% correspond to columns.
CVMdl = fitclinear(fullX,Ystats,'CrossVal','on','Solver','sparsa',...
    'ObservationsIn','columns');
PCVMdl = fitclinear(partX,Ystats,'CrossVal','on','Solver','sparsa',...
    'ObservationsIn','columns');
%%
% |CVMdl| and |PCVMdl| are |ClassificationPartitionedLinear| models. 
%%
% Estimate the _k_-fold margins for each classifier. Plot the distribution
% of the _k_-fold margins sets using box plots.
fullMargins = kfoldMargin(CVMdl);
partMargins = kfoldMargin(PCVMdl);

figure;
boxplot([fullMargins partMargins],'Labels',...
    {'All Predictors','Half of the Predictors'});
h = gca;
h.YLim = [-30 60];
title('Distribution of Cross-Validated Margins')
%%  
% The distributions of the margins of the two classifiers are similar.