www.gusucode.com > stats 源码程序 matlab案例代码 > stats/FeatureSelectionUsingTestSampleMarginsExample.m

    %% Feature Selection Using Test-Sample Margins
% One way to perform feature selection is to compare test-sample margins
% from multiple models.  Based solely on this criterion, the classifier
% with the larger margins is the better classifier.
%%
% Load the NLP data set.  Preprocess the data as in
% <docid:stats_ug.bu620gc>.
load nlpdata
Ystats = Y == 'stats';
X = X';
rng(1); % For reproducibility
%% 
% Create a data partition which holds out 30% of the observations for
% testing.
Partition = cvpartition(Ystats,'Holdout',0.30);
testIdx = test(Partition); % Test-set indices
XTest = X(:,testIdx);     
YTest = Ystats(testIdx);
%%
% |Partition| is a |cvpartition| object that defines the data set
% partition.
%%
% Randomly choose 10% of the predictor variables.
p = size(X,1); % Number of predictors
idxPart = randsample(p,ceil(0.1*p));
%%
% Train two binary, linear classification models: one that uses the all of
% the predictors and one that uses the random 10%. Solve
% the objective function using SpaRSA, and indicate that observations
% correspond to columns.
CVMdl = fitclinear(X,Ystats,'CVPartition',Partition,'Solver','sparsa',...
    'ObservationsIn','columns');
PCVMdl = fitclinear(X(idxPart,:),Ystats,'CVPartition',Partition,'Solver','sparsa',...
    'ObservationsIn','columns');
%%
% |CVMdl| and |PCVMdl| are |ClassificationPartitionedLinear| models. 
%%
% Extract the trained |ClassificationLinear| models from the
% cross-validated models.
CMdl = CVMdl.Trained{1};
PCMdl = PCVMdl.Trained{1};
%%
% Estimate the test sample margins for each classifier.  Plot the
% distribution of the margins sets using box plots.
fullMargins = margin(CMdl,XTest,YTest,'ObservationsIn','columns');
partMargins = margin(PCMdl,XTest(idxPart,:),YTest,...
    'ObservationsIn','columns');

figure;
boxplot([fullMargins partMargins],'Labels',...
    {'All Predictors','10% of the Predictors'});
h = gca;
h.YLim = [-20 60];
title('Test-Sample Margins')
%%
% The margin distribution of |CMdl| is situated higher than the margin
% distribution of |PCMdl|.