www.gusucode.com > stats 源码程序 matlab案例代码 > stats/SelectFeaturesViaStatisticalAccuracyComparisonExample.m

    %% Select Features Using Statistical Accuracy Comparison
% Reduce classification model complexity by selecting a subset
% of predictor variables (features) from a larger set.  Then, statistically
% compare the out-of-sample accuracy between the two models.
%%
% Load the |ionosphere| data set.

% Copyright 2015 The MathWorks, Inc.

load ionosphere;
%%
% Create a partition that evenly splits the data into training and testing
% sets. 
rng(1);                             % For reproducibility
CVP = cvpartition(Y,'holdout',0.5);
idxTrain = training(CVP);           % Training-set indices 
idxTest = test(CVP);                % Test-set indices
%%
% |CVP| is a cross-validation partition object that specifies the training
% and test sets.
%%
% Train an ensemble of 100 boosted classification trees using AdaBoostM1
% and the entire set of predictors. Inspect the importance measure for each
% predictor.
nTrees = 100;
C2 = fitensemble(X(idxTrain,:),Y(idxTrain),'AdaBoostM1',nTrees,'Tree');
predImp = predictorImportance(C2);

figure;
bar(predImp);
h = gca;
h.XTick = 1:2:h.XLim(2)
title('Predictor Importances');
xlabel('Predictor');
ylabel('Importance measure');
%%
% Identify the top five predictors in terms of their importance.
[~,idxSort] = sort(predImp,'descend');
idx5 = idxSort(1:5);
%%
% Train another ensemble of 100 boosted classification trees using AdaBoostM1
% and the five predictors with the best importance.
C1 = fitensemble(X(idxTrain,idx5),Y(idxTrain),'AdaBoostM1',nTrees,...
    'Tree');
%%
% Test whether the two models have equal predictive accuracies.  Specify
% the reduced test-set predictor data for |C1| and the full test-set
% predictor data for |C2|.
[h,p,e1,e2] = compareHoldout(C1,C2,X(idxTest,idx5),X(idxTest,:),Y(idxTest))
%%
% |h = 0| indicates to not reject the null hypothesis that the two models
% have equal predictive accuracies.  This result favors the simpler 
% ensemble, |C1|.