www.gusucode.com > stats 源码程序 matlab案例代码 > stats/SelectSVMClassifierFeaturesUsingMarginExample.m

    %% Select SVM Classifier Features by Examining Test Sample Margins
% The classifier margins measure, for each observation, the difference
% between the true class observed score and the maximal false class score
% for a particular class. One way to perform feature selection is to
% compare test sample margins from multiple models.  Based solely on this
% criterion, the model with the highest margins is the best model.
%%
% Load the |ionosphere| data set.

% Copyright 2015 The MathWorks, Inc.

load ionosphere
rng(1); % For reproducibility
%%
% Partition the data set into training and test sets. Specify a 15% holdout
% sample for testing.
Partition = cvpartition(Y,'Holdout',0.15);
testInds = test(Partition); % Indices for the test set
XTest = X(testInds,:);
YTest = Y(testInds,:);
%%
% Partition defines the data set partition.
%%
% Define these two data sets:
%
% * |fullX| contains all predictors (except the removed column of 0s).
% * |partX| contains the last 20 predictors.
%
fullX = X;
partX = X(:,end-20:end);
%%
% Train SVM classifiers for each predictor set. Specify the partition
% definition.
FullCVSVMModel = fitcsvm(fullX,Y,'CVPartition',Partition);
PartCVSVMModel = fitcsvm(partX,Y,'CVPartition',Partition);
FCSVMModel = FullCVSVMModel.Trained{1};
PCSVMModel = PartCVSVMModel.Trained{1};
%%
% |FullCVSVMModel| and |PartCVSVMModel| are
% |ClassificationPartitionedModel| classifiers. They contain the property
% |Trained|, which is a 1-by-1 cell array holding a
% |CompactClassificationSVM| classifier that the software trained using the
% training set.
%%
% Estimate the test sample margins for each classifier.
fullM = margin(FCSVMModel,XTest,YTest);
partM = margin(PCSVMModel,XTest(:,end-20:end),YTest);
n = size(XTest,1);
p = sum(fullM < partM)/n
%%
% Approximately 25% of the margins from the full model are less than those
% from the model with fewer predictors.  This suggests that the model
% trained using all of the predictors is better.