www.gusucode.com > stats 源码程序 matlab案例代码 > stats/AssessWhetherOneModelClassifiesBetterThanAnotherExample.m

    %% Assess Whether One Classification Model Classifies Better Than Another
% Train two classification models using the same algorithm, but adjust a
% hyperparameter to make the algorithm more complex.  Conduct a statistical
% test to assess whether the simpler model has better accuracy in held-out
% data than the more complex model.
%%
% Load the |ionosphere| data set.

% Copyright 2015 The MathWorks, Inc.

load ionosphere;
%%
% Create a partition that evenly splits the data into training and testing
% sets. 
rng(1);                             % For reproducibility
CVP = cvpartition(Y,'holdout',0.5);
idxTrain = training(CVP);           % Training-set indices 
idxTest = test(CVP);                % Test-set indices
%%
% |CVP| is a cross-validation partition object that specifies the training
% and test sets.
%%
% Train two SVM models: one that uses a linear kernel (the default for
% binary classification) and one that uses the radial basis function
% kernel. Use the default kernel scale of 1.
MdlLinear = fitcsvm(X(idxTrain,:),Y(idxTrain),'Standardize',true);
MdlRBF = fitcsvm(X(idxTrain,:),Y(idxTrain),'Standardize',true,...
    'KernelFunction','RBF');
%%
% |MdlLinear| and |MdlRBF| are trained |ClassificationSVM| models.
%% 
% Label the test-set observations using the trained models.
YhatLinear = predict(MdlLinear,X(idxTest,:));
YhatRBF = predict(MdlRBF,X(idxTest,:));
%%
% |YhatLinear| and |YhatRBF| are vectors continuing the predicted
% class labels of the respective models.
%%
% Test the null hypothesis that the simpler model (|MdlLinear|) is at most
% as accurate as the more complex model (|MdlRBF|). Because the test-set
% size is large, conduct the asymptotic McNemar test, and compare the
% results with the mid- _p_-value test (the cost-insensitive testing
% default). Request to return _p_-values and misclassification rates.
Asymp = zeros(4,1); % Preallocation
MidP = zeros(4,1); 

[Asymp(1),Asymp(2),Asymp(3),Asymp(4)] = testcholdout(YhatLinear,YhatRBF,Y(idxTest),...
    'Alternative','greater','Test','asymptotic');
[MidP(1),MidP(2),MidP(3),MidP(4)] = testcholdout(YhatLinear,YhatRBF,Y(idxTest),...
    'Alternative','greater');
table(Asymp,MidP,'RowNames',{'h' 'p' 'e1' 'e2'})
%%
% The _p_-value is close to zero for both tests, which indicates strong
% evidence to reject the null hypothesis that the simpler model is less
% accurate than the more complex model.  No matter what test you specify,
% |testcholdout| returns the same type of misclassification measure for
% both models.