www.gusucode.com > stats 源码程序 matlab案例代码 > stats/SurrogateSplitsExample.m

    %% Surrogate Splits
% When you have missing data, trees and ensembles of trees give better
% predictions when they include surrogate splits. Furthermore, estimates of
% predictor importance are often different with surrogate splits.
% Eliminating unimportant predictors can save time and memory for
% predictions, and can make predictions easier to understand.
%%
% This example shows the effects of surrogate splits for predictions for
% data containing missing entries in the test set.
%%
% Load sample data. Partition it into a training and test set.

% Copyright 2015 The MathWorks, Inc.

load ionosphere;

rng(10) % for reproducibility
cv = cvpartition(Y,'Holdout',0.3);
Xtrain = X(training(cv),:);
Ytrain = Y(training(cv));
Xtest = X(test(cv),:);
Ytest = Y(test(cv));
%%
% Bag decision trees with and without surrogate splits.
b = fitensemble(Xtrain,Ytrain,'Bag',50,'Tree',...
     'Type','Class');

templS = templateTree('Surrogate','On');
bs = fitensemble(Xtrain,Ytrain,'Bag',50,templS,...
    'Type','Class');
%%
% Suppose half of the values in the test set are missing.
Xtest(rand(size(Xtest))>0.5) = NaN;
%%
% Test accuracy with and without surrogate splits.
figure;
plot(loss(b,Xtest,Ytest,'Mode','Cumulative'));
hold on;
plot(loss(bs,Xtest,Ytest,'Mode','Cumulative'),'r--');
legend('Regular trees','Trees with surrogate splits');
xlabel('Number of trees');
ylabel('Test classification error');
%%
% Check the statistical significance of the difference in results with the
% McNemar test. Convert the labels to a |nominal| data type to make it
% easier to check for equality.
Yfit = nominal(predict(b,Xtest));
YfitS = nominal(predict(bs,Xtest));
N10 = sum(Yfit==nominal(Ytest) & YfitS~=nominal(Ytest));
N01 = sum(Yfit~=nominal(Ytest) & YfitS==nominal(Ytest));
mcnemar = (abs(N10-N01) - 1)^2/(N10+N01);
pval = 1 - chi2cdf(mcnemar,1)
%%
% The extremely low _p_-value indicates that the ensemble with surrogate
% splits is better in a statistically significant manner.