www.gusucode.com > stats 源码程序 matlab案例代码 > stats/UnbiasedPredictorImportanceEstimatesTBExample.m

    %% Unbiased Predictor Importance Estimates
%%
% Load the |carsmall| data set.  Consider a model that predicts the mean 
% fuel economy of a car given its acceleration, number of cylinders, engine
% displacement, horsepower, manufacturer, model year, and weight.  Consider
% |Cylinders|, |Mfg|, and |Model_Year| as categorical variables.
load carsmall
Cylinders = categorical(Cylinders);
Mfg = categorical(cellstr(Mfg));
Model_Year = categorical(Model_Year);
X = table(Acceleration,Cylinders,Displacement,Horsepower,Mfg,...
    Model_Year,Weight,MPG);
rng('default'); % For reproducibility
%%
% Display the number of categories represented in the categorical
% variables.
numCylinders = numel(categories(Cylinders))
numMfg = numel(categories(Mfg))
numModelYear = numel(categories(Model_Year))
%%
% Because there are 3 categories only in |Cylinders| and |Model_Year|, the
% standard CART, predictor-splitting algorithm prefers splitting a 
% continuous predictor over these two variables.
%%
% Train a random forest of 200 regression trees using the entire data set.
% To grow unbiased trees, specify usage of the curvature test for splitting
% predictors. Because there are missing values in the data, specify usage
% of surrogate splits.  Store the out-of-bag information for predictor
% importance estimation.
Mdl = TreeBagger(200,X,'MPG','Method','regression','Surrogate','on',...
    'PredictorSelection','curvature','OOBPredictorImportance','on');
%%
% |TreeBagger| stores predictor importance estimates in the property
% |OOBPermutedPredictorDeltaError|. Compare the estimates using a bar
% graph.
imp = Mdl.OOBPermutedPredictorDeltaError;

figure;
bar(imp);
title('Curvature Test');
ylabel('Predictor importance estimates');
xlabel('Predictors');
h = gca;
h.XTickLabel = Mdl.PredictorNames;
h.XTickLabelRotation = 45;
h.TickLabelInterpreter = 'none';
%%
% In this case, |Model_Year| is the most important predictor, followed by
% |Weight|.
%%
% Compare the |imp| to predictor importance estimates computed from a
% random forest that grows trees using standard CART.
MdlCART = TreeBagger(200,X,'MPG','Method','regression','Surrogate','on',...
    'OOBPredictorImportance','on');

impCART = MdlCART.OOBPermutedPredictorDeltaError;

figure;
bar(impCART);
title('Standard CART');
ylabel('Predictor importance estimates');
xlabel('Predictors');
h = gca;
h.XTickLabel = Mdl.PredictorNames;
h.XTickLabelRotation = 45;
h.TickLabelInterpreter = 'none';
%%
% In this case, |Weight|, a continuous predictor, is the most important.
% The next two most importance predictor are |Model_Year| followed closely
% by |Horsepower|, which is a continuous predictor.