www.gusucode.com > stats 源码程序 matlab案例代码 > stats/UnbiasedPredictorImportanceEstimatesRegExample.m

    %% Unbiased Predictor Importance Estimates
%%
% Load the |carsmall| data set.  Consider a model that predicts the mean 
% fuel economy of a car given its acceleration, number of cylinders, engine
% displacement, horsepower, manufacturer, model year, and weight.  Consider
% |Cylinders|, |Mfg|, and |Model_Year| as categorical variables.
load carsmall
Cylinders = categorical(Cylinders);
Mfg = categorical(cellstr(Mfg));
Model_Year = categorical(Model_Year);
X = table(Acceleration,Cylinders,Displacement,Horsepower,Mfg,...
    Model_Year,Weight,MPG);
%%
% Display the number of categories represented in the categorical
% variables.
numCylinders = numel(categories(Cylinders))
numMfg = numel(categories(Mfg))
numModelYear = numel(categories(Model_Year))
%%
% Because there are 3 categories only in |Cylinders| and |Model_Year|, the
% standard CART, predictor-splitting algorithm prefers splitting a 
% continuous predictor over these two variables.
%%
% Train a regression tree using the entire data set. To grow unbiased
% trees, specify usage of the curvature test for splitting predictors.
% Because there are missing values in the data, specify usage of surrogate
% splits.
Mdl = fitrtree(X,'MPG','PredictorSelection','curvature','Surrogate','on');
%%
% Estimate predictor importance values by summing changes in the risk due
% to splits on every predictor and dividing the sum by the number of branch
% nodes.  Compare the estimates using a bar graph.
imp = predictorImportance(Mdl);

figure;
bar(imp);
title('Predictor Importance Estimates');
ylabel('Estimates');
xlabel('Predictors');
h = gca;
h.XTickLabel = Mdl.PredictorNames;
h.XTickLabelRotation = 45;
h.TickLabelInterpreter = 'none';
%%
% In this case, |Displacement| is the most important predictor, followed by
% |Horsepower|.