www.gusucode.com > stats 源码程序 matlab案例代码 > stats/ControlTreeDepthExample.m

    %% Control Regression Tree Depth
% You can control the depth of trees using the |MaxNumSplits|,
% |MinLeafSize|, or |MinParentSize| name-value pair parameters.  |fitrtree|
% grows deep decision trees by default.  You can grow shallower
% trees to reduce model complexity or computation time.
%%
% Load the |carsmall| data set. Consider |Displacement|, |Horsepower|, and
% |Weight| as predictors of the response |MPG|.
load carsmall
X = [Displacement Horsepower Weight];
%%
% The default values of the tree-depth controllers for growing regression
% trees are:
%
% * |n - 1| for |MaxNumSplits|. |n| is the training sample size.
% * |1| for |MinLeafSize|.
% * |10| for |MinParentSize|.
%
% These default values tend to grow deep trees for large training sample
% sizes.
%%
% Train a regression tree using the default values for tree-depth control. 
% Cross validate the model using 10-fold cross validation.
rng(1); % For reproducibility
MdlDefault = fitrtree(X,MPG,'CrossVal','on');
%%
% Draw a histogram of the number of imposed splits on the trees.  The
% number of imposed splits is one less than the number of leaves.  Also,
% view one of the trees.
numBranches = @(x)sum(x.IsBranch);
mdlDefaultNumSplits = cellfun(numBranches, MdlDefault.Trained);

figure;
histogram(mdlDefaultNumSplits)

view(MdlDefault.Trained{1},'Mode','graph')
%%
% The average number of splits is between 14 and 15.
%%
% Suppose that you want a regression tree that is not as complex (deep) as
% the ones trained using the default number of splits.  Train another
% regression tree, but set the maximum number of splits at 7, which is
% about half the mean number of splits from the default regression tree. 
% Cross validate the model using 10-fold cross validation.
Mdl7 = fitrtree(X,MPG,'MaxNumSplits',7,'CrossVal','on');
view(Mdl7.Trained{1},'Mode','graph')
%%
% Compare the cross validation MSEs of the models.
mseDefault = kfoldLoss(MdlDefault)
mse7 = kfoldLoss(Mdl7)
%%
% |Mdl7| is much less complex and performs only slightly worse than
% |MdlDefault|.