www.gusucode.com > stats 源码程序 matlab案例代码 > stats/SelectAppropriateTreeDepthExample.m

    %% Select Appropriate Tree Depth
% This example shows how to control the depth of a decision tree, and how
% to choose an appropriate depth.
%%
% Load the |ionosphere| data.
load ionosphere
%%
% Generate an exponentially spaced set of values from |10| through |100|
% that represent the minimum number of observations per leaf node.
leafs = logspace(1,2,10);
%%
% Create cross-validated classification trees for the |ionosphere| data.
% Specify to grow each tree using a minimum leaf size in |leafs|. 
rng('default')
N = numel(leafs);
err = zeros(N,1);
for n=1:N
    t = fitctree(X,Y,'CrossVal','On',...
        'MinLeafSize',leafs(n));
    err(n) = kfoldLoss(t);
end
plot(leafs,err);
xlabel('Min Leaf Size');
ylabel('cross-validated error');
%%
% The best leaf size is between about |20| and |50| observations per leaf.
%%
% Compare the near-optimal tree with at least |40| observations per leaf
% with the default tree, which uses |10| observations per parent node and
% |1| observation per leaf.
DefaultTree = fitctree(X,Y);
view(DefaultTree,'Mode','Graph')

OptimalTree = fitctree(X,Y,'MinLeafSize',40);
view(OptimalTree,'mode','graph')
%%
resubOpt = resubLoss(OptimalTree);
lossOpt = kfoldLoss(crossval(OptimalTree));
resubDefault = resubLoss(DefaultTree);
lossDefault = kfoldLoss(crossval(DefaultTree));
resubOpt,resubDefault,lossOpt,lossDefault
%%
% The near-optimal tree is much smaller and gives a much higher
% resubstitution error. Yet, it gives similar accuracy for cross-validated
% data.