www.gusucode.com > stats 源码程序 matlab案例代码 > stats/SelectAppropriateTreeDepthExample.m
%% Select Appropriate Tree Depth % This example shows how to control the depth of a decision tree, and how % to choose an appropriate depth. %% % Load the |ionosphere| data. load ionosphere %% % Generate an exponentially spaced set of values from |10| through |100| % that represent the minimum number of observations per leaf node. leafs = logspace(1,2,10); %% % Create cross-validated classification trees for the |ionosphere| data. % Specify to grow each tree using a minimum leaf size in |leafs|. rng('default') N = numel(leafs); err = zeros(N,1); for n=1:N t = fitctree(X,Y,'CrossVal','On',... 'MinLeafSize',leafs(n)); err(n) = kfoldLoss(t); end plot(leafs,err); xlabel('Min Leaf Size'); ylabel('cross-validated error'); %% % The best leaf size is between about |20| and |50| observations per leaf. %% % Compare the near-optimal tree with at least |40| observations per leaf % with the default tree, which uses |10| observations per parent node and % |1| observation per leaf. DefaultTree = fitctree(X,Y); view(DefaultTree,'Mode','Graph') OptimalTree = fitctree(X,Y,'MinLeafSize',40); view(OptimalTree,'mode','graph') %% resubOpt = resubLoss(OptimalTree); lossOpt = kfoldLoss(crossval(OptimalTree)); resubDefault = resubLoss(DefaultTree); lossDefault = kfoldLoss(crossval(DefaultTree)); resubOpt,resubDefault,lossOpt,lossDefault %% % The near-optimal tree is much smaller and gives a much higher % resubstitution error. Yet, it gives similar accuracy for cross-validated % data.