www.gusucode.com > stats 源码程序 matlab案例代码 > stats/EstimateInSampleResponsesForEachSubtreeExample.m

    %% Estimate In-Sample Responses For Each Subtree
%%
% Load the |carsmall| data set. Consider |Weight| as a predictor of the
% response |MPG|.

% Copyright 2015 The MathWorks, Inc.

load carsmall
idxNaN = isnan(MPG + Weight);
X = Weight(~idxNaN);
Y = MPG(~idxNaN);
n = numel(X);
%%
% Grow a regression tree using all observations.
Mdl = fitrtree(X,Y);
%%
% Compute resubstitution fitted values for the subtrees at several pruning
% levels.
m = max(Mdl.PruneList);
pruneLevels = 1:4:m; % Pruning levels to consider
z = numel(pruneLevels);
Yfit = resubPredict(Mdl,'SubTrees',pruneLevels);
%%
% |Yfit| is an |n|-by- |z| matrix of fitted values in which the rows
% correspond to observations and the columns correspond to a subtree.
%%
% Plot several columns of |Yfit| and |Y| against |X|.
figure;
sortDat = sortrows([X Y Yfit],1); % Sort all data with respect to X
plot(repmat(sortDat(:,1),1,size(Yfit,2) + 1),sortDat(:,2:end))...
    % Vectorize for efficiency
lev = cellstr(num2str((pruneLevels)','Level %d MPG'));
legend(['Observed MPG'; lev])
title 'In-Sample Fitted Responses'
xlabel 'Weight (lbs)';
ylabel 'MPG';
h = findobj(gcf);
set(h(4:end),'LineWidth',3) % Widen all lines
%%
% The values of |Yfit| for lower pruning levels tend to follow the data more closely
% than higher levels. Higher pruning levels tend to be flat for large
% |X| intervals.