www.gusucode.com > stats 源码程序 matlab案例代码 > stats/EstimateOOBCCDFUsingQuantileRegressionExample.m

    %% Estimate Out-of-Bag Conditional Cumulative Distribution Using Quantile Regression
%%
% Load the |carsmall| data set.  Consider a model that predicts the fuel
% economy of a car given its engine displacement.
load carsmall
%%
% Train an ensemble of bagged regression trees using the entire data set.
% Specify 100 weak learners and save the out-of-bag indices.
rng(1); % For reproducibility
Mdl = TreeBagger(100,Displacement,MPG,'Method','regression',...
    'OOBPrediction','on');
%%
% Estimate the out-of-bag response weights.
[~,YW] = oobQuantilePredict(Mdl);
%%
% |YW| is an |n|-by- |n| sparse matrix containing the response weights. |n|
% 
% is the number of training observations, |numel(Y)|.  The response weights
% for the observation in |Mdl.X(j,:)| are in |YW(:,j)|. Response weights
% are independent of any specified quantile probabilities.
%%
% Estimate the out-of-bag, conditional cumulative distribution function
% (C.C.D.F.) of the responses by:
%
% # Sorting the responses is ascending order, and then sorting the response
% weights using the indices induced by sorting the responses.
% # Computing the cumulative sums over each column of the sorted response
% weights.
%
[sortY,sortIdx] = sort(Mdl.Y);
cpdf = full(YW(sortIdx,:));
ccdf = cumsum(cpdf);
%%
% |ccdf(:,j)| is the empirical out-of-bag C.C.D.F. of the response given
% observation |j|.
%%
% Choose a random sample of four training observations. Plot the training
% sample and identify the chosen observations.
[randX,idx] = datasample(Mdl.X,4);
figure;
plot(Mdl.X,Mdl.Y,'o');
hold on
plot(randX,Mdl.Y(idx),'*','MarkerSize',10);
text(randX-10,Mdl.Y(idx)+1.5,{'obs. 1' 'obs. 2' 'obs. 3' 'obs. 4'});
legend('Training Data','Chosen Observations');
xlabel('Engine displacement')
ylabel('Fuel economy')
hold off
%%
% Plot the out-of-bag C.C.D.F. for the four chosen responses in the same figure.
figure;
plot(sortY,ccdf(:,idx));
legend('C.C.D.F. given obs. 1','C.C.D.F. given obs. 2',...
    'C.C.D.F. given obs. 3','C.C.D.F. given obs. 4',...
    'Location','SouthEast')
title('Out-of-Bag Conditional Cumulative Distribution Functions')
xlabel('Fuel economy')
ylabel('Empirical CDF')