www.gusucode.com > stats 源码程序 matlab案例代码 > stats/LPBoostAndTotalBoostForSmallEnsemblesExample.m

    %% LPBoost and TotalBoost for Small Ensembles
% This example shows how to obtain the benefits of the |LPBoost| and
% |TotalBoost| algorithms. These algorithms share two beneficial
% characteristics:
% 
% They are self-terminating, so you don't have to guess how many members to
% include.
% 
% They produce ensembles with some very small weights, so you can safely
% remove ensemble members.
% 
% Note that the algorithms in this example require an Optimization
% Toolbox(TM) license.
%% Load the data
% Load the |ionosphere| data set.

% Copyright 2015 The MathWorks, Inc.

load ionosphere
%% Create the classification ensembles
% Create ensembles for classifying the |ionosphere| data using the
% |LPBoost|, |TotalBoost|, and, for comparison, |AdaBoostM1| algorithms. It
% is hard to know how many members to include in an ensemble. For |LPBoost|
% and |TotalBoost|, try using |500|. For comparison, also use |500| for
% |AdaBoostM1|.
rng default % For reproducibility
T = 500;
adaStump = fitensemble(X,Y,'AdaBoostM1',T,'Tree');
totalStump = fitensemble(X,Y,'TotalBoost',T,'Tree');
lpStump = fitensemble(X,Y,'LPBoost',T,'Tree');
figure;
plot(resubLoss(adaStump,'Mode','Cumulative'));
hold on
plot(resubLoss(totalStump,'Mode','Cumulative'),'r');
plot(resubLoss(lpStump,'Mode','Cumulative'),'g');
hold off
xlabel('Number of stumps');
ylabel('Training error');
legend('AdaBoost','TotalBoost','LPBoost','Location','NE');
%% 
% All three algorithms achieve perfect prediction on the training data
% after a while.
%%
% Examine the number of members in all three ensembles.
[adaStump.NTrained totalStump.NTrained lpStump.NTrained]
%%
% |AdaBoostM1| trained all |500| members. The other two algorithms stopped
% training early.
%% Cross validate the ensembles
% Cross validate the ensembles to better determine ensemble accuracy.
cvlp = crossval(lpStump,'KFold',5);
cvtotal = crossval(totalStump,'KFold',5);
cvada = crossval(adaStump,'KFold',5);

figure;
plot(kfoldLoss(cvada,'Mode','Cumulative'));
hold on
plot(kfoldLoss(cvtotal,'Mode','Cumulative'),'r');
plot(kfoldLoss(cvlp,'Mode','Cumulative'),'g');
hold off
xlabel('Ensemble size');
ylabel('Cross-validated error');
legend('AdaBoost','TotalBoost','LPBoost','Location','NE');
%%
% It appears that each boosting algorithms achieves 10% or lower loss with
% 50 ensemble members, and |AdaBoostM1| achieves near 6% error with 150 or
% more ensemble members.
%% Compact and remove ensemble members
% To reduce the ensemble sizes, compact them, and then use
% |removeLearners|. The question is, how many learners should you remove?
% The cross-validated loss curves give you one measure. For another,
% examine the learner weights for |LPBoost| and |TotalBoost| after
% compacting.
cada = compact(adaStump);
clp = compact(lpStump);
ctotal = compact(totalStump);

figure
subplot(2,1,1)
plot(clp.TrainedWeights)
title('LPBoost weights')
subplot(2,1,2)
plot(ctotal.TrainedWeights)
title('TotalBoost weights')
%%
% Both |LPBoost| and |TotalBoost| show clear points where the ensemble
% member weights become negligible.
%%
% Remove the unimportant ensemble members.
cada = removeLearners(cada,150:cada.NTrained);
clp = removeLearners(clp,60:clp.NTrained);
ctotal = removeLearners(ctotal,40:ctotal.NTrained);
%%
% Check that removing these learners does not affect ensemble accuracy on
% the training data.
[loss(cada,X,Y) loss(clp,X,Y) loss(ctotal,X,Y)]
%%
% Check the resulting compact ensemble sizes.
s(1) = whos('cada');
s(2) = whos('clp');
s(3) = whos('ctotal');
s.bytes
%%
% The sizes of the compact ensembles are approximately proportional to the
% number of members in each.