www.gusucode.com > stats 源码程序 matlab案例代码 > stats/AnalyzeBinaryLearnersOfAnECOCClassifierExample.m

    %% Analyze Binary Learners of an ECOC Classifier
% ECOC models are an ensemble of strong classifiers.  It might be helpful
% to inspect the performance of each binary classifier.
%%
% Load and inspect the |arrhythmia| data set.  Remove observations from the
% training data having labels that have low representation in the data
% (i.e., having at most five observations).

% Copyright 2015 The MathWorks, Inc.

load arrhythmia
Y = categorical(Y);
tabY = tabulate(Y)
rmClass = tabY(cell2mat(tabY(:,2))<=5,1);
idx = ~ismember(Y,rmClass);
X = X(idx,:);
Y = Y(idx);
classes = unique(Y);
K = size(tabY,1) - numel(rmClass) % Number of distinct classes
[n,p] = size(X)                   % Training sample size and number of predictors
%%
% There are |K| = 9 classes represented in the data set.  Class 1 indicates
% no arrhythmia and 16 indicates lack of arrythmia classification.  The
% other 7 classes indicate a different heart-beat problem. Suppose that
% classes 2-6 are related (Group 1), and classes 9 and 10 are related
% (Group 2). You want to build an adjusted, one-versus-one classifier that
% uses this hypothesis.
%%
% Construct a coding design matrix for the classifier that distinguishes
% between:
%
% * Group 1 and Group 2
% * The classes within the groups using one-versus-one.
% * Class 1 and the groups separately for each group.
% * The classified and the unclassified.
%
OVOCodingG1 = designecoc(5,'onevsone');
OVOCodingG2 = designecoc(2,'onevsone');
nOVOG1 = size(OVOCodingG1);
nOVOG2 = size(OVOCodingG2);

% Columns for binary learners
BLG = [0; ones(nOVOG1(1),1); -ones(nOVOG2(1),1); 0]; % Compare groups
BLG1 = [zeros(1,nOVOG1(2)); OVOCodingG1; zeros(nOVOG2(1) + 1,...
    nOVOG1(2))];% Within Group 1
BLG2 = [zeros(nOVOG1(1) + 1,nOVOG2(2)); OVOCodingG2;...
    zeros(1,nOVOG2(2))]; % Within Group 2
BLC1G1 = [1; -ones(nOVOG1(1),1); zeros(nOVOG2(1) + 1,1)]; % Class 1 versus Group 1
BLC1G2 = [1; zeros(nOVOG1(1),1); -ones(nOVOG2(1),1); 0]; % Class 11 versus Group 2
BLCUnC = [ones(K - 1,1); -1]; % Classified versus unclassified

Coding = [BLG BLG1 BLG2 BLC1G1 BLC1G2 BLCUnC]; % Coding design
L = size(Coding,2); % Number of binary learners
%%
% Train an ECOC classifier using SVMs as binary learners.
Mdl = fitcecoc(X,Y,'Coding',Coding);
%%
% Compute the confusion matrix.
labels = resubPredict(Mdl);
classes'
ConfusionMat = confusionmat(Y,labels)
%%
% According to the confusion matrix, the ECOC classifier does not do a
% good job of distinguishing between the first class and any other class.
%%
% Diagnose badly performing classifiers by comparing the classification
% accuracies across binary learners using in-sample classification error.
IdxMat = Mdl.BinaryY ~= 0;
BLLabels = cell(L,1);
misClass = nan(L,1);
for j = 1:L
    if ~isempty(Mdl.BinaryLearners{j})
        pX = X(IdxMat(:,j),:);
        pY = Mdl.BinaryY(IdxMat(:,j),j);
        BLLabels{j} = predict(Mdl.BinaryLearners{j},pX);
        misClass(j) = mean(BLLabels{j} ~= pY);
    end
end

figure;
bar(misClass);
hold on;
plot([0 L],[0.1 0.1],'r--','LineWidth',3);
title 'Misclassification Error Across Learners';
xlabel 'Binary Learner';
ylabel 'Proportion of misclassified observations';
hold off;
%%
% A majority of the binary learners are misclassifying more than 10% of
% their respective training observations.
%%
% The poor performance might indicate that:
% 
% * The number of observations for some classes is too small to train well.
% * The hypothesized coding design is not appropriate for the data set.
% The confusion matrix indicates that most of the second group of 6 classes
% are misclassified into the first class.
% You can try a different coding design.  |sparserandom| is a good choice
% for data sets with many classes.
% * SVM is not appropriate.  In fact, several of the variables are
% categorical, and SVM does not account for this.  Ensembles of
% classification trees accommodate categorical predictors.