www.gusucode.com > stats 源码程序 matlab案例代码 > stats/EstimatePosteriorClassProbabilitiesExample.m

    %% Estimate Posterior Class Probabilities
% Estimate test-sample, posterior class probabilities, and determine the
% quality of the model by plotting a ROC curve. Linear classification
% models return posterior probabilities for logistic regression learners
% only.
%%
% Load the NLP data set and preprocess it as in <docid:stats_ug.bu7u2xa-1>.
load nlpdata
Ystats = Y == 'stats';
X = X';
%%
% Randomly partition the data into training and test sets by specifying a
% 30% holdout sample.  Identify the test-set indices.
cvp = cvpartition(Ystats,'Holdout',0.30);
idxTest = test(cvp);
%%
% Train a binary, linear classification model. Fit logistic regression
% learners using SpaRSA. To holdout the test set, specify the partitioned
% model.
CVMdl = fitclinear(X,Ystats,'ObservationsIn','columns','CVPartition',cvp,...
    'Learner','logistic','Solver','sparsa');
Mdl = CVMdl.Trained{1};
%%
% |Mdl| is a |ClassificationLinear| model trained using the training set
% specified in the partition |cvp| only.
%%
% Predict the test-sample posterior class probabilities.
[~,posterior] = predict(Mdl,X(:,idxTest),'ObservationsIn','columns');
%%
% Because there is one regularization strength in |Mdl|, |posterior| is a
% matrix with 2 columns and rows equal to the number of test-set
% observations. Column _i_ contains posterior probabilities of
% |Mdl.ClassNames(i)| given a particular observation.
%%
% Obtain false and true positive rates, and estimate the AUC. Specify that
% the second class is the positive class.
[fpr,tpr,~,auc] = perfcurve(Ystats(idxTest),posterior(:,2),Mdl.ClassNames(2));
auc
%%
% The AUC is |1|, which indicates a model that predicts well.
%%
% Plot an ROC curve.
figure;
plot(fpr,tpr)
h = gca;
h.XLim(1) = -0.1;
h.YLim(2) = 1.1;
xlabel('False positive rate')
ylabel('True positive rate')
title('ROC Curve')
%%
% The ROC curve and AUC indicate that the model classifies the test-sample
% observations almost perfectly.