www.gusucode.com > stats 源码程序 matlab案例代码 > stats/EstimatekfoldCrossValidationPosteriorClassProbabilitiesExample.m

    %% Estimate _k_-fold Cross-Validation Posterior Class Probabilities
% Linear classification models return posterior probabilities for logistic
% regression learners only.
%%
% Load the NLP data set and preprocess it as in <docid:stats_ug.bu7wsed>.
% Transpose the predictor data matrix. 
load nlpdata
Ystats = Y == 'stats';
X = X';
%%
% Cross-validate binary, linear classification models using 5-fold
% cross-validation. Solve the objective function using SpaRSA. Lower the
% tolerance on the gradient of the objective function to |1e-8|.
%
rng(10); % For reproducibility
CVMdl = fitclinear(X,Ystats,'ObservationsIn','columns',...
    'KFold',5,'Learner','logistic','Solver','sparsa',...
    'Regularization','lasso','GradientTolerance',1e-8);
%%
% Predict the posterior class probabilities for observations not used to
% train each fold.
[~,posterior] = kfoldPredict(CVMdl);
CVMdl.ClassNames
%%
% Because there is one regularization strength in |CVMdl|, |posterior| is a
% matrix with 2 columns and rows equal to the number of observations.
% Column _i_ contains posterior probabilities of |Mdl.ClassNames(i)| given
% a particular observation.
%%
% Obtain false and true positive rates, and estimate the AUC. Specify that
% the second class is the positive class.
[fpr,tpr,~,auc] = perfcurve(Ystats,posterior(:,2),CVMdl.ClassNames(2));
auc
%%
% The AUC is |0.9990|, which indicates a model that predicts well.
%%
% Plot an ROC curve.
figure;
plot(fpr,tpr)
h = gca;
h.XLim(1) = -0.1;
h.YLim(2) = 1.1;
xlabel('False positive rate')
ylabel('True positive rate')
title('ROC Curve')
%%
% The ROC curve indicates that the model classifies almost perfectly.