www.gusucode.com > stats 源码程序 matlab案例代码 > stats/PredictkfoldCrossValidationLabelsExample.m

    %% Predict _k_-fold Cross-Validation Labels
%%
% Load the NLP data set.
load nlpdata
%%
% |X| is a sparse matrix of predictor data, and |Y| is a categorical vector
% of class labels.  There are more than two classes in the data.
%%
% The models should identify whether the word counts in a web page are
% from the Statistics and Machine Learning Toolbox(TM) documentation. So,
% identify the labels that correspond to the Statistics and Machine
% Learning Toolbox(TM) documentation web pages.
Ystats = Y == 'stats';
%%
% Cross-validate a binary, linear classification model using the entire
% data set, which can identify whether the word counts in a documentation
% web page are from the Statistics and Machine Learning Toolbox(TM)
% documentation.
rng(1); % For reproducibility 
CVMdl = fitclinear(X,Ystats,'CrossVal','on');
Mdl1 = CVMdl.Trained{1}
%%
% |CVMdl| is a |ClassificationPartitionedLinear| model. By default, the
% software implements 10-fold cross validation.  You can alter the number
% of folds using the |'KFold'| name-value pair argument.
%%
% Predict labels for the observations that |fitclinear| did not use in
% training the folds.
label = kfoldPredict(CVMdl);
%%
% Because there is one regularization strength in |Mdl1|, |label| is a
% column vector of predictions containing as many rows as observations in
% |X|.
%%
% Construct a confusion matrix.
ConfusionTrain = confusionmat(Ystats,label)
%%
% The model misclassifies 15 |'stats'| documentation pages as being
% outside of the Statistics and Machine Learning Toolbox documentation, and
% misclassifies nine pages as |'stats'| pages.