www.gusucode.com > stats 源码程序 matlab案例代码 > stats/EstimateTestSampleMarginsExample.m

    %% Estimate Test-Sample Margins
%%
% Load the NLP data set.
load nlpdata
%%
% |X| is a sparse matrix of predictor data, and |Y| is a categorical vector
% of class labels.  There are more than two classes in the data.
%%
% The models should identify whether the word counts in a web page are
% from the Statistics and Machine Learning Toolbox(TM) documentation. So,
% identify the labels that correspond to the Statistics and Machine
% Learning Toolbox(TM) documentation web pages.
Ystats = Y == 'stats';
%%
% Train a binary, linear classification model that can
% identify whether the word counts in a documentation web page are from
% the Statistics and Machine Learning Toolbox(TM) documentation. Specify to
% hold out 30% of the observations.  Solve the objective function using
% SpaRSA.
rng(1); % For reproducibility 
CVMdl = fitclinear(X,Ystats,'Solver','sparsa','Holdout',0.30);
CMdl = CVMdl.Trained{1};          
%%
% |CVMdl| is a |ClassificationPartitionedLinear| model. It contains
% the property |Trained|, which is a 1-by-1 cell array holding a
% |ClassificationLinear| model that the software trained using the
% training set.
%%
% Extract the training and test data from the partition definition.
trainIdx = training(CVMdl.Partition);
testIdx = test(CVMdl.Partition);
%%
% Estimate the training- and test-sample margins.
mTrain = margin(CMdl,X(trainIdx,:),Ystats(trainIdx));
mTest = margin(CMdl,X(testIdx,:),Ystats(testIdx));
%%
% Because there is one regularization strength in |CMdl|, |mTrain| and
% |mTest| are column vectors with lengths equal to the number of training
% and test observations, respectively.
%%
% Plot both sets of margins using box plots.
figure;
boxplot([mTrain; mTest],trainIdx);
h = gca;
h.YLim = [-5 60];
title 'Training- and Test-Set Margins'
%%
% The distributions of the margins between the training and test sets
% appear similar.