www.gusucode.com > stats 源码程序 matlab案例代码 > stats/FindGoodLassoPenaltyUsingKfoldEdgeExample.m

    %% Find Good Lasso Penalty Using _k_-fold Edge
% To determine a good lasso-penalty strength for a linear classification
% model that uses a logistic regression learner, compare k-fold edges.
%%
% Load the NLP data set.  Preprocess the data as in
% <docid:stats_ug.bu6xx1d>.
load nlpdata
Ystats = Y == 'stats';
X = X'; 
%%
% Create a set of 11 logarithmically-spaced regularization strengths from
% $10^{-8}$ through $10^{1}$.
Lambda = logspace(-8,1,11);  
%%
% Cross-validate a binary, linear classification model using 5-fold
% cross-validation and that uses each of the regularization strengths.
% Solve the objective function using SpaRSA. Lower the tolerance on the
% gradient of the objective function to |1e-8|.
%
rng(10); % For reproducibility
CVMdl = fitclinear(X,Ystats,'ObservationsIn','columns','KFold',5,...
    'Learner','logistic','Solver','sparsa','Regularization','lasso',...
    'Lambda',Lambda,'GradientTolerance',1e-8)
%%
% |CVMdl| is a |ClassificationPartitionedLinear| model.  Because |fitclinear|
% implements 5-fold cross-validation, |CVMdl| contains 5
% |ClassificationLinear| models that the software trains on each fold.
%%
% Estimate the edges for each fold and regularization strength.
eFolds = kfoldEdge(CVMdl,'Mode','individual')
%%
% |eFolds| is a 5-by-11 matrix of edges.  Rows correspond to folds and
% columns correspond to regularization strengths in |Lambda|.  You can use
% |eFolds| to identify ill-performing folds, that is, unusually low edges.
%%
% Estimate the average edge over all folds for each regularization
% strength.
e = kfoldEdge(CVMdl)
%%
% Determine how well the models generalize by plotting the averages of the
% 5-fold edge for each regularization strength.  Identify the
% regularization strength that maximizes the 5-fold edge over the grid.
figure;
plot(log10(Lambda),log10(e),'-o')
[~, maxEIdx] = max(e);
maxLambda = Lambda(maxEIdx);
hold on
plot(log10(maxLambda),log10(e(maxEIdx)),'ro');
ylabel('log_{10} 5-fold edge')
xlabel('log_{10} Lambda')
legend('Edge','Max edge')
hold off
%%
% Several values of |Lambda| yield similarly high edges.  Higher values of
% lambda lead to predictor variable sparsity, which is a good quality of a
% classifier.
%%
% Choose the regularization strength that occurs just before
% the edge starts decreasing.
LambdaFinal = Lambda(5);
%%
% Train a linear classification model using the entire data set and specify
% the regularization strength |LambdaFinal|.
MdlFinal = fitclinear(X,Ystats,'ObservationsIn','columns',...
    'Learner','logistic','Solver','sparsa','Regularization','lasso',...
    'Lambda',LambdaFinal);
%%
% To estimate labels for new observations, pass |MdlFinal| and the new data
% to |predict|.