www.gusucode.com > stats 源码程序 matlab案例代码 > stats/FindGoodLassoPenaltyUsingkfoldEdge1Example.m

    %% Find Good Lasso Penalty Using _k_-fold Edge
% To determine a good lasso-penalty strength for a linear classification
% model that uses a logistic regression learner, compare k-fold edges.
%%
% Load the NLP data set.  Preprocess the data as in
% <docid:stats_ug.bu6xz2d-1>.
load nlpdata
Y(~(ismember(Y,{'simulink','dsp','comm'}))) = 'others';
X = X'; 
%%
% Create a set of 8 logarithmically-spaced regularization strengths from
% $10^{-8}$ through $10^{1}$.
Lambda = logspace(-8,1,8);  
%%
% Create a linear classification model template that specifies to use
% logistic regression with a lasso penalty, use each of the regularization
% strengths, solve the objective function using SpaRSA, and reduce the
% tolerance on the gradient of the objective function to |1e-8|.
t = templateLinear('Learner','logistic','Solver','sparsa',...
    'Regularization','lasso','Lambda',Lambda,'GradientTolerance',1e-8);
%%
% Cross-validate an ECOC model composed of binary, linear classification
% models using 5-fold cross-validation and that 
rng(10); % For reproducibility
CVMdl = fitcecoc(X,Y,'Learners',t,'ObservationsIn','columns','KFold',5)
%%
% |CVMdl| is a |ClassificationPartitionedLinearECOC| model.
%%
% Estimate the edges for each fold and regularization strength.
eFolds = kfoldEdge(CVMdl,'Mode','individual')
%%
% |eFolds| is a 5-by-8 matrix of edges.  Rows correspond to folds and
% columns correspond to regularization strengths in |Lambda|.  You can use
% |eFolds| to identify ill-performing folds, that is, unusually low edges.
%%
% Estimate the average edge over all folds for each regularization
% strength.
e = kfoldEdge(CVMdl)
%%
% Determine how well the models generalize by plotting the averages of the
% 5-fold edge for each regularization strength.  Identify the
% regularization strength that maximizes the 5-fold edge over the grid.
figure;
plot(log10(Lambda),log10(e),'-o')
[~, maxEIdx] = max(e);
maxLambda = Lambda(maxEIdx);
hold on
plot(log10(maxLambda),log10(e(maxEIdx)),'ro');
ylabel('log_{10} 5-fold edge')
xlabel('log_{10} Lambda')
legend('Edge','Max edge')
hold off
%%
% Several values of |Lambda| yield similarly high edges.  Greater
% regularization strength values lead to predictor variable sparsity, which
% is a good quality of a classifier.
%%
% Choose the regularization strength that occurs just before
% the edge starts decreasing.
LambdaFinal = Lambda(4);
%%
% Train an ECOC model composed of linear classification model using the
% entire data set and specify the regularization strength |LambdaFinal|.
t = templateLinear('Learner','logistic','Solver','sparsa',...
    'Regularization','lasso','Lambda',LambdaFinal,'GradientTolerance',1e-8);
MdlFinal = fitcecoc(X,Y,'Learners',t,'ObservationsIn','columns');
%%
% To estimate labels for new observations, pass |MdlFinal| and the new data
% to |predict|.