www.gusucode.com > stats 源码程序 matlab案例代码 > stats/SelectBestRegularizedModelsRegExample.m

    %% Find Good Lasso Penalty Using Regression Loss
%%
% Simulate 10000 observations from this model
%
% $$y = x_{100} + 2x_{200} + e.$$
%
%
% * $X = \{x_1,...,x_{1000}\}$ is a 10000-by-1000 sparse matrix with 10%
% nonzero standard normal elements.
% * _e_ is random normal error with mean 0 and standard deviation
% 0.3.
%
rng(1) % For reproducibility
n = 1e4;
d = 1e3;
nz = 0.1;
X = sprandn(n,d,nz);
Y = X(:,100) + 2*X(:,200) + 0.3*randn(n,1);
%%
% Create a set of 15 logarithmically-spaced regularization strengths from
% $10^{-4}$ through $10^{-1}$.
Lambda = logspace(-4,-1,15);  
%%
% Hold out 30% of the data for testing.  Identify the test-sample indices.
cvp = cvpartition(numel(Y),'Holdout',0.30);
idxTest = test(cvp);
%%
% Train a linear regression model using lasso penalties with the strengths
% in |Lambda|. Specify the regularization strengths, solve the objective
% function using SpaRSA, and the data partition.  To increase execution
% speed, transpose the predictor data and specify that the observations are
% in columns.
%
X = X'; 
CVMdl = fitrlinear(X,Y,'ObservationsIn','columns','Lambda',Lambda,...
    'Solver','sparsa','Regularization','lasso','CVPartition',cvp);
Mdl1 = CVMdl.Trained{1};
numel(Mdl1.Lambda)
%%
% |Mdl1| is a |RegressionLinear| model.  Because |Lambda| is a 
% 15-dimensional vector of regularization strengths, you can think of |Mdl1|
% as 15 trained models, one for each regularization strength.
%%
% Estimate the test-sample mean squared error for each regularized
% model.
mse = loss(Mdl1,X(:,idxTest),Y(idxTest),'ObservationsIn','columns');
%%
% Higher values of |Lambda| lead to predictor variable sparsity, which is a
% good quality of a regression model.  Retrain the model using the entire data
% set and all options used previously, except the data-partition
% specification. Determine the number of nonzero coefficients per model.
Mdl = fitrlinear(X,Y,'ObservationsIn','columns','Lambda',Lambda,...
    'Solver','sparsa','Regularization','lasso');
numNZCoeff = sum(Mdl.Beta~=0);
%%
% In the same figure, plot the MSE and frequency of nonzero coefficients
% for each regularization strength. Plot all variables on the log scale.
figure;
[h,hL1,hL2] = plotyy(log10(Lambda),log10(mse),...
    log10(Lambda),log10(numNZCoeff)); 
hL1.Marker = 'o';
hL2.Marker = 'o';
ylabel(h(1),'log_{10} MSE')
ylabel(h(2),'log_{10} nonzero-coefficient frequency')
xlabel('log_{10} Lambda')
hold off
%%
% Select the index or indices of |Lambda| that balance minimal
% classification error and predictor-variable sparsity (for example,
% |Lambda(11)|).
idx = 11;
MdlFinal = selectModels(Mdl,idx);
%%
% |MdlFinal| is a trained |RegressionLinear| model object that uses
% |Lambda(11)| as a regularization strength.