www.gusucode.com > stats 源码程序 matlab案例代码 > stats/RefitNCAModelWithModifiedSettingsExample.m

    %% Refit NCA Model for Classification with Modified Settings
%%
% Generate checkerboard data using the |generateCheckerBoardData.m|
% function.
rng(2016,'twister'); % For reproducibility
pps = 1375;
[X,y] = generateCheckerBoardData(pps);
X = X + 2;
%%
% Plot the data.
figure
plot(X(y==1,1),X(y==1,2),'rx')
hold on
plot(X(y==-1,1),X(y==-1,2),'bx')

[n,p] = size(X)

%%
% Add irrelevant predictors to the data.
Q = 98;
Xrnd = unifrnd(0,4,n,Q);
Xobs = [X,Xrnd];

%%
% This piece of code creates 98 additional predictors, all uniformly
% distributed between 0 and 4.
%%
% Partition the data into training and test sets. To create stratified
% partitions, so that each partition has similar proportion of classes, use
% |y| instead of |length(y)| as the partitioning criteria.
cvp = cvpartition(y,'holdout',2000);

%%
% |cvpartition| randomly chooses 2000 of the observations to add to the
% test set and the rest of the data to add to the training set. Create the
% training and validation sets using the assignments stored in the
% |cvpartition| object |cvp| .

Xtrain = Xobs(cvp.training(1),:);
ytrain = y(cvp.training(1),:);

Xval = Xobs(cvp.test(1),:);
yval = y(cvp.test(1),:);

%%
% Compute the misclassification error without feature selection.
nca = fscnca(Xtrain,ytrain,'FitMethod','none','Standardize',true, ...
    'Solver','lbfgs');
loss_nofs = loss(nca,Xval,yval)

%%
% |'FitMethod','none'| option uses the default weights (all 1s), which
% means all features are equally important.
%%
% This time, perform feature selection using neighborhood component
% analysis for classification, with $\lambda = 1/n$.
w0 = rand(100,1);
n = length(ytrain)
lambda = 1/n;
nca = refit(nca,'InitialFeatureWeights',w0,'FitMethod','exact', ...
       'Lambda',lambda,'solver','sgd');
   
%%
% Plot the objective function value versus the iteration number.
figure()
plot(nca.FitInfo.Iteration,nca.FitInfo.Objective,'ro')
hold on
plot(nca.FitInfo.Iteration,movmean(nca.FitInfo.Objective,10),'k.-')
xlabel('Iteration number')
ylabel('Objective value')
%%
% Compute the misclassification error with feature selection.
loss_withfs = loss(nca,Xval,yval)

%%
% Plot the selected features.
figure
semilogx(nca.FeatureWeights,'ro')
xlabel('Feature index')
ylabel('Feature weight')
grid on

%%
% Select features using the feature weights and a relative threshold.
tol = 0.15;
selidx = find(nca.FeatureWeights > tol*max(1,max(nca.FeatureWeights)))

%%
% Feature selection improves the results and |fscnca| detects the
% correct two features as relevant.