www.gusucode.com > stats 源码程序 matlab案例代码 > stats/TrainAKNNClassifierUsingACustomDistanceMetricExample.m

    %% Train a _k_-Nearest Neighbor Classifier Using a Custom Distance Metric
% Train a _k_-nearest neighbor classifier using the
% chi-square distance.
%% 
% Load Fisher's iris data set.

% Copyright 2015 The MathWorks, Inc.

load fisheriris
X = meas;    % Predictors
Y = species; % Response
%%
% The chi-square distance between _j_-dimensional points _x_ and _z_ is 
%
% $$\chi(x,z) = \sqrt{\displaystyle\sum^J_{j = 1}w_j\left(x_j - z_j\right)^2},$$
%
% where $w_j$ is a weight associated with dimension _j_.  
%%
% Specify the chi-square distance function.  The distance function must: 
% 
% * Take one row of |X|, e.g., |x|, and the matrix |Z|.
% * Compare |x| to each row of |Z|.
% * Return a vector |D| of length $n_z$, where $n_z$ is the number of
% rows of |Z|.  Each element of |D| is the distance between the observation
% corresponding to |x| and the observations
% corresponding to each row of |Z|.
%
chiSqrDist = @(x,Z,wt)sqrt((bsxfun(@minus,x,Z).^2)*wt);
%%
% This example uses arbitrtary weights for illustration.
%%
% Train a 3-nearest neighbor classifier.  It is good practoce to
% standardize noncategorical predictor data.
k = 3;
w = [0.3; 0.3; 0.2; 0.2];
KNNMdl = fitcknn(X,Y,'Distance',@(x,Z)chiSqrDist(x,Z,w),...
    'NumNeighbors',k,'Standardize',1);
%%
% |KNNMdl| is a <docid:stats_ug.bs85mom> classifier.
%%
% Cross validate the KNN classifier using the default 10-fold cross
% validation.  Examine the classification error.
rng(1); % For reproducibility
CVKNNMdl = crossval(KNNMdl);
classError = kfoldLoss(CVKNNMdl)
%%
% |CVKNNMdl| is a <docid:stats_ug.bsu1ox3-1> classifier.  The 10-fold
% classification error is 4%.
%%
% Compare the classifier with one that uses a different weighting scheme.
w2 = [0.2; 0.2; 0.3; 0.3];
CVKNNMdl2 = fitcknn(X,Y,'Distance',@(x,Z)chiSqrDist(x,Z,w2),...
    'NumNeighbors',k,'KFold',10,'Standardize',1);
classError2 = kfoldLoss(CVKNNMdl2)
%%
% The second weighting scheme yields a classifier that has better
% out-of-sample performance.