www.gusucode.com > stats 源码程序 matlab案例代码 > stats/IllustrateLinearDiscriminantAnalysisExample.m

    %% Illustrate Linear Discriminant Analysis
% For example, consider measuring two features on a sample unit, and
% categorizing the unit into one of two classes: class 0 and class 1.
%
% * The members of class 0 correspond to inputs coming from a bivariate Gaussian
% distribution with mean [1 1]'.
% * The members of class 1 correspond to inputs coming from a bivariate
% Gaussian distribution with mean [3 2]'.
% * $P(y = 0) = P(y = 1) = \pi = 0.5$.
% * The covariance matrix for both distributions is
%
% $$\left[\matrix{0.25 & 0\cr 0 & 0.25}\right].$$
%

% Copyright 2015 The MathWorks, Inc.

rng(1); % For reproducibility

Mu = [1 1;3 2];      % Mean matrix
Sigma = 0.25*eye(2); % Covariance matrix
GMDist = gmdistribution(Mu,Sigma);
[X,idx] = random(GMDist,1000);
y = idx - 1;
%%
% |X| is the 1000-by-2 matrix of random variates form the multivariate
% Gaussian distribution. |idx| is a 1000-by-1 vector mappig each random
% variate to one of the two Gaussian distributions.
%%
% Compute the log-odds ratio for each input sample unit.
GMDist0 = gmdistribution(Mu(1,:),Sigma);
GMDist1 = gmdistribution(Mu(2,:),Sigma);
logOR = log(pdf(GMDist1,X)./pdf(GMDist0,X));
%%
% Plot the data with contour lines for the density function.
c = exp(-0.1*logOR.^2); % For plot colors

pdfFun = @(x1,x2)(pdf(GMDist,[x1,x2]));
ezcontour(pdfFun,[-1,5,0,4],50)
hold on
for k = 1:1000
    line(X(k,1),X(k,2),'Marker','.','Color',[c(k) 0.5 0.5]);
end
title('{\bf Input Distribution}')
%%
% The plot displays the data and contour lines representing the
% distribution function. The pink data points are difficult
% to classify because of their similar proximities to both means.
%%
% LDA defines a threshold to classify inputs based on properties of the
% Gaussian distribution. Specifically, if 
%
% $$x^\prime \hat\Sigma^{-1}(\hat\mu_1-\hat\mu_0)>0.5\hat\mu_1^\prime\hat\Sigma^{-1}\hat\mu_1-0.5\hat\mu_0^\prime\hat\Sigma^{-1}\hat\mu_0+\log\left(\frac{\hat\pi_0}{\hat\pi_1}\right),$$
%
% then LDA assigns a sample unit to class 1. Note that
%
% * $\hat\pi_k = \frac{n_k}{n}$, where $n$ is the sample size and $n_k$ is the
% number of sample units in class _k_.
% * $\hat\mu_k = \frac{1}{n_k}\displaystyle\sum_{\{j:y_j = k\}}x_j$, the within class sample means.
% * $\hat\Sigma = \frac{1}{n-K}\sum_{k=1}^K\displaystyle\sum_{\{j:y_j = k\}}(x_i-\hat\mu_k)(x_i-\hat\mu_k)^\prime$
%
%%
% Apply LDA to the data.  Plot the decision boundary.
EstMdl = fitcdiscr(X,y);
decnBndLDA = @(x1,x2)(EstMdl.Coeffs(1,2).Const + ...
    EstMdl.Coeffs(1,2).Linear(1)*x1 + ...
    EstMdl.Coeffs(1,2).Linear(2)*x2);

figure;
gscatter(X(:,1),X(:,2),y,[1,0,0;0,0.5,1]);
hold on
h1 = gca;
h2 = ezplot(decnBndLDA ,[h1.XLim h1.YLim]);
h2.Color = [0,0,0];
h2.LineWidth = 2;
legend('Class 0','Class 1','Decision Boundary')
title('LDA Decision Boundary')
%%
% The plot displays the two classes and the decision boundary.  Note that
% LDA missclassifies some of the sample units.