www.gusucode.com > 数据挖掘工具箱 - mitmatlab源码程序 > mitmatlab\ADDC.m

    function [features, targets] = ADDC(train_features, train_targets, Nmu, region, plot_on)

%Reduce the number of data points using the Agglomerative clustering algorithm
%Inputs:
%	train_features	- Input features
%	train_targets	- Input targets
%	Nmu				- Maximum number of output data points
%	region			- Decision region vector (unused)
%   plot_on         - Plot stages of the algorithm
%
%Outputs
%	features		- New features
%	targets			- New targets

if (nargin < 5),
    plot_on = 0;
end

if (Nmu == 1),
    %If one center is needed, it is simply the average of the data
    features = mean(train_features')';
    targets  = (sum(train_targets)/length(train_targets) > 0.5);
    break
end
    
[D,L]				= size(train_features);
min_percentage = 0.001; %Points with count less than this will be removed
min_number		= 5; 		%Points with count less than this will also be removed

%Initialize the mu's
K		= 0; %Number of centroids
mu		= zeros(D,Nmu);
count = zeros(1,Nmu);

for i = 1:L,
   data = train_features(:,i);
   
   if (K > 0),
      %Find closest centriod
      dist 				= sum((mu(:,1:K) - data * ones(1,K)).^2);
      [temp, min_d]  = min(dist);
      mu(:,min_d)		= mu(:,min_d) + (data - mu(:,min_d)) / (count(:,min_d) + 1);
      count(:,min_d) = count(:,min_d) + 1;
   end
   
   if (K < Nmu),
	   %Add new centroid
      K = K + 1;
      mu(:,K) = data;
   else
      %Merge redundant centroids
      closest_i1 = 0;
      closest_i2 = 0;
      dist		  = 1e100;
      for i1 = 1:K,
         for i2 = 1:K,
            if (i1 ~= i2),
               temp_dist = norm(mu(:,i1)-mu(:,i2));
               if (temp_dist < dist),
                  dist = temp_dist;
                  closest_i1 = i1;
                  closest_i2 = i2;
               end
            end
         end
      end
      if ((count(closest_i1) + count(closest_i2)) > 0),
         mu(:,closest_i1)  = (mu(:,closest_i1)*count(closest_i1) + mu(:,closest_i2)*count(closest_i2)) / ...
      						     (count(closest_i1) + count(closest_i2));
	      count(closest_i1) = count(closest_i1) + count(closest_i2);
   	   mu(:,closest_i2)  = data;
         count(closest_i2) = 0;
      end
   end
   
   if (plot_on == 1),
       plot_process(mu)
   end
   
end

%Post-processing
keep 		= find(count(1:K) > max(min_percentage*L,min_number));
features = mu(:,keep);
Nmu		= length(keep);

%Classify all the features to one of the mu's (1-NN)
dist = zeros(Nmu,L);
for i = 1:Nmu,
   dist(i,:) = sum((train_features - mu(:,i)*ones(1,L)).^2);
end
   
%Label the points
if (Nmu > 1),
   [m,label] = min(dist);
	targets = zeros(1,Nmu);
	for i = 1:Nmu,
   	if (length(train_targets(:,find(label == i))) > 0),
      	targets(i) = (sum(train_targets(:,find(label == i)))/length(train_targets(:,find(label == i))) > .5);
	   end
   end
else
   %There is only one center
   targets = (sum(train_targets)/length(train_targets) > .5);
end