www.gusucode.com > bigdata 工具箱 matlab源码程序 > bigdata/@tall/discretize.m

    function [tb, edges] = discretize(tx, edges, varargin)
%DISCRETIZE Group tall data into bins or categories.
%   Supported syntaxes:
%   [BINS,EDGES] = discretize(X,EDGES)
%   [BINS,EDGES] = discretize(X,N)
%   [BINS,EDGES] = discretize(X,EDGES,VALUES)
%   [C,EDGES] = discretize(X,EDGES,'categorical')
%   [C,EDGES] = discretize(X,EDGES,'categorical',CATEGORYNAMES)
%   [BINS,EDGES] = discretize(...,'IncludedEdge',SIDE)
%
%   See also DISCRETIZE.

%   Copyright 2016 The MathWorks, Inc.

narginchk(2,6);
checkNotTall(upper(mfilename), 1, edges);
checkNotTall(upper(mfilename), 2, varargin{:});
if isscalar(edges) || isdatetimeOption(edges)
    % Need to work out actual edges
    [xmin, xmax] = reducefun(@finiteMinMax, tx, tx);
    tcl = tall.getClass(tx);
    xmin.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(tcl);
    xmax.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(tcl);
    edges = clientfun(@(x,y)dummyDiscretize(x,y,edges,varargin{:}),xmin,xmax);
    edges.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(tcl);
    % Now we can discretize.
    tb = elementfun(@(x,y)discretize(x,y,varargin{:}), tx, matlab.bigdata.internal.broadcast(edges));
else
    if issparse(edges)
        edges = full(edges);
    end
    tb = elementfun(@(x,y)discretize(x,edges,varargin{:}), tx);
end
% Assign appropriate type to output
cmpFunc = @(x)ischar(x) && strncmpi('categorical', x, max(length(x), 1)); 
if any(cellfun(cmpFunc,varargin))
    tb.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType('categorical');
elseif nargin > 2 && ~ischar(varargin{1}) 
    % We have a VALUES input
    tb.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(class(varargin{1}));
else
    tb.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType('double');
end
% Copy size info
tb.Adaptor = copySizeInformation(tb.Adaptor, tx.Adaptor);
end

function edges = dummyDiscretize(xmin,xmax,edges,varargin)
% Call the correct overloaded DISCRETIZE to get EDGES
[~, edges] = discretize([xmin;xmax], edges, varargin{:});
end

function [xmin, xmax] = finiteMinMax(x, y)
% finds max and min of the finite values.
if isa(x, 'duration')
    xfinite = x;
    xfinite.millis = x.millis(isfinite(x));
    yfinite = y;
    yfinite.millis = y.millis(isfinite(y));
else
    xfinite = x(isfinite(x)); % xfinite always a column vector
    yfinite = y(isfinite(y)); % yfinite always a column vector
end
xmin = min(xfinite,[],1);
xmax = max(yfinite,[],1);
end

function tf = isdatetimeOption(x)
option = {'second', 'minute', 'hour', 'day', 'week', 'month', 'quarter', ...
    'year', 'decade', 'century'};
a = strncmpi(option, x, max(length(x), 1));
tf = sum(a) == 1; % match only 1
end