www.gusucode.com > bigdata 工具箱 matlab源码程序 > bigdata/@tall/discretize.m
function [tb, edges] = discretize(tx, edges, varargin) %DISCRETIZE Group tall data into bins or categories. % Supported syntaxes: % [BINS,EDGES] = discretize(X,EDGES) % [BINS,EDGES] = discretize(X,N) % [BINS,EDGES] = discretize(X,EDGES,VALUES) % [C,EDGES] = discretize(X,EDGES,'categorical') % [C,EDGES] = discretize(X,EDGES,'categorical',CATEGORYNAMES) % [BINS,EDGES] = discretize(...,'IncludedEdge',SIDE) % % See also DISCRETIZE. % Copyright 2016 The MathWorks, Inc. narginchk(2,6); checkNotTall(upper(mfilename), 1, edges); checkNotTall(upper(mfilename), 2, varargin{:}); if isscalar(edges) || isdatetimeOption(edges) % Need to work out actual edges [xmin, xmax] = reducefun(@finiteMinMax, tx, tx); tcl = tall.getClass(tx); xmin.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(tcl); xmax.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(tcl); edges = clientfun(@(x,y)dummyDiscretize(x,y,edges,varargin{:}),xmin,xmax); edges.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(tcl); % Now we can discretize. tb = elementfun(@(x,y)discretize(x,y,varargin{:}), tx, matlab.bigdata.internal.broadcast(edges)); else if issparse(edges) edges = full(edges); end tb = elementfun(@(x,y)discretize(x,edges,varargin{:}), tx); end % Assign appropriate type to output cmpFunc = @(x)ischar(x) && strncmpi('categorical', x, max(length(x), 1)); if any(cellfun(cmpFunc,varargin)) tb.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType('categorical'); elseif nargin > 2 && ~ischar(varargin{1}) % We have a VALUES input tb.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType(class(varargin{1})); else tb.Adaptor = matlab.bigdata.internal.adaptors.getAdaptorForType('double'); end % Copy size info tb.Adaptor = copySizeInformation(tb.Adaptor, tx.Adaptor); end function edges = dummyDiscretize(xmin,xmax,edges,varargin) % Call the correct overloaded DISCRETIZE to get EDGES [~, edges] = discretize([xmin;xmax], edges, varargin{:}); end function [xmin, xmax] = finiteMinMax(x, y) % finds max and min of the finite values. if isa(x, 'duration') xfinite = x; xfinite.millis = x.millis(isfinite(x)); yfinite = y; yfinite.millis = y.millis(isfinite(y)); else xfinite = x(isfinite(x)); % xfinite always a column vector yfinite = y(isfinite(y)); % yfinite always a column vector end xmin = min(xfinite,[],1); xmax = max(yfinite,[],1); end function tf = isdatetimeOption(x) option = {'second', 'minute', 'hour', 'day', 'week', 'month', 'quarter', ... 'year', 'decade', 'century'}; a = strncmpi(option, x, max(length(x), 1)); tf = sum(a) == 1; % match only 1 end