www.gusucode.com > bigdata 工具箱 matlab源码程序 > bigdata/@tall/topkrows.m

    function out = topkrows(tx, k, col, sortDirn)
%TOPKROWS  Return the top k rows of a matrix or table.
%
%   T = TOPKROWS(TX,K) returns up to the first K rows of tall matrix TX
%   when sorted in descending order as a group. TX can have any numeric
%   underlying class. If TX has fewer than K rows then the entire matrix is
%   returned.
%
%   T = TOPKROWS(TT,K) returns up to the first K rows of tall table TT when
%   sorted in descending order by all of the variables in TT.
%
%   T = TOPKROWS(TX,K,COL) returns up to the first K rows of tall matrix TX
%   when sorted by the columns specified by COL. COL is a positive integer
%   or a vector of positive integers.
%
%   T = TOPKROWS(TT,K,VARS) sorts the rows in tall table TT by the variables
%   specified by VARS. VARS is a positive integer, a vector of positive
%   integers, a variable name, a cell array containing one or more variable
%   names, or a logical vector.
%
%   T = TOPKROWS(TX,K,COL,DIRECTION) sorts tall matrix TX in the
%   direction(s) specified by DIRECTION. When DIRECTION is 'ascend' or
%   'descend' (the default), topkrows sorts A in ascending or descending
%   order, respectively, for all columns specified by COL. DIRECTION may
%   also be a cell array containing the strings 'ascend' or 'descend' to
%   specify a different direction for each column specified by COL.
%
%   T = TOPKROWS(TT,K,VARS,DIRECTION) sorts tall table TT in the
%   direction(s) specified by DIRECTION.
%
%   Examples:
%   topkrows(tx, 10)
%   topkrows(tx, 15, [2 3], {'descend','ascend'})
%
%   See also: SORTROWS, TALL.

% Copyright 2016 The MathWorks, Inc.

narginchk(2,4);

% Check that k is a non-negative integer-valued scalar
validateattributes(k, ...
    {'numeric'}, {'real','scalar','nonnegative','integer'}, ...
    'topkrows', 'k')

isTallTable = istall(tx) && strcmp(tall.getClass(tx), 'table');

% Col list must be an integer-valued vector
if nargin<3
    % Use empty to signify all cols
    col = [];
else
    if isTallTable
        [tx, col] = iResolveTableCols(tx, col);
    else
        [tx, col] = iResolveMatrixCols(tx, col);
    end
end
if nargin<4
    sortDirn = [];
end
sortDirn = resolveSortDirection(sortDirn); % +1 for ascending, -1 for descending

% Check that the input is a matrix
if ~isTallTable
    tx = lazyValidate(tx, {@ismatrix, 'MATLAB:bigdata:array:TopKBadX'});
end

outPa = reducefun(@(x) iSelectTopKRows(k, col, x, sortDirn), tx.ValueImpl);
out   = tall(outPa, resetSizeInformation(tx.Adaptor));

% Try to cache the result so that we don't have to revisit the original
% data again in future.
markforreuse(out);

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [tx, col] = iResolveTableCols(tx, col)
txAdaptor = tx.Adaptor;
col = txAdaptor.resolveVarNamesToIdxs(col);
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [tx, col] = iResolveMatrixCols(tx, col)
if ~isnumeric(col) || ~isvector(col) || ~isreal(col) ...
        || any(floor(col)~=col) || any(col<1)
    error(message('MATLAB:bigdata:array:TopKBadCol'));
end
% Also, lazily check that none is out of range (need size of TX)
tx = lazyValidate(tx, {@(x) all(abs(col)<=size(x,2)), 'MATLAB:bigdata:array:TopKBadCol'});
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function out = iSelectTopKRows(numRows, col, x, sortDirn)
% Function to run on each chunk, keeping at most numRowsToSelect rows

% First sort the chunk using the columns specified
if isempty(col)
    col = 1:size(x,2);
else
    % Ensure COL is double as SORTROWS won't accept anything else
    col = double(col);
end
if ~isscalar(sortDirn) && (numel(sortDirn)~=numel(col))
    error(message('MATLAB:bigdata:array:WrongLengthSortDirection'));
end

% Column indices must be row vector
col = col(:)'.*sortDirn(:)';
x = sortrows(x, col);

% Now keep only the first rows
numRows = min(size(x,1), numRows);
out = x(1:numRows, :);
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function sortDirn = resolveSortDirection(sortDirn)
sortDirnStrs = {'descend','ascend'};
if isempty(sortDirn)
    % Default all to descending
    sortDirn = -1;
else
    if ischar(sortDirn)
        sortDirn = cellstr(sortDirn);
    elseif ~iscellstr(sortDirn)
        error(message('MATLAB:bigdata:array:UnrecognizedSortDirection'));
    end
    [tf,sortDirn] = ismember(lower(sortDirn(:)),sortDirnStrs); % 1 or 2
    if ~all(tf)
        error(message('MATLAB:bigdata:array:UnrecognizedSortDirection'));
    end
    % Apply sign of -1 to descending vars, +1 to ascending
    sortDirn = 2*(sortDirn-1.5); % -1 or 1
end
end