www.gusucode.com > bigdata 工具箱 matlab源码程序 > bigdata/@tall/topkrows.m
function out = topkrows(tx, k, col, sortDirn) %TOPKROWS Return the top k rows of a matrix or table. % % T = TOPKROWS(TX,K) returns up to the first K rows of tall matrix TX % when sorted in descending order as a group. TX can have any numeric % underlying class. If TX has fewer than K rows then the entire matrix is % returned. % % T = TOPKROWS(TT,K) returns up to the first K rows of tall table TT when % sorted in descending order by all of the variables in TT. % % T = TOPKROWS(TX,K,COL) returns up to the first K rows of tall matrix TX % when sorted by the columns specified by COL. COL is a positive integer % or a vector of positive integers. % % T = TOPKROWS(TT,K,VARS) sorts the rows in tall table TT by the variables % specified by VARS. VARS is a positive integer, a vector of positive % integers, a variable name, a cell array containing one or more variable % names, or a logical vector. % % T = TOPKROWS(TX,K,COL,DIRECTION) sorts tall matrix TX in the % direction(s) specified by DIRECTION. When DIRECTION is 'ascend' or % 'descend' (the default), topkrows sorts A in ascending or descending % order, respectively, for all columns specified by COL. DIRECTION may % also be a cell array containing the strings 'ascend' or 'descend' to % specify a different direction for each column specified by COL. % % T = TOPKROWS(TT,K,VARS,DIRECTION) sorts tall table TT in the % direction(s) specified by DIRECTION. % % Examples: % topkrows(tx, 10) % topkrows(tx, 15, [2 3], {'descend','ascend'}) % % See also: SORTROWS, TALL. % Copyright 2016 The MathWorks, Inc. narginchk(2,4); % Check that k is a non-negative integer-valued scalar validateattributes(k, ... {'numeric'}, {'real','scalar','nonnegative','integer'}, ... 'topkrows', 'k') isTallTable = istall(tx) && strcmp(tall.getClass(tx), 'table'); % Col list must be an integer-valued vector if nargin<3 % Use empty to signify all cols col = []; else if isTallTable [tx, col] = iResolveTableCols(tx, col); else [tx, col] = iResolveMatrixCols(tx, col); end end if nargin<4 sortDirn = []; end sortDirn = resolveSortDirection(sortDirn); % +1 for ascending, -1 for descending % Check that the input is a matrix if ~isTallTable tx = lazyValidate(tx, {@ismatrix, 'MATLAB:bigdata:array:TopKBadX'}); end outPa = reducefun(@(x) iSelectTopKRows(k, col, x, sortDirn), tx.ValueImpl); out = tall(outPa, resetSizeInformation(tx.Adaptor)); % Try to cache the result so that we don't have to revisit the original % data again in future. markforreuse(out); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [tx, col] = iResolveTableCols(tx, col) txAdaptor = tx.Adaptor; col = txAdaptor.resolveVarNamesToIdxs(col); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [tx, col] = iResolveMatrixCols(tx, col) if ~isnumeric(col) || ~isvector(col) || ~isreal(col) ... || any(floor(col)~=col) || any(col<1) error(message('MATLAB:bigdata:array:TopKBadCol')); end % Also, lazily check that none is out of range (need size of TX) tx = lazyValidate(tx, {@(x) all(abs(col)<=size(x,2)), 'MATLAB:bigdata:array:TopKBadCol'}); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function out = iSelectTopKRows(numRows, col, x, sortDirn) % Function to run on each chunk, keeping at most numRowsToSelect rows % First sort the chunk using the columns specified if isempty(col) col = 1:size(x,2); else % Ensure COL is double as SORTROWS won't accept anything else col = double(col); end if ~isscalar(sortDirn) && (numel(sortDirn)~=numel(col)) error(message('MATLAB:bigdata:array:WrongLengthSortDirection')); end % Column indices must be row vector col = col(:)'.*sortDirn(:)'; x = sortrows(x, col); % Now keep only the first rows numRows = min(size(x,1), numRows); out = x(1:numRows, :); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function sortDirn = resolveSortDirection(sortDirn) sortDirnStrs = {'descend','ascend'}; if isempty(sortDirn) % Default all to descending sortDirn = -1; else if ischar(sortDirn) sortDirn = cellstr(sortDirn); elseif ~iscellstr(sortDirn) error(message('MATLAB:bigdata:array:UnrecognizedSortDirection')); end [tf,sortDirn] = ismember(lower(sortDirn(:)),sortDirnStrs); % 1 or 2 if ~all(tf) error(message('MATLAB:bigdata:array:UnrecognizedSortDirection')); end % Apply sign of -1 to descending vars, +1 to ascending sortDirn = 2*(sortDirn-1.5); % -1 or 1 end end