www.gusucode.com > datafun 工具箱matlab源码程序 > datafun/rmmissing.m
function [B,I] = rmmissing(A,varargin) %RMMISSING Remove rows or columns with missing entries % First argument must be numeric, datetime, duration, calendarDuration, % string, categorical, character array, cell array of character vectors, % a table, or a timetable. % Standard missing data is defined as: % NaN - for double and single floating-point arrays % NaN - for duration and calendarDuration arrays % NaT - for datetime arrays % <missing> - for string arrays % <undefined> - for categorical arrays % blank character [' '] - for character arrays % empty character {''} - for cell arrays of character vectors % % B = RMMISSING(A) removes missing entries from a vector, or rows with % missing entries from a matrix or table. % % B = RMMISSING(A,DIM) reduces the size of A along the dimension DIM. % DIM = 1 removes rows, and DIM = 2 removes columns with missing entries. % If A is a table, DIM = 2 removes table variables. By default, RMMISSING % reduces the size of A along its first non-singleton dimension: it % removes rows from matrices and tables. % % B = RMMISSING(A,...,'MinNumMissing',N) removes rows (columns) that % contain at least N missing entries. N must be an integer. By default, % N = 1. % % [B,I] = RMMISSING(A,...) also returns a logical row (column) vector I % indicating which rows (columns) of A were removed. % % Arguments supported only for table inputs: % % B = RMMISSING(A,...,'DataVariables',DV) removes rows according to % missing data in table variables DV. The default is all table variables % in A. DV must be a table variable name, a cell array of table variable % names, a vector of table variable indices, a logical vector, or a % function handle that returns a logical scalar (such as @isnumeric). % % Examples: % % % Remove NaN entries from a vector % a = [NaN 1 2 NaN NaN 3] % b = rmmissing(a) % % % Remove only rows that contain at least 2 missing entries % A = [NaN(1,3); 13 1 -20; NaN(4,1) ones(4,2); -1 7 -10; NaN(1,3)] % B = rmmissing(A,'MinNumMissing',2) % % % Remove table rows that contain standard missing data % v1 = {'AB'; ''; ''; 'XYZZ'; 'CDE'}; % v2 = [NaN; -1; 8; 10; 4]; % v3 = categorical({'yes'; '--'; 'yes'; 'no'; 'yes'},{'yes' 'no'}); % T = table(v1,v2,v3) % U = rmmissing(T) % % % Ignore rows with NaN entries when using sortrows % a = [ [20 10 NaN 30 -5]', [1:5]' ] % [b,ia] = rmmissing(a) % a(~ia,:) = sortrows(b) % % See also ISMISSING, STANDARDIZEMISSING, FILLMISSING, ISNAN, ISNAT % Copyright 2015-2016 The MathWorks, Inc. [A,AisTable,byrows,kount,dataVars,allVars] = parseInputs(A,varargin{:}); if ~AisTable I = ismissing(A); I = cumputeIndex(I,byrows,kount); else if ~all(varfun(@ismatrix,A,'OutputFormat','uniform')) error(message('MATLAB:rmmissing:NDArrays')); end if byrows if allVars I = ismissing(A); else I = ismissing(A(:,dataVars)); end I = cumputeIndex(I,byrows,kount); if isa(A,'timetable') % Also remove the rows that correspond to missing RowTimes I = I | ismissing(A.Properties.RowTimes); end else I = false(1,width(A)); for vj = dataVars Ivj = ismissing(A(:,vj)); I(vj) = cumputeIndex(Ivj,byrows,kount); end end end B = reduceSize(A,I,byrows); %-------------------------------------------------------------------------- function I = cumputeIndex(I,byrows,kount) if byrows I = sum(I,2) >= kount; else I = sum(I,1) >= kount; end end %-------------------------------------------------------------------------- function B = reduceSize(A,I,byrows) % Keep non-missing if byrows B = A(~I,:); else B = A(:,~I); end end %-------------------------------------------------------------------------- function [A,AisTable,byrows,kount,dataVars,allVars]=parseInputs(A,varargin) % Parse RMMISSING inputs AisTable = isa(A,'table') || isa(A,'timetable'); if ~isnumeric(A) && ~islogical(A) && ... ~ischar(A) && ~iscategorical(A) && ~iscellstr(A) && ~isstring(A) && ... ~isdatetime(A) && ~isduration(A) && ~iscalendarduration(A) && ... ~AisTable error(message('MATLAB:rmmissing:FirstInputInvalid')); end if ~ismatrix(A) error(message('MATLAB:rmmissing:NDArrays')); end % Defaults kount = 1; byrows = true; allVars = true; % use all table variables if ~AisTable if isrow(A) && ~isscalar(A) byrows = false; end dataVars = []; % not supported for arrays else dataVars = 1:width(A); end if nargin > 1 input2 = varargin{1}; offsetNV = 1; % N-V pairs start at 3rd and 4th inputs if ischar(input2) && nargin > 2 % rmmissing(A,'MinNumMissing',M) offsetNV = 0; % N-V pairs start at 2nd and 3rd inputs else % rmmissing(A,DIM,...) if (isnumeric(input2) || islogical(input2)) && isscalar(input2) if input2 == 1 byrows = true; elseif input2 == 2 byrows = false; else error(message('MATLAB:rmmissing:DimensionInvalid')); end else error(message('MATLAB:rmmissing:DimensionInvalid')); end end % Parse N-V pairs if nargin > 2 indNV = (1+offsetNV):numel(varargin); if rem(length(indNV),2) ~= 0 error(message('MATLAB:rmmissing:NameValuePairs')); end for i = indNV(1:2:end) opt = varargin{i}; if matlab.internal.math.checkInputName(opt,'MinNumMissing') kount = varargin{i+1}; if (~isnumeric(kount) && ~islogical(kount)) || ~isscalar(kount) || ~isreal(kount) || fix(kount) ~= kount || ~(kount >= 0) error(message('MATLAB:rmmissing:MinNumMissing')); end elseif matlab.internal.math.checkInputName(varargin{i},'DataVariables') allVars = false; if AisTable dataVars = matlab.internal.math.checkDataVariables(A,varargin{i+1},'rmmissing'); else error(message('MATLAB:rmmissing:DataVariablesArray')); end else error(message('MATLAB:rmmissing:NameValueNames')); end end end end end % parseInputs end % rmmissing