www.gusucode.com > datafun 工具箱matlab源码程序 > datafun/fillmissing.m
function [B,FA] = fillmissing(A,fillMethod,varargin) %FILLMISSING Fill missing entries % First argument must be numeric, datetime, duration, calendarDuration, % string, categorical, character array, cell array of character vectors, % a table, or a timetable. % Standard missing data is defined as: % NaN - for double and single floating-point arrays % NaN - for duration and calendarDuration arrays % NaT - for datetime arrays % <missing> - for string arrays % <undefined> - for categorical arrays % blank character [' '] - for character arrays % empty character {''} - for cell arrays of character vectors % % B = FILLMISSING(A,'constant',C) fills missing entries in A with the % constant scalar value C. You can also use a vector C to specify % different fill constants for each column (or table variable) in A: C(i) % represents the fill constant used for the i-th column of A. For tables % A, C can also be a cell containing fill constants of different types. % % B = FILLMISSING(A,INTERP) fills standard missing entries using the % interpolation method specified by INTERP, which must be: % 'previous' - Previous non-missing entry. % 'next' - Next non-missing entry. % 'nearest' - Nearest non-missing entry. % 'linear' - Linear interpolation of non-missing entries. % 'spline' - Piecewise cubic spline interpolation. % 'pchip' - Shape-preserving piecewise cubic spline interpolation. % % Optional arguments: % % B = FILLMISSING(A,METHOD,...,'EndValues',E) also specifies how to % extrapolate leading and trailing missing values. E must be: % 'extrap' - (default) Uses METHOD to also extrapolate missing data. % 'previous' - Previous non-missing entry. % 'next' - Next non-missing entry. % 'nearest' - Nearest non-missing entry. % 'none' - No extrapolation of missing values. % VALUE - Uses an extrapolation constant. VALUE must be a scalar % or a vector of type numeric, duration, or datetime. % % B = FILLMISSING(A,METHOD,...,'SamplePoints',X) also specifies the % sample points X used by the fill method. X must be a floating-point, % duration, or datetime vector. X must be sorted. X must contain unique % points. You can use X to specify time stamps for the data. By default, % FILLMISSING uses data sampled uniformly at points X = [1 2 3 ... ]. % % B = FILLMISSING(A,METHOD,DIM,...) also specifies a dimension DIM to % operate along. A must be an array. % % [B,FA] = FILLMISSING(A,...) also returns a logical array FA indicating % the missing entries of A that were filled. FA has the same size as A. % % Arguments supported only for table inputs: % % B = FILLMISSING(A,...,'DataVariables',DV) fills missing data only in % the table variables specified by DV. The default is all table variables % in A. DV must be a table variable name, a cell array of table variable % names, a vector of table variable indices, a logical vector, or a % function handle that returns a logical scalar (such as @isnumeric). % Output table B has the same size as input table A. % % Examples: % % % Linear interpolation of NaN entries % a = [NaN 1 2 NaN 4 NaN] % b = fillmissing(a,'linear') % % % Fill leading and trailing NaN entries with their nearest neighbors % a = [NaN 1 2 NaN 4 NaN] % b = fillmissing(a,'linear','EndValues','nearest') % % % Fill NaN entries with their previous neighbors (zero-order-hold) % A = [1000 1 -10; NaN 1 NaN; NaN 1 NaN; -1 77 5; NaN(1,3)] % B = fillmissing(A,'previous') % % % Fill NaN entries with the mean of each column % A = [NaN(1,3); 13 1 -20; NaN(4,1) (1:4)' NaN(4,1); -1 7 -10; NaN(1,3)] % C = mean(A,'omitnan'); % B = fillmissing(A,'constant',C) % % % Linear interpolation of NaN entries for non-uniformly spaced data % x = [linspace(-3,1,120) linspace(1.1,7,30)]; % a = exp(-0.1*x).*sin(2*x); a(a > -0.2 & a < 0.2) = NaN; % [b,id] = fillmissing(a,'linear','SamplePoints',x); % plot(x,a,'.', x(id),b(id),'o') % title('''linear'' fill') % xlabel('Sample points x'); % legend('original data','filled missing data') % % % Fill missing entries in tables with their previous neighbors % temperature = [21.1 21.5 NaN 23.1 25.7 24.1 25.3 NaN 24.1 25.5]'; % windSpeed = [12.9 13.3 12.1 13.5 10.9 NaN NaN 12.2 10.8 17.1]'; % windDirection = categorical({'W' 'SW' 'SW' '' 'SW' 'S' ... % 'S' 'SW' 'SW' 'SW'})'; % conditions = {'PTCLDY' '' '' 'PTCLDY' 'FAIR' 'CLEAR' ... % 'CLEAR' 'FAIR' 'PTCLDY' 'MOSUNNY'}'; % T = table(temperature,windSpeed,windDirection,conditions) % U = fillmissing(T,'previous') % % See also ISMISSING, STANDARDIZEMISSING, RMMISSING, ISNAN, ISNAT % Copyright 2015-2016 The MathWorks, Inc. [A,AisTable,intM,intC,extM,x,dim,dataVars] = parseInputs(A,fillMethod,varargin{:}); if ~AisTable [intC,extM] = checkArrayType(A,intM,intC,extM,x,false); if nargout < 2 B = fillArray(A,intM,intC,extM,x,dim,false); else [B,FA] = fillArray(A,intM,intC,extM,x,dim,false); end else if nargout < 2 B = fillTable(A,intM,intC,extM,x,dataVars); else [B,FA] = fillTable(A,intM,intC,extM,x,dataVars); end end %-------------------------------------------------------------------------- function [B,FA] = fillTable(A,intMethod,intConst,extMethod,x,dataVars) % Fill table according to DataVariables B = A; if nargout > 1 FA = false(size(A)); end useJthFillConstant = strcmp(intMethod,'constant') && ~isscalar(intConst) && ~ischar(intConst); useJthExtrapConstant = ~ischar(extMethod) && ~isscalar(extMethod); indVj = 1; for vj = dataVars if nargout < 2 B.(vj) = fillTableVar(indVj,A.(vj),intMethod,intConst,extMethod,x,useJthFillConstant,useJthExtrapConstant); else [B.(vj),FA(:,vj)] = fillTableVar(indVj,A.(vj),intMethod,intConst,extMethod,x,useJthFillConstant,useJthExtrapConstant); end indVj = indVj+1; end end % fillTable %-------------------------------------------------------------------------- function [Bvj,FAvj] = fillTableVar(indVj,Avj,intMethod,intConst,extMethod,x,useJthFillConstant,useJthExtrapConstant) % Fill each table variable intConstVj = intConst; extMethodVj = extMethod; if useJthFillConstant intConstVj = intConst(indVj); end if iscell(intConstVj) intConstVj = checkConstantsSize(Avj,true,intConstVj{1},1,[],''); end if useJthExtrapConstant extMethodVj = extMethod(indVj); end % Validate types of array and fill constants [intConstVj,extMethodVj] = checkArrayType(Avj,intMethod,intConstVj,extMethodVj,x,true); % Treat row in a char table variable as a string AisCharTableVar = ischar(Avj); if AisCharTableVar Avj = matlab.internal.math.charRows2string(Avj); if strcmp(intMethod,'constant') intConstVj = matlab.internal.math.charRows2string(intConstVj); end end % Fill if nargout < 2 Bvj = fillArray(Avj,intMethod,intConstVj,extMethodVj,x,1,true); else [Bvj,FAvj] = fillArray(Avj,intMethod,intConstVj,extMethodVj,x,1,true); end % Convert back to char table variable if AisCharTableVar Bvj = matlab.internal.math.string2charRows(Bvj); end end % fillTableVar %-------------------------------------------------------------------------- function [B,FA] = fillArray(A,intMethod,intConst,extMethod,x,dim,AisTableVar) % Perform FILLMISSING of standard missing entries in an array A B = A; FA = ismissing(A); sizeBin = size(B); ndimsBin = ndims(B); % Quick return if ~AisTableVar && dim > ndimsBin B = extrapolateWithConstant(B,intMethod,intConst,extMethod,FA,FA); return end % Permute and reshape into a matrix perm = [dim, 1:(dim-1), (dim+1):ndimsBin]; sizeBperm = sizeBin(perm); ncolsB = prod(sizeBperm(2:end)); B = reshape(permute(B, perm),[sizeBperm(1), ncolsB]); FA = reshape(permute(FA, perm),[sizeBperm(1), ncolsB]); % Fill each column for jj = 1:ncolsB B(:,jj) = fillArrayColumn(jj,B(:,jj),FA(:,jj),intMethod,intConst,extMethod,x); end % Reshape and permute back to original size if AisTableVar && nargout > 1 FA = xor(any(FA,2),any(ismissing(B),2)); end B = ipermute(reshape(B,sizeBperm), perm); if ~AisTableVar && nargout > 1 FA = ipermute(reshape(FA,sizeBperm), perm); FA(FA) = xor(FA(FA),ismissing(B(FA))); end end % fillArray %-------------------------------------------------------------------------- function b = fillArrayColumn(jj,a,ma,intMethod,intConst,extMethod,x) % Fill one column. Do not error if we cannot fill all missing entries. % jj = j-th column numeric index. Used to select the j-th fill constant. % a = the j-th column itself. Can be numeric, logical, duration, datetime, % calendarDuration, char, string, cellstr, or categorical. % ma = logical mask of missing entries found in a. % intMethod = interpolation method. % intConst = interpolation constant. [] if intMethod is not 'constant'. % extMethod = extrap method. If not a char, it holds the extrap constant. % x = the abscissa ('SamplePoints'). Can be float, duration, or datetime. b = a; % Quick return if isinteger(b) || islogical(b) return end nma = find(~ma); numNonMissing = numel(nma); if numNonMissing == 0 % Columns full of missing data can only be filled with a constant. b = extrapolateWithConstant(b,intMethod,intConst,extMethod,ma,jj); return end % (1) Interpolate if issparse(b) b = full(b); end if strcmp(intMethod,'constant') b = assignConstant(b,intConst,ma,jj); else % griddedInterpolant/interp1 require at least 2 grid points. % Do not error if we cannot fill. Instead, return the original array. % For example, fillmissing([NaN 1 NaN],'linear') returns [NaN 1 NaN]. if numNonMissing > 1 isfloatb = isfloat(b); if isfloatb && isfloat(x) G = griddedInterpolant(x(nma),b(nma),intMethod); b(ma) = G(x(ma)); % faster than interp1 elseif isfloatb || isduration(b) || isdatetime(b) b(ma) = interp1(x(nma),b(nma),x(ma),intMethod,'extrap'); else % calendarDuration, char, string, cellstr, or categorical: % No griddedInterpolant because x may be datetime/duration vq = interp1(x(nma),nma,x(ma),intMethod,'extrap'); indvq = ~isnan(vq); % vq may have leading or trailing NaN iatmp = find(ma); b(iatmp(indvq)) = b(vq(indvq)); % copy non-missing to missing end end end % (2) Correct for EndValues indBeg = nma(1); % numNonMissing > 0 indEnd = nma(end); if ischar(extMethod) if strcmp(extMethod,'none') b(1:indBeg-1) = a(1:indBeg-1); b(indEnd+1:end) = a(indEnd+1:end); elseif strcmp(extMethod,'nearest') || (strcmp(extMethod,'extrap') && strcmp(intMethod,'nearest')) b(1:indBeg-1) = a(indBeg); b(indEnd+1:end) = a(indEnd); elseif strcmp(extMethod,'previous') || (strcmp(extMethod,'extrap') && strcmp(intMethod,'previous')) b(1:indBeg-1) = a(1:indBeg-1); b(indEnd+1:end) = a(indEnd); elseif strcmp(extMethod,'next') || (strcmp(extMethod,'extrap') && strcmp(intMethod,'next')) b(1:indBeg-1) = a(indBeg); b(indEnd+1:end) = a(indEnd+1:end); end else if isscalar(extMethod) b([1:indBeg-1, indEnd+1:end]) = extMethod; else b([1:indBeg-1, indEnd+1:end]) = extMethod(jj); end end end % fillArrayColumn %-------------------------------------------------------------------------- function B = extrapolateWithConstant(B,intMethod,intConst,extMethod,lhsIndex,rhsIndex) % Fill all missings with a constant. Used if B is full of missing data, or % for array B with dim > ndims(B). rhsIndex may be logical or numeric. % Fill only when we have specified an extrapolation constant: if ~ischar(extMethod) % Either through EndValues: % fillmissing(A,METHOD,'EndValues',ConstVals) B = assignConstant(B,extMethod,lhsIndex,rhsIndex); elseif strcmp(intMethod,'constant') && strcmp(extMethod,'extrap') % Or through the 'constant' fill method: % fillmissing(A,'constant',ConstVals) % fillmissing(A,'constant',ConstVals,'EndValues','extrap') B = assignConstant(B,intConst,lhsIndex,rhsIndex); end end % extrapolateWithConstant %-------------------------------------------------------------------------- function B = assignConstant(B,ConstVals,lhsIndex,rhsIndex) if isscalar(ConstVals) B(lhsIndex) = ConstVals; else B(lhsIndex) = ConstVals(rhsIndex); end end %-------------------------------------------------------------------------- function [A,AisTable,intMethod,intConst,extMethod,x,dim,dataVars] = parseInputs(A,fillMethod,varargin) % Parse FILLMISSING inputs AisTable = isa(A,'table') || isa(A,'timetable'); if ~isSupportedArray(A) && ~AisTable error(message('MATLAB:fillmissing:FirstInputInvalid')); end % Parse fill method. Empty '' or [] fill method is not allowed. validIntMethods = {'constant','previous','next','nearest','linear','spline','pchip'}; indIntMethod = matlab.internal.math.checkInputName(fillMethod,validIntMethods); if sum(indIntMethod) ~= 1 % Also catch ambiguities for fillmissing(A,'ne') and fillmissing(A,'p') error(message('MATLAB:fillmissing:MethodInvalid')); end intMethod = validIntMethods{indIntMethod}; intConst = []; % Parse fillmissing(A,'constant',c) intConstOffset = 0; if indIntMethod(1) if nargin > 2 intConst = varargin{1}; else error(message('MATLAB:fillmissing:ConstantInput')); end intConstOffset = 1; end % Parse optional inputs extMethod = 'extrap'; x = []; if ~AisTable dim = find(size(A) ~= 1,1); % default to first non-singleton dimension if isempty(dim) dim = 2; % dim = 2 for scalar and empty A end dataVars = []; % not supported for arrays else dim = 1; % Fill each table variable separately dataVars = 1:width(A); end if nargin > 2+intConstOffset % Third input can be a constant, the dimension, or an argument Name: % fillmissing(A,'constant',C,...) and C may be a char itself % fillmissing(A,'linear',DIM,...) % fillmissing(A,'linear','EndValues',...) firstOptionalInput = varargin{1+intConstOffset}; % The dimension dimOffset = 0; if isnumeric(firstOptionalInput) || islogical(firstOptionalInput) if AisTable error(message('MATLAB:fillmissing:DimensionTable')); end dimOffset = 1; dim = firstOptionalInput; if ~isscalar(dim) || ~isreal(dim) || fix(dim) ~= dim || dim < 1 || ~isfinite(dim) error(message('MATLAB:fillmissing:DimensionInvalid')); end end % Trailing N-V pairs indNV = (1+intConstOffset+dimOffset):numel(varargin); if rem(length(indNV),2) ~= 0 error(message('MATLAB:fillmissing:NameValuePairs')); end for i = indNV(1:2:end) if matlab.internal.math.checkInputName(varargin{i},'EndValues') extMethod = varargin{i+1}; if ischar(extMethod) validExtMethods = {'extrap','previous','next','nearest','none'}; indExtMethod = matlab.internal.math.checkInputName(extMethod,validExtMethods); if sum(indExtMethod) ~= 1 % Also catch ambiguities between nearest and next error(message('MATLAB:fillmissing:EndValuesInvalidMethod')); end extMethod = validExtMethods{indExtMethod}; end elseif matlab.internal.math.checkInputName(varargin{i},'SamplePoints') if isa(A,'timetable') error(message('MATLAB:fillmissing:SamplePointsTimeTable')); end x = checkSamplePoints(varargin{i+1},A,false,dim); elseif matlab.internal.math.checkInputName(varargin{i},'DataVariables') if AisTable dataVars = matlab.internal.math.checkDataVariables(A,varargin{i+1},'fillmissing'); else error(message('MATLAB:fillmissing:DataVariablesArray')); end else error(message('MATLAB:fillmissing:NameValueNames')); end end end % Validate fill constants size if strcmp(intMethod,'constant') intConst = checkConstantsSize(A,false,intConst,dim,dataVars,''); end if ~ischar(extMethod) extMethod = checkConstantsSize(A,false,extMethod,dim,dataVars,'Extrap'); end % Default abscissa if isempty(x) if isa(A,'timetable') x = checkSamplePoints(A.Properties.RowTimes,A,true,dim); else x = (1:size(A,dim)).'; end end end % parseInputs %-------------------------------------------------------------------------- function tf = isSupportedArray(A) % Check if array type is supported tf = isnumeric(A) || islogical(A) || ... isstring(A) || iscategorical(A) || iscellstr(A) || ischar(A) || ... isdatetime(A) || isduration(A) || iscalendarduration(A); end % isSupportedArray %-------------------------------------------------------------------------- function x = checkSamplePoints(x,A,AisTimeTable,dim) % Validate SamplePoints value if AisTimeTable tname = 'RowTimes'; else tname = 'SamplePoints'; end if (~isvector(x) && ~isempty(x)) || (~isfloat(x) && ~isduration(x) && ~isdatetime(x)) error(message('MATLAB:fillmissing:SamplePointsInvalidDatatype')); end if length(x) ~= size(A,dim) error(message('MATLAB:fillmissing:SamplePointsLength',size(A,dim))); end x = x(:); if (isfloat(x) || isduration(x)) && any(~isfinite(x)) error(message('MATLAB:fillmissing:SamplePointsNonFinite',tname,'NaN')); end if isdatetime(x) && any(~isfinite(x)) error(message('MATLAB:fillmissing:SamplePointsNonFinite',tname,'NaT')); end if isfloat(x) if ~isreal(x) error(message('MATLAB:fillmissing:SamplePointsComplex')); end if issparse(x) error(message('MATLAB:fillmissing:SamplePointsSparse')); end end if any(diff(x) <= 0) % && intConstOffset == 0 if any(diff(x) == 0) error(message('MATLAB:fillmissing:SamplePointsDuplicate',tname)); else error(message('MATLAB:fillmissing:SamplePointsSorted',tname)); end end end % checkSamplePoints %-------------------------------------------------------------------------- function C = checkConstantsSize(A,AisTableVar,C,dim,dataVars,eid) % Validate the size of the fill constant. We can fill all columns with the % same scalar, or use a different scalar for each column. if isempty(A) && ~isempty(C) error(message(['MATLAB:fillmissing:SizeConstantEmpty',eid])); end if ischar(C) && (~ischar(A) || AisTableVar) % A char fill constant is treated as a scalar for string, categorical % and cellstr (arrays or table variables), and char table variables if ~isrow(C) && ~isempty(C) % '' is not a row error(message('MATLAB:fillmissing:CharRowVector')); end elseif ~isscalar(C) sizeA = size(A); if isa(A,'table') || isa(A,'timetable') % numel(constant) must equal numel 'DataVariables' value sizeA(2) = length(dataVars); end if dim <= ndims(A) sizeA(dim) = []; nVects = prod(sizeA); else % fillmissing(A,'constant',c) supported % fillmissing(A,METHOD,'EndValues',constant_value) supported nVects = numel(A); end if (numel(C) ~= nVects) if nVects <= 1 error(message(['MATLAB:fillmissing:SizeConstantScalar',eid])); else error(message(['MATLAB:fillmissing:SizeConstant',eid],nVects)); end end C = C(:); end end % checkConstantsSize %-------------------------------------------------------------------------- function [intConst,extMethod] = checkArrayType(A,intMethod,intConst,extMethod,x,AisTableVar) % Check if array types match if AisTableVar && ~isSupportedArray(A) error(message('MATLAB:fillmissing:UnsupportedTableVariable',class(A))); end if ~(isnumeric(A) || islogical(A) || isduration(A) || isdatetime(A)) && ... ~any(strcmp(intMethod,{'nearest','next','previous','constant'})) if AisTableVar error(message('MATLAB:fillmissing:InterpolationInvalidTableVariable',intMethod)); else error(message('MATLAB:fillmissing:InterpolationInvalidArray',intMethod,class(A))); end end try if strcmp(intMethod,'constant') intConst = checkConstantType(A,intConst,''); end if ~ischar(extMethod) extMethod = checkConstantType(A,extMethod,'Extrap'); end catch ME if AisTableVar && matlab.internal.math.checkInputName('MATLAB:fillmissing:Constant',ME.identifier) % Generic error message for tables error(message('MATLAB:fillmissing:ConstantInvalidType')); else % Specific error message for arrays throw(ME); end end if isa(x,'single') && (isduration(A) || isdatetime(A)) error(message('MATLAB:fillmissing:SamplePointsSingle')); end end % checkArrayType %-------------------------------------------------------------------------- function C = checkConstantType(A,C,eid) % Check if constant type matches the array type if ~isempty(eid) && ~isnumeric(C) && ~islogical(C) && ... ~isdatetime(C) && ~isduration(C) && ~iscalendarduration(C) error(message('MATLAB:fillmissing:ConstantInvalidTypeExtrap')); end if isnumeric(A) && ~isnumeric(C) && ~islogical(C) error(message(['MATLAB:fillmissing:ConstantNumeric',eid])); elseif isdatetime(A) && ~isdatetime(C) error(message(['MATLAB:fillmissing:ConstantDatetime',eid])); elseif isduration(A) && ~isduration(C) error(message(['MATLAB:fillmissing:ConstantDuration',eid])); elseif iscalendarduration(A) && ~iscalendarduration(C) error(message(['MATLAB:fillmissing:ConstantCalendarDuration',eid])); elseif iscategorical(A) if ischar(C) C = string(C); % make char a scalar string elseif (~iscellstr(C) && ~isstring(C)) % categorical fill constants not supported error(message(['MATLAB:fillmissing:ConstantCategorical',eid])); end elseif ischar(A) && ~ischar(C) error(message(['MATLAB:fillmissing:ConstantChar',eid])); elseif iscellstr(A) if ischar(C) C = {C}; % make char a scalar cellstr elseif ~iscellstr(C) % string constants not supported error(message(['MATLAB:fillmissing:ConstantCellstr',eid])); end elseif isstring(A) && ~isstring(C) % char and cellstr constants not supported error(message(['MATLAB:fillmissing:ConstantString',eid])); end end % checkConstantType end % fillmissing