www.gusucode.com > bigdata 工具箱 matlab源码程序 > bigdata/@tall/join.m

    function s = join(str, varargin)
%JOIN Combine elements of a string array or merge two tables together.
%   For tall string array:
%   S = JOIN(STR)
%   S = JOIN(STR, DELIMITER)
%   S = JOIN(STR, DIM)
%   S = JOIN(STR, DELIMITER, DIM)
%
%   For tall table:
%   C = JOIN(A, B) 
%   C = JOIN(A, B, 'PARAM1',val1, 'PARAM2',val2, ...)
%
%   LIMITATION for tall table:
%   (1) Only JOIN between tall table and a table is supported.
%   (2) [C,IB] = JOIN(...) is not supported
%
%   See also TALL/STRING, TABLE/JOIN

%   Copyright 2016 The MathWorks, Inc.

% First input should be tall, rest should not be
if ~istall(str)
    error(message('MATLAB:bigdata:array:ArgMustBeTall', 1, upper(mfilename)));
end
checkNotTall(upper(mfilename), 1, varargin{:});
% First input should be 'string' ot 'table'
str = tall.validateType(str, mfilename, {'string', 'table'}, 1);
ca = tall.getClass(str);

if strcmp(ca, 'table')
    narginchk(2,inf);
    A = str;
    Aname = inputname(1);
    if isempty(Aname)
        Aname = 'left';
    end
    B = varargin{1};
    Bname = inputname(2);
    if isempty(Bname)
        Bname = 'right';
    end   
    varargin = varargin(2:end);
    if ~istable(B) 
        error(message('MATLAB:table:join:InvalidInput'));
    end
    % Merge tall table with table
    s = iJoinTable(A, Aname, B, Bname, varargin{:});
    return;
end

% String JOIN
narginchk(1,3);
% Work out if we know the dimension and delimiter
delim = ' ';
dim = [];
if nargin>2
    % JOIN(STR, DELIMITER, DIM)
    delim = varargin{1};
    dim = varargin{2};
elseif nargin==2
    % JOIN(STR, DELIMITER) or JOIN(STR, DIM)
    if isnumeric(varargin{1})
        dim = varargin{1};
    else
        delim = varargin{1};
    end
end


if isempty(dim)
    % We need to select the last non-singleton dimension. If the dimension
    % cannot be deduced, error.
    dim = iGetLastNonsingletonDim(str);
    if isempty(dim)
        % Could not deduce dimension to us. For now we just error. In
        % future we could run both reduction and slice operations and
        % choose the right result lazily.
        error(message('MATLAB:bigdata:array:JoinNoDim'));
    end
end

% Dimension now known. Either work on slices or reduce.
fcn = @(x) join(x,delim,dim);
if isequal(dim, 1) % TallDimension
    sPA = reducefun(fcn, str.ValueImpl);
else
    sPA = slicefun(fcn, str.ValueImpl);
end

% In all cases, the output adaptor is the same as the input adaptor but
% with the reduction dimension now set to 1.
newAdaptor = resetSizeInformation(str.Adaptor);

s = tall(sPA, newAdaptor);
end

function dim = iGetLastNonsingletonDim(x)
% Try to find the last non-singleton dimension of x. If the dimensions are
% unknown then the result is empty.
dim = [];
if isnan(x.Adaptor.NDims) || any(isnan(x.Adaptor.SmallSizes))
    return;
end

% We know both the number of dimensions and the size in each
% dimension. We pre-pend a zero so that the result is 1 if all other
% dimensions are unity.
dim = find([0, x.Adaptor.SmallSizes] ~= 1, 1, 'last');
end

function tt = iJoinTable(tA, Aname, B, Bname, varargin)
% JOIN for tall table
[dummyA, knownClass1] = makeDummyTable(tA, mfilename);
[dummyB, knownClass2] = makeDummyTable(B, mfilename);
dummyC = join(dummyA, dummyB, varargin{:});
outputNames = dummyC.Properties.VariableNames;
% Ok, now we need to see if any columns have had '_dummyA' or '_dummyB'
% appended. If they have, replace that with '_<aName>' or '_<bName>'.
outputNames = regexprep(outputNames, '_dummyA$', ['_' Aname]);
outputNames = regexprep(outputNames, '_dummyB$', ['_' Bname]);
dummyC.Properties.VariableNames = outputNames;
adaptors = getAdaptorsFromDummyTable(dummyC, knownClass1 & knownClass2);
tt = slicefun(@(x)iLocalJoin(x,B,outputNames,varargin{:}),tA);
tt.Adaptor = adaptors;
end

function tt = iLocalJoin(A,B,outputNames,varargin)
tt = join(A,B,varargin{:});
tt.Properties.VariableNames = outputNames;
end