www.gusucode.com > mbc 工具箱 matlab 源码程序 > mbc/mbcMatchNames.m

    function [MatchInd, IsFound] = mbcMatchNames(ItemNames, AvailNames, varargin)
%MBCMATCHNAMES Match item names to a list of available names.
%
%   MATCHIND = MBCMATCHNAMES(ITEMNAMES, AVAILNAMES) looks for matches of
%   the cell array of strings in ITEMNAMES in the cell array of strings
%   AVAILNAMES and returns the indices into AVAILNAMES in MATCHIND.
%   MATCHIND will be a vector of integers the same size as ITEMNAMES and
%   may contain zeros if there is no appropriate match for an item.  The
%   matching algorithm is intended to find an appropriate matching index
%   even when no exact matches are available for an entry in ITEMNAMES.
%
%   [MATCHIND, ISFOUND] = MBCMATCHNAMES(...) returns a second output that
%   is a logical vector indicating which item names were successfully
%   matched.
%
%   MBCMATCHNAMES(..., 'OptionName', 'OptionValue') specifies parameters
%   for the matching algorithm.  Available options are listed below.
%
%       Option Name     |  Description
%   --------------------+--------------------------------------------------
%   CaseSensitive       |  'on' or 'off'.  Specifies how to do string
%                       |  comparisons.  Defaults to 'off'.
%                       |
%   MatchLevel          |  0, 1 or 2.  Specifies the level of matching that
%                       |  will be attempted.  Level 2 implies the full
%                       | item name must exist in the match.  Level 1
%                       | implies a partial match is good enough.  Level 0
%                       | implies that any match is better than none.  The
%                       | default level is 1.

%   Copyright 2006 The MathWorks, Inc.


% Default options
opts = struct(...
    'CaseSensitive', false, ...
    'MatchLevel', 2);

% Parse user options
for n = 1:2:length(varargin)
    switch lower(varargin{n})
        case 'casesensitive'
            opts.CaseSensitive = i_onoff(varargin{n+1});
        case 'matchlevel'
            opts.MatchLevel = varargin{n+1};
    end
end
if opts.CaseSensitive
    regexpopt = 'matchcase';
else
    regexpopt = 'ignorecase';
end

MatchInd = zeros(size(ItemNames));
if isempty(AvailNames)
    return
end

% Remove any leading/trailing whitespace from the names to be matched
AvailNames = strtrim(AvailNames);

% Logical vector to track whether each name is still available
IsAvail = true(size(AvailNames));


% Stage one: look for any exact matches
[IsFound, MatchInd] = ismember(ItemNames, AvailNames);
% make IsFound a row vector so for loop works
IsFound= IsFound(:)';

nMatchInputs(true(1, length(ItemNames)), MatchInd);


% Do a case-insensitive match if allowed
if ~all(IsFound) && ~opts.CaseSensitive
    [fnd, idx] = ismember(lower(ItemNames(~IsFound)), lower(AvailNames(IsAvail)));
    
    if any(fnd)
        % Register matches for the items that aren't already found
        nMatchInputs(~IsFound, idx);
    end
end
    

% Stage two: look for matches of the name at the beginning, end then middle
% of a string, for each remaining name to be matched that is at least 3
% characters long (below this it doesn't really make sense to claim a
% match).
if ~all(IsFound)
    for n = find(~IsFound)
        nm = ItemNames{n};
        if length(nm)>2
            % Beginning
            matched = nCompareStrings(nm, AvailNames(IsAvail), length(nm));
            if any(matched)
                nMatchInputs(n, find(matched,1));
                break
            end
            
            % End
            found = nDoRegexpMatch(n, [nm '$']);
            if found
                break
            end
            
            % Middle
            nDoRegexpMatch(n, ['.*' nm '.*']);
        end
    end
end


% Stage three: look for partial matches at the beginning of the string
if ~all(IsFound) && opts.MatchLevel<2
    for n = find(~IsFound)
        nm = ItemNames{n};
        for m = length(nm):-1:1
            matched = nCompareStrings(nm, AvailNames(IsAvail), m);
            if any(matched)
                nMatchInputs(n, find(matched,1));
                break  % next name
            end
        end
    end
end


% Stage four: pick any remaining available names.  This is only done if
% MatchAll is specified as an option.
if ~all(IsFound) && opts.MatchLevel<1
    for n = find(~IsFound)
        nMatchInputs(n, 1);
    end
end


    function nMatchInputs(InputIdx, UntakenIdx)
        % Convert the untaken available index to an index into the full
        % list
        if any(UntakenIdx>0)
            f = find(IsAvail, max(UntakenIdx));
            if length(f)==max(UntakenIdx)
                UntakenIdx(UntakenIdx>0) = f(UntakenIdx(UntakenIdx>0));
            else
                % Tried to take one that doesn't exist
                UntakenIdx(:) = 0;
            end
        end
        
        % Save the match
        MatchInd(InputIdx) = UntakenIdx;
        IsFound(InputIdx) = MatchInd(InputIdx)>0;
        
        % Mark the used names as taken
        IsAvail(UntakenIdx(UntakenIdx>0)) = false;
    end


    % Do a regular expression match for the specified ItemName
    function found = nDoRegexpMatch(idx, expr)
        matches = regexp(AvailNames(IsAvail), expr, regexpopt);
        matches = ~cellfun(@isempty, matches);
        if any(matches)
            nMatchInputs(idx, find(matches, 1));
            found = true;
        else
            found = false;
        end
    end


    % String comparison helper.  This takes care of switching case
    % sensitivity.
    function ret = nCompareStrings(str1, str2, nComp)
        if nargin>2
            % Use strncmp
            if opts.CaseSensitive
                ret = strncmp(str1, str2, nComp);
            else
                ret = strncmpi(str1, str2, nComp);
            end
        else
            % Use strcmp
            if opts.CaseSensitive
                ret = strcmp(str1, str2);
            else
                ret = strcmpi(str1, str2);
            end
        end
    end
end


% Convert on/off strings to a boolean
function val = i_onoff(val)
    if ischar(val)
        val = strcmpi(val, 'on');
    end
end