www.gusucode.com > VC_C++源码,界面编程,网页爬虫源码程序 > VC_C++源码,界面编程,网页爬虫源码程序/code/webpageloader_SourceCode/RegExp1.cpp

    
#include "stdafx.h"
#include "regexp1.h"

// Does simple Pattern Matching (not regular expression).
// Recognises *, ? and [] with ranges.

// Although this function uses TCHAR it is not
// multibyte enabled.


BOOL MatchPatterns(LPCTSTR String, LPCTSTR Patterns)
{
   CString sPatterns = Patterns;
   while( !sPatterns.IsEmpty() ) {
      CString sPattern = sPatterns.SpanExcluding("|");
      if( MatchPattern(String, sPattern) ) return TRUE;
      sPatterns = sPatterns.Mid(sPattern.GetLength());
      sPatterns.TrimLeft("|");
   }
   return FALSE;
}

BOOL MatchPattern(LPCTSTR String, LPCTSTR Pattern)
{
  TCHAR c, p, l;
  for (; ;) {
    switch( p = *Pattern++ ) {
      case _T('\0'):                      // end of pattern
          return TRUE;                    // SUCCESS

      case _T('*'):
          while (*String) {               // match zero or more char
             if( MatchPattern(String++, Pattern) )
                return TRUE;
          }
          return MatchPattern(String, Pattern);

      case _T('?'):
          if( *String++ == 0 )            // match any one char
              return FALSE;               // not end of string
          break;

      case _T('['):
          if( (c = *String++) == 0 )      // match char set
              return FALSE;               // syntax
          c = (TCHAR)_totupper(c);
          l = _T('0');
          while( p = *Pattern++ ) {
              if( p==_T(']') )            // if end of char set, then
                  return FALSE;           // no match found
              if( p==_T('-') ) {          // check a range of chars?
                  p = *Pattern;           // get high limit of range
                  if( p == _T('\0')  ||  p == _T(']') )
                      return FALSE;           // syntax
                  if( c >= l  &&  c <= p )
                      break;              // if in range, move on
              }
              l = p;
              if( c == p )                // if char matches this element
                  break;                  // move on
          }
          while( p  &&  p != _T(']') )    // got a match in char set
              p = *Pattern++;             // skip to end of set
          break;

      default:
          c = *String++;
          if( (TCHAR)_totupper(c) != p )          // check for exact char
              return FALSE;               // not a match
          break;
    }
  }
}