6325 lines
219 KiB
C++
6325 lines
219 KiB
C++
//+---------------------------------------------------------------------------
|
|
//
|
|
// Copyright ( C ) Microsoft, 1994 - 2002.
|
|
//
|
|
// File: regexpr2.cpp
|
|
//
|
|
// Contents: implementation for rpattern methods, definitions for all the
|
|
// subexpression types used to perform the matching, the
|
|
// charset class definition .
|
|
//
|
|
// Classes: too many to list here
|
|
//
|
|
// Functions:
|
|
//
|
|
// Author: Eric Niebler ( ericne@microsoft.com )
|
|
//
|
|
// History: 12-11-1998 ericne Created
|
|
// 01-05-2001 ericne Removed dependency on VC's choice
|
|
// of STL iterator types.
|
|
// 08-15-2001 ericne Removed regexpr class, moved match
|
|
// state to match_results container.
|
|
// 09-17-2001 nathann Add DEBUG_HEAP_SUPPORT
|
|
// 11-16-2001 ericne Add stack-conservative algorithm
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
#ifdef _MSC_VER
|
|
// unlimited inline expansion ( compile with /Ob1 or /Ob2 )
|
|
# pragma inline_recursion( on )
|
|
# pragma inline_depth( 255 )
|
|
// warning C4127: conditional expression is constant
|
|
// warning C4355: 'this' : used in base member initializer list
|
|
// warning C4702: unreachable code
|
|
// warning C4710: function 'blah' not inlined
|
|
// warning C4786: identifier was truncated to '255' characters in the debug information
|
|
# pragma warning( push )
|
|
# pragma warning( disable : 4127 4355 4702 4710 4786 )
|
|
#endif
|
|
|
|
#include <limits>
|
|
#include <cctype>
|
|
#include <cwchar>
|
|
#include <memory>
|
|
#include <cwctype>
|
|
#include <malloc.h>
|
|
#include <algorithm>
|
|
|
|
#ifdef __MWERKS__
|
|
# include <alloca.h>
|
|
#endif
|
|
|
|
// If the implementation file has been included in the header, then we
|
|
// need to mark some functions as inline to prevent them from being multiply
|
|
// defined. But if the implementation file is not included in the header,
|
|
// we can't mark them as inline, otherwise the linker won't find them.
|
|
#ifdef REGEXPR_H
|
|
# define REGEXPR_H_INLINE inline
|
|
#else
|
|
# define REGEXPR_H_INLINE
|
|
# include "regexpr2.h"
|
|
#endif
|
|
|
|
#ifdef REGEX_TO_INCLUDE
|
|
# include REGEX_TO_INCLUDE
|
|
#endif
|
|
|
|
// $PORT$
|
|
// _alloca is not standard
|
|
#ifndef alloca
|
|
# define alloca _alloca
|
|
#endif
|
|
|
|
namespace regex
|
|
{
|
|
|
|
namespace detail
|
|
{
|
|
|
|
inline wctype_t REGEX_CDECL regex_wctype( char const * sz )
|
|
{
|
|
using namespace std;
|
|
return wctype( sz );
|
|
}
|
|
|
|
namespace
|
|
{
|
|
|
|
#ifdef __GLIBC__
|
|
struct regex_ctype_t
|
|
{
|
|
int m_ctype;
|
|
wctype_t m_wctype;
|
|
};
|
|
|
|
#define REGEX_DECL_CTYPE(desc) \
|
|
inline regex_ctype_t const & wct_ ## desc() \
|
|
{ \
|
|
static regex_ctype_t const s_wct = { _IS ## desc, regex_wctype(#desc) };\
|
|
return s_wct; \
|
|
}
|
|
|
|
REGEX_DECL_CTYPE(alnum)
|
|
REGEX_DECL_CTYPE(alpha)
|
|
REGEX_DECL_CTYPE(blank)
|
|
REGEX_DECL_CTYPE(cntrl)
|
|
REGEX_DECL_CTYPE(digit)
|
|
REGEX_DECL_CTYPE(graph)
|
|
REGEX_DECL_CTYPE(lower)
|
|
REGEX_DECL_CTYPE(print)
|
|
REGEX_DECL_CTYPE(punct)
|
|
REGEX_DECL_CTYPE(space)
|
|
REGEX_DECL_CTYPE(upper)
|
|
REGEX_DECL_CTYPE(xdigit)
|
|
regex_ctype_t const wct_zero = { 0, 0 };
|
|
|
|
inline regex_ctype_t & operator |= ( regex_ctype_t & lhs, regex_ctype_t const & rhs )
|
|
{
|
|
lhs.m_ctype |= rhs.m_ctype;
|
|
lhs.m_wctype |= rhs.m_wctype;
|
|
return lhs;
|
|
}
|
|
inline regex_ctype_t operator | ( regex_ctype_t lhs, regex_ctype_t const & rhs )
|
|
{
|
|
return lhs |= rhs;
|
|
}
|
|
inline int REGEX_CDECL regex_isctype( int ch, regex_ctype_t const & desc )
|
|
{
|
|
return __isctype( ch, desc.m_ctype );
|
|
}
|
|
inline int REGEX_CDECL regex_iswctype( wint_t wc, regex_ctype_t desc )
|
|
{
|
|
using namespace std;
|
|
return iswctype( wc, desc.m_wctype );
|
|
}
|
|
inline bool operator == ( regex_ctype_t const & lhs, regex_ctype_t const & rhs )
|
|
{
|
|
return lhs.m_ctype == rhs.m_ctype && lhs.m_wctype == rhs.m_wctype;
|
|
}
|
|
inline bool operator != ( regex_ctype_t const & lhs, regex_ctype_t const & rhs )
|
|
{
|
|
return lhs.m_ctype != rhs.m_ctype || lhs.m_wctype != rhs.m_wctype;
|
|
}
|
|
#else
|
|
typedef wctype_t regex_ctype_t;
|
|
|
|
#define REGEX_DECL_CTYPE(desc) \
|
|
inline regex_ctype_t const wct_ ## desc() \
|
|
{ \
|
|
static regex_ctype_t const s_wct = regex_wctype(#desc); \
|
|
return s_wct; \
|
|
}
|
|
|
|
REGEX_DECL_CTYPE(alnum)
|
|
REGEX_DECL_CTYPE(alpha)
|
|
REGEX_DECL_CTYPE(cntrl)
|
|
REGEX_DECL_CTYPE(digit)
|
|
REGEX_DECL_CTYPE(graph)
|
|
REGEX_DECL_CTYPE(lower)
|
|
REGEX_DECL_CTYPE(print)
|
|
REGEX_DECL_CTYPE(punct)
|
|
REGEX_DECL_CTYPE(space)
|
|
REGEX_DECL_CTYPE(upper)
|
|
REGEX_DECL_CTYPE(xdigit)
|
|
regex_ctype_t const wct_zero = 0;
|
|
|
|
#if defined(_MSC_VER) & ( _MSC_VER==1200 | defined(_CPPLIB_VER) )
|
|
inline regex_ctype_t const wct_blank() { return _BLANK; } // work around for bug in VC++
|
|
inline int REGEX_CDECL regex_isctype( int ch, regex_ctype_t desc )
|
|
{
|
|
return _isctype( ch, static_cast<int>( desc ) );
|
|
}
|
|
#else
|
|
REGEX_DECL_CTYPE(blank)
|
|
inline int REGEX_CDECL regex_isctype( int ch, regex_ctype_t desc )
|
|
{
|
|
using namespace std;
|
|
return iswctype( btowc( ch ), desc );
|
|
}
|
|
#endif
|
|
inline int REGEX_CDECL regex_iswctype( wint_t wc, regex_ctype_t desc )
|
|
{
|
|
using namespace std;
|
|
return iswctype( wc, desc );
|
|
}
|
|
#endif
|
|
} // unnamed namespace
|
|
|
|
template< typename CStringsT, typename IterT >
|
|
bool _do_match_iterative( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur, CStringsT );
|
|
|
|
// NathanN:
|
|
// By defining the symbol REGEX_DEBUG_HEAP the allocator object
|
|
// no longer sub allocates memory. This enables heap checking tools like
|
|
// AppVerifier & PageHeap to find errors like buffer overruns
|
|
#if !defined( REGEX_DEBUG_HEAP ) & REGEX_DEBUG
|
|
# define REGEX_DEBUG_HEAP 1
|
|
#else
|
|
# define REGEX_DEBUG_HEAP 0
|
|
#endif
|
|
|
|
REGEXPR_H_INLINE size_t DEFAULT_BLOCK_SIZE()
|
|
{
|
|
#if REGEX_DEBUG_HEAP
|
|
// put each allocation in its own mem_block
|
|
return 1;
|
|
#else
|
|
// put multiple allocation in each mem_block
|
|
return 352;
|
|
#endif
|
|
}
|
|
|
|
template< typename IBeginT, typename IEndT >
|
|
inline size_t parse_int( IBeginT & ibegin, IEndT iend, size_t const max_ = size_t( -1 ) )
|
|
{
|
|
typedef typename std::iterator_traits<IEndT>::value_type char_type;
|
|
size_t retval = 0;
|
|
while( iend != ibegin && REGEX_CHAR(char_type,'0') <= *ibegin && REGEX_CHAR(char_type,'9') >= *ibegin && max_ > retval )
|
|
{
|
|
retval *= 10;
|
|
retval += static_cast<size_t>( *ibegin - REGEX_CHAR(char_type,'0') );
|
|
++ibegin;
|
|
}
|
|
if( max_ < retval )
|
|
{
|
|
retval /= 10;
|
|
--ibegin;
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Class: boyer_moore
|
|
//
|
|
// Description: fast sub-string search algorithm
|
|
//
|
|
// Members: m_begin - iter to first char in pattern sequence
|
|
// m_last - iter to last char in pattern sequence
|
|
// m_len - length of the pattern sequence
|
|
// m_off - array of offsets, indexed by ASCII char values
|
|
//
|
|
// History: 6/8/2003 - ericne - Created
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
template< typename IterT >
|
|
class boyer_moore
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef typename std::char_traits<char_type> traits_type;
|
|
|
|
enum { OFFSET_SIZE = UCHAR_MAX + 1 };
|
|
|
|
IterT m_begin;
|
|
IterT m_last;
|
|
char_type const* m_low_last;
|
|
unsigned char m_len;
|
|
unsigned char m_off[ OFFSET_SIZE ];
|
|
|
|
static unsigned char hash_char( char ch ) { return static_cast<unsigned char>( ch ); }
|
|
static unsigned char hash_char( signed char ch ) { return static_cast<unsigned char>( ch ); }
|
|
static unsigned char hash_char( unsigned char ch ) { return ch; }
|
|
static unsigned char hash_char( wchar_t ch ) { return static_cast<unsigned char>( ch % OFFSET_SIZE ); }
|
|
template< typename CharT >
|
|
static unsigned char REGEX_VC6(REGEX_CDECL) hash_char( CharT ch REGEX_VC6(...) )
|
|
{
|
|
return static_cast<unsigned char>( std::char_traits<CharT>::to_int_type( ch ) % OFFSET_SIZE );
|
|
}
|
|
|
|
// case-sensitive Boyer-Moore search
|
|
template< typename OtherT >
|
|
OtherT find_with_case( OtherT begin, OtherT end ) const
|
|
{
|
|
typedef typename std::iterator_traits<OtherT>::difference_type diff_type;
|
|
diff_type const endpos = std::distance( begin, end );
|
|
diff_type offset = m_len;
|
|
|
|
for( diff_type curpos = offset; curpos < endpos; curpos += offset )
|
|
{
|
|
std::advance( begin, offset );
|
|
|
|
IterT pat_tmp = m_last;
|
|
OtherT str_tmp = begin;
|
|
|
|
for( ; traits_type::eq( *str_tmp, *pat_tmp );
|
|
--pat_tmp, --str_tmp )
|
|
{
|
|
if( pat_tmp == m_begin )
|
|
{
|
|
return str_tmp;
|
|
}
|
|
}
|
|
|
|
offset = m_off[ hash_char( *begin ) ];
|
|
}
|
|
|
|
return end;
|
|
}
|
|
|
|
// case-insensitive Boyer-Moore search
|
|
template< typename OtherT >
|
|
OtherT find_without_case( OtherT begin, OtherT end ) const
|
|
{
|
|
typedef typename std::iterator_traits<OtherT>::difference_type diff_type;
|
|
diff_type const endpos = std::distance( begin, end );
|
|
diff_type offset = m_len;
|
|
|
|
for( diff_type curpos = offset; curpos < endpos; curpos += offset )
|
|
{
|
|
std::advance( begin, offset );
|
|
|
|
IterT pat_tmp = m_last;
|
|
char_type const* low_tmp = m_low_last;
|
|
OtherT str_tmp = begin;
|
|
|
|
for( ; traits_type::eq( *str_tmp, *pat_tmp ) || traits_type::eq( *str_tmp, *low_tmp );
|
|
--pat_tmp, --str_tmp, --low_tmp )
|
|
{
|
|
if( pat_tmp == m_begin )
|
|
{
|
|
return str_tmp;
|
|
}
|
|
}
|
|
|
|
offset = m_off[ hash_char( *begin ) ];
|
|
}
|
|
|
|
return end;
|
|
}
|
|
|
|
public:
|
|
// initialize the Boyer-Moore search data structure, using the
|
|
// search sub-sequence to prime the pump.
|
|
boyer_moore( IterT begin, IterT end, char_type const* lower = 0 )
|
|
: m_begin( begin )
|
|
, m_last( begin )
|
|
, m_low_last( lower )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::difference_type diff_type;
|
|
diff_type diff = std::distance( begin, end );
|
|
m_len = static_cast<unsigned char>( regex_min<diff_type>( diff, UCHAR_MAX ) );
|
|
std::fill_n( m_off, ARRAYSIZE( m_off ), m_len );
|
|
--m_len;
|
|
|
|
for( unsigned char offset = m_len; offset; --offset, ++m_last )
|
|
{
|
|
m_off[ hash_char( *m_last ) ] = offset;
|
|
}
|
|
|
|
if( m_low_last )
|
|
{
|
|
for( unsigned char offset = m_len; offset; --offset, ++m_low_last )
|
|
{
|
|
unsigned char hash = hash_char( *m_low_last );
|
|
m_off[ hash ] = regex_min( m_off[ hash ], offset );
|
|
}
|
|
}
|
|
}
|
|
|
|
template< typename OtherT >
|
|
OtherT find( OtherT begin, OtherT end ) const
|
|
{
|
|
if( m_low_last )
|
|
{
|
|
return find_without_case( begin, end );
|
|
}
|
|
else
|
|
{
|
|
return find_with_case( begin, end );
|
|
}
|
|
}
|
|
|
|
static void * operator new( size_t size, regex_arena & arena )
|
|
{
|
|
return arena.allocate( size );
|
|
}
|
|
static void operator delete( void *, regex_arena & )
|
|
{
|
|
}
|
|
};
|
|
|
|
// This class is used to speed up character set matching by providing
|
|
// a bitset that spans the ASCII range. std::bitset is not used because
|
|
// the range-checking slows it down.
|
|
// Note: The division and modulus operations are optimized by the compiler
|
|
// into bit-shift operations.
|
|
class ascii_bitvector
|
|
{
|
|
typedef unsigned int elem_type;
|
|
|
|
enum
|
|
{
|
|
CBELEM = CHAR_BIT * sizeof( elem_type ), // count of bits per element
|
|
CELEMS = ( UCHAR_MAX+1 ) / CBELEM // number of element in array
|
|
};
|
|
|
|
elem_type m_rg[ CELEMS ];
|
|
|
|
// Used to inline operations like: bv1 |= ~bv2; without creating temp bit vectors.
|
|
struct not_ascii_bitvector
|
|
{
|
|
ascii_bitvector const & m_ref;
|
|
not_ascii_bitvector( ascii_bitvector const & ref )
|
|
: m_ref( ref ) {}
|
|
private:
|
|
not_ascii_bitvector & operator=( not_ascii_bitvector const & );
|
|
};
|
|
|
|
ascii_bitvector( ascii_bitvector const & );
|
|
ascii_bitvector & operator=( ascii_bitvector const & );
|
|
public:
|
|
ascii_bitvector()
|
|
{
|
|
zero();
|
|
}
|
|
void zero()
|
|
{
|
|
std::fill_n( m_rg, ARRAYSIZE( m_rg ), 0 );
|
|
}
|
|
void set( unsigned char ch )
|
|
{
|
|
m_rg[ ( ch / CBELEM ) ] |= ( ( elem_type )1U << ( ch % CBELEM ) );
|
|
}
|
|
bool operator[]( unsigned char ch ) const
|
|
{
|
|
return 0 != ( m_rg[ ( ch / CBELEM ) ] & ( ( elem_type )1U << ( ch % CBELEM ) ) );
|
|
}
|
|
not_ascii_bitvector const operator~() const
|
|
{
|
|
return not_ascii_bitvector( *this );
|
|
}
|
|
ascii_bitvector & operator|=( ascii_bitvector const & that )
|
|
{
|
|
for( int i=0; i<CELEMS; ++i )
|
|
m_rg[ i ] |= that.m_rg[ i ];
|
|
return *this;
|
|
}
|
|
ascii_bitvector & operator|=( not_ascii_bitvector const & that )
|
|
{
|
|
for( int i=0; i<CELEMS; ++i )
|
|
m_rg[ i ] |= ~that.m_ref.m_rg[ i ];
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
typedef std::pair<wchar_t, wchar_t> range_type;
|
|
|
|
// determines if one range is less then another.
|
|
// used in binary search of range vector
|
|
struct range_less
|
|
{
|
|
bool operator()( range_type const & rg1, range_type const & rg2 ) const
|
|
{
|
|
return rg1.second < rg2.first;
|
|
}
|
|
};
|
|
|
|
// A singly-linked list, which works even if the allocator
|
|
// has per-instance state.
|
|
template< typename T, typename AllocT=std::allocator<T> >
|
|
class slist
|
|
{
|
|
struct cons
|
|
{
|
|
T car;
|
|
cons * cdr;
|
|
|
|
cons( T const & t, cons * nxt )
|
|
: car( t )
|
|
, cdr( nxt )
|
|
{
|
|
}
|
|
};
|
|
|
|
typedef typename rebind<AllocT, cons>::type cons_allocator;
|
|
typedef typename rebind<AllocT, char>::type char_allocator;
|
|
|
|
#if !defined(_MSC_VER) | 1200 < _MCS_VER
|
|
// Use the empty base optimization to avoid reserving
|
|
// space for the allocator if it is empty.
|
|
struct slist_impl : cons_allocator
|
|
{
|
|
cons * m_lst;
|
|
|
|
slist_impl( cons_allocator const & alloc, cons *lst )
|
|
: cons_allocator( alloc )
|
|
, m_lst( lst )
|
|
{
|
|
}
|
|
cons_allocator & allocator()
|
|
{
|
|
return *this;
|
|
}
|
|
};
|
|
#else
|
|
struct slist_impl
|
|
{
|
|
cons_allocator m_alloc;
|
|
cons *m_lst;
|
|
|
|
slist_impl( cons_allocator const & alloc, cons *lst )
|
|
: m_alloc( alloc )
|
|
, m_lst( lst )
|
|
{
|
|
}
|
|
cons_allocator & allocator()
|
|
{
|
|
return m_alloc;
|
|
}
|
|
};
|
|
#endif
|
|
|
|
slist_impl m_impl;
|
|
|
|
// find the previous node in the list (*prev(lst)==lst)
|
|
cons ** prev( cons *lst, cons *hint = 0 )
|
|
{
|
|
if( m_impl.m_lst == lst )
|
|
return &m_impl.m_lst;
|
|
if( !hint || hint->cdr != lst )
|
|
for( hint=m_impl.m_lst; hint->cdr != lst; hint=hint->cdr )
|
|
{}
|
|
return &hint->cdr;
|
|
}
|
|
public:
|
|
typedef T value_type;
|
|
typedef T* pointer;
|
|
typedef T& reference;
|
|
typedef T const* const_pointer;
|
|
typedef T const& const_reference;
|
|
typedef size_t size_type;
|
|
|
|
struct iterator : public std::iterator<std::forward_iterator_tag, T>
|
|
{
|
|
friend class slist<T,AllocT>;
|
|
explicit iterator( cons * pcons = 0 )
|
|
: m_pcons( pcons )
|
|
{
|
|
}
|
|
T & operator*() const
|
|
{
|
|
return m_pcons->car;
|
|
}
|
|
T * operator->() const
|
|
{
|
|
return &m_pcons->car;
|
|
}
|
|
iterator & operator++()
|
|
{
|
|
m_pcons = m_pcons->cdr;
|
|
return *this;
|
|
}
|
|
iterator operator++( int )
|
|
{
|
|
iterator i( *this );
|
|
++*this;
|
|
return i;
|
|
}
|
|
bool operator==( iterator it )
|
|
{
|
|
return m_pcons == it.m_pcons;
|
|
}
|
|
bool operator!=( iterator it )
|
|
{
|
|
return m_pcons != it.m_pcons;
|
|
}
|
|
private:
|
|
cons * m_pcons;
|
|
};
|
|
|
|
// not ideal, but good enough for gov'ment work....
|
|
typedef iterator const_iterator;
|
|
|
|
explicit slist( char_allocator const & al = char_allocator() )
|
|
: m_impl( convert_allocator<cons>( al, 0 ), 0 )
|
|
{
|
|
}
|
|
~slist()
|
|
{
|
|
clear();
|
|
}
|
|
void clear()
|
|
{
|
|
for( cons *nxt; m_impl.m_lst; m_impl.m_lst=nxt )
|
|
{
|
|
nxt = m_impl.m_lst->cdr;
|
|
m_impl.allocator().destroy( m_impl.m_lst );
|
|
m_impl.allocator().deallocate( m_impl.m_lst, 1 );
|
|
}
|
|
}
|
|
void push_front( T const & t )
|
|
{
|
|
cons * lst = m_impl.allocator().allocate( 1, 0 );
|
|
try
|
|
{
|
|
m_impl.allocator().construct( lst, cons( t, m_impl.m_lst ) );
|
|
}
|
|
catch(...)
|
|
{
|
|
m_impl.allocator().deallocate( lst, 1 );
|
|
throw;
|
|
}
|
|
m_impl.m_lst = lst;
|
|
}
|
|
template< typename PredT >
|
|
void sort( PredT pred )
|
|
{
|
|
// simple insertion sort
|
|
cons *rst=m_impl.m_lst;
|
|
m_impl.m_lst = 0;
|
|
while( rst )
|
|
{
|
|
cons *cur=m_impl.m_lst, *prv=0;
|
|
while( cur && ! pred( rst->car, cur->car ) )
|
|
prv=cur, cur=cur->cdr;
|
|
if( prv )
|
|
prv->cdr=rst, rst=rst->cdr, prv->cdr->cdr=cur;
|
|
else
|
|
m_impl.m_lst=rst, rst=rst->cdr, m_impl.m_lst->cdr=cur;
|
|
}
|
|
}
|
|
void sort()
|
|
{
|
|
this->sort( std::less<T>() );
|
|
}
|
|
iterator begin() const
|
|
{
|
|
return iterator( m_impl.m_lst );
|
|
}
|
|
iterator end() const
|
|
{
|
|
return iterator();
|
|
}
|
|
bool empty() const
|
|
{
|
|
return 0 == m_impl.m_lst;
|
|
}
|
|
size_t size() const
|
|
{
|
|
size_t len=0;
|
|
for( cons *lst=m_impl.m_lst; lst; lst=lst->cdr, ++len )
|
|
{}
|
|
return len;
|
|
}
|
|
iterator erase( iterator it, iterator hint = iterator() )
|
|
{
|
|
cons **prv = prev( it.m_pcons, hint.m_pcons ); // *prv==it.p
|
|
*prv = it.m_pcons->cdr;
|
|
m_impl.allocator().destroy( it.m_pcons );
|
|
m_impl.allocator().deallocate( it.m_pcons, 1 );
|
|
return iterator( *prv );
|
|
}
|
|
void reverse()
|
|
{
|
|
cons *prv=0, *nxt;
|
|
while( m_impl.m_lst )
|
|
nxt = m_impl.m_lst->cdr, m_impl.m_lst->cdr = prv, prv = m_impl.m_lst, m_impl.m_lst = nxt;
|
|
m_impl.m_lst = prv;
|
|
}
|
|
};
|
|
|
|
template< typename AllocT >
|
|
struct basic_charset;
|
|
|
|
template< typename CharT >
|
|
struct posixcharsoff_pred
|
|
{
|
|
CharT m_ch;
|
|
posixcharsoff_pred( CharT ch )
|
|
: m_ch( ch )
|
|
{
|
|
}
|
|
bool operator()( regex_ctype_t desc ) const
|
|
{
|
|
return ! local_isctype( m_ch, desc );
|
|
}
|
|
static int local_isctype( char ch, regex_ctype_t desc )
|
|
{
|
|
return regex_isctype( ch, desc );
|
|
}
|
|
static int local_isctype( wchar_t ch, regex_ctype_t desc )
|
|
{
|
|
return regex_iswctype( ch, desc );
|
|
}
|
|
};
|
|
|
|
template< typename CharT, bool CaseT >
|
|
struct in_charset_pred
|
|
{
|
|
CharT m_ch;
|
|
in_charset_pred( CharT ch )
|
|
: m_ch( ch )
|
|
{
|
|
}
|
|
template< typename AllocT >
|
|
bool operator()( basic_charset<AllocT> const * pcs ) const
|
|
{
|
|
REGEX_VC6( return pcs->in( m_ch COMMA bool2type<CaseT>() ); )
|
|
REGEX_NVC6( return pcs->template in<CaseT>( m_ch ); )
|
|
}
|
|
};
|
|
|
|
template< typename AllocT >
|
|
struct basic_charset
|
|
{
|
|
typedef basic_charset<std::allocator<char> > other_type;
|
|
typedef slist<range_type,std::allocator<char> > other_ranges_type;
|
|
|
|
typedef slist<range_type,AllocT> ranges_type;
|
|
typedef slist<regex_ctype_t,AllocT> posixcharsoff_type;
|
|
typedef slist<other_type const*,AllocT> nestedcharsets_type;
|
|
|
|
typedef typename rebind<AllocT, char>::type char_allocator_type;
|
|
|
|
bool m_fcompliment;
|
|
bool m_fskip_extended_check;
|
|
ascii_bitvector m_ascii_bitvector;
|
|
regex_ctype_t m_posixcharson;
|
|
ranges_type m_ranges;
|
|
posixcharsoff_type m_posixcharsoff;
|
|
nestedcharsets_type m_nestedcharsets;
|
|
|
|
explicit basic_charset( char_allocator_type const & al = char_allocator_type() )
|
|
: m_fcompliment( false )
|
|
, m_fskip_extended_check( false )
|
|
, m_ascii_bitvector()
|
|
, m_posixcharson( wct_zero )
|
|
, m_ranges( al )
|
|
, m_posixcharsoff( al )
|
|
, m_nestedcharsets( al )
|
|
{
|
|
}
|
|
|
|
// We'll be inheriting from this, so a virtual d'tor is regretably necessary.
|
|
virtual ~basic_charset()
|
|
{
|
|
}
|
|
|
|
void clear()
|
|
{
|
|
m_fcompliment = false;
|
|
m_fskip_extended_check = false;
|
|
m_ascii_bitvector.zero();
|
|
m_posixcharson = wct_zero;
|
|
m_ranges.clear();
|
|
m_posixcharsoff.clear();
|
|
m_nestedcharsets.clear();
|
|
}
|
|
|
|
// merge one charset into another
|
|
basic_charset & operator|=( other_type const & that )
|
|
{
|
|
if( that.m_fcompliment )
|
|
{
|
|
// If no posix-style character sets are used, then we can merge this
|
|
// nested character set directly into the enclosing character set.
|
|
if( wct_zero == that.m_posixcharson &&
|
|
that.m_posixcharsoff.empty() &&
|
|
that.m_nestedcharsets.empty() )
|
|
{
|
|
m_ascii_bitvector |= ~ that.m_ascii_bitvector;
|
|
|
|
// append the inverse of that.m_ranges to this->m_ranges
|
|
wchar_t chlow = UCHAR_MAX;
|
|
typedef typename other_ranges_type::const_iterator iter_type;
|
|
for( iter_type prg = that.m_ranges.begin(); that.m_ranges.end() != prg; ++prg )
|
|
{
|
|
if( UCHAR_MAX + 1 != prg->first )
|
|
m_ranges.push_front( range_type( wchar_t( chlow+1 ), wchar_t( prg->first-1 ) ) );
|
|
chlow = prg->second;
|
|
}
|
|
if( WCHAR_MAX != chlow )
|
|
m_ranges.push_front( range_type( wchar_t( chlow+1 ), WCHAR_MAX ) );
|
|
}
|
|
else
|
|
{
|
|
// There is no simple way to merge this nested character
|
|
// set into the enclosing character set, so we must save
|
|
// a pointer to the nested character set in a list.
|
|
m_nestedcharsets.push_front( &that );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
m_ascii_bitvector |= that.m_ascii_bitvector;
|
|
std::copy( that.m_ranges.begin(),
|
|
that.m_ranges.end(),
|
|
std::front_inserter( m_ranges ) );
|
|
|
|
m_posixcharson |= that.m_posixcharson;
|
|
std::copy( that.m_posixcharsoff.begin(),
|
|
that.m_posixcharsoff.end(),
|
|
std::front_inserter( m_posixcharsoff ) );
|
|
|
|
std::copy( that.m_nestedcharsets.begin(),
|
|
that.m_nestedcharsets.end(),
|
|
std::front_inserter( m_nestedcharsets ) );
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
// Note overloading based on first parameter
|
|
void set_bit( char ch, bool const fnocase )
|
|
{
|
|
if( fnocase )
|
|
{
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( regex_tolower( ch ) ) );
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( regex_toupper( ch ) ) );
|
|
}
|
|
else
|
|
{
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( ch ) );
|
|
}
|
|
}
|
|
|
|
// Note overloading based on first parameter
|
|
void set_bit( wchar_t ch, bool const fnocase )
|
|
{
|
|
if( UCHAR_MAX >= ch )
|
|
set_bit( static_cast<char>( ch ), fnocase );
|
|
else
|
|
m_ranges.push_front( range_type( ch, ch ) );
|
|
}
|
|
|
|
// Note overloading based on first two parameters
|
|
void set_bit_range( char ch1, char ch2, bool const fnocase )
|
|
{
|
|
if( static_cast<unsigned char>( ch1 ) > static_cast<unsigned char>( ch2 ) )
|
|
throw bad_regexpr( "invalid range specified in character set" );
|
|
|
|
if( fnocase )
|
|
{
|
|
// i is unsigned int to prevent overflow if ch2 is UCHAR_MAX
|
|
for( unsigned int i = static_cast<unsigned char>( ch1 );
|
|
i <= static_cast<unsigned char>( ch2 ); ++i )
|
|
{
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( regex_toupper( (char) i ) ) );
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( regex_tolower( (char) i ) ) );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// i is unsigned int to prevent overflow if ch2 is UCHAR_MAX
|
|
for( unsigned int i = static_cast<unsigned char>( ch1 );
|
|
i <= static_cast<unsigned char>( ch2 ); ++i )
|
|
{
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( i ) );
|
|
}
|
|
}
|
|
}
|
|
|
|
// Note overloading based on first two parameters
|
|
void set_bit_range( wchar_t ch1, wchar_t ch2, bool const fnocase )
|
|
{
|
|
if( ch1 > ch2 )
|
|
throw bad_regexpr( "invalid range specified in character set" );
|
|
|
|
if( UCHAR_MAX >= ch1 )
|
|
set_bit_range( static_cast<char>( ch1 ), static_cast<char>( regex_min<wchar_t>( UCHAR_MAX, ch2 ) ), fnocase );
|
|
|
|
if( UCHAR_MAX < ch2 )
|
|
m_ranges.push_front( range_type( regex_max( static_cast<wchar_t>( UCHAR_MAX + 1 ), ch1 ), ch2 ) );
|
|
}
|
|
|
|
void optimize( type2type<wchar_t> )
|
|
{
|
|
if( m_ranges.begin() != m_ranges.end() )
|
|
{
|
|
// this sorts on range_type.m_pfirst ( uses operator<() for pair templates )
|
|
m_ranges.sort();
|
|
|
|
// merge ranges that overlap
|
|
typename ranges_type::iterator icur=m_ranges.begin(), iprev=icur++;
|
|
while( icur != m_ranges.end() )
|
|
{
|
|
if( icur->first <= iprev->second + 1 )
|
|
{
|
|
iprev->second = regex_max( iprev->second, icur->second );
|
|
icur = m_ranges.erase( icur, iprev );
|
|
}
|
|
else
|
|
{
|
|
iprev=icur++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// For the ASCII range, merge the m_posixcharson info
|
|
// into the ascii_bitvector
|
|
if( wct_zero != m_posixcharson )
|
|
{
|
|
// BUGBUG this is kind of expensive. Think of a better way.
|
|
for( unsigned int i=0; i<=UCHAR_MAX; ++i )
|
|
if( regex_isctype( i, m_posixcharson ) )
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( i ) );
|
|
}
|
|
|
|
// m_fskip_extended_check is a cache which tells us whether we
|
|
// need to check the m_posixcharsoff and m_nestedcharsets vectors,
|
|
// which would only be used in nested user-defined character sets
|
|
m_fskip_extended_check = m_posixcharsoff.empty() && m_nestedcharsets.empty();
|
|
}
|
|
|
|
void optimize( type2type<char> )
|
|
{
|
|
optimize( type2type<wchar_t>() );
|
|
|
|
// the posixcharson info was merged into the ascii bitvector,
|
|
// so we don't need to ever call regex_isctype ever again.
|
|
m_posixcharson = wct_zero;
|
|
}
|
|
|
|
template< bool CaseT, typename CharT >
|
|
bool extended_check( CharT ch REGEX_VC6(COMMA bool2type<CaseT>) ) const
|
|
{
|
|
REGEX_ASSERT( m_fskip_extended_check == ( m_posixcharsoff.empty() && m_nestedcharsets.empty() ) );
|
|
|
|
if( m_fskip_extended_check )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return ( m_posixcharsoff.end() !=
|
|
std::find_if( m_posixcharsoff.begin(), m_posixcharsoff.end(),
|
|
posixcharsoff_pred<CharT>( ch ) ) )
|
|
|| ( m_nestedcharsets.end() !=
|
|
std::find_if( m_nestedcharsets.begin(), m_nestedcharsets.end(),
|
|
in_charset_pred<CharT, CaseT>( ch ) ) );
|
|
}
|
|
|
|
inline bool in_ranges( wchar_t ch, true_t ) const
|
|
{
|
|
typedef typename ranges_type::const_iterator iter_type;
|
|
iter_type ibegin = m_ranges.begin(), iend = m_ranges.end();
|
|
|
|
return ibegin != iend &&
|
|
std::binary_search( ibegin, iend, range_type( ch, ch ), range_less() );
|
|
}
|
|
|
|
inline bool in_ranges( wchar_t ch, false_t ) const
|
|
{
|
|
typedef typename ranges_type::const_iterator iter_type;
|
|
iter_type ibegin = m_ranges.begin(), iend = m_ranges.end();
|
|
|
|
if( ibegin == iend )
|
|
return false;
|
|
|
|
wchar_t const chup = regex_toupper( ch );
|
|
if( std::binary_search( ibegin, iend, range_type( chup, chup ), range_less() ) )
|
|
return true;
|
|
|
|
wchar_t const chlo = regex_tolower( ch );
|
|
if( chup == chlo )
|
|
return false;
|
|
|
|
return std::binary_search( ibegin, iend, range_type( chlo, chlo ), range_less() );
|
|
}
|
|
|
|
// Note overloading based on parameter
|
|
template< bool CaseT >
|
|
bool in( char ch REGEX_VC6(COMMA bool2type<CaseT>) ) const
|
|
{
|
|
// Whoops, forgot to call optimize() on this charset
|
|
REGEX_ASSERT( wct_zero == m_posixcharson );
|
|
|
|
return m_fcompliment !=
|
|
(
|
|
( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] )
|
|
|| ( extended_check REGEX_NVC6(<CaseT>) ( ch REGEX_VC6(COMMA bool2type<CaseT>()) ) )
|
|
);
|
|
}
|
|
|
|
// Note overloading based on parameter
|
|
template< bool CaseT >
|
|
bool in( wchar_t ch REGEX_VC6(COMMA bool2type<CaseT>) ) const
|
|
{
|
|
// use range_match_type to see if this character is within one of the
|
|
// ranges stored in m_rgranges.
|
|
return m_fcompliment !=
|
|
(
|
|
( ( UCHAR_MAX >= ch ) ?
|
|
( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] ) :
|
|
( ( in_ranges( ch, bool2type<CaseT>() ) )
|
|
|| ( wct_zero != m_posixcharson && regex_iswctype( ch, m_posixcharson ) ) ) )
|
|
|| ( extended_check REGEX_NVC6(<CaseT>) ( ch REGEX_VC6(COMMA bool2type<CaseT>()) ) )
|
|
);
|
|
}
|
|
|
|
private:
|
|
basic_charset & operator=( basic_charset const & that );
|
|
basic_charset( basic_charset const & that );
|
|
};
|
|
|
|
// Intrinsic character sets are allocated on the heap with the standard allocator.
|
|
// They are either the built-in character sets, or the user-defined ones.
|
|
struct charset : public basic_charset<std::allocator<char> >
|
|
{
|
|
charset()
|
|
{
|
|
}
|
|
private:
|
|
charset( charset const & );
|
|
charset & operator=( charset const & );
|
|
};
|
|
|
|
// charset is no longer an incomplete type so we now
|
|
// know how to destroy one. free_charset() is used in syntax2.h
|
|
REGEXPR_H_INLINE void free_charset( charset const * pcharset )
|
|
{
|
|
delete pcharset;
|
|
}
|
|
|
|
// Custom character sets are the ones that appear in patterns between
|
|
// square brackets. They are allocated in a regex_arena to speed up
|
|
// pattern compilation and to make rpattern clean-up faster.
|
|
struct custom_charset : public basic_charset<regex_arena>
|
|
{
|
|
static void * operator new( size_t size, regex_arena & arena )
|
|
{
|
|
return arena.allocate( size );
|
|
}
|
|
static void operator delete( void *, regex_arena & ) {}
|
|
static void operator delete( void * ) {}
|
|
|
|
custom_charset( regex_arena & arena )
|
|
: basic_charset<regex_arena>( arena )
|
|
{
|
|
}
|
|
private:
|
|
custom_charset( custom_charset const & );
|
|
custom_charset & operator=( custom_charset const & );
|
|
};
|
|
|
|
template< typename CharT >
|
|
class intrinsic_charsets
|
|
{
|
|
struct intrinsic_charset : public charset
|
|
{
|
|
intrinsic_charset( bool fcompliment, regex_ctype_t desc, char const * sz )
|
|
{
|
|
reset( fcompliment, desc, sz );
|
|
}
|
|
void reset( bool fcompliment, regex_ctype_t desc, char const * sz )
|
|
{
|
|
clear();
|
|
m_fcompliment = fcompliment;
|
|
m_posixcharson = desc;
|
|
for( ; *sz; ++sz )
|
|
m_ascii_bitvector.set( static_cast<unsigned char>( *sz ) );
|
|
optimize( type2type<CharT>() );
|
|
}
|
|
private:
|
|
intrinsic_charset( intrinsic_charset const & );
|
|
intrinsic_charset & operator=( intrinsic_charset const & );
|
|
};
|
|
|
|
static intrinsic_charset & _get_word_charset()
|
|
{
|
|
static intrinsic_charset s_word_charset( false, wct_alpha()|wct_digit(), "_" );
|
|
return s_word_charset;
|
|
}
|
|
static intrinsic_charset & _get_digit_charset()
|
|
{
|
|
static intrinsic_charset s_digit_charset( false, wct_digit(), "" );
|
|
return s_digit_charset;
|
|
}
|
|
static intrinsic_charset & _get_space_charset()
|
|
{
|
|
static intrinsic_charset s_space_charset( false, wct_space(), "" );
|
|
return s_space_charset;
|
|
}
|
|
static intrinsic_charset & _get_not_word_charset()
|
|
{
|
|
static intrinsic_charset s_not_word_charset( true, wct_alpha()|wct_digit(), "_" );
|
|
return s_not_word_charset;
|
|
}
|
|
static intrinsic_charset & _get_not_digit_charset()
|
|
{
|
|
static intrinsic_charset s_not_digit_charset( true, wct_digit(), "" );
|
|
return s_not_digit_charset;
|
|
}
|
|
static intrinsic_charset & _get_not_space_charset()
|
|
{
|
|
static intrinsic_charset s_not_space_charset( true, wct_space(), "" );
|
|
return s_not_space_charset;
|
|
}
|
|
public:
|
|
static charset const & get_word_charset()
|
|
{
|
|
return _get_word_charset();
|
|
}
|
|
static charset const & get_digit_charset()
|
|
{
|
|
return _get_digit_charset();
|
|
}
|
|
static charset const & get_space_charset()
|
|
{
|
|
return _get_space_charset();
|
|
}
|
|
static charset const & get_not_word_charset()
|
|
{
|
|
return _get_not_word_charset();
|
|
}
|
|
static charset const & get_not_digit_charset()
|
|
{
|
|
return _get_not_digit_charset();
|
|
}
|
|
static charset const & get_not_space_charset()
|
|
{
|
|
return _get_not_space_charset();
|
|
}
|
|
static void reset()
|
|
{
|
|
_get_word_charset().reset( false, wct_alpha()|wct_digit(), "_" );
|
|
_get_digit_charset().reset( false, wct_digit(), "" );
|
|
_get_space_charset().reset( false, wct_space(), "" );
|
|
_get_not_word_charset().reset( true, wct_alpha()|wct_digit(), "_" );
|
|
_get_not_digit_charset().reset( true, wct_digit(), "" );
|
|
_get_not_space_charset().reset( true, wct_space(), "" );
|
|
}
|
|
};
|
|
|
|
//
|
|
// Operator implementations
|
|
//
|
|
|
|
// Evaluates the beginning-of-string condition
|
|
template< typename CStringsT >
|
|
struct bos_t
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const & param, IterT iter )
|
|
{
|
|
return param.m_ibufferbegin == iter;
|
|
}
|
|
};
|
|
|
|
// Find the beginning of a line, either beginning of a string, or the character
|
|
// immediately following a newline
|
|
template< typename CStringsT >
|
|
struct bol_t
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const & param, IterT iter )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
return param.m_ibufferbegin == iter || traits_type::eq( REGEX_CHAR(char_type,'\n'), *--iter );
|
|
}
|
|
};
|
|
|
|
// Evaluates end-of-string condition for string's
|
|
template< typename CStringsT >
|
|
struct eos_t
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const & param, IterT iter )
|
|
{
|
|
return param.m_iend == iter;
|
|
}
|
|
};
|
|
template<>
|
|
struct eos_t<true_t>
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const &, IterT iter )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
return traits_type::eq( *iter, char_type() );
|
|
}
|
|
};
|
|
|
|
// Evaluates end-of-line conditions, either the end of the string, or a
|
|
// newline character.
|
|
template< typename CStringsT >
|
|
struct eol_t
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const & param, IterT iter )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
return param.m_iend == iter
|
|
|| traits_type::eq( REGEX_CHAR(char_type,'\n'), *iter );
|
|
}
|
|
};
|
|
template<>
|
|
struct eol_t<true_t>
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const &, IterT iter )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
return traits_type::eq( *iter, char_type() )
|
|
|| traits_type::eq( *iter, REGEX_CHAR(char_type,'\n') );
|
|
}
|
|
};
|
|
|
|
// Evaluates perl's end-of-string conditions, either the end of the string, or a
|
|
// newline character followed by end of string. ( Only used by $ and /Z assertions )
|
|
template< typename CStringsT >
|
|
struct peos_t
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const & param, IterT iter )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
return param.m_iend == iter
|
|
|| ( traits_type::eq( REGEX_CHAR(char_type,'\n'), *iter ) && param.m_iend == ++iter );
|
|
}
|
|
};
|
|
template<>
|
|
struct peos_t<true_t>
|
|
{
|
|
template< typename IterT >
|
|
static bool eval( match_param<IterT> const &, IterT iter )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
return traits_type::eq( *iter, char_type() )
|
|
|| ( traits_type::eq( *iter, REGEX_CHAR(char_type,'\n') )
|
|
&& traits_type::eq( *++iter, char_type() ) );
|
|
}
|
|
};
|
|
|
|
// compare two characters, case-sensitive
|
|
template< typename CharT >
|
|
struct ch_neq_t
|
|
{
|
|
typedef CharT char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
static bool eval( register CharT ch1, register CharT ch2 )
|
|
{
|
|
return ! traits_type::eq( ch1, ch2 );
|
|
}
|
|
};
|
|
|
|
// Compare two characters, disregarding case
|
|
template< typename CharT >
|
|
struct ch_neq_nocase_t
|
|
{
|
|
typedef CharT char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
static bool eval( register CharT ch1, register CharT ch2 )
|
|
{
|
|
return ! traits_type::eq( regex_toupper( ch1 ), regex_toupper( ch2 ) );
|
|
}
|
|
};
|
|
|
|
//
|
|
// helper functions for dealing with widths.
|
|
//
|
|
inline size_t width_add( size_t a, size_t b )
|
|
{
|
|
return ( size_t( -1 ) == a || size_t( -1 ) == b ? size_t( -1 ) : a + b );
|
|
}
|
|
|
|
inline size_t width_mult( size_t a, size_t b )
|
|
{
|
|
if( 0 == a || 0 == b )
|
|
return 0;
|
|
|
|
if( size_t( -1 ) == a || size_t( -1 ) == b )
|
|
return size_t( -1 );
|
|
|
|
return a * b;
|
|
}
|
|
|
|
inline bool operator==( width_type const & rhs, width_type const & lhs )
|
|
{
|
|
return ( rhs.m_min == lhs.m_min && rhs.m_max == lhs.m_max );
|
|
}
|
|
|
|
inline bool operator!=( width_type const & rhs, width_type const & lhs )
|
|
{
|
|
return ( rhs.m_min != lhs.m_min || rhs.m_max != lhs.m_max );
|
|
}
|
|
|
|
inline width_type operator+( width_type const & rhs, width_type const & lhs )
|
|
{
|
|
width_type width = { width_add( rhs.m_min, lhs.m_min ), width_add( rhs.m_max, lhs.m_max ) };
|
|
return width;
|
|
}
|
|
|
|
inline width_type operator*( width_type const & rhs, width_type const & lhs )
|
|
{
|
|
width_type width = { width_mult( rhs.m_min, lhs.m_min ), width_mult( rhs.m_max, lhs.m_max ) };
|
|
return width;
|
|
}
|
|
|
|
inline width_type & operator+=( width_type & rhs, width_type const & lhs )
|
|
{
|
|
rhs.m_min = width_add( rhs.m_min, lhs.m_min );
|
|
rhs.m_max = width_add( rhs.m_max, lhs.m_max );
|
|
return rhs;
|
|
}
|
|
|
|
inline width_type & operator*=( width_type & rhs, width_type const & lhs )
|
|
{
|
|
rhs.m_min = width_mult( rhs.m_min, lhs.m_min );
|
|
rhs.m_max = width_mult( rhs.m_max, lhs.m_max );
|
|
return rhs;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
width_type const zero_width = { 0, 0 };
|
|
width_type const worst_width = { 0, size_t( -1 ) };
|
|
}
|
|
|
|
template< typename IterT >
|
|
struct width_param
|
|
{
|
|
std::vector<match_group_base<IterT>*> & m_rggroups;
|
|
std::list<size_t> const & m_invisible_groups;
|
|
width_type m_width;
|
|
|
|
width_param
|
|
(
|
|
std::vector<match_group_base<IterT>*> & rggroups,
|
|
std::list<size_t> const & invisible_groups
|
|
)
|
|
: m_rggroups( rggroups )
|
|
, m_invisible_groups( invisible_groups )
|
|
, m_width( zero_width )
|
|
{
|
|
}
|
|
private:
|
|
width_param & operator=( width_param const & );
|
|
};
|
|
|
|
template< typename CharT >
|
|
struct must_have
|
|
{
|
|
typedef std::basic_string<CharT> string_type;
|
|
typedef typename string_type::const_iterator const_iterator;
|
|
|
|
bool m_has;
|
|
const_iterator m_begin;
|
|
const_iterator m_end;
|
|
CharT const * m_lower;
|
|
};
|
|
|
|
template< typename CharT >
|
|
struct peek_param
|
|
{
|
|
// "chars" is a list of characters. If every alternate in a group
|
|
// begins with a character or string literal, the "chars" list can
|
|
// be used to speed up the matching of a group.
|
|
size_t m_cchars;
|
|
|
|
union
|
|
{
|
|
CharT m_rgchars[2];
|
|
CharT const * m_pchars;
|
|
};
|
|
|
|
// "must" is a string that must appear in the match. It is used
|
|
// to speed up the search.
|
|
must_have<CharT> m_must_have;
|
|
};
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Class: sub_expr
|
|
//
|
|
// Description: patterns are "compiled" into a directed graph of sub_expr
|
|
// structs. Matching is accomplished by traversing this graph.
|
|
//
|
|
// Methods: sub_expr - construct a sub_expr
|
|
// recursive_match_this - does this sub_expr match at the given location
|
|
// width_this - what is the width of this sub_expr
|
|
// ~sub_expr - recursively delete the sub_expr graph
|
|
// next - pointer to the next node in the graph
|
|
// next - pointer to the next node in the graph
|
|
// recursive_match_next - match the rest of the graph
|
|
// recursive_match_all - recursive_match_this and recursive_match_next
|
|
// is_assertion - true if this sub_expr is a zero-width assertion
|
|
// get_width - find the width of the graph at this sub_expr
|
|
//
|
|
// Members: m_pnext - pointer to the next node in the graph
|
|
//
|
|
// History: 8/14/2000 - ericne - Created
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
template< typename IterT >
|
|
class sub_expr : public sub_expr_base<IterT>
|
|
{
|
|
sub_expr * m_pnext;
|
|
|
|
protected:
|
|
// Only derived classes can instantiate sub_expr's
|
|
sub_expr()
|
|
: m_pnext( 0 )
|
|
{
|
|
}
|
|
|
|
public:
|
|
typedef IterT iterator_type;
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
virtual ~sub_expr()
|
|
{
|
|
delete m_pnext;
|
|
}
|
|
|
|
sub_expr ** pnext()
|
|
{
|
|
return & m_pnext;
|
|
}
|
|
|
|
sub_expr const * next() const
|
|
{
|
|
return m_pnext;
|
|
}
|
|
|
|
virtual sub_expr * quantify( size_t, size_t, bool, regex_arena & )
|
|
{
|
|
throw bad_regexpr( "sub-expression cannot be quantified" );
|
|
}
|
|
|
|
// Match this object and all subsequent objects
|
|
// If recursive_match_all returns false, it must not change any of param's state
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( recursive_match_this_s( param, icur ) && recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const // for C-style strings
|
|
{
|
|
return ( recursive_match_this_c( param, icur ) && recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
|
|
// match this object only
|
|
virtual bool recursive_match_this_s( match_param<IterT> &, IterT & ) const
|
|
{
|
|
return true;
|
|
}
|
|
|
|
virtual bool recursive_match_this_c( match_param<IterT> &, IterT & ) const // for C-style strings
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// Match all subsequent objects
|
|
template< typename CStringsT >
|
|
bool recursive_match_next( match_param<IterT> & param, IterT icur, CStringsT ) const
|
|
{
|
|
return m_pnext->recursive_match_all( param, icur, CStringsT() );
|
|
}
|
|
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = next();
|
|
return true;
|
|
}
|
|
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const // for C-style strings
|
|
{
|
|
param.m_pnext = next();
|
|
return true;
|
|
}
|
|
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & ) const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & ) const // for C-style strings
|
|
{
|
|
return false;
|
|
}
|
|
|
|
virtual bool is_assertion() const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
width_type get_width( width_param<IterT> & param )
|
|
{
|
|
width_type temp_width = width_this( param );
|
|
if( m_pnext )
|
|
temp_width += m_pnext->get_width( param );
|
|
return temp_width;
|
|
}
|
|
|
|
virtual width_type width_this( width_param<IterT> & ) = 0;
|
|
|
|
virtual bool peek_this( peek_param<char_type> & ) const
|
|
{
|
|
return false;
|
|
}
|
|
};
|
|
|
|
// An object of type end_of_pattern is used to mark the
|
|
// end of the pattern. (Duh!) It is responsible for ending
|
|
// the recursion, or for letting the search continue if
|
|
// the match is zero-width and we are trying to find a
|
|
// non-zero-width match
|
|
template< typename IterT >
|
|
class end_of_pattern : public sub_expr<IterT>
|
|
{
|
|
bool _do_match_this( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ! param.m_no0len || param.m_imatchbegin != icur;
|
|
}
|
|
public:
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_match_this( param, icur );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const // for C-style strings
|
|
{
|
|
return _do_match_this( param, icur );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = 0;
|
|
return _do_match_this( param, param.m_icur );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const // for C-style strings
|
|
{
|
|
param.m_pnext = 0;
|
|
return _do_match_this( param, param.m_icur );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
};
|
|
|
|
// Base class for sub-expressions which are zero-width
|
|
// ( i.e., assertions eat no characters during matching )
|
|
// Assertions cannot be quantified.
|
|
template< typename IterT >
|
|
class assertion : public sub_expr<IterT>
|
|
{
|
|
public:
|
|
virtual bool is_assertion() const
|
|
{
|
|
return true;
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
return this->next()->peek_this( peek );
|
|
}
|
|
};
|
|
|
|
template< typename OpT, typename OpCT >
|
|
struct opwrap
|
|
{
|
|
typedef OpT op_type;
|
|
typedef OpCT opc_type;
|
|
};
|
|
|
|
#define REGEX_OP(x) opwrap< x<false_t>, x<true_t> >
|
|
|
|
template< typename IterT, typename OpWrapT >
|
|
class assert_op : public assertion<IterT>
|
|
{
|
|
public:
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( assert_op::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( assert_op::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return OpWrapT::op_type::eval( param, icur );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return OpWrapT::opc_type::eval( param, icur );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return OpWrapT::op_type::eval( param, param.m_icur );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return OpWrapT::opc_type::eval( param, param.m_icur );
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_bos( REGEX_FLAGS, regex_arena & arena )
|
|
{
|
|
return new( arena ) assert_op<IterT, REGEX_OP(bos_t) >();
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_eos( REGEX_FLAGS, regex_arena & arena )
|
|
{
|
|
return new( arena ) assert_op<IterT, REGEX_OP(peos_t) >();
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_eoz( REGEX_FLAGS, regex_arena & arena )
|
|
{
|
|
return new( arena ) assert_op<IterT, REGEX_OP(eos_t) >();
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_bol( REGEX_FLAGS flags, regex_arena & arena )
|
|
{
|
|
switch( MULTILINE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) assert_op<IterT, REGEX_OP(bos_t) >();
|
|
case MULTILINE:
|
|
return new( arena ) assert_op<IterT, REGEX_OP(bol_t) >();
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_eol( REGEX_FLAGS flags, regex_arena & arena )
|
|
{
|
|
switch( MULTILINE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) assert_op<IterT, REGEX_OP(peos_t) >();
|
|
case MULTILINE:
|
|
return new( arena ) assert_op<IterT, REGEX_OP(eol_t) >();
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< typename IterT, typename SubExprT = sub_expr<IterT> >
|
|
class match_wrapper : public sub_expr<IterT>
|
|
{
|
|
match_wrapper & operator=( match_wrapper const & );
|
|
public:
|
|
match_wrapper( SubExprT * psub )
|
|
: m_psub( psub )
|
|
{
|
|
}
|
|
virtual ~match_wrapper()
|
|
{
|
|
_cleanup();
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & param )
|
|
{
|
|
return m_psub->width_this( param );
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
return m_psub->peek_this( peek );
|
|
}
|
|
protected:
|
|
void _cleanup()
|
|
{
|
|
delete m_psub;
|
|
m_psub = 0;
|
|
}
|
|
|
|
SubExprT * m_psub;
|
|
};
|
|
|
|
template< typename IterT, typename SubExprT = sub_expr<IterT> >
|
|
class match_quantifier : public match_wrapper<IterT, SubExprT>
|
|
{
|
|
match_quantifier & operator=( match_quantifier const & );
|
|
public:
|
|
match_quantifier( SubExprT * psub, size_t lbound, size_t ubound )
|
|
: match_wrapper<IterT, SubExprT>( psub )
|
|
, m_lbound( lbound )
|
|
, m_ubound( ubound )
|
|
{
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & param )
|
|
{
|
|
width_type this_width = match_wrapper<IterT, SubExprT>::width_this( param );
|
|
width_type quant_width = { m_lbound, m_ubound };
|
|
return this_width * quant_width;
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
return 0 != m_lbound && this->m_psub->peek_this( peek );
|
|
}
|
|
protected:
|
|
size_t const m_lbound;
|
|
size_t const m_ubound;
|
|
};
|
|
|
|
template< typename IterT, typename SubExprT >
|
|
class atom_quantifier : public match_quantifier<IterT, SubExprT>
|
|
{
|
|
atom_quantifier & operator=( atom_quantifier const & );
|
|
public:
|
|
atom_quantifier( SubExprT * psub, size_t lbound, size_t ubound )
|
|
: match_quantifier<IterT, SubExprT>( psub, lbound, ubound )
|
|
{
|
|
}
|
|
protected:
|
|
void _push_frame( unsafe_stack * pstack, IterT curr, size_t count ) const
|
|
{
|
|
std::pair<IterT, size_t> p( curr, count );
|
|
pstack->push( p );
|
|
}
|
|
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
std::pair<IterT, size_t> p;
|
|
param.m_pstack->pop( p );
|
|
param.m_icur = p.first;
|
|
}
|
|
};
|
|
|
|
template< typename IterT, typename SubExprT >
|
|
class max_atom_quantifier : public atom_quantifier<IterT, SubExprT>
|
|
{
|
|
max_atom_quantifier & operator=( max_atom_quantifier const & );
|
|
|
|
public:
|
|
max_atom_quantifier( SubExprT * psub, size_t lbound, size_t ubound )
|
|
: atom_quantifier<IterT, SubExprT>( psub, lbound, ubound )
|
|
{
|
|
}
|
|
|
|
// Why a macro instead of a template, you ask? Performance. Due to a known
|
|
// bug in the VC7 inline heuristic, I cannot get VC7 to inline the calls to
|
|
// m_psub methods unless I use these macros. And the performance win is
|
|
// nothing to sneeze at. It's on the order of a 25% speed up to use a macro
|
|
// here instead of a template.
|
|
#define DECLARE_RECURSIVE_MATCH_ALL(CSTRINGS,EXT) \
|
|
virtual bool recursive_match_all ## EXT( match_param<IterT> & param, IterT icur ) const \
|
|
{ \
|
|
typedef typename std::iterator_traits<IterT>::difference_type diff_type; \
|
|
/* In an ideal world, ibegin and cdiff would be members of a union */ \
|
|
/* to conserve stack, but I don't know if IterT is a POD type or not. */ \
|
|
IterT ibegin = icur; \
|
|
diff_type cdiff = 0; /* must be a signed integral type */ \
|
|
size_t cmatches = 0; \
|
|
/* greedily match as much as we can*/ \
|
|
if( this->m_ubound && this->m_psub->SubExprT::recursive_match_this ## EXT( param, icur ) ) \
|
|
{ \
|
|
if( 0 == ( cdiff = -std::distance( ibegin, icur ) ) ) \
|
|
return this->recursive_match_next( param, icur, CSTRINGS() ); \
|
|
while( ++cmatches < this->m_ubound && this->m_psub->SubExprT::recursive_match_this ## EXT( param, icur ) )\
|
|
{} \
|
|
} \
|
|
if( this->m_lbound > cmatches ) \
|
|
return false; \
|
|
/* try matching the rest of the pattern, and back off if necessary */ \
|
|
for( ; ; --cmatches, std::advance( icur, cdiff ) ) \
|
|
{ \
|
|
if( this->recursive_match_next( param, icur, CSTRINGS() ) ) \
|
|
return true; \
|
|
if( this->m_lbound == cmatches ) \
|
|
return false; \
|
|
} \
|
|
}
|
|
|
|
#define DECLARE_ITERATIVE_MATCH_THIS(EXT) \
|
|
virtual bool iterative_match_this ## EXT( match_param<IterT> & param ) const \
|
|
{ \
|
|
IterT ibegin = param.m_icur; \
|
|
size_t cmatches = 0; \
|
|
if( this->m_ubound && this->m_psub->SubExprT::iterative_match_this ## EXT( param ) ) \
|
|
{ \
|
|
if( 0 == std::distance( ibegin, param.m_icur ) ) \
|
|
{ \
|
|
cmatches = this->m_lbound; \
|
|
} \
|
|
else \
|
|
{ \
|
|
while( ++cmatches < this->m_ubound && this->m_psub->SubExprT::iterative_match_this ## EXT( param ) )\
|
|
{} \
|
|
} \
|
|
} \
|
|
if( cmatches >= this->m_lbound ) \
|
|
{ \
|
|
this->_push_frame( param.m_pstack, ibegin, cmatches ); \
|
|
param.m_pnext = this->next(); \
|
|
return true; \
|
|
} \
|
|
param.m_icur = ibegin; \
|
|
return false; \
|
|
}
|
|
|
|
#define DECLARE_ITERATIVE_REMATCH_THIS(EXT) \
|
|
virtual bool iterative_rematch_this ## EXT( match_param<IterT> & param ) const \
|
|
{ \
|
|
typedef std::pair<IterT, size_t> top_type; \
|
|
size_t & cmatches = REGEX_VC6( param.m_pstack->top( type2type<top_type>() ).second ) \
|
|
REGEX_NVC6( param.m_pstack->template top<top_type>().second ); \
|
|
if( this->m_lbound != cmatches ) \
|
|
{ \
|
|
--cmatches; \
|
|
this->m_psub->SubExprT::iterative_rematch_this ## EXT( param ); \
|
|
param.m_pnext = this->next(); \
|
|
return true; \
|
|
} \
|
|
this->_pop_frame( param ); \
|
|
return false; \
|
|
}
|
|
|
|
DECLARE_RECURSIVE_MATCH_ALL(false_t,_s)
|
|
DECLARE_RECURSIVE_MATCH_ALL(true_t,_c)
|
|
DECLARE_ITERATIVE_MATCH_THIS(_s)
|
|
DECLARE_ITERATIVE_MATCH_THIS(_c)
|
|
DECLARE_ITERATIVE_REMATCH_THIS(_s)
|
|
DECLARE_ITERATIVE_REMATCH_THIS(_c)
|
|
|
|
#undef DECLARE_RECURSIVE_MATCH_ALL
|
|
#undef DECLARE_ITERATIVE_MATCH_THIS
|
|
#undef DECLARE_ITERATIVE_REMATCH_THIS
|
|
};
|
|
|
|
template< typename IterT, typename SubExprT >
|
|
class min_atom_quantifier : public atom_quantifier<IterT, SubExprT>
|
|
{
|
|
min_atom_quantifier & operator=( min_atom_quantifier const & );
|
|
|
|
public:
|
|
min_atom_quantifier( SubExprT * psub, size_t lbound, size_t ubound )
|
|
: atom_quantifier<IterT, SubExprT>( psub, lbound, ubound )
|
|
{
|
|
}
|
|
|
|
// Why a macro instead of a template, you ask? Performance. Due to a known
|
|
// bug in the VC7 inline heuristic, I cannot get VC7 to inline the calls to
|
|
// m_psub methods unless I use these macros. And the performance win is
|
|
// nothing to sneeze at. It's on the order of a 25% speed up to use a macro
|
|
// here instead of a template.
|
|
#define DECLARE_RECURSIVE_MATCH_ALL(CSTRINGS,EXT) \
|
|
virtual bool recursive_match_all ## EXT( match_param<IterT> & param, IterT icur ) const \
|
|
{ \
|
|
IterT icur_tmp = icur; \
|
|
size_t cmatches = 0; \
|
|
if( this->m_psub->SubExprT::recursive_match_this ## EXT( param, icur_tmp ) ) \
|
|
{ \
|
|
if( icur_tmp == icur ) \
|
|
return this->recursive_match_next( param, icur, CSTRINGS() ); \
|
|
if( this->m_lbound ) \
|
|
{ \
|
|
icur = icur_tmp; \
|
|
++cmatches; \
|
|
} \
|
|
for( ; cmatches < this->m_lbound; ++cmatches ) \
|
|
{ \
|
|
if( ! this->m_psub->SubExprT::recursive_match_this ## EXT( param, icur ) ) \
|
|
return false; \
|
|
} \
|
|
} \
|
|
else if( this->m_lbound ) \
|
|
{ \
|
|
return false; \
|
|
} \
|
|
do \
|
|
{ \
|
|
if( this->recursive_match_next( param, icur, CSTRINGS() ) ) \
|
|
return true; \
|
|
} \
|
|
while( cmatches < this->m_ubound && \
|
|
( ++cmatches, this->m_psub->SubExprT::recursive_match_this ## EXT( param, icur ) ) ); \
|
|
return false; \
|
|
}
|
|
|
|
#define DECLARE_ITERATIVE_MATCH_THIS(EXT) \
|
|
virtual bool iterative_match_this ## EXT( match_param<IterT> & param ) const \
|
|
{ \
|
|
IterT ibegin = param.m_icur; \
|
|
size_t cmatches = 0; \
|
|
if( this->m_psub->SubExprT::iterative_match_this ## EXT( param ) ) \
|
|
{ \
|
|
if( 0 == std::distance( ibegin, param.m_icur ) ) \
|
|
{ \
|
|
cmatches = this->m_ubound; \
|
|
} \
|
|
else if( this->m_lbound ) \
|
|
{ \
|
|
for( ++cmatches; cmatches < this->m_lbound; ++cmatches ) \
|
|
{ \
|
|
if( ! this->m_psub->SubExprT::iterative_match_this ## EXT( param ) ) \
|
|
{ \
|
|
param.m_icur = ibegin; \
|
|
return false; \
|
|
} \
|
|
} \
|
|
} \
|
|
else \
|
|
{ \
|
|
param.m_icur = ibegin; \
|
|
} \
|
|
} \
|
|
else if( this->m_lbound ) \
|
|
{ \
|
|
return false; \
|
|
} \
|
|
this->_push_frame( param.m_pstack, ibegin, cmatches ); \
|
|
param.m_pnext = this->next(); \
|
|
return true; \
|
|
}
|
|
|
|
#define DECLARE_ITERATIVE_REMATCH_THIS(EXT) \
|
|
virtual bool iterative_rematch_this ## EXT( match_param<IterT> & param ) const \
|
|
{ \
|
|
typedef std::pair<IterT, size_t> top_type; \
|
|
size_t & cmatches = REGEX_VC6( param.m_pstack->top( type2type<top_type>() ).second ) \
|
|
REGEX_NVC6( param.m_pstack->template top<top_type>().second ); \
|
|
if( cmatches == this->m_ubound || ! this->m_psub->SubExprT::iterative_match_this ## EXT( param ) ) \
|
|
{ \
|
|
this->_pop_frame( param ); \
|
|
return false; \
|
|
} \
|
|
++cmatches; \
|
|
param.m_pnext = this->next(); \
|
|
return true; \
|
|
}
|
|
|
|
DECLARE_RECURSIVE_MATCH_ALL(false_t,_s)
|
|
DECLARE_RECURSIVE_MATCH_ALL(true_t,_c)
|
|
DECLARE_ITERATIVE_MATCH_THIS(_s)
|
|
DECLARE_ITERATIVE_MATCH_THIS(_c)
|
|
DECLARE_ITERATIVE_REMATCH_THIS(_s)
|
|
DECLARE_ITERATIVE_REMATCH_THIS(_c)
|
|
|
|
#undef DECLARE_RECURSIVE_MATCH_ALL
|
|
#undef DECLARE_ITERATIVE_MATCH_THIS
|
|
#undef DECLARE_ITERATIVE_REMATCH_THIS
|
|
};
|
|
|
|
template< typename CharT >
|
|
struct char_nocase
|
|
{
|
|
CharT m_chlo;
|
|
CharT m_chhi;
|
|
};
|
|
|
|
template< typename IterT >
|
|
class match_char : public sub_expr<IterT>
|
|
{
|
|
match_char & operator=( match_char const & );
|
|
public:
|
|
typedef typename sub_expr<IterT>::char_type char_type;
|
|
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
width_type width = { 1, 1 };
|
|
return width;
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
--param.m_icur;
|
|
return false;
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
--param.m_icur;
|
|
return false;
|
|
}
|
|
};
|
|
|
|
template< typename IterT, typename CharT >
|
|
class match_char_t : public match_char<IterT>
|
|
{
|
|
match_char_t & operator=( match_char_t const & );
|
|
|
|
public:
|
|
match_char_t( CharT const & ch )
|
|
: m_ch( ch )
|
|
{
|
|
}
|
|
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_atom_quantifier<IterT, match_char_t<IterT, CharT> >( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_atom_quantifier<IterT, match_char_t<IterT, CharT> >( this, lbound, ubound );
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_char_t::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_char_t::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, param.m_icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, param.m_icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
_do_peek_this( peek, m_ch );
|
|
return true;
|
|
}
|
|
private:
|
|
static bool eq( char_type left, char_type right )
|
|
{
|
|
return traits_type::eq( left, right );
|
|
}
|
|
static bool eq( char_type left, char_nocase<char_type> right )
|
|
{
|
|
return traits_type::eq( left, right.m_chlo ) ||
|
|
traits_type::eq( left, right.m_chhi );
|
|
}
|
|
static void _do_peek_this( peek_param<char_type> & peek, char_type ch )
|
|
{
|
|
peek.m_cchars = 1;
|
|
peek.m_rgchars[0] = ch;
|
|
peek.m_must_have.m_has = false;
|
|
}
|
|
static void _do_peek_this( peek_param<char_type> & peek, char_nocase<char_type> ch )
|
|
{
|
|
peek.m_cchars = 2;
|
|
peek.m_rgchars[0] = ch.m_chlo;
|
|
peek.m_rgchars[1] = ch.m_chhi;
|
|
peek.m_must_have.m_has = false;
|
|
}
|
|
template< typename CStringsT >
|
|
bool _do_match_this( match_param<IterT> & param, IterT & icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
if( eos_t<CStringsT>::eval( param, icur ) || ! eq( *icur, m_ch ) )
|
|
return false;
|
|
++icur;
|
|
return true;
|
|
}
|
|
CharT const m_ch;
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline match_char<IterT> * create_char
|
|
(
|
|
typename std::iterator_traits<IterT>::value_type ch,
|
|
REGEX_FLAGS flags,
|
|
regex_arena & arena
|
|
)
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
switch( NOCASE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) match_char_t<IterT, char_type>( ch );
|
|
|
|
case NOCASE:
|
|
{
|
|
char_nocase<char_type> nocase = { regex_tolower( ch ), regex_toupper( ch ) };
|
|
|
|
if( traits_type::eq( nocase.m_chlo, nocase.m_chhi ) )
|
|
return new( arena ) match_char_t<IterT, char_type>( ch );
|
|
else
|
|
return new( arena ) match_char_t<IterT, char_nocase<char_type> >( nocase );
|
|
}
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
class match_literal : public sub_expr<IterT>
|
|
{
|
|
match_literal & operator=( match_literal const & );
|
|
public:
|
|
typedef typename sub_expr<IterT>::char_type char_type;
|
|
typedef std::basic_string<char_type> string_type;
|
|
typedef typename string_type::iterator iterator;
|
|
typedef typename string_type::const_iterator const_iterator;
|
|
typedef typename std::iterator_traits<IterT>::difference_type diff_type;
|
|
|
|
match_literal( const_iterator ibegin, const_iterator iend )
|
|
: m_ibegin( ibegin )
|
|
, m_iend( iend )
|
|
, m_dist( std::distance( m_ibegin, m_iend ) )
|
|
{
|
|
}
|
|
|
|
const_iterator const m_ibegin;
|
|
const_iterator const m_iend;
|
|
diff_type const m_dist; // must be signed integral type
|
|
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
width_type width = { static_cast<size_t>( m_dist ), static_cast<size_t>( m_dist ) };
|
|
return width;
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
std::advance( param.m_icur, -m_dist );
|
|
return false;
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
std::advance( param.m_icur, -m_dist );
|
|
return false;
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
class match_literal_t : public match_literal<IterT>
|
|
{
|
|
match_literal_t & operator=( match_literal_t const & );
|
|
public:
|
|
typedef typename match_literal<IterT>::char_type char_type;
|
|
typedef typename match_literal<IterT>::string_type string_type;
|
|
typedef typename match_literal<IterT>::iterator iterator;
|
|
typedef typename match_literal<IterT>::const_iterator const_iterator;
|
|
|
|
match_literal_t( const_iterator ibegin, const_iterator iend )
|
|
: match_literal<IterT>( ibegin, iend )
|
|
{
|
|
}
|
|
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_atom_quantifier<IterT, match_literal_t<IterT> >( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_atom_quantifier<IterT, match_literal_t<IterT> >( this, lbound, ubound );
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_literal_t::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_literal_t::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, param.m_icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, param.m_icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
peek.m_cchars = 1;
|
|
peek.m_rgchars[0] = *this->m_ibegin;
|
|
peek.m_must_have.m_has = true;
|
|
peek.m_must_have.m_begin = this->m_ibegin;
|
|
peek.m_must_have.m_end = this->m_iend;
|
|
peek.m_must_have.m_lower = 0;
|
|
return true;
|
|
}
|
|
private:
|
|
template< typename CStringsT >
|
|
bool _do_match_this( match_param<IterT> & param, IterT & icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
IterT icur_tmp = icur;
|
|
const_iterator ithis = this->m_ibegin;
|
|
for( ; this->m_iend != ithis; ++icur_tmp, ++ithis )
|
|
{
|
|
if( eos_t<CStringsT>::eval( param, icur_tmp ) || ! traits_type::eq( *ithis, *icur_tmp ) )
|
|
return false;
|
|
}
|
|
icur = icur_tmp;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
class match_literal_nocase_t : public match_literal<IterT>
|
|
{
|
|
match_literal_nocase_t & operator=( match_literal_nocase_t const & );
|
|
|
|
public:
|
|
typedef typename match_literal<IterT>::char_type char_type;
|
|
typedef typename match_literal<IterT>::string_type string_type;
|
|
typedef typename match_literal<IterT>::iterator iterator;
|
|
typedef typename match_literal<IterT>::const_iterator const_iterator;
|
|
|
|
match_literal_nocase_t( iterator ibegin, const_iterator iend, regex_arena & arena )
|
|
: match_literal<IterT>( ibegin, iend )
|
|
, m_szlower( arena_allocator<char_type>( arena ).allocate( m_dist ) )
|
|
{
|
|
// Copy from ibegin to m_szlower
|
|
std::copy( this->m_ibegin, this->m_iend, m_szlower );
|
|
// Store the uppercase version of the literal in [ m_ibegin, m_iend ).
|
|
regex_toupper( ibegin, iend );
|
|
// Store the lowercase version of the literal in m_strlower.
|
|
regex_tolower( m_szlower, m_szlower + this->m_dist );
|
|
}
|
|
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_atom_quantifier<IterT, match_literal_nocase_t<IterT> >( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_atom_quantifier<IterT, match_literal_nocase_t<IterT> >( this, lbound, ubound );
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_literal_nocase_t::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_literal_nocase_t::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, param.m_icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, param.m_icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
peek.m_cchars = 2;
|
|
peek.m_rgchars[0] = *this->m_ibegin;
|
|
peek.m_rgchars[1] = *m_szlower;
|
|
peek.m_must_have.m_has = true;
|
|
peek.m_must_have.m_begin = this->m_ibegin;
|
|
peek.m_must_have.m_end = this->m_iend;
|
|
peek.m_must_have.m_lower = m_szlower;
|
|
return true;
|
|
}
|
|
private:
|
|
// Allocated from a regex arena. The memory will be cleaned up
|
|
// when the arena is deallocated.
|
|
char_type *const m_szlower;
|
|
|
|
template< typename CStringsT >
|
|
bool _do_match_this( match_param<IterT> & param, IterT & icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
IterT icur_tmp = icur;
|
|
const_iterator ithisu = this->m_ibegin; // uppercase
|
|
char_type const * ithisl = m_szlower; // lowercase
|
|
for( ; this->m_iend != ithisu; ++icur_tmp, ++ithisu, ++ithisl )
|
|
{
|
|
if( eos_t<CStringsT>::eval( param, icur_tmp ) ||
|
|
( ! traits_type::eq( *ithisu, *icur_tmp ) &&
|
|
! traits_type::eq( *ithisl, *icur_tmp ) ) )
|
|
return false;
|
|
}
|
|
icur = icur_tmp;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
template< typename IterT, typename IBeginT, typename IEndT >
|
|
inline sub_expr<IterT> * create_literal
|
|
(
|
|
IBeginT ibegin,
|
|
IEndT iend,
|
|
REGEX_FLAGS flags,
|
|
regex_arena & arena
|
|
)
|
|
{
|
|
// A match_char is faster than a match_literal, so prefer it
|
|
// when the literal to match is only 1 char wide.
|
|
if( 1 == std::distance<IEndT>( ibegin, iend ) )
|
|
{
|
|
return create_char<IterT>( *ibegin, flags, arena );
|
|
}
|
|
|
|
switch( NOCASE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) match_literal_t<IterT>( ibegin, iend );
|
|
case NOCASE:
|
|
return new( arena ) match_literal_nocase_t<IterT>( ibegin, iend, arena );
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
class match_any : public sub_expr<IterT>
|
|
{
|
|
public:
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
width_type width = { 1, 1 };
|
|
return width;
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
--param.m_icur;
|
|
return false;
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
--param.m_icur;
|
|
return false;
|
|
}
|
|
};
|
|
|
|
template< typename IterT, typename EosWrapT >
|
|
class match_any_t : public match_any<IterT>
|
|
{
|
|
bool _do_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
if( EosWrapT::op_type::eval( param, icur ) )
|
|
return false;
|
|
++icur;
|
|
return true;
|
|
}
|
|
bool _do_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
if( EosWrapT::opc_type::eval( param, icur ) )
|
|
return false;
|
|
++icur;
|
|
return true;
|
|
}
|
|
public:
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_atom_quantifier<IterT, match_any_t<IterT, EosWrapT> >( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_atom_quantifier<IterT, match_any_t<IterT, EosWrapT> >( this, lbound, ubound );
|
|
}
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_any_t::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_any_t::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this_s( param, icur );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this_c( param, icur );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this_s( param, param.m_icur );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this_c( param, param.m_icur );
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline match_any<IterT> * create_any( REGEX_FLAGS flags, regex_arena & arena )
|
|
{
|
|
switch( SINGLELINE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) match_any_t<IterT, REGEX_OP(eol_t) >();
|
|
case SINGLELINE:
|
|
return new( arena ) match_any_t<IterT, REGEX_OP(eos_t) >();
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
class match_charset : public sub_expr<IterT>
|
|
{
|
|
public:
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
width_type width = { 1, 1 };
|
|
return width;
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
--param.m_icur;
|
|
return false;
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
--param.m_icur;
|
|
return false;
|
|
}
|
|
};
|
|
|
|
template< typename IterT, typename CharSetPtrT, bool CaseT >
|
|
class match_charset_t : public match_charset<IterT>
|
|
{
|
|
CharSetPtrT const m_pcs;
|
|
|
|
match_charset_t & operator=( match_charset_t const & );
|
|
|
|
template< typename CStringsT >
|
|
bool _do_match_this( match_param<IterT> & param, IterT & icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
if( eos_t<CStringsT>::eval( param, icur ) ||
|
|
! m_pcs->REGEX_NVC6(template) in REGEX_NVC6(<CaseT>)( *icur REGEX_VC6(COMMA bool2type<CaseT>()) ) )
|
|
return false;
|
|
++icur;
|
|
return true;
|
|
}
|
|
public:
|
|
match_charset_t( CharSetPtrT pcs )
|
|
: m_pcs( pcs )
|
|
{
|
|
}
|
|
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_atom_quantifier<IterT, match_charset_t<IterT, CharSetPtrT, CaseT> >( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_atom_quantifier<IterT, match_charset_t<IterT, CharSetPtrT, CaseT> >( this, lbound, ubound );
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_charset_t::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_charset_t::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, param.m_icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, param.m_icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline match_charset<IterT> * create_charset
|
|
(
|
|
charset const & cs,
|
|
REGEX_FLAGS flags,
|
|
regex_arena & arena
|
|
)
|
|
{
|
|
switch( NOCASE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) match_charset_t<IterT, charset const*, true>( &cs );
|
|
case NOCASE:
|
|
return new( arena ) match_charset_t<IterT, charset const*, false>( &cs );
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline match_charset<IterT> * create_custom_charset
|
|
(
|
|
custom_charset const * pcs,
|
|
REGEX_FLAGS flags,
|
|
regex_arena & arena
|
|
)
|
|
{
|
|
typedef std::auto_ptr<custom_charset const> auto_charset;
|
|
auto_charset acs( pcs );
|
|
|
|
switch( NOCASE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) match_charset_t<IterT, auto_charset, true>( acs );
|
|
case NOCASE:
|
|
return new( arena ) match_charset_t<IterT, auto_charset, false>( acs );
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< bool IsBoundaryT >
|
|
struct word_boundary
|
|
{
|
|
static bool eval( bool fprevword, bool fthisword )
|
|
{
|
|
return IsBoundaryT == ( fprevword != fthisword );
|
|
}
|
|
};
|
|
|
|
struct word_start
|
|
{
|
|
static bool eval( bool fprevword, bool fthisword )
|
|
{
|
|
return ! fprevword && fthisword;
|
|
}
|
|
};
|
|
|
|
struct word_stop
|
|
{
|
|
static bool eval( bool fprevword, bool fthisword )
|
|
{
|
|
return fprevword && ! fthisword;
|
|
}
|
|
};
|
|
|
|
template< typename IterT, typename CondT >
|
|
class word_assertion_t : public assertion<IterT>
|
|
{
|
|
word_assertion_t & operator=( word_assertion_t const & );
|
|
public:
|
|
typedef typename assertion<IterT>::char_type char_type;
|
|
|
|
word_assertion_t()
|
|
: m_isword( intrinsic_charsets<char_type>::get_word_charset() )
|
|
{
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( word_assertion_t::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( word_assertion_t::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, param.m_icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, param.m_icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
private:
|
|
bool _is_word( char_type ch ) const
|
|
{
|
|
return REGEX_VC6( m_isword.in( ch COMMA true_t() ) )
|
|
REGEX_NVC6( m_isword.template in<true>( ch ) );
|
|
}
|
|
template< typename CStringsT >
|
|
bool _do_match_this( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
bool const fthisword = ! eos_t<CStringsT>::eval( param, icur ) && _is_word( *icur );
|
|
bool const fprevword = ! bos_t<CStringsT>::eval( param, icur ) && _is_word( *--icur );
|
|
|
|
return CondT::eval( fprevword, fthisword );
|
|
}
|
|
charset const & m_isword;
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_word_boundary
|
|
(
|
|
bool fisboundary,
|
|
REGEX_FLAGS, // flags
|
|
regex_arena & arena
|
|
)
|
|
{
|
|
if( fisboundary )
|
|
return new( arena ) word_assertion_t<IterT, word_boundary<true> >();
|
|
else
|
|
return new( arena ) word_assertion_t<IterT, word_boundary<false> >();
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_word_start( REGEX_FLAGS, regex_arena & arena )
|
|
{
|
|
return new( arena ) word_assertion_t<IterT, word_start>();
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline assertion<IterT> * create_word_stop( REGEX_FLAGS, regex_arena & arena )
|
|
{
|
|
return new( arena ) word_assertion_t<IterT, word_stop>();
|
|
}
|
|
|
|
// an "extent" represents the range of backrefs that can be modified as the
|
|
// result of a look-ahead or look-behind
|
|
typedef std::pair<size_t, size_t> extent_type;
|
|
|
|
template< typename IterT > class max_group_quantifier;
|
|
template< typename IterT > class min_group_quantifier;
|
|
|
|
template< typename IterT >
|
|
class match_group_base : public sub_expr<IterT>
|
|
{
|
|
protected:
|
|
typedef slist<sub_expr<IterT>*,regex_arena> alt_list_type;
|
|
|
|
private:
|
|
match_group_base & operator=( match_group_base const & );
|
|
|
|
void _push_frame( match_param<IterT> & param ) const
|
|
{
|
|
unsafe_stack * ps = param.m_pstack;
|
|
|
|
if( size_t( -1 ) != m_cgroup )
|
|
{
|
|
IterT & reserved1 = param.m_prgbackrefs[ m_cgroup ].reserved1;
|
|
ps->push( reserved1 );
|
|
reserved1 = param.m_icur;
|
|
}
|
|
|
|
ps->push( m_rgalternates.begin() );
|
|
}
|
|
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
unsafe_stack * ps = param.m_pstack;
|
|
|
|
REGEX_VC6( ps->pop( type2type<iter_type>() COMMA 0 ); )
|
|
REGEX_NVC6( ps->template pop<iter_type>(); )
|
|
|
|
if( size_t( -1 ) != m_cgroup )
|
|
ps->pop( param.m_prgbackrefs[ m_cgroup ].reserved1 );
|
|
}
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
|
|
if( 0 != m_peek_chars_begin &&
|
|
( eos_t<CStringsT>::eval( param, icur ) ||
|
|
m_peek_chars_end == std::find( m_peek_chars_begin, m_peek_chars_end, *icur ) ) )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if( size_t( -1 ) != m_cgroup ) // could be -1 if this is a lookahead_assertion
|
|
{
|
|
IterT & reserved1 = param.m_prgbackrefs[ m_cgroup ].reserved1;
|
|
IterT old_ibegin = reserved1;
|
|
reserved1 = icur;
|
|
|
|
for( iter_type ialt = m_rgalternates.begin(); m_rgalternates.end() != ialt; ++ialt )
|
|
{
|
|
if( (*ialt)->recursive_match_all( param, icur, CStringsT() ) )
|
|
return true;
|
|
}
|
|
|
|
reserved1 = old_ibegin;
|
|
}
|
|
else
|
|
{
|
|
for( iter_type ialt = m_rgalternates.begin(); m_rgalternates.end() != ialt; ++ialt )
|
|
{
|
|
if( (*ialt)->recursive_match_all( param, icur, CStringsT() ) )
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
template< typename CStringsT >
|
|
bool _do_iterative_match_this( match_param<IterT> & param REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
if( 0 != m_peek_chars_begin &&
|
|
( eos_t<CStringsT>::eval( param, param.m_icur ) ||
|
|
m_peek_chars_end == std::find( m_peek_chars_begin, m_peek_chars_end, *param.m_icur ) ) )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
_push_frame( param );
|
|
param.m_pnext = *m_rgalternates.begin();
|
|
return true;
|
|
}
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
iter_type next_iter = ++param.m_pstack->REGEX_NVC6(template) top REGEX_NVC6(<iter_type>) ( REGEX_VC6(type2type<iter_type>()) );
|
|
if( m_rgalternates.end() != next_iter )
|
|
{
|
|
param.m_pnext = *next_iter;
|
|
return true;
|
|
}
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
public:
|
|
typedef typename sub_expr<IterT>::char_type char_type;
|
|
|
|
match_group_base( size_t cgroup, regex_arena & arena )
|
|
: m_rgalternates( arena_allocator<sub_expr<IterT>*>( arena ) )
|
|
, m_cgroup( cgroup )
|
|
, m_nwidth( uninit_width() )
|
|
, m_pptail( 0 )
|
|
, m_peek_chars_end( 0 )
|
|
{
|
|
}
|
|
|
|
// Derived classes that own the end_group object must have a
|
|
// destructor, and that destructor must call _cleanup().
|
|
virtual ~match_group_base() = 0;
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<false_t>) ( param REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<true_t>) ( param REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
size_t group_number() const
|
|
{
|
|
return m_cgroup;
|
|
}
|
|
void add_item( sub_expr<IterT> * pitem )
|
|
{
|
|
*m_pptail = pitem;
|
|
m_pptail = pitem->pnext();
|
|
}
|
|
void add_alternate()
|
|
{
|
|
m_rgalternates.push_front( 0 );
|
|
m_pptail = &*m_rgalternates.begin();
|
|
}
|
|
void end_alternate()
|
|
{
|
|
*m_pptail = _get_end_group();
|
|
}
|
|
void open_group()
|
|
{
|
|
add_alternate();
|
|
}
|
|
must_have<char_type> close_group( regex_arena & arena )
|
|
{
|
|
end_alternate();
|
|
m_rgalternates.reverse();
|
|
return get_peek_chars( arena );
|
|
}
|
|
must_have<char_type> get_peek_chars( regex_arena & arena )
|
|
{
|
|
m_peek_chars_begin = 0;
|
|
|
|
// optimization: find the lookahead characters for each alternate
|
|
size_t total_chars = 0;
|
|
peek_param<char_type> peek;
|
|
typename alt_list_type::const_iterator ialt;
|
|
for( ialt = m_rgalternates.begin(); m_rgalternates.end() != ialt; ++ialt )
|
|
{
|
|
if( ! (*ialt)->peek_this( peek ) )
|
|
{
|
|
peek.m_must_have.m_has = false;
|
|
return peek.m_must_have;
|
|
}
|
|
total_chars += peek.m_cchars;
|
|
}
|
|
|
|
arena_allocator<char_type> alloc( arena );
|
|
m_peek_chars_begin = alloc.allocate( total_chars, 0 );
|
|
m_peek_chars_end = m_peek_chars_begin;
|
|
|
|
for( ialt = m_rgalternates.begin(); m_rgalternates.end() != ialt; ++ialt )
|
|
{
|
|
(*ialt)->peek_this( peek );
|
|
char_type const * in = ( peek.m_cchars > 2 ) ? peek.m_pchars : peek.m_rgchars;
|
|
m_peek_chars_end = std::copy( in, in + peek.m_cchars, m_peek_chars_end );
|
|
}
|
|
|
|
std::sort( m_peek_chars_begin, m_peek_chars_end );
|
|
m_peek_chars_end = std::unique( m_peek_chars_begin, m_peek_chars_end );
|
|
|
|
if( 1 < m_rgalternates.size() )
|
|
peek.m_must_have.m_has = false;
|
|
|
|
return peek.m_must_have;
|
|
}
|
|
size_t calternates() const
|
|
{
|
|
return m_rgalternates.size();
|
|
}
|
|
virtual void set_extent( extent_type const & )
|
|
{
|
|
}
|
|
width_type group_width
|
|
(
|
|
std::vector<match_group_base<IterT>*> & rggroups,
|
|
std::list<size_t> const & invisible_groups
|
|
)
|
|
{
|
|
// This should only be called on the top node
|
|
REGEX_ASSERT( 0 == m_cgroup );
|
|
if( uninit_width() == m_nwidth )
|
|
{
|
|
width_param<IterT> param( rggroups, invisible_groups );
|
|
match_group_base<IterT>::width_this( param );
|
|
}
|
|
return m_nwidth;
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & param )
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
width_type width = { size_t( -1 ), 0 };
|
|
for( iter_type ialt = m_rgalternates.begin(); worst_width != width && m_rgalternates.end() != ialt; ++ialt )
|
|
{
|
|
// prevent possible infinite recursion
|
|
if( m_cgroup < param.m_rggroups.size() )
|
|
param.m_rggroups[ m_cgroup ] = 0;
|
|
|
|
width_type temp_width = ( *ialt )->get_width( param );
|
|
|
|
if( m_cgroup < param.m_rggroups.size() )
|
|
param.m_rggroups[ m_cgroup ] = this;
|
|
|
|
width.m_min = regex_min( width.m_min, temp_width.m_min );
|
|
width.m_max = regex_max( width.m_max, temp_width.m_max );
|
|
}
|
|
return m_nwidth = width;
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
if( 0 == m_peek_chars_begin )
|
|
return false;
|
|
|
|
peek.m_cchars = std::distance( m_peek_chars_begin, m_peek_chars_end );
|
|
if( 2 < peek.m_cchars )
|
|
peek.m_pchars = m_peek_chars_begin;
|
|
else
|
|
std::copy( m_peek_chars_begin, m_peek_chars_end, peek.m_rgchars );
|
|
|
|
peek.m_must_have.m_has = false;
|
|
if( 1 == m_rgalternates.size() )
|
|
{
|
|
peek_param<char_type> local_peek;
|
|
(*m_rgalternates.begin())->peek_this( local_peek );
|
|
peek.m_must_have = local_peek.m_must_have;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
protected:
|
|
void _cleanup()
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
for( iter_type ialt = m_rgalternates.begin(); m_rgalternates.end() != ialt; ++ialt )
|
|
delete *ialt;
|
|
m_rgalternates.clear();
|
|
}
|
|
|
|
virtual sub_expr<IterT> * _get_end_group() = 0;
|
|
|
|
alt_list_type m_rgalternates;
|
|
size_t const m_cgroup;
|
|
width_type m_nwidth;
|
|
|
|
union
|
|
{
|
|
sub_expr<IterT> ** m_pptail; // only used when adding elements
|
|
char_type * m_peek_chars_begin;
|
|
};
|
|
|
|
char_type * m_peek_chars_end;
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline match_group_base<IterT>::~match_group_base()
|
|
{
|
|
}
|
|
|
|
// A indestructable_sub_expr is an object that brings itself back
|
|
// to life after explicitly being deleted. It is used
|
|
// to ease clean-up of the sub_expr graph, where most
|
|
// nodes are dynamically allocated, but some nodes are
|
|
// members of other nodes and are not dynamically allocated.
|
|
// The recursive delete of the sub_expr graph causes
|
|
// delete to be ( incorrectly ) called on these members.
|
|
// By inheriting these members from indestructable_sub_expr,
|
|
// explicit attempts to delete the object will have no
|
|
// effect. ( Actually, the object will be destructed and
|
|
// then immediately reconstructed. ) This is accomplished
|
|
// by calling placement new in operator delete.
|
|
template< typename IterT, typename T >
|
|
class indestructable_sub_expr : public sub_expr<IterT>
|
|
{
|
|
static void * operator new( size_t, regex_arena & );
|
|
static void operator delete( void *, regex_arena & );
|
|
protected:
|
|
static void * operator new( size_t, void * pv ) { return pv; }
|
|
static void operator delete( void *, void * ) {}
|
|
public:
|
|
virtual ~indestructable_sub_expr() {}
|
|
static void operator delete( void * pv ) { ::new( pv ) T; }
|
|
};
|
|
|
|
template< typename IterT >
|
|
class match_group : public match_group_base<IterT>
|
|
{
|
|
match_group( match_group const & );
|
|
match_group & operator=( match_group const & );
|
|
public:
|
|
match_group( size_t cgroup, regex_arena & arena )
|
|
: match_group_base<IterT>( cgroup, arena )
|
|
, m_end_group( this )
|
|
{
|
|
}
|
|
|
|
virtual ~match_group()
|
|
{
|
|
this->_cleanup();
|
|
}
|
|
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_group_quantifier<IterT>( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_group_quantifier<IterT>( this, lbound, ubound );
|
|
}
|
|
|
|
protected:
|
|
typedef typename match_group_base<IterT>::alt_list_type alt_list_type;
|
|
|
|
struct old_backref
|
|
{
|
|
IterT m_ibegin;
|
|
IterT m_iend;
|
|
bool m_matched;
|
|
|
|
old_backref() {}
|
|
old_backref( backref_tag<IterT> const & br )
|
|
: m_ibegin( br.first )
|
|
, m_iend( br.second )
|
|
, m_matched( br.matched )
|
|
{
|
|
}
|
|
};
|
|
|
|
static void restore_backref( backref_tag<IterT> & br, old_backref const & old_br )
|
|
{
|
|
br.first = old_br.m_ibegin;
|
|
br.second = old_br.m_iend;
|
|
br.matched = old_br.m_matched;
|
|
}
|
|
|
|
template< typename CStringsT >
|
|
bool _do_call_back( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
if( size_t( -1 ) != this->m_cgroup )
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ this->m_cgroup ];
|
|
|
|
// Save the relevant portions of the backref in an old_backref struct
|
|
old_backref old_br( br );
|
|
|
|
br.first = br.reserved1;
|
|
br.second = icur;
|
|
br.matched = true;
|
|
|
|
if( this->recursive_match_next( param, icur, CStringsT() ) )
|
|
return true;
|
|
|
|
// Restore the backref to its saved state
|
|
restore_backref( br, old_br );
|
|
}
|
|
else
|
|
{
|
|
if( this->recursive_match_next( param, icur, CStringsT() ) )
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
class end_group : public indestructable_sub_expr<IterT, end_group>
|
|
{
|
|
match_group<IterT> const *const m_pgroup;
|
|
|
|
end_group & operator=( end_group const & );
|
|
|
|
void _push_frame( match_param<IterT> & param ) const
|
|
{
|
|
size_t cgroup = m_pgroup->group_number();
|
|
|
|
if( size_t( -1 ) != cgroup )
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ cgroup ];
|
|
old_backref old_br( br );
|
|
param.m_pstack->push( old_br );
|
|
|
|
br.first = br.reserved1;
|
|
br.second = param.m_icur;
|
|
br.matched = true;
|
|
}
|
|
}
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
size_t cgroup = m_pgroup->group_number();
|
|
|
|
if( size_t( -1 ) != cgroup )
|
|
{
|
|
old_backref old_br;
|
|
param.m_pstack->pop( old_br );
|
|
match_group<IterT>::restore_backref( param.m_prgbackrefs[ cgroup ], old_br );
|
|
}
|
|
}
|
|
bool _do_iterative_match_this( match_param<IterT> & param ) const
|
|
{
|
|
_push_frame( param );
|
|
param.m_pnext = m_pgroup->next();
|
|
return true;
|
|
}
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
public:
|
|
end_group( match_group<IterT> const * pgroup = 0 )
|
|
: m_pgroup( pgroup )
|
|
{
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return m_pgroup->REGEX_NVC6(template) _do_call_back REGEX_NVC6(<false_t>)( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return m_pgroup->REGEX_NVC6(template) _do_call_back REGEX_NVC6(<true_t>)( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
} m_end_group;
|
|
|
|
friend class end_group;
|
|
|
|
virtual sub_expr<IterT> * _get_end_group()
|
|
{
|
|
return & m_end_group;
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline void save_backrefs( backref_tag<IterT> const * ibegin, backref_tag<IterT> const * iend, IterT * prgci )
|
|
{
|
|
for( ; ibegin != iend; ++ibegin, ++prgci )
|
|
{
|
|
new( prgci ) IterT( ibegin->reserved1 );
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline void restore_backrefs( backref_tag<IterT> * ibegin, backref_tag<IterT> * iend, IterT const * prgci )
|
|
{
|
|
for( ; ibegin != iend; ++ibegin, ++prgci )
|
|
{
|
|
ibegin->reserved1 = *prgci;
|
|
prgci->~IterT();
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
class group_wrapper : public sub_expr<IterT>
|
|
{
|
|
match_group_base<IterT> const *const m_pgroup;
|
|
|
|
group_wrapper & operator=( group_wrapper const & );
|
|
public:
|
|
group_wrapper( match_group_base<IterT> const * pgroup )
|
|
: m_pgroup( pgroup )
|
|
{
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return m_pgroup->match_group_base<IterT>::iterative_match_this_s( param );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return m_pgroup->match_group_base<IterT>::iterative_match_this_c( param );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return m_pgroup->match_group_base<IterT>::iterative_rematch_this_s( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return m_pgroup->match_group_base<IterT>::iterative_rematch_this_c( param );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
};
|
|
|
|
struct deleter
|
|
{
|
|
template< typename T >
|
|
void operator()( T const & t )
|
|
{
|
|
t.T::~T();
|
|
}
|
|
};
|
|
|
|
// Behaves like a lookahead assertion if m_cgroup is -1, or like
|
|
// an independent group otherwise.
|
|
template< typename IterT >
|
|
class independent_group_base : public match_group_base<IterT>
|
|
{
|
|
independent_group_base( independent_group_base const & );
|
|
independent_group_base & operator=( independent_group_base const & );
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
backref_tag<IterT> * prgbr = 0;
|
|
|
|
// Copy onto the stack the part of the backref vector that could
|
|
// be modified by the lookahead.
|
|
if( m_extent.second )
|
|
{
|
|
prgbr = static_cast<backref_tag<IterT>*>( alloca( m_extent.second * sizeof( backref_tag<IterT> ) ) );
|
|
std::uninitialized_copy(
|
|
param.m_prgbackrefs + m_extent.first,
|
|
param.m_prgbackrefs + m_extent.first + m_extent.second,
|
|
prgbr );
|
|
}
|
|
|
|
// Match until the end of this group and then return
|
|
// BUGBUG can the compiler optimize this?
|
|
bool const fdomatch = CStringsT::value ?
|
|
match_group_base<IterT>::recursive_match_all_c( param, icur ) :
|
|
match_group_base<IterT>::recursive_match_all_s( param, icur );
|
|
|
|
if( m_fexpected == fdomatch )
|
|
{
|
|
// If m_cgroup != 1, then this is not a zero-width assertion.
|
|
if( fdomatch && size_t( -1 ) != this->m_cgroup )
|
|
icur = param.m_prgbackrefs[ this->m_cgroup ].second;
|
|
|
|
if( this->recursive_match_next( param, icur, CStringsT() ) )
|
|
{
|
|
std::for_each( prgbr, prgbr + m_extent.second, deleter() );
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// if match_group::recursive_match_all returned true, the backrefs must be restored
|
|
if( m_extent.second && fdomatch )
|
|
std::copy( prgbr, prgbr + m_extent.second, param.m_prgbackrefs + m_extent.first );
|
|
|
|
std::for_each( prgbr, prgbr + m_extent.second, deleter() );
|
|
return false;
|
|
}
|
|
template< typename CStringsT >
|
|
bool _do_iterative_match_this( match_param<IterT> & param REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
group_wrapper<IterT> expr( this );
|
|
|
|
_push_frame( param );
|
|
IterT ibegin = param.m_icur;
|
|
|
|
bool const fdomatch = _do_match_iterative( &expr, param, param.m_icur, CStringsT() );
|
|
|
|
if( m_fexpected == fdomatch )
|
|
{
|
|
// If m_cgroup == -1, then this is a zero-width assertion.
|
|
if( fdomatch && size_t( -1 ) == this->m_cgroup )
|
|
param.m_icur = ibegin;
|
|
|
|
param.m_pnext = this->next();
|
|
return true;
|
|
}
|
|
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
public:
|
|
independent_group_base( size_t cgroup, regex_arena & arena )
|
|
: match_group_base<IterT>( cgroup, arena )
|
|
, m_fexpected( true )
|
|
, m_extent( 0, 0 )
|
|
{
|
|
}
|
|
virtual void set_extent( extent_type const & ex )
|
|
{
|
|
m_extent = ex;
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<false_t>) ( param REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<true_t>) ( param REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
if( size_t( -1 ) == this->m_cgroup )
|
|
return false;
|
|
return match_group_base<IterT>::peek_this( peek );
|
|
}
|
|
protected:
|
|
|
|
void _push_frame( match_param<IterT> & param ) const
|
|
{
|
|
unsafe_stack * pstack = param.m_pstack;
|
|
typedef typename match_param<IterT>::backref_type backref_type;
|
|
backref_type * ibegin = param.m_prgbackrefs + m_extent.first;
|
|
backref_type * iend = ibegin + m_extent.second;
|
|
|
|
for( ; iend != ibegin; ++ibegin )
|
|
{
|
|
pstack->push( *ibegin );
|
|
}
|
|
pstack->push( param.m_icur );
|
|
}
|
|
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
unsafe_stack * pstack = param.m_pstack;
|
|
typedef typename match_param<IterT>::backref_type backref_type;
|
|
|
|
backref_type * ibegin = param.m_prgbackrefs + m_extent.first;
|
|
backref_type * iend = ibegin + m_extent.second;
|
|
|
|
pstack->pop( param.m_icur );
|
|
while( iend != ibegin )
|
|
{
|
|
pstack->pop( *--iend );
|
|
}
|
|
}
|
|
|
|
independent_group_base( bool const fexpected, regex_arena & arena )
|
|
: match_group_base<IterT>( size_t( -1 ), arena )
|
|
, m_fexpected( fexpected )
|
|
{
|
|
}
|
|
|
|
bool const m_fexpected;
|
|
extent_type m_extent;
|
|
};
|
|
|
|
template< typename IterT >
|
|
class independent_group : public independent_group_base<IterT>
|
|
{
|
|
independent_group( independent_group const & );
|
|
independent_group & operator=( independent_group const & );
|
|
public:
|
|
independent_group( size_t cgroup, regex_arena & arena )
|
|
: independent_group_base<IterT>( cgroup, arena )
|
|
, m_end_group( this )
|
|
{
|
|
}
|
|
|
|
virtual ~independent_group()
|
|
{
|
|
this->_cleanup();
|
|
}
|
|
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_group_quantifier<IterT>( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_group_quantifier<IterT>( this, lbound, ubound );
|
|
}
|
|
|
|
protected:
|
|
independent_group( bool const fexpected, regex_arena & arena )
|
|
: independent_group_base<IterT>( fexpected, arena )
|
|
, m_end_group( this )
|
|
{
|
|
}
|
|
|
|
bool _do_call_back( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
if( size_t( -1 ) != this->m_cgroup )
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ this->m_cgroup ];
|
|
br.first = br.reserved1;
|
|
br.second = icur;
|
|
br.matched = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
class end_group : public indestructable_sub_expr<IterT, end_group>
|
|
{
|
|
independent_group<IterT> const *const m_pgroup;
|
|
|
|
end_group & operator=( end_group const & );
|
|
bool _do_iterative_match_this( match_param<IterT> & param ) const
|
|
{
|
|
size_t cgroup = m_pgroup->group_number();
|
|
if( size_t( -1 ) != cgroup )
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ cgroup ];
|
|
br.first = br.reserved1;
|
|
br.second = param.m_icur;
|
|
br.matched = true;
|
|
}
|
|
param.m_pnext = 0;
|
|
return true;
|
|
}
|
|
public:
|
|
end_group( independent_group<IterT> const * pgroup = 0 )
|
|
: m_pgroup( pgroup )
|
|
{
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return m_pgroup->_do_call_back( param, icur );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return m_pgroup->_do_call_back( param, icur );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
} m_end_group;
|
|
|
|
friend class end_group;
|
|
|
|
virtual sub_expr<IterT> * _get_end_group()
|
|
{
|
|
return & m_end_group;
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
class lookahead_assertion : public independent_group<IterT>
|
|
{
|
|
lookahead_assertion( lookahead_assertion const & );
|
|
lookahead_assertion & operator=( lookahead_assertion const & );
|
|
public:
|
|
lookahead_assertion( bool const fexpected, regex_arena & arena )
|
|
: independent_group<IterT>( fexpected, arena )
|
|
{
|
|
}
|
|
virtual sub_expr<IterT> * quantify( size_t, size_t, bool, regex_arena & )
|
|
{
|
|
throw bad_regexpr( "look-ahead assertion cannot be quantified" );
|
|
}
|
|
virtual bool is_assertion() const
|
|
{
|
|
return true;
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & param )
|
|
{
|
|
// calculate the group's width and store it, but return zero_width
|
|
match_group_base<IterT>::width_this( param );
|
|
return zero_width;
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
return this->next()->peek_this( peek );
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
class lookbehind_assertion : public independent_group_base<IterT>
|
|
{
|
|
lookbehind_assertion( lookbehind_assertion const & );
|
|
lookbehind_assertion & operator=( lookbehind_assertion const & );
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::difference_type diff_type;
|
|
|
|
// This is the room in the string from the start to the current position
|
|
diff_type room = std::distance( param.m_ibufferbegin, icur );
|
|
|
|
// If we don't have enough room to match the lookbehind, the match fails.
|
|
// If we wanted the match to fail, try to match the rest of the pattern.
|
|
if( this->m_nwidth.m_min > static_cast<size_t>( room ) )
|
|
return this->m_fexpected ? false : this->recursive_match_next( param, icur, CStringsT() );
|
|
|
|
backref_tag<IterT> * prgbr = 0;
|
|
|
|
// Copy onto the stack the part of the backref vector that could
|
|
// be modified by the lookbehind.
|
|
if( this->m_extent.second )
|
|
{
|
|
prgbr = static_cast<backref_tag<IterT>*>( alloca( this->m_extent.second * sizeof( backref_tag<IterT> ) ) );
|
|
std::uninitialized_copy(
|
|
param.m_prgbackrefs + this->m_extent.first,
|
|
param.m_prgbackrefs + this->m_extent.first + this->m_extent.second,
|
|
prgbr );
|
|
}
|
|
|
|
IterT local_ibegin = icur;
|
|
std::advance( local_ibegin, -static_cast<diff_type>( regex_min<size_t>( this->m_nwidth.m_max, room ) ) );
|
|
|
|
IterT local_iend = icur;
|
|
std::advance( local_iend, -static_cast<diff_type>( this->m_nwidth.m_min ) );
|
|
|
|
// Create a local param struct that has icur as param.m_iend
|
|
match_param<IterT> local_param( param.m_ibufferbegin, param.m_imatchbegin, icur, param.m_prgbackrefs, param.m_cbackrefs );
|
|
|
|
// Find the rightmost match that ends at icur.
|
|
for( IterT local_icur = local_ibegin; ; ++local_icur )
|
|
{
|
|
// Match until the end of this group and then return
|
|
// Note that we're calling recursive_match_all_s regardless of the CStringsT switch.
|
|
// This is because for the lookbehind assertion, the termination condition is when
|
|
// icur == param.m_iend, not when *icur == '\0'
|
|
bool const fmatched = match_group_base<IterT>::recursive_match_all_s( local_param, local_icur );
|
|
|
|
// If the match results were what we were expecting, try to match the
|
|
// rest of the pattern. If that succeeds, return true.
|
|
if( this->m_fexpected == fmatched && this->recursive_match_next( param, icur, CStringsT() ) )
|
|
{
|
|
std::for_each( prgbr, prgbr + this->m_extent.second, deleter() );
|
|
return true;
|
|
}
|
|
|
|
// if match_group::recursive_match_all returned true, the backrefs must be restored
|
|
if( fmatched )
|
|
{
|
|
if( this->m_extent.second )
|
|
std::copy( prgbr, prgbr + this->m_extent.second, param.m_prgbackrefs + this->m_extent.first );
|
|
|
|
// Match succeeded. If this is a negative lookbehind, we didn't want it
|
|
// to succeed, so return false.
|
|
if( ! this->m_fexpected )
|
|
{
|
|
std::for_each( prgbr, prgbr + this->m_extent.second, deleter() );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if( local_icur == local_iend )
|
|
break;
|
|
}
|
|
|
|
// No variation of the lookbehind was satisfied in a way that permited
|
|
// the rest of the pattern to match successfully, so return false.
|
|
std::for_each( prgbr, prgbr + this->m_extent.second, deleter() );
|
|
return false;
|
|
}
|
|
|
|
template< typename CStringsT >
|
|
bool _do_iterative_match_this( match_param<IterT> & param REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::difference_type diff_type;
|
|
|
|
// Save the backrefs
|
|
this->_push_frame( param );
|
|
|
|
// This is the room in the string from the start to the current position
|
|
diff_type room = std::distance( param.m_ibufferbegin, param.m_icur );
|
|
|
|
// If we don't have enough room to match the lookbehind, the match fails.
|
|
// If we wanted the match to fail, try to match the rest of the pattern.
|
|
if( this->m_nwidth.m_min > static_cast<size_t>( room ) )
|
|
{
|
|
if( this->m_fexpected )
|
|
{
|
|
this->_pop_frame( param );
|
|
return false;
|
|
}
|
|
param.m_pnext = this->next();
|
|
return true;
|
|
}
|
|
|
|
IterT local_ibegin = param.m_icur;
|
|
std::advance( local_ibegin, -static_cast<diff_type>( regex_min<size_t>( this->m_nwidth.m_max, room ) ) );
|
|
|
|
IterT local_iend = param.m_icur;
|
|
std::advance( local_iend, -static_cast<diff_type>( this->m_nwidth.m_min ) );
|
|
|
|
// Create a local param struct that has icur as param.m_iend
|
|
match_param<IterT> local_param( param.m_ibufferbegin, param.m_imatchbegin, param.m_icur, param.m_prgbackrefs, param.m_cbackrefs );
|
|
local_param.m_pstack = param.m_pstack;
|
|
|
|
group_wrapper<IterT> expr( this );
|
|
|
|
// Find the rightmost match that ends at icur.
|
|
for( IterT local_icur = local_ibegin; ; ++local_icur )
|
|
{
|
|
// Match until the end of this group and then return
|
|
// Note that we're calling _do_match_iterative_helper_s regardless of the CStringsT switch.
|
|
// This is because for the lookbehind assertion, the termination condition is when
|
|
// icur == param.m_iend, not when *icur == '\0'
|
|
bool const fmatched = regex_access<IterT>::_do_match_iterative_helper_s( &expr, local_param, local_icur );
|
|
|
|
// If the match results were what we were expecting, try to match the
|
|
// rest of the pattern. If that succeeds, return true.
|
|
if( this->m_fexpected == fmatched )
|
|
{
|
|
param.m_pnext = this->next();
|
|
return true;
|
|
}
|
|
|
|
// if match_group::recursive_match_all returned true, the backrefs must be restored
|
|
if( fmatched )
|
|
{
|
|
// Restore the backrefs
|
|
this->_pop_frame( param );
|
|
|
|
// Match succeeded. If this is a negative lookbehind, we didn't want it
|
|
// to succeed, so return false.
|
|
if( ! this->m_fexpected )
|
|
return false;
|
|
|
|
// Save the backrefs again.
|
|
this->_push_frame( param );
|
|
}
|
|
|
|
if( local_icur == local_iend )
|
|
break;
|
|
}
|
|
|
|
// No variation of the lookbehind was satisfied in a way that permited
|
|
// the rest of the pattern to match successfully, so return false.
|
|
this->_pop_frame( param );
|
|
return false;
|
|
}
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
this->_pop_frame( param );
|
|
return false;
|
|
}
|
|
public:
|
|
lookbehind_assertion( bool const fexpected, regex_arena & arena )
|
|
: independent_group_base<IterT>( fexpected, arena )
|
|
{
|
|
}
|
|
|
|
virtual ~lookbehind_assertion()
|
|
{
|
|
this->_cleanup();
|
|
}
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<false_t>) ( param REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<true_t>) ( param REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
|
|
virtual bool is_assertion() const
|
|
{
|
|
return true;
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & param )
|
|
{
|
|
// calculate the group's width and store it, but return zero_width
|
|
match_group_base<IterT>::width_this( param );
|
|
return zero_width;
|
|
}
|
|
virtual bool peek_this( peek_param<char_type> & peek ) const
|
|
{
|
|
return this->next()->peek_this( peek );
|
|
}
|
|
|
|
protected:
|
|
struct end_group : public indestructable_sub_expr<IterT, end_group>
|
|
{
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return param.m_iend == icur;
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return param.m_iend == icur;
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = 0;
|
|
return param.m_iend == param.m_icur;
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = 0;
|
|
return param.m_iend == param.m_icur;
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
} m_end_group;
|
|
|
|
virtual sub_expr<IterT> * _get_end_group()
|
|
{
|
|
return & m_end_group;
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
class group_quantifier : public match_quantifier<IterT>
|
|
{
|
|
group_quantifier & operator=( group_quantifier const & );
|
|
|
|
bool _do_iterative_match_this( match_param<IterT> & param ) const
|
|
{
|
|
_push_frame( param );
|
|
param.m_pnext = this->m_psub->next(); // ptr to end_quant
|
|
return true;
|
|
}
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
public:
|
|
group_quantifier
|
|
(
|
|
match_group_base<IterT> * psub,
|
|
size_t lbound,
|
|
size_t ubound,
|
|
sub_expr<IterT> * pend_quant
|
|
)
|
|
: match_quantifier<IterT>( psub, lbound, ubound )
|
|
, m_group( *psub )
|
|
{
|
|
*psub->pnext() = pend_quant;
|
|
}
|
|
|
|
// sub-classes of group_quantifer that own the end_quant
|
|
// object must declare a destructor, and it must call _cleanup
|
|
virtual ~group_quantifier() = 0;
|
|
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
|
|
protected:
|
|
struct old_quant
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
|
|
size_t reserved2;
|
|
bool reserved3;
|
|
smart_iter_type reserved4;
|
|
smart_iter_type reserved5;
|
|
|
|
old_quant()
|
|
{
|
|
}
|
|
old_quant( backref_tag<IterT> const & br )
|
|
: reserved2( br.reserved2 )
|
|
, reserved3( br.reserved3 )
|
|
, reserved4( br.reserved4 )
|
|
, reserved5( br.reserved5 )
|
|
{
|
|
}
|
|
};
|
|
|
|
void _push_frame( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ group_number() ];
|
|
old_quant old_qt( br );
|
|
param.m_pstack->push( old_qt );
|
|
|
|
br.reserved2 = 0; // nbr of times this group has matched
|
|
br.reserved3 = true; // toggle used for backtracking
|
|
br.reserved4 = static_init<smart_iter_type>::value;
|
|
br.reserved5 = static_init<smart_iter_type>::value;
|
|
}
|
|
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ group_number() ];
|
|
old_quant old_qt;
|
|
param.m_pstack->pop( old_qt );
|
|
|
|
br.reserved2 = old_qt.reserved2;
|
|
br.reserved3 = old_qt.reserved3;
|
|
br.reserved4 = old_qt.reserved4;
|
|
br.reserved5 = old_qt.reserved5;
|
|
}
|
|
|
|
size_t group_number() const
|
|
{
|
|
return m_group.group_number();
|
|
}
|
|
|
|
size_t & cmatches( match_param<IterT> & param ) const
|
|
{
|
|
return param.m_prgbackrefs[ group_number() ].reserved2;
|
|
}
|
|
|
|
typename backref_tag<IterT>::smart_iter_type & highwater1( match_param<IterT> & param ) const
|
|
{
|
|
return param.m_prgbackrefs[ group_number() ].reserved4;
|
|
}
|
|
|
|
typename backref_tag<IterT>::smart_iter_type & highwater2( match_param<IterT> & param ) const
|
|
{
|
|
return param.m_prgbackrefs[ group_number() ].reserved5;
|
|
}
|
|
|
|
match_group_base<IterT> const & m_group;
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline group_quantifier<IterT>::~group_quantifier()
|
|
{
|
|
}
|
|
|
|
template< typename IterT >
|
|
class max_group_quantifier : public group_quantifier<IterT>
|
|
{
|
|
max_group_quantifier & operator=( max_group_quantifier const & );
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
|
|
smart_iter_type old_highwater1 = this->highwater1( param );
|
|
smart_iter_type old_highwater2 = this->highwater2( param );
|
|
size_t old_cmatches = this->cmatches( param );
|
|
|
|
this->highwater1( param ) = static_init<smart_iter_type>::value;
|
|
this->highwater2( param ) = icur;
|
|
this->cmatches( param ) = 0;
|
|
|
|
if( _do_recurse REGEX_NVC6(<CStringsT>) ( param, icur REGEX_VC6(COMMA CStringsT()) ) )
|
|
return true;
|
|
|
|
this->cmatches( param ) = old_cmatches;
|
|
this->highwater2( param ) = old_highwater2;
|
|
this->highwater1( param ) = old_highwater1;
|
|
|
|
return false;
|
|
}
|
|
public:
|
|
max_group_quantifier( match_group_base<IterT> * psub, size_t lbound, size_t ubound )
|
|
: group_quantifier<IterT>( psub, lbound, ubound, & m_end_quant )
|
|
, m_end_quant( this )
|
|
{
|
|
}
|
|
|
|
virtual ~max_group_quantifier()
|
|
{
|
|
// Must call _cleanup() here before the end_quant object
|
|
// gets destroyed.
|
|
this->_cleanup();
|
|
}
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
|
|
protected:
|
|
template< typename CStringsT >
|
|
bool _do_recurse( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
if( this->m_ubound == this->cmatches( param ) )
|
|
return this->recursive_match_next( param, icur, CStringsT() );
|
|
|
|
++this->cmatches( param );
|
|
if( this->m_psub->recursive_match_all( param, icur, CStringsT() ) )
|
|
return true;
|
|
|
|
if( --this->cmatches( param ) < this->m_lbound )
|
|
return false;
|
|
|
|
return this->recursive_match_next( param, icur, CStringsT() );
|
|
}
|
|
|
|
class end_quantifier : public indestructable_sub_expr<IterT, end_quantifier>
|
|
{
|
|
max_group_quantifier<IterT> const *const m_pquant;
|
|
|
|
end_quantifier & operator=( end_quantifier const & );
|
|
|
|
void _push_frame( match_param<IterT> & param ) const
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
param.m_pstack->push( br.reserved4 );
|
|
br.reserved4 = br.reserved5;
|
|
br.reserved5 = param.m_icur;
|
|
}
|
|
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
br.reserved5 = br.reserved4;
|
|
param.m_pstack->pop( br.reserved4 );
|
|
}
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
smart_iter_type old_highwater1 = m_pquant->highwater1( param );
|
|
|
|
if( icur == old_highwater1 )
|
|
return m_pquant->recursive_match_next( param, icur, CStringsT() );
|
|
|
|
m_pquant->highwater1( param ) = m_pquant->highwater2( param );
|
|
m_pquant->highwater2( param ) = icur;
|
|
|
|
if( m_pquant->REGEX_NVC6(template) _do_recurse REGEX_NVC6(<CStringsT>) ( param, icur REGEX_VC6(COMMA CStringsT()) ) )
|
|
return true;
|
|
|
|
m_pquant->highwater2( param ) = m_pquant->highwater1( param );
|
|
m_pquant->highwater1( param ) = old_highwater1;
|
|
|
|
return false;
|
|
}
|
|
bool _do_iterative_match_this( match_param<IterT> & param ) const
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
|
|
// forcibly break the infinite loop
|
|
if( param.m_icur == br.reserved4 )
|
|
{
|
|
_push_frame( param );
|
|
param.m_pnext = m_pquant->next();
|
|
return true;
|
|
}
|
|
|
|
_push_frame( param );
|
|
|
|
// If we've matched the max nbr of times, move on to the next
|
|
// sub-expr.
|
|
if( m_pquant->m_ubound == br.reserved2 )
|
|
{
|
|
param.m_pnext = m_pquant->next();
|
|
br.reserved3 = false;
|
|
return true;
|
|
}
|
|
|
|
// Rematch the group.
|
|
br.reserved3 = true;
|
|
param.m_pnext = m_pquant->m_psub;
|
|
++br.reserved2;
|
|
return true;
|
|
}
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
|
|
// infinite loop forcibly broken
|
|
if( param.m_icur == param.m_pstack->REGEX_NVC6(template) top REGEX_NVC6(<smart_iter_type>) ( REGEX_VC6(type2type<smart_iter_type>()) ) )
|
|
{
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
|
|
if( br.reserved3 )
|
|
{
|
|
--br.reserved2;
|
|
param.m_pnext = m_pquant->next();
|
|
if( m_pquant->m_lbound <= br.reserved2 )
|
|
{
|
|
br.reserved3 = false;
|
|
return true;
|
|
}
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
br.reserved3 = true;
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
public:
|
|
end_quantifier( max_group_quantifier<IterT> const * pquant = 0 )
|
|
: m_pquant( pquant )
|
|
{
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
} m_end_quant;
|
|
|
|
friend class end_quantifier;
|
|
};
|
|
|
|
template< typename IterT >
|
|
class min_group_quantifier : public group_quantifier<IterT>
|
|
{
|
|
min_group_quantifier & operator=( min_group_quantifier const & );
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
|
|
smart_iter_type old_highwater1 = this->highwater1( param );
|
|
smart_iter_type old_highwater2 = this->highwater2( param );
|
|
size_t old_cmatches = this->cmatches( param );
|
|
|
|
this->highwater1( param ) = static_init<smart_iter_type>::value;
|
|
this->highwater2( param ) = icur;
|
|
this->cmatches( param ) = 0;
|
|
|
|
if( _do_recurse REGEX_NVC6(<CStringsT>) ( param, icur REGEX_VC6(COMMA CStringsT()) ) )
|
|
return true;
|
|
|
|
this->cmatches( param ) = old_cmatches;
|
|
this->highwater2( param ) = old_highwater2;
|
|
this->highwater1( param ) = old_highwater1;
|
|
|
|
return false;
|
|
}
|
|
public:
|
|
min_group_quantifier( match_group_base<IterT> * psub, size_t lbound, size_t ubound )
|
|
: group_quantifier<IterT>( psub, lbound, ubound, & m_end_quant )
|
|
, m_end_quant( this )
|
|
{
|
|
}
|
|
|
|
virtual ~min_group_quantifier()
|
|
{
|
|
// Must call _cleanup() here before the end_quant object
|
|
// gets destroyed.
|
|
this->_cleanup();
|
|
}
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
|
|
protected:
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recurse( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
if( this->m_lbound <= this->cmatches( param ) )
|
|
{
|
|
if( this->recursive_match_next( param, icur, CStringsT() ) )
|
|
return true;
|
|
}
|
|
|
|
if( this->m_ubound > this->cmatches( param ) )
|
|
{
|
|
++this->cmatches( param );
|
|
if( this->m_psub->recursive_match_all( param, icur, CStringsT() ) )
|
|
return true;
|
|
--this->cmatches( param );
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
class end_quantifier : public indestructable_sub_expr<IterT, end_quantifier>
|
|
{
|
|
min_group_quantifier<IterT> const *const m_pquant;
|
|
|
|
end_quantifier & operator=( end_quantifier const & );
|
|
|
|
void _push_frame( match_param<IterT> & param ) const
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
param.m_pstack->push( br.reserved4 );
|
|
br.reserved4 = br.reserved5;
|
|
br.reserved5 = param.m_icur;
|
|
}
|
|
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
br.reserved5 = br.reserved4;
|
|
param.m_pstack->pop( br.reserved4 );
|
|
}
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
|
|
smart_iter_type old_highwater1 = m_pquant->highwater1( param );
|
|
|
|
if( icur == old_highwater1 )
|
|
return m_pquant->recursive_match_next( param, icur, CStringsT() );
|
|
|
|
m_pquant->highwater1( param ) = m_pquant->highwater2( param );
|
|
m_pquant->highwater2( param ) = icur;
|
|
|
|
if( m_pquant->REGEX_NVC6(template) _do_recurse REGEX_NVC6(<CStringsT>) ( param, icur REGEX_VC6(COMMA CStringsT()) ) )
|
|
return true;
|
|
|
|
m_pquant->highwater2( param ) = m_pquant->highwater1( param );
|
|
m_pquant->highwater1( param ) = old_highwater1;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool _do_iterative_match_this( match_param<IterT> & param ) const
|
|
{
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
|
|
// forcibly break the infinite loop
|
|
if( param.m_icur == br.reserved4 )
|
|
{
|
|
_push_frame( param );
|
|
param.m_pnext = m_pquant->next();
|
|
return true;
|
|
}
|
|
|
|
_push_frame( param );
|
|
|
|
if( m_pquant->m_lbound <= br.reserved2 )
|
|
{
|
|
br.reserved3 = false;
|
|
param.m_pnext = m_pquant->next();
|
|
return true;
|
|
}
|
|
|
|
++br.reserved2;
|
|
param.m_pnext = m_pquant->m_psub;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename backref_tag<IterT>::smart_iter_type smart_iter_type;
|
|
|
|
backref_tag<IterT> & br = param.m_prgbackrefs[ m_pquant->group_number() ];
|
|
|
|
// infinite loop forcibly broken
|
|
if( param.m_icur == param.m_pstack->REGEX_NVC6(template) top REGEX_NVC6(<smart_iter_type>) ( REGEX_VC6(type2type<smart_iter_type>()) ) )
|
|
{
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
|
|
if( br.reserved3 )
|
|
{
|
|
--br.reserved2;
|
|
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
|
|
br.reserved3 = true;
|
|
|
|
if( m_pquant->m_ubound > br.reserved2 )
|
|
{
|
|
++br.reserved2;
|
|
param.m_pnext = m_pquant->m_psub;
|
|
return true;
|
|
}
|
|
|
|
_pop_frame( param );
|
|
return false;
|
|
}
|
|
public:
|
|
end_quantifier( min_group_quantifier<IterT> const * pquant = 0 )
|
|
: m_pquant( pquant )
|
|
{
|
|
}
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return zero_width;
|
|
}
|
|
} m_end_quant;
|
|
|
|
friend class end_quantifier;
|
|
};
|
|
|
|
inline void fixup_backref( size_t & cbackref, std::list<size_t> const & invisible_groups )
|
|
{
|
|
std::list<size_t>::const_iterator iter = invisible_groups.begin();
|
|
for( ; invisible_groups.end() != iter && cbackref >= *iter; ++iter )
|
|
{
|
|
++cbackref;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
class match_backref : public sub_expr<IterT>
|
|
{
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::difference_type diff_type;
|
|
backref_tag<IterT> const & br = param.m_prgbackrefs[ m_nbackref ];
|
|
diff_type dist = std::distance( br.first, br.second );
|
|
std::advance( param.m_icur, -dist );
|
|
return false;
|
|
}
|
|
public:
|
|
match_backref( size_t nbackref )
|
|
: m_nbackref( nbackref )
|
|
{
|
|
}
|
|
|
|
// Return the width specifications of the group to which this backref refers
|
|
virtual width_type width_this( width_param<IterT> & param )
|
|
{
|
|
// fix up the backref to take into account the number of invisible groups
|
|
fixup_backref( m_nbackref, param.m_invisible_groups );
|
|
|
|
if( m_nbackref >= param.m_rggroups.size() )
|
|
throw bad_regexpr( "reference to nonexistent group" );
|
|
|
|
// If the entry in the backref vector has been nulled out, then we are
|
|
// calculating the width for this group.
|
|
if( 0 == param.m_rggroups[ m_nbackref ] )
|
|
return worst_width; // can't tell how wide this group will be. :-(
|
|
|
|
return param.m_rggroups[ m_nbackref ]->width_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
protected:
|
|
size_t m_nbackref;
|
|
};
|
|
|
|
template< typename CmpT, typename IterT >
|
|
class match_backref_t : public match_backref<IterT>
|
|
{
|
|
public:
|
|
match_backref_t( size_t nbackref )
|
|
: match_backref<IterT>( nbackref )
|
|
{
|
|
}
|
|
virtual sub_expr<IterT> * quantify( size_t lbound, size_t ubound, bool greedy, regex_arena & arena )
|
|
{
|
|
if( greedy )
|
|
return new( arena ) max_atom_quantifier<IterT, match_backref_t<CmpT, IterT> >( this, lbound, ubound );
|
|
else
|
|
return new( arena ) min_atom_quantifier<IterT, match_backref_t<CmpT, IterT> >( this, lbound, ubound );
|
|
}
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_backref_t::recursive_match_this_s( param, icur ) && this->recursive_match_next( param, icur, false_t() ) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return ( match_backref_t::recursive_match_this_c( param, icur ) && this->recursive_match_next( param, icur, true_t() ) );
|
|
}
|
|
virtual bool recursive_match_this_s( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_this_c( match_param<IterT> & param, IterT & icur ) const
|
|
{
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<false_t>) ( param, param.m_icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pnext = this->next();
|
|
return _do_match_this REGEX_NVC6(<true_t>) ( param, param.m_icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
protected:
|
|
template< typename CStringsT >
|
|
bool _do_match_this( match_param<IterT> & param, IterT & icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
// Pattern compilation should have failed if the following is false:
|
|
REGEX_ASSERT( this->m_nbackref < param.m_cbackrefs );
|
|
|
|
// Don't match a backref that hasn't match anything
|
|
if( ! param.m_prgbackrefs[ this->m_nbackref ].matched )
|
|
return false;
|
|
|
|
IterT ithis = param.m_prgbackrefs[ this->m_nbackref ].first;
|
|
IterT const iend = param.m_prgbackrefs[ this->m_nbackref ].second;
|
|
IterT icur_tmp = icur;
|
|
|
|
for( ; iend != ithis; ++icur_tmp, ++ithis )
|
|
{
|
|
if( eos_t<CStringsT>::eval( param, icur_tmp ) || CmpT::eval( *icur_tmp, *ithis ) )
|
|
return false;
|
|
}
|
|
icur = icur_tmp;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline match_backref<IterT> * create_backref(
|
|
size_t cbackref,
|
|
REGEX_FLAGS flags, regex_arena & arena )
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
|
|
switch( NOCASE & flags )
|
|
{
|
|
case 0:
|
|
return new( arena ) match_backref_t<ch_neq_t<char_type>, IterT>( cbackref );
|
|
case NOCASE:
|
|
return new( arena ) match_backref_t<ch_neq_nocase_t<char_type>, IterT>( cbackref );
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
class match_recurse : public sub_expr<IterT>
|
|
{
|
|
match_recurse & operator=( match_recurse const & );
|
|
|
|
void _push_frame( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename match_param<IterT>::backref_type backref_type;
|
|
unsafe_stack * pstack = param.m_pstack;
|
|
backref_type * ibegin = param.m_prgbackrefs;
|
|
backref_type * iend = ibegin + param.m_cbackrefs;
|
|
for( ; iend != ibegin; ++ibegin )
|
|
{
|
|
pstack->push( ibegin->reserved1 );
|
|
}
|
|
}
|
|
|
|
void _pop_frame( match_param<IterT> & param ) const
|
|
{
|
|
typedef typename match_param<IterT>::backref_type backref_type;
|
|
unsafe_stack * pstack = param.m_pstack;
|
|
backref_type * ibegin = param.m_prgbackrefs;
|
|
backref_type * iend = ibegin + param.m_cbackrefs;
|
|
while( iend != ibegin )
|
|
{
|
|
--iend;
|
|
pstack->pop( iend->reserved1 );
|
|
}
|
|
}
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
// Prevent infinite recursion. If icur == param.m_prgbackrefs[ 0 ].reserved1,
|
|
// then the pattern has eaten 0 chars to date, and we would recurse forever.
|
|
if( icur == param.m_prgbackrefs[ 0 ].reserved1 )
|
|
return this->recursive_match_next( param, icur, CStringsT() );
|
|
|
|
// copy the backref vector onto the stack
|
|
IterT * prgci = static_cast<IterT*>( alloca( param.m_cbackrefs * sizeof( IterT ) ) );
|
|
save_backrefs<IterT>( param.m_prgbackrefs, param.m_prgbackrefs + param.m_cbackrefs, prgci );
|
|
|
|
// Recurse.
|
|
if( param.m_pfirst->recursive_match_all( param, icur, CStringsT() ) )
|
|
{
|
|
// Restore the backref vector
|
|
restore_backrefs<IterT>( param.m_prgbackrefs, param.m_prgbackrefs + param.m_cbackrefs, prgci );
|
|
|
|
// Recursive match succeeded. Try to match the rest of the pattern
|
|
// using the end of the recursive match as the start of the next
|
|
return this->recursive_match_next( param, param.m_prgbackrefs[ 0 ].second, CStringsT() );
|
|
}
|
|
|
|
// Recursion failed
|
|
std::for_each( prgci, prgci + param.m_cbackrefs, deleter() );
|
|
return false;
|
|
}
|
|
template< typename CStringsT >
|
|
bool _do_iterative_match_this( match_param<IterT> & param REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
param.m_pstack->push( param.m_icur );
|
|
|
|
// Prevent infine recursion
|
|
if( param.m_icur == param.m_prgbackrefs[ 0 ].reserved1 )
|
|
{
|
|
param.m_pnext = this->next();
|
|
return true;
|
|
}
|
|
|
|
_push_frame( param );
|
|
|
|
if( _do_match_iterative( param.m_pfirst, param, param.m_icur, CStringsT() ) )
|
|
{
|
|
_pop_frame( param );
|
|
param.m_pnext = this->next();
|
|
return true;
|
|
}
|
|
|
|
_pop_frame( param );
|
|
param.m_pstack->pop( param.m_icur );
|
|
return false;
|
|
}
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param ) const
|
|
{
|
|
param.m_pstack->pop( param.m_icur );
|
|
return false;
|
|
}
|
|
public:
|
|
match_recurse()
|
|
{
|
|
}
|
|
|
|
virtual sub_expr<IterT> * quantify( size_t, size_t, bool, regex_arena & )
|
|
{
|
|
throw bad_regexpr( "recursion sub-expression cannot be quantified" );
|
|
}
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<false_t>) ( param REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<true_t>) ( param REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this( param );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & )
|
|
{
|
|
return worst_width;
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline match_recurse<IterT> * create_recurse( regex_arena & arena )
|
|
{
|
|
return new( arena ) match_recurse<IterT>();
|
|
}
|
|
|
|
template< typename IterT >
|
|
struct backref_condition
|
|
{
|
|
size_t m_cbackref;
|
|
|
|
backref_condition( size_t cbackref )
|
|
: m_cbackref( cbackref )
|
|
{
|
|
}
|
|
|
|
template< typename CStringsT >
|
|
bool recursive_match_this( match_param<IterT> & param, IterT, CStringsT ) const
|
|
{
|
|
return m_cbackref < param.m_cbackrefs && param.m_prgbackrefs[ m_cbackref ].matched;
|
|
}
|
|
template< typename CStringsT >
|
|
bool iterative_match_this( match_param<IterT> & param, CStringsT ) const
|
|
{
|
|
return m_cbackref < param.m_cbackrefs && param.m_prgbackrefs[ m_cbackref ].matched;
|
|
}
|
|
template< typename CStringsT >
|
|
bool iterative_rematch_this( match_param<IterT> &, CStringsT ) const
|
|
{
|
|
return false;
|
|
}
|
|
void width_this( width_param<IterT> & param )
|
|
{
|
|
// fix up the backref to take into account the number of invisible groups
|
|
fixup_backref( m_cbackref, param.m_invisible_groups );
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
struct assertion_condition
|
|
{
|
|
std::auto_ptr<match_group_base<IterT> > m_passert;
|
|
|
|
assertion_condition( match_group_base<IterT> * passert , regex_arena & arena )
|
|
: m_passert( passert )
|
|
{
|
|
*passert->pnext() = new( arena ) end_of_pattern<IterT>;
|
|
}
|
|
|
|
bool recursive_match_this( match_param<IterT> & param, IterT icur, false_t ) const
|
|
{
|
|
return m_passert->recursive_match_all_s( param, icur );
|
|
}
|
|
bool recursive_match_this( match_param<IterT> & param, IterT icur, true_t ) const
|
|
{
|
|
return m_passert->recursive_match_all_c( param, icur );
|
|
}
|
|
bool iterative_match_this( match_param<IterT> & param, false_t ) const
|
|
{
|
|
return m_passert->iterative_match_this_s( param );
|
|
}
|
|
bool iterative_match_this( match_param<IterT> & param, true_t ) const
|
|
{
|
|
return m_passert->iterative_match_this_c( param );
|
|
}
|
|
bool iterative_rematch_this( match_param<IterT> & param, false_t ) const
|
|
{
|
|
return m_passert->iterative_rematch_this_s( param );
|
|
}
|
|
bool iterative_rematch_this( match_param<IterT> & param, true_t ) const
|
|
{
|
|
return m_passert->iterative_rematch_this_c( param );
|
|
}
|
|
void width_this( width_param<IterT> & param )
|
|
{
|
|
( void ) m_passert->width_this( param );
|
|
}
|
|
};
|
|
|
|
template< typename IterT, typename CondT >
|
|
class match_conditional : public match_group<IterT>
|
|
{
|
|
protected:
|
|
typedef typename match_group<IterT>::alt_list_type alt_list_type;
|
|
|
|
private:
|
|
match_conditional & operator=( match_conditional const & );
|
|
|
|
template< typename CStringsT >
|
|
bool _do_recursive_match_all( match_param<IterT> & param, IterT icur REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
iter_type ialt = this->m_rgalternates.begin();
|
|
|
|
if( m_condition.recursive_match_this( param, icur, CStringsT() ) || this->m_rgalternates.end() != ++ialt )
|
|
{
|
|
return (*ialt)->recursive_match_all( param, icur, CStringsT() );
|
|
}
|
|
return this->recursive_match_next( param, icur, CStringsT() );
|
|
}
|
|
template< typename CStringsT >
|
|
bool _do_iterative_match_this( match_param<IterT> & param REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
iter_type ialt = this->m_rgalternates.begin();
|
|
|
|
if( m_condition.iterative_match_this( param, CStringsT() ) )
|
|
{
|
|
param.m_pstack->push( true );
|
|
param.m_pnext = *ialt;
|
|
return true;
|
|
}
|
|
param.m_pstack->push( false );
|
|
param.m_pnext = ( this->m_rgalternates.end() != ++ialt ) ? *ialt : this->next();
|
|
return true;
|
|
}
|
|
template< typename CStringsT >
|
|
bool _do_iterative_rematch_this( match_param<IterT> & param REGEX_VC6(COMMA CStringsT) ) const
|
|
{
|
|
bool condition;
|
|
param.m_pstack->pop( condition );
|
|
if( condition )
|
|
m_condition.iterative_rematch_this( param, CStringsT() );
|
|
return false;
|
|
}
|
|
public:
|
|
typedef CondT condition_type;
|
|
|
|
match_conditional( size_t cgroup, condition_type condition, regex_arena & arena )
|
|
: match_group<IterT>( cgroup, arena )
|
|
, m_condition( condition )
|
|
{
|
|
}
|
|
|
|
virtual bool recursive_match_all_s( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<false_t>) ( param, icur REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool recursive_match_all_c( match_param<IterT> & param, IterT icur ) const
|
|
{
|
|
return _do_recursive_match_all REGEX_NVC6(<true_t>) ( param, icur REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_match_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<false_t>) ( param REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_match_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_match_this REGEX_NVC6(<true_t>) ( param REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this REGEX_NVC6(<false_t>) ( param REGEX_VC6(COMMA false_t()) );
|
|
}
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & param ) const
|
|
{
|
|
return _do_iterative_rematch_this REGEX_NVC6(<true_t>) ( param REGEX_VC6(COMMA true_t()) );
|
|
}
|
|
virtual width_type width_this( width_param<IterT> & param )
|
|
{
|
|
typedef typename alt_list_type::const_iterator iter_type;
|
|
iter_type ialt = this->m_rgalternates.begin();
|
|
|
|
width_type width = ( *ialt )->get_width( param );
|
|
|
|
if( this->m_rgalternates.end() != ++ialt )
|
|
{
|
|
width_type temp_width = ( *ialt )->get_width( param );
|
|
width.m_min = regex_min( width.m_min, temp_width.m_min );
|
|
width.m_max = regex_max( width.m_max, temp_width.m_max );
|
|
}
|
|
else
|
|
{
|
|
width.m_min = 0;
|
|
}
|
|
|
|
// Have the condition calculate its width, too. This is important
|
|
// if the condition is a lookbehind assertion.
|
|
m_condition.width_this( param );
|
|
|
|
return this->m_nwidth = width;
|
|
}
|
|
|
|
protected:
|
|
condition_type m_condition;
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline match_conditional<IterT, backref_condition<IterT> > * create_backref_conditional(
|
|
size_t cgroup,
|
|
size_t cbackref,
|
|
regex_arena & arena )
|
|
{
|
|
backref_condition<IterT> cond( cbackref );
|
|
return new( arena ) match_conditional<IterT, backref_condition<IterT> >(
|
|
cgroup, cond, arena );
|
|
}
|
|
|
|
template< typename IterT >
|
|
inline match_conditional<IterT, assertion_condition<IterT> > * create_assertion_conditional(
|
|
size_t cgroup,
|
|
match_group_base<IterT> * passert,
|
|
regex_arena & arena )
|
|
{
|
|
assertion_condition<IterT> cond( passert, arena );
|
|
return new( arena ) match_conditional<IterT, assertion_condition<IterT> >(
|
|
cgroup, cond, arena );
|
|
}
|
|
|
|
//
|
|
// From basic_rpattern_base_impl
|
|
//
|
|
|
|
template< typename IterT >
|
|
REGEXPR_H_INLINE bool basic_rpattern_base_impl<IterT>::_ok_to_recurse() const //throw()
|
|
{
|
|
switch( m_mode )
|
|
{
|
|
case MODE_FAST:
|
|
return true;
|
|
case MODE_SAFE:
|
|
return false;
|
|
case MODE_MIXED:
|
|
return m_fok_to_recurse;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
REGEXPR_H_INLINE void basic_rpattern_base_impl<IterT>::swap( basic_rpattern_base_impl<IterT> & that ) // throw()
|
|
{
|
|
using std::swap;
|
|
swap( m_fuses_backrefs, that.m_fuses_backrefs );
|
|
swap( m_floop, that.m_floop );
|
|
swap( m_fok_to_recurse, that.m_fok_to_recurse );
|
|
swap( m_cgroups, that.m_cgroups );
|
|
swap( m_cgroups_visible, that.m_cgroups_visible );
|
|
swap( m_flags, that.m_flags );
|
|
swap( m_mode, that.m_mode );
|
|
swap( m_nwidth, that.m_nwidth );
|
|
swap( m_pfirst, that.m_pfirst );
|
|
swap( m_search, that.m_search );
|
|
|
|
swap_auto_ptr( m_pat, that.m_pat );
|
|
swap_auto_ptr( m_subst, that.m_subst );
|
|
|
|
m_subst_list.swap( that.m_subst_list );
|
|
m_invisible_groups.swap( that.m_invisible_groups );
|
|
m_arena.swap( that.m_arena );
|
|
}
|
|
|
|
// A helper class for automatically deallocating the arena when
|
|
// parsing the pattern results in an exception
|
|
class arena_guard
|
|
{
|
|
arena_guard( arena_guard const & );
|
|
arena_guard & operator=( arena_guard const & );
|
|
regex_arena * m_parena;
|
|
public:
|
|
explicit arena_guard( regex_arena & arena )
|
|
: m_parena( &arena )
|
|
{
|
|
}
|
|
~arena_guard()
|
|
{
|
|
if( m_parena )
|
|
m_parena->clear();
|
|
}
|
|
void dismiss()
|
|
{
|
|
m_parena = 0;
|
|
}
|
|
};
|
|
|
|
template< typename CatT >
|
|
struct is_random_access_helper
|
|
{
|
|
enum { value = false };
|
|
};
|
|
template<>
|
|
struct is_random_access_helper<std::random_access_iterator_tag>
|
|
{
|
|
enum { value = true };
|
|
};
|
|
template< typename IterT >
|
|
struct is_random_access
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::iterator_category cat_type;
|
|
enum { value = is_random_access_helper<cat_type>::value };
|
|
};
|
|
|
|
} // namespace detail
|
|
|
|
//
|
|
// Implementation of basic_rpattern_base:
|
|
//
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
REGEXPR_H_INLINE void basic_rpattern_base<IterT, SyntaxT>::init( string_type const & pat, REGEX_FLAGS flags, REGEX_MODE mode )
|
|
{
|
|
basic_rpattern_base<IterT, SyntaxT> temp( pat, flags, mode );
|
|
swap( temp );
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
REGEXPR_H_INLINE void basic_rpattern_base<IterT, SyntaxT>::init( string_type const & pat, string_type const & subst, REGEX_FLAGS flags, REGEX_MODE mode )
|
|
{
|
|
basic_rpattern_base<IterT, SyntaxT> temp( pat, subst, flags, mode );
|
|
swap( temp );
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
REGEXPR_H_INLINE void basic_rpattern_base<IterT, SyntaxT>::_common_init( REGEX_FLAGS flags )
|
|
{
|
|
this->m_cgroups = 0;
|
|
std::vector<detail::match_group_base<IterT>*> rggroups;
|
|
typename string_type::iterator ipat = this->m_pat->begin();
|
|
syntax_type sy( flags );
|
|
detail::match_group_base<IterT> * pgroup;
|
|
|
|
// Set up a sentry that will free the arena memory
|
|
// automatically on parse failure.
|
|
{
|
|
detail::arena_guard guard( this->m_arena );
|
|
|
|
// This will throw on failure
|
|
pgroup = _find_next_group( ipat, 0, sy, rggroups );
|
|
|
|
// terminate the pattern with the end_of_pattern marker
|
|
*pgroup->pnext() = new( this->m_arena ) detail::end_of_pattern<IterT>;
|
|
|
|
// The parse was successful. Dismiss the parse sentry
|
|
guard.dismiss();
|
|
}
|
|
|
|
REGEX_ASSERT( 0 == m_pfirst );
|
|
m_pfirst = pgroup;
|
|
|
|
// Calculate the width of the pattern and all groups
|
|
this->m_nwidth = pgroup->group_width( rggroups, m_invisible_groups );
|
|
|
|
//
|
|
// determine if we can get away with only calling m_pfirst->recursive_match_all only once
|
|
//
|
|
|
|
this->m_floop = true;
|
|
|
|
// Optimization: if first character of pattern string is '^'
|
|
// and we are not doing a multiline match, then we only
|
|
// need to try recursive_match_all once
|
|
typename string_type::iterator icur = this->m_pat->begin();
|
|
if( MULTILINE != ( MULTILINE & this->m_flags ) &&
|
|
1 == pgroup->calternates() &&
|
|
this->m_pat->end() != icur &&
|
|
BEGIN_LINE == sy.reg_token( icur, this->m_pat->end() ) )
|
|
{
|
|
this->m_flags = ( REGEX_FLAGS ) ( m_flags & ~RIGHTMOST );
|
|
this->m_floop = false;
|
|
}
|
|
|
|
// Optimization: if first 2 characters of pattern string are ".*" or ".+",
|
|
// then we only need to try recursive_match_all once
|
|
icur = this->m_pat->begin();
|
|
if( RIGHTMOST != ( RIGHTMOST & this->m_flags ) &&
|
|
SINGLELINE == ( SINGLELINE & this->m_flags ) &&
|
|
1 == pgroup->calternates() &&
|
|
this->m_pat->end() != icur &&
|
|
MATCH_ANY == sy.reg_token( icur, this->m_pat->end() ) &&
|
|
this->m_pat->end() != icur )
|
|
{
|
|
switch( sy.quant_token( icur, this->m_pat->end() ) )
|
|
{
|
|
case ONE_OR_MORE:
|
|
case ZERO_OR_MORE:
|
|
case ONE_OR_MORE_MIN:
|
|
case ZERO_OR_MORE_MIN:
|
|
this->m_floop = false;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
REGEXPR_H_INLINE void basic_rpattern_base<IterT, SyntaxT>::set_substitution( string_type const & subst )
|
|
{
|
|
using std::swap;
|
|
std::auto_ptr<string_type> temp_subst( new string_type( subst ) );
|
|
detail::subst_list_type temp_subst_list;
|
|
bool uses_backrefs = false;
|
|
|
|
_normalize_string( *temp_subst );
|
|
basic_rpattern_base<IterT, SyntaxT>::_parse_subst( *temp_subst, uses_backrefs, temp_subst_list );
|
|
|
|
detail::swap_auto_ptr( temp_subst, this->m_subst );
|
|
swap( uses_backrefs, this->m_fuses_backrefs );
|
|
temp_subst_list.swap( this->m_subst_list );
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
inline detail::match_group_base<IterT> * basic_rpattern_base<IterT, SyntaxT>::_find_next_group(
|
|
typename string_type::iterator & ipat,
|
|
detail::match_group_base<IterT> * pgroup_enclosing, syntax_type & sy,
|
|
std::vector<detail::match_group_base<IterT>*> & rggroups )
|
|
{
|
|
std::auto_ptr<detail::match_group_base<IterT> > pgroup;
|
|
typename string_type::iterator itemp = ipat;
|
|
REGEX_FLAGS old_flags = sy.get_flags();
|
|
TOKEN tok = NO_TOKEN;
|
|
size_t extent_start = this->m_cgroups;
|
|
bool fconditional = false;
|
|
|
|
// Look for group extensions.
|
|
if( this->m_pat->end() != ipat && NO_TOKEN != ( tok = sy.ext_token( ipat, this->m_pat->end() ) ) )
|
|
{
|
|
if( this->m_pat->begin() == itemp || this->m_pat->end() == ipat )
|
|
throw bad_regexpr( "ill-formed regular expression" );
|
|
|
|
// Is this a recursion element?
|
|
if( EXT_RECURSE == tok )
|
|
{
|
|
pgroup_enclosing->add_item( detail::create_recurse<IterT>( this->m_arena ) );
|
|
|
|
// This pattern could recurse deeply. Note that fact here so that
|
|
// we can opt to use a stack-conservative algorithm at match time.
|
|
this->m_fok_to_recurse = false;
|
|
}
|
|
|
|
// Don't process empty groups like (?:) or (?i) or (?R)
|
|
if( END_GROUP != sy.reg_token( itemp = ipat, this->m_pat->end() ) )
|
|
{
|
|
switch( tok )
|
|
{
|
|
case EXT_NOBACKREF:
|
|
// note that this group is not visible, so we can fix
|
|
// up offsets into the backref vector later
|
|
m_invisible_groups.push_back( this->m_cgroups );
|
|
detail::reset_auto_ptr( pgroup, new( this->m_arena ) detail::match_group<IterT>( _get_next_group_nbr(), this->m_arena ) );
|
|
break;
|
|
|
|
case EXT_INDEPENDENT:
|
|
m_invisible_groups.push_back( this->m_cgroups );
|
|
detail::reset_auto_ptr( pgroup, new( this->m_arena ) detail::independent_group<IterT>( _get_next_group_nbr(), this->m_arena ) );
|
|
break;
|
|
|
|
case EXT_POS_LOOKAHEAD:
|
|
detail::reset_auto_ptr( pgroup, new( this->m_arena ) detail::lookahead_assertion<IterT>( true, this->m_arena ) );
|
|
break;
|
|
|
|
case EXT_NEG_LOOKAHEAD:
|
|
detail::reset_auto_ptr( pgroup, new( this->m_arena ) detail::lookahead_assertion<IterT>( false, this->m_arena ) );
|
|
break;
|
|
|
|
case EXT_POS_LOOKBEHIND:
|
|
detail::reset_auto_ptr( pgroup, new( this->m_arena ) detail::lookbehind_assertion<IterT>( true, this->m_arena ) );
|
|
break;
|
|
|
|
case EXT_NEG_LOOKBEHIND:
|
|
detail::reset_auto_ptr( pgroup, new( this->m_arena ) detail::lookbehind_assertion<IterT>( false, this->m_arena ) );
|
|
break;
|
|
|
|
case EXT_CONDITION:
|
|
fconditional = true;
|
|
m_invisible_groups.push_back( this->m_cgroups );
|
|
|
|
if( size_t cbackref = detail::parse_int( ipat, this->m_pat->end() ) &&
|
|
END_GROUP == sy.reg_token( ipat, this->m_pat->end() ) )
|
|
{
|
|
detail::reset_auto_ptr(
|
|
pgroup, detail::create_backref_conditional<IterT>(
|
|
_get_next_group_nbr(), cbackref, this->m_arena ) );
|
|
}
|
|
else
|
|
{
|
|
switch( sy.ext_token( itemp = ipat, this->m_pat->end() ) )
|
|
{
|
|
case EXT_POS_LOOKAHEAD:
|
|
case EXT_NEG_LOOKAHEAD:
|
|
case EXT_POS_LOOKBEHIND:
|
|
case EXT_NEG_LOOKBEHIND:
|
|
{
|
|
std::auto_ptr<detail::match_group_base<IterT> > pgroup_tmp(
|
|
_find_next_group( ipat, 0, sy, rggroups ) );
|
|
detail::reset_auto_ptr(
|
|
pgroup, detail::create_assertion_conditional<IterT>(
|
|
_get_next_group_nbr(), pgroup_tmp.get(), this->m_arena ) );
|
|
pgroup_tmp.release();
|
|
}
|
|
break;
|
|
default:
|
|
throw bad_regexpr( "bad extension sequence" );
|
|
}
|
|
}
|
|
break;
|
|
|
|
case EXT_COMMENT:
|
|
while( END_GROUP != ( tok = sy.reg_token( ipat, this->m_pat->end() ) ) )
|
|
{
|
|
if( NO_TOKEN == tok && this->m_pat->end() != ipat )
|
|
++ipat;
|
|
if( this->m_pat->end() == ipat )
|
|
throw bad_regexpr( "Expecting end of comment" );
|
|
}
|
|
break;
|
|
|
|
default:
|
|
throw bad_regexpr( "bad extension sequence" );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Skip over the END_GROUP token
|
|
ipat = itemp;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
detail::reset_auto_ptr( pgroup, new( this->m_arena ) detail::match_group<IterT>( _get_next_group_nbr(), this->m_arena ) );
|
|
++this->m_cgroups_visible;
|
|
}
|
|
|
|
if( 0 != pgroup.get() )
|
|
{
|
|
detail::must_have<char_type> must;
|
|
|
|
pgroup->open_group();
|
|
while( _find_next( ipat, pgroup.get(), sy, rggroups ) ) {}
|
|
must = pgroup->close_group( this->m_arena );
|
|
|
|
// if this is a conditional group, then there must be at
|
|
// most 2 alternates.
|
|
if( fconditional && 2 < pgroup->calternates() )
|
|
throw bad_regexpr( "Too many alternates in conditional subexpression" );
|
|
|
|
// if this is the top-level group and it returned a "must have"
|
|
// string, then use that to initialize a boyer-moore search structure
|
|
if( detail::is_random_access<IterT>::value && must.m_has && 0 == pgroup->group_number() )
|
|
{
|
|
typedef typename string_type::const_iterator iter_type;
|
|
m_search = new( this->m_arena ) detail::boyer_moore<iter_type>
|
|
( must.m_begin, must.m_end, must.m_lower );
|
|
}
|
|
|
|
// Add this group to the rggroups array
|
|
if( size_t( -1 ) != pgroup->group_number() )
|
|
{
|
|
if( pgroup->group_number() >= rggroups.size() )
|
|
rggroups.resize( pgroup->group_number() + 1, 0 );
|
|
rggroups[ pgroup->group_number() ] = pgroup.get();
|
|
}
|
|
|
|
// tell this group how many groups are contained within it
|
|
pgroup->set_extent( detail::extent_type( extent_start, this->m_cgroups - extent_start ) );
|
|
|
|
// If this is not a pattern modifier, restore the
|
|
// flags to their previous settings. This causes
|
|
// pattern modifiers to have the scope of their
|
|
// enclosing group.
|
|
sy.set_flags( old_flags );
|
|
}
|
|
|
|
return pgroup.release();
|
|
}
|
|
|
|
namespace detail
|
|
{
|
|
|
|
// If we reached the end of the string before finding the end of the
|
|
// character set, then this is an ill-formed regex
|
|
template< typename IterT >
|
|
inline void check_iter( IterT icur, IterT iend )
|
|
{
|
|
if( iend == icur )
|
|
throw bad_regexpr( "expecting end of character set" );
|
|
}
|
|
|
|
template< typename IBeginT, typename IEndT >
|
|
inline typename std::iterator_traits<IEndT>::value_type get_escaped_char( IBeginT & icur, IEndT iend, bool normalize )
|
|
{
|
|
typedef typename std::iterator_traits<IEndT>::value_type char_type;
|
|
char_type ch = 0, i;
|
|
check_iter<IEndT>( icur, iend );
|
|
|
|
switch( *icur )
|
|
{
|
|
// octal escape sequence
|
|
case REGEX_CHAR(char_type,'0'): case REGEX_CHAR(char_type,'1'): case REGEX_CHAR(char_type,'2'): case REGEX_CHAR(char_type,'3'):
|
|
case REGEX_CHAR(char_type,'4'): case REGEX_CHAR(char_type,'5'): case REGEX_CHAR(char_type,'6'): case REGEX_CHAR(char_type,'7'):
|
|
ch = char_type( *icur++ - REGEX_CHAR(char_type,'0') );
|
|
for( i=0; i<2 && REGEX_CHAR(char_type,'0') <= *icur && REGEX_CHAR(char_type,'7') >= *icur; check_iter<IEndT>( ++icur, iend ) )
|
|
ch = char_type( ch * 8 + ( *icur - REGEX_CHAR(char_type,'0') ) );
|
|
break;
|
|
// bell character
|
|
case REGEX_CHAR(char_type,'a'):
|
|
if( ! normalize )
|
|
goto default_;
|
|
ch = REGEX_CHAR(char_type,'\a');
|
|
++icur;
|
|
break;
|
|
// control character
|
|
case REGEX_CHAR(char_type,'c'):
|
|
check_iter<IEndT>( ++icur, iend );
|
|
ch = *icur++;
|
|
if( REGEX_CHAR(char_type,'a') <= ch && REGEX_CHAR(char_type,'z') >= ch )
|
|
ch = detail::regex_toupper( ch );
|
|
ch ^= 0x40;
|
|
break;
|
|
// escape character
|
|
case REGEX_CHAR(char_type,'e'):
|
|
ch = 27;
|
|
++icur;
|
|
break;
|
|
// formfeed character
|
|
case REGEX_CHAR(char_type,'f'):
|
|
if( ! normalize )
|
|
goto default_;
|
|
ch = REGEX_CHAR(char_type,'\f');
|
|
++icur;
|
|
break;
|
|
// newline
|
|
case REGEX_CHAR(char_type,'n'):
|
|
if( ! normalize )
|
|
goto default_;
|
|
ch = REGEX_CHAR(char_type,'\n');
|
|
++icur;
|
|
break;
|
|
// return
|
|
case REGEX_CHAR(char_type,'r'):
|
|
if( ! normalize )
|
|
goto default_;
|
|
ch = REGEX_CHAR(char_type,'\r');
|
|
++icur;
|
|
break;
|
|
// horizontal tab
|
|
case REGEX_CHAR(char_type,'t'):
|
|
if( ! normalize )
|
|
goto default_;
|
|
ch = REGEX_CHAR(char_type,'\t');
|
|
++icur;
|
|
break;
|
|
// vertical tab
|
|
case REGEX_CHAR(char_type,'v'):
|
|
if( ! normalize )
|
|
goto default_;
|
|
ch = REGEX_CHAR(char_type,'\v');
|
|
++icur;
|
|
break;
|
|
// hex escape sequence
|
|
case REGEX_CHAR(char_type,'x'):
|
|
for( ++icur, ch=i=0; i<2 && detail::regex_isxdigit( *icur ); check_iter<IEndT>( ++icur, iend ) )
|
|
ch = char_type( ch * 16 + detail::regex_xdigit2int( *icur ) );
|
|
break;
|
|
// backslash
|
|
case REGEX_CHAR(char_type,'\\'):
|
|
if( ! normalize )
|
|
goto default_;
|
|
ch = REGEX_CHAR(char_type,'\\');
|
|
++icur;
|
|
break;
|
|
// all other escaped characters represent themselves
|
|
default: default_:
|
|
ch = *icur;
|
|
++icur;
|
|
break;
|
|
}
|
|
|
|
return ch;
|
|
}
|
|
|
|
template< typename CharT, typename CharSetT, typename SyntaxT >
|
|
inline void parse_charset(
|
|
std::auto_ptr<CharSetT> & pnew,
|
|
typename std::basic_string<CharT>::iterator & icur,
|
|
typename std::basic_string<CharT>::const_iterator iend,
|
|
SyntaxT & sy )
|
|
{
|
|
typedef CharT char_type;
|
|
typedef std::basic_string<CharT> string_type;
|
|
typedef typename string_type::const_iterator iter_type;
|
|
typename string_type::iterator itemp = icur;
|
|
bool const normalize = ( NORMALIZE == ( NORMALIZE & sy.get_flags() ) );
|
|
|
|
if( iend != itemp && CHARSET_NEGATE == sy.charset_token( itemp, iend ) )
|
|
{
|
|
pnew->m_fcompliment = true;
|
|
icur = itemp;
|
|
}
|
|
|
|
TOKEN tok;
|
|
char_type ch_prev = 0;
|
|
bool fhave_prev = false;
|
|
charset const * pcharset = 0;
|
|
typename string_type::iterator iprev = icur;
|
|
bool const fnocase = ( NOCASE == ( NOCASE & sy.get_flags() ) );
|
|
|
|
check_iter<iter_type>( icur, iend );
|
|
|
|
// remember the current position and grab the next token
|
|
tok = sy.charset_token( icur, iend );
|
|
do
|
|
{
|
|
check_iter<iter_type>( icur, iend );
|
|
|
|
if( CHARSET_RANGE == tok && fhave_prev )
|
|
{
|
|
// remember the current position
|
|
typename string_type::iterator iprev2 = icur;
|
|
fhave_prev = false;
|
|
|
|
// ch_prev is lower bound of a range
|
|
switch( sy.charset_token( icur, iend ) )
|
|
{
|
|
case CHARSET_RANGE:
|
|
case CHARSET_NEGATE:
|
|
icur = iprev2; // un-get these tokens and fall through
|
|
case NO_TOKEN:
|
|
pnew->set_bit_range( ch_prev, *icur++, fnocase );
|
|
continue;
|
|
case CHARSET_ESCAPE: // BUGBUG user-defined charset?
|
|
pnew->set_bit_range( ch_prev, get_escaped_char( icur, iend, normalize ), fnocase );
|
|
continue;
|
|
case CHARSET_BACKSPACE:
|
|
pnew->set_bit_range( ch_prev, char_type( 8 ), fnocase ); // backspace
|
|
continue;
|
|
case CHARSET_END: // fall through
|
|
default: // not a range.
|
|
icur = iprev; // backup to range token
|
|
pnew->set_bit( ch_prev, fnocase );
|
|
pnew->set_bit( *icur++, fnocase );
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if( fhave_prev )
|
|
pnew->set_bit( ch_prev, fnocase );
|
|
fhave_prev = false;
|
|
|
|
switch( tok )
|
|
{
|
|
// None of the intrinsic charsets are case-sensitive,
|
|
// so no special handling must be done when the NOCASE
|
|
// flag is set.
|
|
case CHARSET_RANGE:
|
|
case CHARSET_NEGATE:
|
|
case CHARSET_END:
|
|
icur = iprev; // un-get these tokens
|
|
ch_prev = *icur++;
|
|
fhave_prev = true;
|
|
continue;
|
|
case CHARSET_BACKSPACE:
|
|
ch_prev = char_type( 8 ); // backspace
|
|
fhave_prev = true;
|
|
continue;
|
|
case ESC_DIGIT:
|
|
*pnew |= intrinsic_charsets<char_type>::get_digit_charset();
|
|
continue;
|
|
case ESC_NOT_DIGIT:
|
|
*pnew |= intrinsic_charsets<char_type>::get_not_digit_charset();
|
|
continue;
|
|
case ESC_SPACE:
|
|
*pnew |= intrinsic_charsets<char_type>::get_space_charset();
|
|
continue;
|
|
case ESC_NOT_SPACE:
|
|
*pnew |= intrinsic_charsets<char_type>::get_not_space_charset();
|
|
continue;
|
|
case ESC_WORD:
|
|
*pnew |= intrinsic_charsets<char_type>::get_word_charset();
|
|
continue;
|
|
case ESC_NOT_WORD:
|
|
*pnew |= intrinsic_charsets<char_type>::get_not_word_charset();
|
|
continue;
|
|
case CHARSET_ALNUM:
|
|
pnew->m_posixcharson |= ( wct_alnum() );
|
|
continue;
|
|
case CHARSET_NOT_ALNUM:
|
|
pnew->m_posixcharsoff.push_front( wct_alnum() );
|
|
continue;
|
|
case CHARSET_ALPHA:
|
|
pnew->m_posixcharson |= ( wct_alpha() );
|
|
continue;
|
|
case CHARSET_NOT_ALPHA:
|
|
pnew->m_posixcharsoff.push_front( wct_alpha() );
|
|
continue;
|
|
case CHARSET_BLANK:
|
|
pnew->m_posixcharson |= ( wct_blank() );
|
|
continue;
|
|
case CHARSET_NOT_BLANK:
|
|
pnew->m_posixcharsoff.push_front( wct_blank() );
|
|
continue;
|
|
case CHARSET_CNTRL:
|
|
pnew->m_posixcharson |= ( wct_cntrl() );
|
|
continue;
|
|
case CHARSET_NOT_CNTRL:
|
|
pnew->m_posixcharsoff.push_front( wct_cntrl() );
|
|
continue;
|
|
case CHARSET_DIGIT:
|
|
pnew->m_posixcharson |= ( wct_digit() );
|
|
continue;
|
|
case CHARSET_NOT_DIGIT:
|
|
pnew->m_posixcharsoff.push_front( wct_digit() );
|
|
continue;
|
|
case CHARSET_GRAPH:
|
|
pnew->m_posixcharson |= ( wct_graph() );
|
|
continue;
|
|
case CHARSET_NOT_GRAPH:
|
|
pnew->m_posixcharsoff.push_front( wct_graph() );
|
|
continue;
|
|
case CHARSET_LOWER:
|
|
if( NOCASE == ( NOCASE & sy.get_flags() ) )
|
|
pnew->m_posixcharson |= ( wct_lower()|wct_upper() );
|
|
else
|
|
pnew->m_posixcharson |= ( wct_lower() );
|
|
continue;
|
|
case CHARSET_NOT_LOWER:
|
|
if( NOCASE == ( NOCASE & sy.get_flags() ) )
|
|
pnew->m_posixcharsoff.push_front( wct_lower()|wct_upper() );
|
|
else
|
|
pnew->m_posixcharsoff.push_front( wct_lower() );
|
|
continue;
|
|
case CHARSET_PRINT:
|
|
pnew->m_posixcharson |= ( wct_print() );
|
|
continue;
|
|
case CHARSET_NOT_PRINT:
|
|
pnew->m_posixcharsoff.push_front( wct_print() );
|
|
continue;
|
|
case CHARSET_PUNCT:
|
|
pnew->m_posixcharson |= ( wct_punct() );
|
|
continue;
|
|
case CHARSET_NOT_PUNCT:
|
|
pnew->m_posixcharsoff.push_front( wct_punct() );
|
|
continue;
|
|
case CHARSET_SPACE:
|
|
pnew->m_posixcharson |= ( wct_space() );
|
|
continue;
|
|
case CHARSET_NOT_SPACE:
|
|
pnew->m_posixcharsoff.push_front( wct_space() );
|
|
continue;
|
|
case CHARSET_UPPER:
|
|
if( NOCASE == ( NOCASE & sy.get_flags() ) )
|
|
pnew->m_posixcharson |= ( wct_upper()|wct_lower() );
|
|
else
|
|
pnew->m_posixcharson |= ( wct_upper() );
|
|
continue;
|
|
case CHARSET_NOT_UPPER:
|
|
if( NOCASE == ( NOCASE & sy.get_flags() ) )
|
|
pnew->m_posixcharsoff.push_front( wct_upper()|wct_lower() );
|
|
else
|
|
pnew->m_posixcharsoff.push_front( wct_upper() );
|
|
continue;
|
|
case CHARSET_XDIGIT:
|
|
pnew->m_posixcharson |= ( wct_xdigit() );
|
|
continue;
|
|
case CHARSET_NOT_XDIGIT:
|
|
pnew->m_posixcharsoff.push_front( wct_xdigit() );
|
|
continue;
|
|
case CHARSET_ESCAPE:
|
|
// Maybe this is a user-defined intrinsic charset
|
|
pcharset = get_altern_charset( *icur, sy );
|
|
if( 0 != pcharset )
|
|
{
|
|
*pnew |= *pcharset;
|
|
++icur;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
ch_prev = get_escaped_char( icur, iend, normalize );
|
|
fhave_prev = true;
|
|
}
|
|
continue;
|
|
default:
|
|
ch_prev = *icur++;
|
|
fhave_prev = true;
|
|
continue;
|
|
}
|
|
}
|
|
while( check_iter<iter_type>( iprev = icur, iend ),
|
|
CHARSET_END != ( tok = sy.charset_token( icur, iend ) ) );
|
|
|
|
if( fhave_prev )
|
|
pnew->set_bit( ch_prev, fnocase );
|
|
|
|
pnew->optimize( type2type<char_type>() );
|
|
}
|
|
|
|
template< typename CharT, typename SyntaxT >
|
|
inline charset const * get_altern_charset( CharT ch, SyntaxT & sy )
|
|
{
|
|
typedef std::basic_string<CharT> string_type;
|
|
charset const * pcharset = 0;
|
|
regex::detail::charset_map<CharT> & charset_map = sy.get_charset_map();
|
|
typename regex::detail::charset_map<CharT>::iterator iter = charset_map.find( ch );
|
|
if( charset_map.end() != iter )
|
|
{
|
|
bool const fnocase = ( NOCASE == ( sy.get_flags() & NOCASE ) );
|
|
pcharset = iter->second.m_rgcharsets[ fnocase ];
|
|
if( 0 == pcharset )
|
|
{
|
|
// tmp takes ownership of any ptrs.
|
|
charset_map_node<CharT> tmp = iter->second;
|
|
charset_map.erase( iter ); // prevent possible infinite recursion
|
|
typename string_type::iterator ibegin = tmp.m_str.begin();
|
|
std::auto_ptr<charset> pnew( new charset );
|
|
std::auto_ptr<charset const> pold( tmp.m_rgcharsets[ !fnocase ] );
|
|
parse_charset<CharT, charset>( pnew, ibegin, tmp.m_str.end(), sy );
|
|
tmp.m_rgcharsets[ fnocase ] = pcharset = pnew.get();
|
|
charset_map[ ch ] = tmp; // could throw
|
|
// charset_map has taken ownership of these pointers now.
|
|
pnew.release();
|
|
pold.release();
|
|
}
|
|
}
|
|
return pcharset;
|
|
}
|
|
|
|
} // namespace detail
|
|
|
|
//
|
|
// Read ahead through the pattern and treat sequential atoms
|
|
// as a single atom, making sure to handle quantification
|
|
// correctly. Warning: dense code ahead.
|
|
//
|
|
template< typename IterT, typename SyntaxT >
|
|
inline void basic_rpattern_base<IterT, SyntaxT>::_find_atom(
|
|
typename string_type::iterator & ipat,
|
|
detail::match_group_base<IterT> * pgroup,
|
|
syntax_type & sy )
|
|
{
|
|
typedef typename string_type::iterator iter_type;
|
|
typedef typename std::iterator_traits<iter_type>::difference_type diff_type;
|
|
iter_type itemp = ipat, ibegin;
|
|
diff_type const nstart = std::distance( this->m_pat->begin(), ipat );
|
|
|
|
do
|
|
{
|
|
if( itemp != ipat ) // Is there whitespace to skip?
|
|
{
|
|
diff_type dist = std::distance( this->m_pat->begin(), ipat );
|
|
this->m_pat->erase( ipat, itemp ); // erase the whitespace from the patttern
|
|
std::advance( ipat = this->m_pat->begin(), dist );
|
|
if( this->m_pat->end() == ( itemp = ipat ) ) // are we at the end of the pattern?
|
|
break;
|
|
}
|
|
switch( sy.quant_token( itemp, this->m_pat->end() ) )
|
|
{
|
|
// if {, } can't be interpreted as quantifiers, treat them as regular chars
|
|
case BEGIN_RANGE:
|
|
std::advance( ibegin = this->m_pat->begin(), nstart );
|
|
if( ibegin != ipat ) // treat as a quantifier
|
|
goto quantify;
|
|
case NO_TOKEN:
|
|
case END_RANGE:
|
|
case END_RANGE_MIN:
|
|
case RANGE_SEPARATOR:
|
|
break;
|
|
|
|
default:
|
|
std::advance( ibegin = this->m_pat->begin(), nstart );
|
|
if( ibegin == ipat ) // must be able to quantify something.
|
|
throw bad_regexpr( "quantifier not expected" );
|
|
|
|
quantify: if( ibegin != --ipat )
|
|
pgroup->add_item( detail::create_literal<IterT>( ibegin, ipat, sy.get_flags(), this->m_arena ) );
|
|
std::auto_ptr<detail::sub_expr<IterT> > pnew( detail::create_char<IterT>( *ipat++, sy.get_flags(), this->m_arena ) );
|
|
_quantify( pnew, ipat, false, sy );
|
|
pgroup->add_item( pnew.release() );
|
|
return;
|
|
}
|
|
} while( this->m_pat->end() != ++ipat && ! sy.reg_token( itemp = ipat, this->m_pat->end() ) );
|
|
|
|
std::advance( ibegin = this->m_pat->begin(), nstart );
|
|
REGEX_ASSERT( ipat != ibegin );
|
|
pgroup->add_item( detail::create_literal<IterT>( ibegin, ipat, sy.get_flags(), this->m_arena ) );
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
inline bool basic_rpattern_base<IterT, SyntaxT>::_find_next(
|
|
typename string_type::iterator & ipat,
|
|
detail::match_group_base<IterT> * pgroup,
|
|
syntax_type & sy,
|
|
std::vector<detail::match_group_base<IterT>*> & rggroups )
|
|
{
|
|
std::auto_ptr<detail::sub_expr<IterT> > pnew;
|
|
std::auto_ptr<detail::custom_charset> pcs;
|
|
typename string_type::iterator ibegin, itemp;
|
|
bool fdone, is_group = false;
|
|
bool const normalize = ( NORMALIZE == ( NORMALIZE & sy.get_flags() ) );
|
|
|
|
if( this->m_pat->end() == ipat )
|
|
{
|
|
if( 0 != pgroup->group_number() )
|
|
throw bad_regexpr( "mismatched parenthesis" );
|
|
return false;
|
|
}
|
|
|
|
switch( sy.reg_token( ipat, this->m_pat->end() ) )
|
|
{
|
|
case NO_TOKEN: // not a token. Must be an atom
|
|
if( this->m_pat->end() == ipat )
|
|
{
|
|
if( 0 != pgroup->group_number() )
|
|
throw bad_regexpr( "mismatched parenthesis" );
|
|
return false;
|
|
}
|
|
_find_atom( ipat, pgroup, sy );
|
|
return true;
|
|
|
|
case END_GROUP:
|
|
if( 0 == pgroup->group_number() )
|
|
throw bad_regexpr( "mismatched parenthesis" );
|
|
return false;
|
|
|
|
case ALTERNATION:
|
|
pgroup->end_alternate();
|
|
pgroup->add_alternate();
|
|
return true;
|
|
|
|
case BEGIN_GROUP:
|
|
// Find next group. could return NULL if the group is really
|
|
// a pattern modifier, like: ( ?s-i )
|
|
detail::reset_auto_ptr( pnew, _find_next_group( ipat, pgroup, sy, rggroups ) );
|
|
is_group = true;
|
|
break;
|
|
|
|
case BEGIN_LINE:
|
|
detail::reset_auto_ptr( pnew, detail::create_bol<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case END_LINE:
|
|
detail::reset_auto_ptr( pnew, detail::create_eol<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case BEGIN_CHARSET:
|
|
detail::reset_auto_ptr( pcs, new( this->m_arena ) detail::custom_charset( this->m_arena ) );
|
|
detail::parse_charset<char_type, detail::custom_charset>(
|
|
pcs, ipat, this->m_pat->end(), sy );
|
|
detail::reset_auto_ptr( pnew,
|
|
detail::create_custom_charset<IterT>( pcs.get(), sy.get_flags(), this->m_arena ) );
|
|
pcs.release();
|
|
break;
|
|
|
|
case MATCH_ANY:
|
|
detail::reset_auto_ptr( pnew, detail::create_any<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_WORD_BOUNDARY:
|
|
detail::reset_auto_ptr( pnew, detail::create_word_boundary<IterT>( true, sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_NOT_WORD_BOUNDARY:
|
|
detail::reset_auto_ptr( pnew, detail::create_word_boundary<IterT>( false, sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_WORD_START:
|
|
detail::reset_auto_ptr( pnew, detail::create_word_start<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_WORD_STOP:
|
|
detail::reset_auto_ptr( pnew, detail::create_word_stop<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_DIGIT:
|
|
detail::reset_auto_ptr( pnew, detail::create_charset<IterT>( detail::intrinsic_charsets<char_type>::get_digit_charset(), sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_NOT_DIGIT:
|
|
detail::reset_auto_ptr( pnew, detail::create_charset<IterT>( detail::intrinsic_charsets<char_type>::get_not_digit_charset(), sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_WORD:
|
|
detail::reset_auto_ptr( pnew, detail::create_charset<IterT>( detail::intrinsic_charsets<char_type>::get_word_charset(), sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_NOT_WORD:
|
|
detail::reset_auto_ptr( pnew, detail::create_charset<IterT>( detail::intrinsic_charsets<char_type>::get_not_word_charset(), sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_SPACE:
|
|
detail::reset_auto_ptr( pnew, detail::create_charset<IterT>( detail::intrinsic_charsets<char_type>::get_space_charset(), sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_NOT_SPACE:
|
|
detail::reset_auto_ptr( pnew, detail::create_charset<IterT>( detail::intrinsic_charsets<char_type>::get_not_space_charset(), sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_BEGIN_STRING:
|
|
detail::reset_auto_ptr( pnew, detail::create_bos<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_END_STRING:
|
|
detail::reset_auto_ptr( pnew, detail::create_eos<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESC_END_STRING_z:
|
|
detail::reset_auto_ptr( pnew, detail::create_eoz<IterT>( sy.get_flags(), this->m_arena ) );
|
|
break;
|
|
|
|
case ESCAPE:
|
|
if( this->m_pat->end() == ipat )
|
|
{
|
|
// BUGBUG what if the escape sequence is more that 1 character?
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( *--ipat, sy.get_flags(), this->m_arena ) );
|
|
++ipat;
|
|
}
|
|
else if( REGEX_CHAR(char_type,'0') <= *ipat && REGEX_CHAR(char_type,'9') >= *ipat )
|
|
{
|
|
// Parse at most 3 decimal digits.
|
|
size_t nbackref = detail::parse_int( itemp = ipat, this->m_pat->end(), 999 );
|
|
// If the resulting number could conceivably be a backref, then it is.
|
|
if( REGEX_CHAR(char_type,'0') != *ipat && ( 10 > nbackref || nbackref < _cgroups_total() ) )
|
|
{
|
|
detail::reset_auto_ptr( pnew, detail::create_backref<IterT>( nbackref, sy.get_flags(), this->m_arena ) );
|
|
ipat = itemp;
|
|
}
|
|
else
|
|
{
|
|
// It's an octal character escape sequence. If *ipat is 8 or 9, insert
|
|
// a NULL character, and leave the 8 or 9 as a character literal.
|
|
char_type ch = 0, i = 0;
|
|
for( ; i < 3 && this->m_pat->end() != ipat && REGEX_CHAR(char_type,'0') <= *ipat && REGEX_CHAR(char_type,'7') >= *ipat; ++i, ++ipat )
|
|
ch = char_type( ch * 8 + ( *ipat - REGEX_CHAR(char_type,'0') ) );
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( ch, sy.get_flags(), this->m_arena ) );
|
|
}
|
|
}
|
|
else if( REGEX_CHAR(char_type,'e') == *ipat )
|
|
{
|
|
++ipat;
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( char_type( 27 ), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'x') == *ipat )
|
|
{
|
|
char_type ch = 0, i = 0;
|
|
for( ++ipat; i < 2 && this->m_pat->end() != ipat && detail::regex_isxdigit( *ipat ); ++i, ++ipat )
|
|
ch = char_type( ch * 16 + detail::regex_xdigit2int( *ipat ) );
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( ch, sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'c') == *ipat )
|
|
{
|
|
if( this->m_pat->end() == ++ipat )
|
|
throw bad_regexpr( "incomplete escape sequence \\c" );
|
|
char_type ch = *ipat++;
|
|
if( REGEX_CHAR(char_type,'a') <= ch && REGEX_CHAR(char_type,'z') >= ch )
|
|
ch = detail::regex_toupper( ch );
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( char_type( ch ^ 0x40 ), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'a') == *ipat && normalize )
|
|
{
|
|
++ipat;
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( REGEX_CHAR(char_type,'\a'), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'f') == *ipat && normalize )
|
|
{
|
|
++ipat;
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( REGEX_CHAR(char_type,'\f'), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'n') == *ipat && normalize )
|
|
{
|
|
++ipat;
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( REGEX_CHAR(char_type,'\n'), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'r') == *ipat && normalize )
|
|
{
|
|
++ipat;
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( REGEX_CHAR(char_type,'\r'), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'t') == *ipat && normalize )
|
|
{
|
|
++ipat;
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( REGEX_CHAR(char_type,'\t'), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else if( REGEX_CHAR(char_type,'\\') == *ipat && normalize )
|
|
{
|
|
++ipat;
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( REGEX_CHAR(char_type,'\\'), sy.get_flags(), this->m_arena ) );
|
|
}
|
|
else
|
|
{
|
|
// Is this a user-defined intrinsic character set?
|
|
detail::charset const * pcharset = detail::get_altern_charset( *ipat, sy );
|
|
if( 0 != pcharset )
|
|
detail::reset_auto_ptr( pnew, detail::create_charset<IterT>( *pcharset, sy.get_flags(), this->m_arena ) );
|
|
else
|
|
detail::reset_auto_ptr( pnew, detail::create_char<IterT>( *ipat, sy.get_flags(), this->m_arena ) );
|
|
++ipat;
|
|
}
|
|
break;
|
|
|
|
// If quotemeta, loop until we find quotemeta off or end of string
|
|
case ESC_QUOTE_META_ON:
|
|
for( ibegin = itemp = ipat, fdone = false; !fdone && this->m_pat->end() != ipat; )
|
|
{
|
|
switch( sy.reg_token( ipat, this->m_pat->end() ) )
|
|
{
|
|
case ESC_QUOTE_META_OFF:
|
|
fdone = true;
|
|
break;
|
|
case NO_TOKEN:
|
|
if( this->m_pat->end() != ipat )
|
|
++ipat; // fallthrough
|
|
default:
|
|
itemp = ipat;
|
|
break;
|
|
}
|
|
}
|
|
if( itemp != ibegin )
|
|
pgroup->add_item( detail::create_literal<IterT>( ibegin, itemp, sy.get_flags(), this->m_arena ) );
|
|
|
|
// skip the quantification code below
|
|
return true;
|
|
|
|
// Should never get here for valid patterns
|
|
case ESC_QUOTE_META_OFF:
|
|
throw bad_regexpr( "quotemeta turned off, but was never turned on" );
|
|
|
|
default:
|
|
REGEX_ASSERT( ! "Unhandled token type" );
|
|
break;
|
|
}
|
|
|
|
// If pnew is null, then the current subexpression is a no-op.
|
|
if( pnew.get() )
|
|
{
|
|
// Look for quantifiers
|
|
_quantify( pnew, ipat, is_group, sy );
|
|
|
|
// Add the item to the group
|
|
pgroup->add_item( pnew.release() );
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
inline void basic_rpattern_base<IterT, SyntaxT>::_quantify(
|
|
std::auto_ptr<detail::sub_expr<IterT> > & pnew,
|
|
typename string_type::iterator & ipat,
|
|
bool is_group,
|
|
syntax_type & sy )
|
|
{
|
|
if( this->m_pat->end() != ipat && ! pnew->is_assertion() )
|
|
{
|
|
typename string_type::iterator itemp = ipat, itemp2;
|
|
bool fmin = false;
|
|
|
|
// Since size_t is unsigned, -1 is really the largest size_t
|
|
size_t lbound = ( size_t )-1;
|
|
size_t ubound = ( size_t )-1;
|
|
size_t ubound_tmp;
|
|
|
|
switch( sy.quant_token( itemp, this->m_pat->end() ) )
|
|
{
|
|
case ZERO_OR_MORE_MIN:
|
|
fmin = true;
|
|
case ZERO_OR_MORE:
|
|
lbound = 0;
|
|
break;
|
|
|
|
case ONE_OR_MORE_MIN:
|
|
fmin = true;
|
|
case ONE_OR_MORE:
|
|
lbound = 1;
|
|
break;
|
|
|
|
case ZERO_OR_ONE_MIN:
|
|
fmin = true;
|
|
case ZERO_OR_ONE:
|
|
lbound = 0;
|
|
ubound = 1;
|
|
break;
|
|
|
|
case BEGIN_RANGE:
|
|
lbound = detail::parse_int( itemp, this->m_pat->end() );
|
|
if( this->m_pat->end() == itemp )
|
|
return; // not a valid quantifier - treat as atom
|
|
|
|
switch( sy.quant_token( itemp, this->m_pat->end() ) )
|
|
{
|
|
case END_RANGE_MIN:
|
|
fmin = true;
|
|
case END_RANGE:
|
|
ubound = lbound;
|
|
break;
|
|
|
|
case RANGE_SEPARATOR:
|
|
itemp2 = itemp;
|
|
ubound_tmp = detail::parse_int( itemp, this->m_pat->end() );
|
|
if( itemp != itemp2 )
|
|
ubound = ubound_tmp;
|
|
if( itemp == this->m_pat->end() )
|
|
return; // not a valid quantifier - treat as atom
|
|
switch( sy.quant_token( itemp, this->m_pat->end() ) )
|
|
{
|
|
case END_RANGE_MIN:
|
|
fmin = true;
|
|
case END_RANGE:
|
|
break;
|
|
default:
|
|
return; // not a valid quantifier - treat as atom
|
|
}
|
|
break;
|
|
|
|
default:
|
|
return; // not a valid quantifier - treat as atom
|
|
}
|
|
|
|
if( ubound < lbound )
|
|
throw bad_regexpr( "Can't do {n, m} with n > m" );
|
|
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if( ( size_t )-1 != lbound )
|
|
{
|
|
// If we are quantifying a group, then this pattern could recurse
|
|
// deeply. Note that fact here so that we can opt to use a stack-
|
|
// conservative algorithm at match time.
|
|
if( is_group && ubound > 16 )
|
|
this->m_fok_to_recurse = false;
|
|
|
|
std::auto_ptr<detail::sub_expr<IterT> > pquant( pnew->quantify( lbound, ubound, ! fmin, this->m_arena ) );
|
|
pnew.release();
|
|
detail::reset_auto_ptr( pnew, pquant.release() );
|
|
ipat = itemp;
|
|
}
|
|
}
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
inline void basic_rpattern_base<IterT, SyntaxT>::_add_subst_backref(
|
|
detail::subst_node & snode,
|
|
size_t nbackref,
|
|
ptrdiff_t rstart,
|
|
bool & uses_backrefs,
|
|
detail::subst_list_type & subst_list ) const
|
|
{
|
|
uses_backrefs = true;
|
|
REGEX_ASSERT( detail::subst_node::SUBST_STRING == snode.m_stype );
|
|
if( snode.m_subst_string.m_rlength )
|
|
subst_list.push_back( snode );
|
|
|
|
snode.m_stype = detail::subst_node::SUBST_BACKREF;
|
|
snode.m_subst_backref = nbackref;
|
|
subst_list.push_back( snode );
|
|
|
|
// re-initialize the subst_node
|
|
snode.m_stype = detail::subst_node::SUBST_STRING;
|
|
snode.m_subst_string.m_rstart = rstart;
|
|
snode.m_subst_string.m_rlength = 0;
|
|
}
|
|
|
|
template< typename IterT, typename SyntaxT >
|
|
inline void basic_rpattern_base<IterT, SyntaxT>::_parse_subst(
|
|
string_type & subst,
|
|
bool & uses_backrefs,
|
|
detail::subst_list_type & subst_list ) const
|
|
{
|
|
TOKEN tok;
|
|
detail::subst_node snode;
|
|
typename string_type::iterator icur = subst.begin();
|
|
size_t nbackref;
|
|
typename string_type::iterator itemp;
|
|
bool fdone;
|
|
syntax_type sy( this->m_flags );
|
|
|
|
uses_backrefs = false;
|
|
|
|
// Initialize the subst_node
|
|
snode.m_stype = detail::subst_node::SUBST_STRING;
|
|
snode.m_subst_string.m_rstart = 0;
|
|
snode.m_subst_string.m_rlength = 0;
|
|
|
|
while( subst.end() != icur )
|
|
{
|
|
switch( tok = sy.subst_token( icur, subst.end() ) )
|
|
{
|
|
case SUBST_MATCH:
|
|
_add_subst_backref( snode, 0, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
|
|
break;
|
|
|
|
case SUBST_PREMATCH:
|
|
_add_subst_backref( snode, ( size_t )detail::subst_node::PREMATCH, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
|
|
break;
|
|
|
|
case SUBST_POSTMATCH:
|
|
_add_subst_backref( snode, ( size_t )detail::subst_node::POSTMATCH, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
|
|
break;
|
|
|
|
case SUBST_BACKREF:
|
|
nbackref = detail::parse_int( icur, subst.end(), cgroups() - 1 ); // always at least 1 group
|
|
if( 0 == nbackref )
|
|
throw bad_regexpr( "invalid backreference in substitution" );
|
|
|
|
_add_subst_backref( snode, nbackref, std::distance( subst.begin(), icur ), uses_backrefs, subst_list );
|
|
break;
|
|
|
|
case SUBST_QUOTE_META_ON:
|
|
REGEX_ASSERT( detail::subst_node::SUBST_STRING == snode.m_stype );
|
|
if( snode.m_subst_string.m_rlength )
|
|
subst_list.push_back( snode );
|
|
|
|
snode.m_subst_string.m_rstart = std::distance( subst.begin(), icur );
|
|
for( itemp = icur, fdone = false; !fdone && subst.end() != icur; )
|
|
{
|
|
switch( tok = sy.subst_token( icur, subst.end() ) )
|
|
{
|
|
case SUBST_ALL_OFF:
|
|
fdone = true;
|
|
break;
|
|
case NO_TOKEN:
|
|
++icur; // fall-through
|
|
default:
|
|
itemp = icur;
|
|
break;
|
|
}
|
|
}
|
|
snode.m_subst_string.m_rlength = std::distance( subst.begin(), itemp ) - snode.m_subst_string.m_rstart;
|
|
if( snode.m_subst_string.m_rlength )
|
|
subst_list.push_back( snode );
|
|
|
|
if( tok == SUBST_ALL_OFF )
|
|
{
|
|
snode.m_stype = detail::subst_node::SUBST_OP;
|
|
snode.m_op = detail::subst_node::ALL_OFF;
|
|
subst_list.push_back( snode );
|
|
}
|
|
|
|
// re-initialize the subst_node
|
|
snode.m_stype = detail::subst_node::SUBST_STRING;
|
|
snode.m_subst_string.m_rstart = std::distance( subst.begin(), icur );
|
|
snode.m_subst_string.m_rlength = 0;
|
|
break;
|
|
|
|
case SUBST_UPPER_ON:
|
|
case SUBST_UPPER_NEXT:
|
|
case SUBST_LOWER_ON:
|
|
case SUBST_LOWER_NEXT:
|
|
case SUBST_ALL_OFF:
|
|
REGEX_ASSERT( detail::subst_node::SUBST_STRING == snode.m_stype );
|
|
if( snode.m_subst_string.m_rlength )
|
|
subst_list.push_back( snode );
|
|
|
|
snode.m_stype = detail::subst_node::SUBST_OP;
|
|
snode.m_op = static_cast<detail::subst_node::op_type>( tok );
|
|
subst_list.push_back( snode );
|
|
|
|
// re-initialize the subst_node
|
|
snode.m_stype = detail::subst_node::SUBST_STRING;
|
|
snode.m_subst_string.m_rstart = std::distance( subst.begin(), icur );
|
|
snode.m_subst_string.m_rlength = 0;
|
|
break;
|
|
|
|
case SUBST_ESCAPE:
|
|
if( subst.end() == icur )
|
|
throw bad_regexpr( "expecting escape sequence in substitution string" );
|
|
REGEX_ASSERT( detail::subst_node::SUBST_STRING == snode.m_stype );
|
|
if( snode.m_subst_string.m_rlength )
|
|
subst_list.push_back( snode );
|
|
snode.m_subst_string.m_rstart = std::distance( subst.begin(), icur++ );
|
|
snode.m_subst_string.m_rlength = 1;
|
|
break;
|
|
|
|
case NO_TOKEN:
|
|
default:
|
|
++snode.m_subst_string.m_rlength;
|
|
++icur;
|
|
break;
|
|
}
|
|
}
|
|
REGEX_ASSERT( detail::subst_node::SUBST_STRING == snode.m_stype );
|
|
if( snode.m_subst_string.m_rlength )
|
|
subst_list.push_back( snode );
|
|
}
|
|
|
|
|
|
|
|
template< typename CharT >
|
|
REGEXPR_H_INLINE void reset_intrinsic_charsets( CharT )
|
|
{
|
|
detail::intrinsic_charsets<CharT>::reset();
|
|
}
|
|
|
|
typedef regex::detail::select
|
|
<
|
|
REGEX_FOLD_INSTANTIATIONS &&
|
|
detail::is_convertible<char const *,std::string::const_iterator>::value,
|
|
std::string::const_iterator,
|
|
char const *
|
|
>::type lpcstr_t;
|
|
|
|
typedef regex::detail::select
|
|
<
|
|
REGEX_FOLD_INSTANTIATIONS &&
|
|
detail::is_convertible<wchar_t const *,std::wstring::const_iterator>::value,
|
|
std::wstring::const_iterator,
|
|
wchar_t const *
|
|
>::type lpcwstr_t;
|
|
|
|
namespace detail
|
|
{
|
|
|
|
// Here is the main dispatch loop for the iterative match routine.
|
|
// It is responsible for calling match on the current sub-expression
|
|
// and repeating for the next sub-expression. It also backtracks
|
|
// the match when it needs to.
|
|
template< typename CStringsT, typename IterT >
|
|
inline bool _do_match_iterative( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur, CStringsT )
|
|
{
|
|
unsafe_stack::stack_guard guard( param.m_pstack );
|
|
unsafe_stack & s = *param.m_pstack;
|
|
void *const jump_ptr = s.set_jump(); // the bottom of the stack
|
|
param.m_icur = icur;
|
|
|
|
if( ! expr->iterative_match_this( param, CStringsT() ) )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
for( ;; )
|
|
{
|
|
do
|
|
{
|
|
if( param.m_pnext == 0 ) // This means we're done
|
|
return true;
|
|
s.push( expr );
|
|
expr = param.m_pnext;
|
|
}
|
|
while( expr->iterative_match_this( param, CStringsT() ) );
|
|
|
|
do
|
|
{
|
|
if( jump_ptr == s.set_jump() ) // No more posibilities to try
|
|
return false;
|
|
s.pop( expr );
|
|
}
|
|
while( ! expr->iterative_rematch_this( param, CStringsT() ) );
|
|
}
|
|
}
|
|
|
|
template< typename IterT >
|
|
REGEXPR_H_INLINE bool regex_access<IterT>::_do_match_iterative_helper_s( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur )
|
|
{
|
|
return _do_match_iterative( expr, param, icur, false_t() );
|
|
}
|
|
|
|
template< typename IterT >
|
|
REGEXPR_H_INLINE bool regex_access<IterT>::_do_match_iterative_helper_c( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur )
|
|
{
|
|
return _do_match_iterative( expr, param, icur, true_t() );
|
|
}
|
|
|
|
template< typename IterT >
|
|
REGEXPR_H_INLINE bool regex_access<IterT>::_do_match_recursive_s( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur )
|
|
{
|
|
return static_cast<match_group_base<IterT> const*>(expr)->match_group_base<IterT>::recursive_match_all_s( param, icur );
|
|
}
|
|
|
|
template< typename IterT >
|
|
REGEXPR_H_INLINE bool regex_access<IterT>::_do_match_recursive_c( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur )
|
|
{
|
|
return static_cast<match_group_base<IterT> const*>(expr)->match_group_base<IterT>::recursive_match_all_c( param, icur );
|
|
}
|
|
|
|
template< typename IterT >
|
|
REGEX_NOINLINE bool regex_access<IterT>::_do_match_with_stack( rpattern_type const & pat, match_param<IterT> & param, bool const use_null )
|
|
{
|
|
unsafe_stack s;
|
|
param.m_pstack = &s;
|
|
return _do_match_impl( pat, param, use_null );
|
|
}
|
|
|
|
template< typename IterT >
|
|
REGEXPR_H_INLINE bool regex_access<IterT>::_do_match_impl( rpattern_type const & pat, match_param<IterT> & param, bool const use_null )
|
|
{
|
|
typedef bool ( *pfndomatch_t )( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur );
|
|
|
|
bool floop = pat._loops();
|
|
unsigned flags = pat.flags();
|
|
width_type nwidth = pat.get_width();
|
|
|
|
// Create some aliases for convenience and effeciency.
|
|
REGEX_ASSERT( 0 != param.m_prgbackrefs );
|
|
|
|
// If the pstack parameter is not NULL, we should do a safe, iterative match.
|
|
// Otherwise, we should do a fast, recursive match.
|
|
pfndomatch_t pfndomatch;
|
|
if( 0 != param.m_pstack )
|
|
if( use_null )
|
|
pfndomatch = &_do_match_iterative_helper_c;
|
|
else
|
|
pfndomatch = &_do_match_iterative_helper_s;
|
|
else
|
|
if( use_null )
|
|
pfndomatch = &_do_match_recursive_c;
|
|
else
|
|
pfndomatch = &_do_match_recursive_s;
|
|
|
|
sub_expr_base<IterT> const * pfirst = pat._get_first_subexpression();
|
|
param.m_pfirst = pfirst;
|
|
|
|
REGEX_ASSERT( param.m_cbackrefs == pat._cgroups_total() );
|
|
std::fill_n( param.m_prgbackrefs, param.m_cbackrefs, static_init<backref_type>::value );
|
|
|
|
if( ! use_null )
|
|
{
|
|
// If the minimum width of the pattern exceeds the width of the
|
|
// string, a succesful match is impossible
|
|
typedef typename std::iterator_traits<IterT>::difference_type diff_type;
|
|
diff_type room = std::distance( param.m_imatchbegin, param.m_iend );
|
|
|
|
if( nwidth.m_min <= static_cast<size_t>( room ) )
|
|
{
|
|
IterT local_iend = param.m_iend;
|
|
std::advance( local_iend, -static_cast<diff_type>( nwidth.m_min ) );
|
|
|
|
if( RIGHTMOST & flags )
|
|
{
|
|
// begin trying to match after the last character.
|
|
// Continue to the beginning
|
|
for( IterT icur = local_iend; ; --icur, param.m_no0len = false )
|
|
{
|
|
if( ( *pfndomatch )( pfirst, param, icur ) )
|
|
break; // m_floop not used for rightmost matches
|
|
if( icur == param.m_imatchbegin )
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// begin trying to match before the first character.
|
|
// Continue to the end
|
|
if( is_random_access<IterT>::value && pat.m_search )
|
|
{
|
|
IterT icur = pat.m_search->find( param.m_imatchbegin, param.m_iend );
|
|
while( icur != param.m_iend )
|
|
{
|
|
if( ( *pfndomatch )( pfirst, param, icur ) || ! floop )
|
|
break;
|
|
param.m_no0len = false;
|
|
icur = pat.m_search->find( ++icur, param.m_iend );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( IterT icur = param.m_imatchbegin; ; ++icur, param.m_no0len = false )
|
|
{
|
|
if( ( *pfndomatch )( pfirst, param, icur ) || ! floop )
|
|
break;
|
|
if( icur == local_iend )
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
REGEX_ASSERT( 0 == ( RIGHTMOST & flags ) );
|
|
// begin trying to match before the first character.
|
|
// Continue to the end
|
|
for( IterT icur = param.m_imatchbegin; ; ++icur, param.m_no0len = false )
|
|
{
|
|
if( ( *pfndomatch )( pfirst, param, icur ) || ! floop )
|
|
break;
|
|
if( traits_type::eq( *icur, char_type() ) )
|
|
break;
|
|
}
|
|
}
|
|
|
|
return param.m_prgbackrefs[0].matched;
|
|
}
|
|
|
|
// Here is a rudimentary typelist facility to allow the REGEX_TO_INSTANTIATE
|
|
// list to recursively generate the instantiations we are interested in.
|
|
struct empty_typelist
|
|
{
|
|
};
|
|
|
|
template< typename HeadT, typename TailT >
|
|
struct cons
|
|
{
|
|
typedef HeadT head_type;
|
|
typedef TailT tail_type;
|
|
};
|
|
|
|
template
|
|
<
|
|
typename T1 =empty_typelist, typename T2 =empty_typelist, typename T3 =empty_typelist,
|
|
typename T4 =empty_typelist, typename T5 =empty_typelist, typename T6 =empty_typelist,
|
|
typename T7 =empty_typelist, typename T8 =empty_typelist, typename T9 =empty_typelist,
|
|
typename T10=empty_typelist, typename T11=empty_typelist, typename T12=empty_typelist
|
|
>
|
|
struct typelist : public cons<T1,typelist<T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12> >
|
|
{
|
|
};
|
|
|
|
template<>
|
|
struct typelist
|
|
<
|
|
empty_typelist,empty_typelist,empty_typelist,empty_typelist,
|
|
empty_typelist,empty_typelist,empty_typelist,empty_typelist,
|
|
empty_typelist,empty_typelist,empty_typelist,empty_typelist
|
|
>
|
|
: public empty_typelist
|
|
{
|
|
};
|
|
|
|
// This class is responsible for instantiating basic_rpattern
|
|
// with the template parameters we are interested in. It also
|
|
// instntiates any helper routines this basic_rpattern relies
|
|
// on.
|
|
template< typename IterT, typename SyntaxT >
|
|
struct rpattern_instantiator : protected regex::basic_rpattern<IterT,SyntaxT>
|
|
{
|
|
static instantiator instantiate()
|
|
{
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
void (*pfn)( char_type ) = &reset_intrinsic_charsets;
|
|
|
|
return regex::basic_rpattern<IterT,SyntaxT>::instantiate() +
|
|
regex_access<IterT>::instantiate() +
|
|
instantiator_helper( pfn );
|
|
}
|
|
};
|
|
|
|
// The regex_instantiate uses typelists and the rpattern_instantiator
|
|
// to generate instantiations for all the types in the typelist.
|
|
template< typename SyntaxT >
|
|
instantiator regex_instantiate( empty_typelist, type2type<SyntaxT> )
|
|
{
|
|
return instantiator();
|
|
}
|
|
|
|
template< typename HeadT, typename TailT, typename SyntaxT >
|
|
instantiator regex_instantiate( cons<HeadT,TailT>, type2type<SyntaxT> )
|
|
{
|
|
typedef typename std::iterator_traits<HeadT>::value_type char_type;
|
|
typedef typename SyntaxT::template rebind<char_type>::other syntax_type;
|
|
|
|
return rpattern_instantiator<HeadT,syntax_type>::instantiate() +
|
|
regex_instantiate( TailT(), type2type<SyntaxT>() );
|
|
}
|
|
|
|
// Here is a list of types to instantiate.
|
|
#ifndef REGEX_TO_INSTANTIATE
|
|
# ifdef REGEX_WIDE_AND_NARROW
|
|
# define REGEX_TO_INSTANTIATE std::string::const_iterator, \
|
|
std::wstring::const_iterator, \
|
|
lpcstr_t, \
|
|
lpcwstr_t
|
|
# else
|
|
# define REGEX_TO_INSTANTIATE restring::const_iterator, \
|
|
lpctstr_t
|
|
# endif
|
|
#endif
|
|
|
|
typedef typelist<REGEX_TO_INSTANTIATE> regex_typelist;
|
|
typedef type2type<perl_syntax<char> > perl_type;
|
|
typedef type2type<posix_syntax<char> > posix_type;
|
|
|
|
namespace
|
|
{
|
|
// Create the perl instantiations
|
|
#ifndef REGEX_NO_PERL
|
|
instantiator const perl_inst = regex_instantiate( regex_typelist(), perl_type() );
|
|
#endif
|
|
|
|
// Create the posix instantiations
|
|
#ifdef REGEX_POSIX
|
|
instantiator const posix_inst = regex_instantiate( regex_typelist(), posix_type() );
|
|
#endif
|
|
}
|
|
|
|
} // unnamed namespace
|
|
|
|
} // namespace regex
|
|
|
|
#ifdef _MSC_VER
|
|
# pragma warning( pop )
|
|
#endif
|