1470 lines
48 KiB
C++
1470 lines
48 KiB
C++
//+---------------------------------------------------------------------------
|
|
//
|
|
// Copyright ( C ) Microsoft, 1994 - 2002.
|
|
//
|
|
// File: reimpl2.h
|
|
//
|
|
// Functions: helpers for matching and substituting regular expressions
|
|
//
|
|
// Notes: implementation details that really belong in a cpp file,
|
|
// but can't because of template weirdness
|
|
//
|
|
// Author: Eric Niebler ( ericne@microsoft.com )
|
|
//
|
|
// History: 8/15/2001 ericne Created
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
#ifndef REIMPL_H
|
|
#define REIMPL_H
|
|
|
|
//
|
|
// Helper functions for match and substitute
|
|
//
|
|
|
|
namespace detail
|
|
{
|
|
|
|
// For use while doing uppercase/lowercase conversions:
|
|
inline char regex_toupper( char ch ) { using namespace std; return ( char )toupper( ch ); }
|
|
inline char regex_tolower( char ch ) { using namespace std; return ( char )tolower( ch ); }
|
|
inline wchar_t regex_toupper( wchar_t ch ) { using namespace std; return ( wchar_t )towupper( ch ); }
|
|
inline wchar_t regex_tolower( wchar_t ch ) { using namespace std; return ( wchar_t )towlower( ch ); }
|
|
|
|
template< typename IBeginT, typename IEndT >
|
|
inline void regex_toupper( IBeginT ibegin, IEndT iend )
|
|
{
|
|
typedef typename std::iterator_traits<IEndT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
for( ; iend != ibegin; ++ibegin )
|
|
traits_type::assign( *ibegin, regex_toupper( *ibegin ) );
|
|
}
|
|
|
|
template< typename IBeginT, typename IEndT >
|
|
inline void regex_tolower( IBeginT ibegin, IEndT iend )
|
|
{
|
|
typedef typename std::iterator_traits<IEndT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
for( ; iend != ibegin; ++ibegin )
|
|
traits_type::assign( *ibegin, regex_tolower( *ibegin ) );
|
|
}
|
|
|
|
//
|
|
// Helper fn for swapping two auto_ptr's
|
|
//
|
|
template< typename T >
|
|
inline void swap_auto_ptr( std::auto_ptr<T> & left, std::auto_ptr<T> & right )
|
|
{
|
|
std::auto_ptr<T> temp( left );
|
|
left = right;
|
|
right = temp;
|
|
}
|
|
|
|
template< typename T >
|
|
inline void reset_auto_ptr( std::auto_ptr<T> & left )
|
|
{
|
|
std::auto_ptr<T> temp( 0 );
|
|
left = temp;
|
|
}
|
|
|
|
template< typename T, typename U >
|
|
inline void reset_auto_ptr( std::auto_ptr<T> & left, U * right )
|
|
{
|
|
std::auto_ptr<T> temp( right );
|
|
left = temp;
|
|
}
|
|
|
|
typedef int instantiator;
|
|
|
|
inline instantiator REGEX_CDECL instantiator_helper( ... )
|
|
{
|
|
return instantiator();
|
|
}
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Class: match_param
|
|
//
|
|
// Description: Struct that contains the state of the matching operation.
|
|
// Passed by reference to all recursive_match_all and recursive_match_this routines.
|
|
//
|
|
// Methods: match_param - ctor
|
|
//
|
|
// Members: ibufferbegin - start of the buffer
|
|
// ibegin - start of this iteration
|
|
// iend - end of the string
|
|
// prgbackrefs - pointer to backref array
|
|
//
|
|
// History: 8/14/2000 - ericne - Created
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
template< typename IterT >
|
|
struct match_param
|
|
{
|
|
typedef backref_tag<IterT> backref_type;
|
|
typedef sub_expr_base<IterT> const * sub_expr_ptr;
|
|
|
|
// for performance reasons, the most frequently used fields
|
|
// are placed at offsets which are a power of 2 (assuming
|
|
// a 32-bit architecture, and iterators which are 32 bits).
|
|
|
|
backref_type * m_prgbackrefs; // offsetof == 0
|
|
IterT m_iend; // offsetof == 4
|
|
IterT m_icur; // offsetof == 8
|
|
size_t m_cbackrefs;
|
|
sub_expr_ptr m_pnext; // offsetof == 16
|
|
IterT m_ibufferbegin;
|
|
IterT m_imatchbegin;
|
|
sub_expr_ptr m_pfirst;
|
|
unsafe_stack * m_pstack; // offsetof == 32
|
|
bool m_no0len;
|
|
bool m_reserved;
|
|
|
|
match_param
|
|
(
|
|
IterT ibufferbegin,
|
|
IterT imatchbegin,
|
|
IterT iend,
|
|
backref_type * prgbackrefs,
|
|
size_t cbackrefs
|
|
)
|
|
: m_prgbackrefs( prgbackrefs )
|
|
, m_iend( iend )
|
|
, m_icur( imatchbegin )
|
|
, m_cbackrefs( cbackrefs )
|
|
, m_pnext( 0 )
|
|
, m_ibufferbegin( ibufferbegin )
|
|
, m_imatchbegin( imatchbegin )
|
|
, m_pfirst( 0 )
|
|
, m_pstack( 0 )
|
|
, m_no0len( false )
|
|
, m_reserved( false )
|
|
{
|
|
}
|
|
};
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Class: arena_allocator
|
|
//
|
|
// Description: A small, fast allocator for speeding up pattern compilation.
|
|
// Every basic_rpattern object has an arena as a member.
|
|
// sub_expr objects can only be allocated from this arena.
|
|
// Memory is alloc'ed in chunks using the underlying allocator.
|
|
// Chunks are freed en-masse when clear() or finalize() is called.
|
|
//
|
|
// History: 8/17/2001 - ericne - Created
|
|
//
|
|
// Notes: This is NOT a std-compliant allocator and CANNOT be used with
|
|
// STL containers. arena_allocator objects maintain state, and
|
|
// STL containers are allowed to assume their allocators do
|
|
// not maintain state. In regexpr2.cpp, I define slist<>, a simple
|
|
// arena-friendly singly-linked list for use with the arena
|
|
// allocator.
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
template< typename AllocT = std::allocator<char> >
|
|
struct pool_impl
|
|
{
|
|
typedef typename rebind<AllocT, char>::type char_allocator_type;
|
|
|
|
struct mem_block
|
|
{
|
|
size_t m_offset;
|
|
size_t m_blocksize;
|
|
mem_block * m_pnext;
|
|
unsigned char m_data[ 1 ];
|
|
};
|
|
#if !defined(_MSC_VER) | 1200 < _MSC_VER
|
|
struct pool_data : char_allocator_type
|
|
{
|
|
pool_data( size_t default_size, char_allocator_type const & alloc )
|
|
: char_allocator_type( alloc )
|
|
, m_pfirst( 0 )
|
|
, m_default_size( default_size )
|
|
{
|
|
}
|
|
mem_block * m_pfirst;
|
|
size_t m_default_size;
|
|
char_allocator_type & get_allocator()
|
|
{
|
|
return *this;
|
|
}
|
|
} m_data;
|
|
#else
|
|
struct pool_data
|
|
{
|
|
pool_data( size_t default_size, char_allocator_type const & alloc )
|
|
: m_alloc( alloc )
|
|
, m_pfirst( 0 )
|
|
, m_default_size( default_size )
|
|
{
|
|
}
|
|
char_allocator_type m_alloc;
|
|
mem_block * m_pfirst;
|
|
size_t m_default_size;
|
|
char_allocator_type & get_allocator()
|
|
{
|
|
return m_alloc;
|
|
}
|
|
} m_data;
|
|
#endif
|
|
void new_block( size_t size );
|
|
void clear();
|
|
void * allocate( size_t size );
|
|
explicit pool_impl( size_t default_size, char_allocator_type const & alloc = char_allocator_type() );
|
|
~pool_impl();
|
|
char_allocator_type get_allocator() const
|
|
{
|
|
return const_cast<pool_impl*>(this)->m_data.get_allocator();
|
|
}
|
|
};
|
|
|
|
template< typename T, typename AllocT = std::allocator<char> >
|
|
class arena_allocator
|
|
{
|
|
public:
|
|
typedef size_t size_type;
|
|
typedef ptrdiff_t difference_type;
|
|
typedef T *pointer;
|
|
typedef T const *const_pointer;
|
|
typedef T & reference;
|
|
typedef T const & const_reference;
|
|
typedef T value_type;
|
|
|
|
typedef typename rebind<AllocT, char>::type char_alloc_type;
|
|
typedef pool_impl<AllocT> pool_impl_t;
|
|
typedef typename rebind<AllocT, pool_impl_t>::type pool_alloc_type;
|
|
|
|
explicit arena_allocator( size_t default_size, char_alloc_type const & alloc = char_alloc_type() )
|
|
: m_pool( 0 )
|
|
{
|
|
char_alloc_type char_alloc( alloc );
|
|
pool_alloc_type pool_alloc( convert_allocator<pool_impl_t>( char_alloc, 0 ) );
|
|
m_pool = pool_alloc.allocate( 1, 0 );
|
|
pool_alloc.construct( m_pool, pool_impl_t( default_size, char_alloc ) ); // can't throw
|
|
}
|
|
#if !defined(_MSC_VER) | 1200 < _MSC_VER
|
|
arena_allocator( arena_allocator const & that )
|
|
: m_pool( that.m_pool )
|
|
{
|
|
}
|
|
#endif
|
|
template< typename U >
|
|
arena_allocator( arena_allocator<U> const & that )
|
|
: m_pool( that.m_pool )
|
|
{
|
|
}
|
|
~arena_allocator()
|
|
{ // Many arena_allocators may point to m_pool, so don't delete it.
|
|
} // Rather, wait for someone to call finalize().
|
|
pointer allocate( size_type size, void const * =0 )
|
|
{
|
|
return static_cast<T*>( m_pool->allocate( size * sizeof(T) ) );
|
|
}
|
|
void deallocate( void *, size_type )
|
|
{ // no-op. deallocation happens when pool is finalized or cleared.
|
|
}
|
|
void construct( pointer p, T const & t )
|
|
{
|
|
new( static_cast<void*>(p) ) T( t );
|
|
}
|
|
void destroy( pointer p )
|
|
{
|
|
regex::detail::destroy( p );
|
|
}
|
|
#if !defined(_MSC_VER) | 1200 < _MSC_VER
|
|
template< typename U > struct rebind
|
|
{
|
|
typedef arena_allocator<U> other;
|
|
};
|
|
#endif
|
|
void clear()
|
|
{
|
|
m_pool->clear();
|
|
}
|
|
void finalize()
|
|
{
|
|
char_alloc_type char_alloc( m_pool->get_allocator() );
|
|
pool_alloc_type pool_alloc( convert_allocator<pool_impl_t>( char_alloc, 0 ) );
|
|
pool_alloc.destroy( m_pool );
|
|
pool_alloc.deallocate( m_pool, 1 );
|
|
m_pool = 0;
|
|
}
|
|
void swap( arena_allocator & that )
|
|
{
|
|
using std::swap;
|
|
swap( m_pool, that.m_pool );
|
|
}
|
|
|
|
// the pool lives here
|
|
pool_impl_t * m_pool;
|
|
};
|
|
|
|
// Dummy struct used by the pool allocator to align returned pointers
|
|
struct not_pod
|
|
{
|
|
virtual ~not_pod() {}
|
|
};
|
|
|
|
template< typename AllocT >
|
|
inline pool_impl<AllocT>::pool_impl( size_t default_size, char_allocator_type const & alloc )
|
|
: m_data( default_size, alloc )
|
|
{
|
|
}
|
|
|
|
template< typename AllocT >
|
|
inline pool_impl<AllocT>::~pool_impl()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
template< typename AllocT >
|
|
inline void pool_impl<AllocT>::clear()
|
|
{
|
|
for( mem_block * pnext; m_data.m_pfirst; m_data.m_pfirst = pnext )
|
|
{
|
|
pnext = m_data.m_pfirst->m_pnext;
|
|
m_data.get_allocator().deallocate( reinterpret_cast<char*>( m_data.m_pfirst ), m_data.m_pfirst->m_blocksize );
|
|
}
|
|
}
|
|
|
|
template< typename AllocT >
|
|
inline void pool_impl<AllocT>::new_block( size_t size )
|
|
{
|
|
size_t blocksize = regex_max( m_data.m_default_size, size ) + offsetof( mem_block, m_data );
|
|
mem_block * pnew = reinterpret_cast<mem_block*>( m_data.get_allocator().allocate( blocksize, 0 ) );
|
|
if( 0 == pnew )
|
|
{
|
|
throw std::bad_alloc();
|
|
}
|
|
pnew->m_offset = 0;
|
|
pnew->m_blocksize = blocksize;
|
|
pnew->m_pnext = m_data.m_pfirst;
|
|
m_data.m_pfirst = pnew;
|
|
}
|
|
|
|
template< typename AllocT >
|
|
inline void * pool_impl<AllocT>::allocate( size_t size )
|
|
{
|
|
if( 0 == size )
|
|
size = 1;
|
|
|
|
if( 0 == m_data.m_pfirst || m_data.m_pfirst->m_offset + size > m_data.m_default_size )
|
|
new_block( size );
|
|
|
|
void * pnew = m_data.m_pfirst->m_data + m_data.m_pfirst->m_offset;
|
|
|
|
// ensure returned pointers are always suitably aligned
|
|
m_data.m_pfirst->m_offset += ( ( size + alignof<not_pod>::value - 1 )
|
|
& ~( alignof<not_pod>::value - 1 ) );
|
|
|
|
return pnew;
|
|
}
|
|
|
|
// The regex_arena is a basic, vanilla arena_allocator.
|
|
typedef arena_allocator<char> regex_arena;
|
|
|
|
template< typename T >
|
|
type_with_size<3> allocator_picker( arena_allocator<T> const &, int );
|
|
|
|
template<> struct rebind_helper<3>
|
|
{
|
|
template< typename, typename ElemT>
|
|
struct inner
|
|
{
|
|
typedef arena_allocator<ElemT> type;
|
|
};
|
|
};
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Class: sub_expr_base
|
|
//
|
|
// Description: patterns are "compiled" into a directed graph of sub_expr_base
|
|
// structs. Matching is accomplished by traversing this graph.
|
|
//
|
|
// Methods: ~sub_expr_base - virt dtor so cleanup happens correctly
|
|
// recursive_match_all - match this sub-expression and all following
|
|
// sub-expression
|
|
//
|
|
// History: 8/14/2000 - ericne - Created
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
template< typename IterT >
|
|
struct sub_expr_base
|
|
{
|
|
virtual bool recursive_match_all_s( match_param<IterT> &, IterT ) const = 0; //throw() (offset 0)
|
|
virtual bool recursive_match_all_c( match_param<IterT> &, IterT ) const = 0; //throw() (offset 4)
|
|
virtual bool iterative_match_this_s( match_param<IterT> & ) const = 0; //throw() (offset 8)
|
|
virtual bool iterative_match_this_c( match_param<IterT> & ) const = 0; //throw() (offset 12)
|
|
virtual bool iterative_rematch_this_s( match_param<IterT> & ) const = 0; //throw() (offset 16)
|
|
virtual bool iterative_rematch_this_c( match_param<IterT> & ) const = 0; //throw() (offset 20)
|
|
|
|
virtual ~sub_expr_base() = 0; // (offset 24)
|
|
|
|
// Use the regex_arena for memory management
|
|
static void * operator new( size_t size, regex_arena & arena )
|
|
{
|
|
return arena.allocate( size );
|
|
}
|
|
static void operator delete( void *, regex_arena & )
|
|
{
|
|
}
|
|
|
|
// Invoke the d'tor, but don't bother freeing memory. That will
|
|
// happen automatically when the arena object gets destroyed.
|
|
static void operator delete( void * )
|
|
{
|
|
}
|
|
|
|
// For choosing an appropriate virtual function based on a compile time constant
|
|
bool recursive_match_all( match_param<IterT> & param, IterT icur, false_t ) const //throw()
|
|
{
|
|
return recursive_match_all_s( param, icur );
|
|
}
|
|
bool recursive_match_all( match_param<IterT> & param, IterT icur, true_t ) const //throw()
|
|
{
|
|
return recursive_match_all_c( param, icur );
|
|
}
|
|
bool iterative_match_this( match_param<IterT> & param, false_t ) const //throw()
|
|
{
|
|
return iterative_match_this_s( param );
|
|
}
|
|
bool iterative_match_this( match_param<IterT> & param, true_t ) const //throw()
|
|
{
|
|
return iterative_match_this_c( param );
|
|
}
|
|
bool iterative_rematch_this( match_param<IterT> & param, false_t ) const //throw()
|
|
{
|
|
return iterative_rematch_this_s( param );
|
|
}
|
|
bool iterative_rematch_this( match_param<IterT> & param, true_t ) const //throw()
|
|
{
|
|
return iterative_rematch_this_c( param );
|
|
}
|
|
private:
|
|
// don't allocate sub-expressions directly on the heap; they should
|
|
// be allocated from an arena
|
|
static void * operator new( size_t size ) throw( std::bad_alloc );
|
|
// disable all the vector new's and delete's.
|
|
static void * operator new[]( size_t size, regex_arena & arena ) throw( std::bad_alloc );
|
|
static void operator delete[]( void *, regex_arena & );
|
|
static void * operator new[]( size_t size ) throw( std::bad_alloc );
|
|
static void operator delete[]( void * );
|
|
};
|
|
|
|
template< typename IterT >
|
|
inline sub_expr_base<IterT>::~sub_expr_base()
|
|
{
|
|
}
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Class: subst_node
|
|
//
|
|
// Description: Substitution strings are parsed into an array of these
|
|
// structures in order to speed up subst operations.
|
|
//
|
|
// Members: stype - type of this struct
|
|
// .m_subst_string - do a string substitution
|
|
// .m_subst_backref - do a bacref substitution
|
|
// op - execute an operation
|
|
//
|
|
// History: 8/14/2000 - ericne - Created
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
struct subst_node
|
|
{
|
|
enum
|
|
{
|
|
PREMATCH = -1,
|
|
POSTMATCH = -2
|
|
};
|
|
|
|
enum subst_type
|
|
{
|
|
SUBST_STRING,
|
|
SUBST_BACKREF,
|
|
SUBST_OP
|
|
};
|
|
|
|
enum op_type
|
|
{
|
|
UPPER_ON = SUBST_UPPER_ON,
|
|
UPPER_NEXT = SUBST_UPPER_NEXT,
|
|
LOWER_ON = SUBST_LOWER_ON,
|
|
LOWER_NEXT = SUBST_LOWER_NEXT,
|
|
ALL_OFF = SUBST_ALL_OFF
|
|
};
|
|
|
|
struct string_offsets
|
|
{
|
|
ptrdiff_t m_rstart;
|
|
ptrdiff_t m_rlength;
|
|
};
|
|
|
|
subst_type m_stype;
|
|
|
|
union
|
|
{
|
|
string_offsets m_subst_string;
|
|
size_t m_subst_backref;
|
|
op_type m_op;
|
|
};
|
|
};
|
|
|
|
typedef std::list<subst_node> subst_list_type;
|
|
size_t DEFAULT_BLOCK_SIZE();
|
|
|
|
template< typename IterT >
|
|
class boyer_moore;
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Class: basic_rpattern_base_impl
|
|
//
|
|
// Description:
|
|
//
|
|
// Methods: basic_rpattern_base_impl - ctor
|
|
// flags - get the state of the flags
|
|
// uses_backrefs - true if the backrefs are referenced
|
|
// get_first_subexpression - return ptr to first sub_expr struct
|
|
// get_width - get min/max nbr chars this pattern can match
|
|
// loops - if false, we only need to try to match at 1st position
|
|
// cgroups - number of visible groups
|
|
// _cgroups_total - total number of groups, including hidden ( ?: ) groups
|
|
// get_pat - get string representing the pattern
|
|
// get_subst - get string representing the substitution string
|
|
// get_subst_list - get the list of subst nodes
|
|
// _normalize_string - perform character escaping
|
|
//
|
|
// Members: m_fuses_backrefs - true if subst string refers to backrefs
|
|
// m_floop - false if pat only needs to be matched in one place
|
|
// m_cgroups - total count of groups
|
|
// m_cgroups_visible - count of visible groups
|
|
// m_flags - the flags
|
|
// m_nwidth - width of this pattern
|
|
// m_pat - pattern string
|
|
// m_subst - substitution string
|
|
// m_subst_list - list of substitution nodes
|
|
// m_pfirst - ptr to first subexpression to match
|
|
//
|
|
// Typedefs: char_type -
|
|
// string_type -
|
|
// size_type -
|
|
//
|
|
// History: 8/14/2000 - ericne - Created
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
template< typename IterT >
|
|
class basic_rpattern_base_impl
|
|
{
|
|
basic_rpattern_base_impl( basic_rpattern_base_impl<IterT> const & );
|
|
basic_rpattern_base_impl & operator=( basic_rpattern_base_impl<IterT> const & );
|
|
protected:
|
|
typedef typename std::iterator_traits<IterT>::value_type char_type;
|
|
typedef std::char_traits<char_type> traits_type;
|
|
|
|
typedef std::basic_string<char_type> string_type;
|
|
typedef size_t size_type;
|
|
|
|
typedef backref_tag<IterT> backref_type;
|
|
typedef std::vector<backref_type> backref_vector;
|
|
|
|
friend struct regex_access<IterT>;
|
|
|
|
explicit basic_rpattern_base_impl
|
|
(
|
|
REGEX_FLAGS flags = NOFLAGS,
|
|
REGEX_MODE mode = MODE_DEFAULT,
|
|
string_type const & pat = string_type(),
|
|
string_type const & subst = string_type()
|
|
) //throw()
|
|
: m_arena( DEFAULT_BLOCK_SIZE() )
|
|
, m_fuses_backrefs( false )
|
|
, m_floop( true )
|
|
, m_fok_to_recurse( true )
|
|
, m_cgroups( 0 )
|
|
, m_cgroups_visible( 0 )
|
|
, m_flags( flags )
|
|
, m_mode( mode )
|
|
, m_nwidth( uninit_width() )
|
|
, m_pat( new string_type( pat ) )
|
|
, m_subst( new string_type( subst ) )
|
|
, m_subst_list()
|
|
, m_pfirst( 0 )
|
|
, m_invisible_groups()
|
|
, m_search( 0 )
|
|
{
|
|
}
|
|
|
|
virtual ~basic_rpattern_base_impl()
|
|
{
|
|
// We're not going to be calling destructors because all allocated
|
|
// memory associated with the parsed pattern resides in the arena.
|
|
// The memory will be freed when the arena gets destroyed.
|
|
//delete m_pfirst;
|
|
reset_auto_ptr( m_pat );
|
|
reset_auto_ptr( m_subst );
|
|
m_arena.finalize();
|
|
}
|
|
|
|
regex_arena m_arena; // The sub_expr arena
|
|
|
|
bool m_fuses_backrefs; // true if the substitution uses backrefs
|
|
bool m_floop; // false if m_pfirst->recursive_match_all only needs to be called once
|
|
bool m_fok_to_recurse; // false if the pattern would recurse too deeply
|
|
size_t m_cgroups; // number of groups ( always at least one )
|
|
size_t m_cgroups_visible; // number of visible groups
|
|
REGEX_FLAGS m_flags; // flags used to customize search/replace
|
|
REGEX_MODE m_mode; // Used to pick the fast or safe algorithm
|
|
width_type m_nwidth; // width of the pattern
|
|
|
|
std::auto_ptr<string_type> m_pat; // contains the unparsed pattern
|
|
std::auto_ptr<string_type> m_subst; // contains the unparsed substitution
|
|
|
|
subst_list_type m_subst_list; // used to speed up substitution
|
|
sub_expr_base<IterT> const * m_pfirst; // first subexpression in pattern
|
|
|
|
std::list<size_t> m_invisible_groups; // groups w/o backrefs
|
|
|
|
boyer_moore<typename string_type::const_iterator> * m_search;
|
|
|
|
size_t _cgroups_total() const //throw()
|
|
{
|
|
return m_cgroups;
|
|
}
|
|
|
|
bool _loops() const //throw()
|
|
{
|
|
return m_floop;
|
|
}
|
|
|
|
size_t _get_next_group_nbr()
|
|
{
|
|
return m_cgroups++;
|
|
}
|
|
|
|
void _normalize_string( string_type & str ) const //throw()
|
|
{
|
|
if( NORMALIZE & flags() )
|
|
process_escapes( str, true );
|
|
}
|
|
|
|
bool _save_backrefs() const //throw()
|
|
{
|
|
return m_fuses_backrefs || ! ( flags() & NOBACKREFS );
|
|
}
|
|
|
|
sub_expr_base<IterT> const * _get_first_subexpression() const //throw()
|
|
{
|
|
return m_pfirst;
|
|
}
|
|
|
|
REGEX_FLAGS flags() const //throw()
|
|
{
|
|
return m_flags;
|
|
}
|
|
|
|
REGEX_MODE mode() const // throw()
|
|
{
|
|
return m_mode;
|
|
}
|
|
|
|
width_type get_width() const //throw()
|
|
{
|
|
return m_nwidth;
|
|
}
|
|
|
|
size_t cgroups() const //throw()
|
|
{
|
|
return m_cgroups_visible;
|
|
}
|
|
|
|
string_type const & get_pat() const //throw()
|
|
{
|
|
return *m_pat;
|
|
}
|
|
|
|
string_type const & get_subst() const //throw()
|
|
{
|
|
return *m_subst;
|
|
}
|
|
|
|
bool _ok_to_recurse() const; //throw();
|
|
|
|
void swap( basic_rpattern_base_impl<IterT> & that ); // throw();
|
|
|
|
enum { npos = static_cast<size_type>( -1 ) };
|
|
|
|
static instantiator instantiate()
|
|
{
|
|
typedef basic_rpattern_base_impl this_type;
|
|
|
|
return instantiator_helper
|
|
(
|
|
&this_type::_ok_to_recurse,
|
|
&this_type::swap
|
|
);
|
|
}
|
|
};
|
|
|
|
template< typename IterT >
|
|
struct regex_access
|
|
{
|
|
typedef basic_rpattern_base_impl< IterT > rpattern_type;
|
|
typedef typename rpattern_type::size_type size_type;
|
|
typedef typename rpattern_type::char_type char_type;
|
|
typedef typename rpattern_type::traits_type traits_type;
|
|
typedef typename rpattern_type::backref_type backref_type;
|
|
|
|
static bool _do_match_iterative_helper_s
|
|
(
|
|
sub_expr_base<IterT> const * expr,
|
|
match_param<IterT> & param,
|
|
IterT icur
|
|
);
|
|
|
|
static bool _do_match_iterative_helper_c
|
|
(
|
|
sub_expr_base<IterT> const * expr,
|
|
match_param<IterT> & param,
|
|
IterT icur
|
|
);
|
|
|
|
static bool _do_match_recursive_s
|
|
(
|
|
sub_expr_base<IterT> const * expr,
|
|
match_param<IterT> & param,
|
|
IterT icur
|
|
);
|
|
|
|
static bool _do_match_recursive_c
|
|
(
|
|
sub_expr_base<IterT> const * expr,
|
|
match_param<IterT> & param,
|
|
IterT icur
|
|
);
|
|
|
|
static bool _do_match_impl
|
|
(
|
|
rpattern_type const & pat,
|
|
match_param<IterT> & param,
|
|
bool const use_null
|
|
);
|
|
|
|
static bool _do_match_with_stack
|
|
(
|
|
rpattern_type const & pat,
|
|
match_param<IterT> & param,
|
|
bool const use_null
|
|
);
|
|
|
|
template< typename Alloc1T, typename Alloc2T >
|
|
static void _fixup_backrefs
|
|
(
|
|
std::vector<backref_type,Alloc1T> & rgbackrefs,
|
|
std::list<size_t,Alloc2T> const & invisible
|
|
)
|
|
{
|
|
typedef typename std::list<size_t,Alloc2T>::const_iterator iter_type;
|
|
|
|
// Remove information about the "invisible" groups
|
|
if( rgbackrefs[0].matched )
|
|
{
|
|
size_t dropped = 0;
|
|
iter_type const end = invisible.end();
|
|
iter_type curr = invisible.begin(), next = invisible.begin();
|
|
|
|
for( ; end != curr; curr = next, ++dropped )
|
|
{
|
|
if( end == ++next )
|
|
{
|
|
std::copy(
|
|
rgbackrefs.begin() + *curr + 1,
|
|
rgbackrefs.end(),
|
|
rgbackrefs.begin() + *curr - dropped );
|
|
}
|
|
else
|
|
{
|
|
std::copy(
|
|
rgbackrefs.begin() + *curr + 1,
|
|
rgbackrefs.begin() + *next,
|
|
rgbackrefs.begin() + *curr - dropped );
|
|
}
|
|
}
|
|
|
|
rgbackrefs.resize( rgbackrefs.size() - dropped );
|
|
}
|
|
else
|
|
{
|
|
rgbackrefs.resize( rgbackrefs.size() - invisible.size() );
|
|
}
|
|
}
|
|
|
|
template< typename AllocT >
|
|
static bool _do_try_match
|
|
(
|
|
rpattern_type const & pat,
|
|
match_param<IterT> & param,
|
|
std::vector<backref_type,AllocT> & rgbackrefs,
|
|
bool const use_null
|
|
)
|
|
{
|
|
bool success;
|
|
rgbackrefs.resize( pat._cgroups_total() );
|
|
param.m_prgbackrefs = & rgbackrefs[0];
|
|
param.m_cbackrefs = rgbackrefs.size();
|
|
|
|
REGEX_SEH_TRY
|
|
{
|
|
if( pat._ok_to_recurse() )
|
|
{
|
|
success = _do_match_impl( pat, param, use_null );
|
|
}
|
|
else
|
|
{
|
|
success = _do_match_with_stack( pat, param, use_null );
|
|
}
|
|
}
|
|
REGEX_SEH_EXCEPT( REGEX_SEH_STACK_OVERFLOW == _exception_code() )
|
|
{
|
|
// we have overflowed the stack. reset the guard page.
|
|
REGEX_RESET_STK_OFLW();
|
|
|
|
// This match fails silently.
|
|
for( size_t i=0; i < param.m_cbackrefs; ++i )
|
|
{
|
|
param.m_prgbackrefs[i] = static_init<backref_type>::value;
|
|
}
|
|
|
|
success = false;
|
|
}
|
|
|
|
_fixup_backrefs( rgbackrefs, pat.m_invisible_groups );
|
|
return success;
|
|
}
|
|
|
|
template< typename AllocT >
|
|
static bool _do_match
|
|
(
|
|
rpattern_type const & pat,
|
|
basic_match_results<IterT,AllocT> & results,
|
|
IterT ibegin,
|
|
IterT iend,
|
|
bool use_null
|
|
)
|
|
{
|
|
typedef typename basic_match_results<IterT,AllocT>::backref_vector backref_vector;
|
|
|
|
results.m_ibegin = ibegin;
|
|
match_param<IterT> param( ibegin, ibegin, iend, 0, 0 );
|
|
|
|
if( GLOBAL & pat.flags() ) // do a global find
|
|
{
|
|
// The NOBACKREFS flag is ignored in the match method.
|
|
bool const fAll = ( ALLBACKREFS == ( ALLBACKREFS & pat.flags() ) );
|
|
bool const fFirst = ( FIRSTBACKREFS == ( FIRSTBACKREFS & pat.flags() ) );
|
|
|
|
backref_vector rgtempbackrefs( results.m_rgbackrefs.get_allocator() );
|
|
|
|
while( _do_try_match( pat, param, results.m_rgbackrefs, use_null ) )
|
|
{
|
|
backref_type const & br = param.m_prgbackrefs[0];
|
|
|
|
// Handle specially the backref flags
|
|
if( fFirst )
|
|
{
|
|
rgtempbackrefs.push_back( br );
|
|
}
|
|
else if( fAll )
|
|
{
|
|
rgtempbackrefs.insert(
|
|
rgtempbackrefs.end(),
|
|
results.m_rgbackrefs.begin(),
|
|
results.m_rgbackrefs.end() );
|
|
}
|
|
else
|
|
{
|
|
rgtempbackrefs.swap( results.m_rgbackrefs );
|
|
}
|
|
|
|
param.m_imatchbegin = br.second;
|
|
param.m_no0len = ( br.first == br.second );
|
|
}
|
|
|
|
// restore the backref vectors
|
|
results.m_rgbackrefs.swap( rgtempbackrefs );
|
|
return ! results.m_rgbackrefs.empty();
|
|
}
|
|
else
|
|
{
|
|
return _do_try_match( pat, param, results.m_rgbackrefs, use_null );
|
|
}
|
|
}
|
|
|
|
template< typename AllocT >
|
|
static bool _do_match_c
|
|
(
|
|
rpattern_type const & pat,
|
|
basic_match_results<IterT,AllocT> & results,
|
|
char_type const * szbegin
|
|
)
|
|
{
|
|
if( RIGHTMOST & pat.flags() )
|
|
{
|
|
// We need to know the end of the string if we're doing a
|
|
// RIGHTMOST match.
|
|
char_type const * szend = szbegin;
|
|
std::advance( szend, traits_type::length( szbegin ) );
|
|
return _do_match( pat, results, szbegin, szend, false );
|
|
}
|
|
else
|
|
{
|
|
return _do_match( pat, results, szbegin, 0, true );
|
|
}
|
|
}
|
|
|
|
static size_t _do_count
|
|
(
|
|
rpattern_type const & pat,
|
|
IterT ibegin,
|
|
IterT iend,
|
|
bool use_null
|
|
)
|
|
{
|
|
size_t cmatches = 0;
|
|
std::vector<backref_type> rgbackrefs;
|
|
|
|
// If your compile breaks here, it is because CharT const * is not
|
|
// convertible to type IterT. Check the declaration of your rpattern object.
|
|
match_param<IterT> param( ibegin, ibegin, iend, 0, 0 );
|
|
|
|
while( _do_try_match( pat, param, rgbackrefs, use_null ) )
|
|
{
|
|
backref_type const & br = param.m_prgbackrefs[0];
|
|
++cmatches;
|
|
param.m_imatchbegin = br.second;
|
|
|
|
param.m_no0len = ( br.first == br.second );
|
|
}
|
|
|
|
return cmatches;
|
|
}
|
|
|
|
template< typename CharT, typename TraitsT, typename AllocT >
|
|
static size_t _do_split
|
|
(
|
|
rpattern_type const & pat,
|
|
basic_split_results<CharT, TraitsT, AllocT> & results,
|
|
IterT ibegin,
|
|
IterT iend,
|
|
int limit,
|
|
bool use_null
|
|
)
|
|
{
|
|
typedef typename basic_split_results<CharT, TraitsT, AllocT>::string_type string_type;
|
|
typedef typename rebind<AllocT, backref_type>::type backref_allocator;
|
|
|
|
std::vector<backref_type,backref_allocator> rgbackrefs(
|
|
convert_allocator<backref_type>( results.strings().get_allocator(), 0 ) );
|
|
|
|
typedef typename rebind<AllocT, CharT>::type char_allocator_type;
|
|
char_allocator_type char_allocator =
|
|
convert_allocator<CharT>( results.strings().get_allocator(), 0 );
|
|
|
|
// reserve some initial space
|
|
results.strings().clear();
|
|
results.strings().reserve( 10 );
|
|
|
|
match_param<IterT> param( ibegin, ibegin, iend, 0, 0 );
|
|
|
|
while( 1 != limit && _do_try_match( pat, param, rgbackrefs, use_null ) )
|
|
{
|
|
backref_type const & br = param.m_prgbackrefs[0];
|
|
param.m_no0len = ( br.first == br.second );
|
|
|
|
// discard zero-width matches at the beginning and end of the buffer
|
|
if( param.m_no0len )
|
|
{
|
|
// if we're at the beginning, skip
|
|
if( br.first == param.m_ibufferbegin )
|
|
continue;
|
|
|
|
// if we're at the end, break
|
|
if( use_null ? 0 == *param.m_imatchbegin : param.m_imatchbegin == param.m_iend )
|
|
break;
|
|
}
|
|
|
|
string_type tmp( param.m_imatchbegin, br.first, char_allocator );
|
|
results.strings().push_back( tmp );
|
|
param.m_imatchbegin = br.second;
|
|
|
|
// add any groups
|
|
for( size_t i = 1; i < rgbackrefs.size(); ++i )
|
|
{
|
|
backref_type const & br = rgbackrefs[i];
|
|
string_type tmp( br.first, br.second, char_allocator );
|
|
results.strings().push_back( tmp );
|
|
}
|
|
|
|
if( limit > 0 )
|
|
--limit;
|
|
}
|
|
|
|
// append the last string, unless it's empty and limit is 0
|
|
if( use_null )
|
|
{
|
|
if( *param.m_imatchbegin || 0 != limit )
|
|
results.strings().push_back( string_type( &*param.m_imatchbegin, char_allocator ) );
|
|
}
|
|
else
|
|
{
|
|
if( param.m_imatchbegin != param.m_iend || 0 != limit )
|
|
results.strings().push_back( string_type( param.m_imatchbegin, param.m_iend, char_allocator ) );
|
|
}
|
|
|
|
// remove trailing empty fields
|
|
if( 0 == limit )
|
|
{
|
|
while( results.size() && results.back().empty() )
|
|
{
|
|
results.strings().pop_back();
|
|
}
|
|
}
|
|
|
|
return results.size();
|
|
}
|
|
|
|
template< typename CharT, typename TraitsT, typename AllocT >
|
|
static size_t _do_subst_internal
|
|
(
|
|
std::basic_string<CharT, TraitsT, AllocT> & str,
|
|
basic_subst_results<CharT, TraitsT, AllocT> const & results,
|
|
rpattern_type const & pat,
|
|
size_type strpos,
|
|
size_type strlen
|
|
)
|
|
{
|
|
typedef subst_list_type::const_iterator iter_type;
|
|
enum { UPPER = -1, NIL, LOWER } next = NIL, rest = NIL;
|
|
bool first = true;
|
|
size_t old_strpos = strpos;
|
|
typename std::basic_string<CharT, TraitsT, AllocT>::iterator itstrlen = str.begin();
|
|
std::advance( itstrlen, strpos + strlen );
|
|
std::basic_string<char_type> const & subst = pat.get_subst();
|
|
|
|
for( iter_type isubst = pat.m_subst_list.begin(); pat.m_subst_list.end() != isubst; ++isubst )
|
|
{
|
|
size_t sublen = 0;
|
|
typename std::basic_string<CharT, TraitsT, AllocT>::const_iterator itsubpos1; // iter into str
|
|
typename std::basic_string<CharT, TraitsT, AllocT>::const_iterator itsublen1;
|
|
typename std::basic_string<char_type>::const_iterator itsubpos2; // iter into subst string
|
|
typename std::basic_string<char_type>::const_iterator itsublen2;
|
|
typename std::basic_string<CharT, TraitsT, AllocT>::iterator itstrpos = str.begin();
|
|
std::advance( itstrpos, strpos );
|
|
|
|
switch( isubst->m_stype )
|
|
{
|
|
case subst_node::SUBST_STRING:
|
|
itsubpos2 = subst.begin();
|
|
std::advance( itsubpos2, isubst->m_subst_string.m_rstart );
|
|
itsublen2 = itsubpos2;
|
|
std::advance( itsublen2, isubst->m_subst_string.m_rlength );
|
|
|
|
if( first )
|
|
str.replace( itstrpos, itstrlen, itsubpos2, itsublen2 );
|
|
else
|
|
str.insert( itstrpos, itsubpos2, itsublen2 );
|
|
sublen = std::distance( itsubpos2, itsublen2 );
|
|
break;
|
|
|
|
case subst_node::SUBST_BACKREF:
|
|
switch( isubst->m_subst_backref )
|
|
{
|
|
case subst_node::PREMATCH:
|
|
itsubpos1 = results.backref_str().begin();
|
|
itsublen1 = itsubpos1;
|
|
std::advance( itsublen1, sublen = results.rstart() );
|
|
break;
|
|
case subst_node::POSTMATCH:
|
|
itsubpos1 = results.backref_str().begin();
|
|
std::advance( itsubpos1, results.rstart() + results.rlength() );
|
|
itsublen1 = results.backref_str().end();
|
|
break;
|
|
default:
|
|
itsubpos1 = results.backref_str().begin();
|
|
std::advance( itsubpos1, results.rstart( isubst->m_subst_backref ) );
|
|
itsublen1 = itsubpos1;
|
|
std::advance( itsublen1, results.rlength( isubst->m_subst_backref ) );
|
|
break;
|
|
}
|
|
|
|
if( first )
|
|
str.replace( itstrpos, itstrlen, itsubpos1, itsublen1 );
|
|
else
|
|
str.insert( itstrpos, itsubpos1, itsublen1 );
|
|
sublen = std::distance( itsubpos1, itsublen1 );
|
|
break;
|
|
|
|
case subst_node::SUBST_OP:
|
|
switch( isubst->m_op )
|
|
{
|
|
case subst_node::UPPER_ON:
|
|
rest = UPPER;
|
|
break;
|
|
case subst_node::UPPER_NEXT:
|
|
next = UPPER;
|
|
break;
|
|
case subst_node::LOWER_ON:
|
|
rest = LOWER;
|
|
break;
|
|
case subst_node::LOWER_NEXT:
|
|
next = LOWER;
|
|
break;
|
|
case subst_node::ALL_OFF:
|
|
rest = NIL;
|
|
break;
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
break;
|
|
}
|
|
continue; // jump to the next item in the list
|
|
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
break;
|
|
}
|
|
|
|
first = false;
|
|
|
|
// Are we upper- or lower-casing this string?
|
|
if( rest )
|
|
{
|
|
typename std::basic_string<CharT, TraitsT, AllocT>::iterator ibegin = str.begin();
|
|
std::advance( ibegin, strpos );
|
|
typename std::basic_string<CharT, TraitsT, AllocT>::const_iterator iend = ibegin;
|
|
std::advance( iend, sublen );
|
|
switch( rest )
|
|
{
|
|
case UPPER:
|
|
regex_toupper( ibegin, iend );
|
|
break;
|
|
case LOWER:
|
|
regex_tolower( ibegin, iend );
|
|
break;
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Are we upper- or lower-casing the next character?
|
|
if( next )
|
|
{
|
|
switch( next )
|
|
{
|
|
case UPPER:
|
|
traits_type::assign( str[strpos], regex_toupper( str[strpos] ) );
|
|
break;
|
|
case LOWER:
|
|
traits_type::assign( str[strpos], regex_tolower( str[strpos] ) );
|
|
break;
|
|
default:
|
|
REGEX_ASSERT(false);
|
|
break;
|
|
}
|
|
next = NIL;
|
|
}
|
|
|
|
strpos += sublen;
|
|
}
|
|
|
|
// If *first* is still true, then we never called str.replace, and the substitution
|
|
// string is empty. Erase the part of the string that the pattern matched.
|
|
if( first )
|
|
str.erase( strpos, strlen );
|
|
|
|
// return length of the substitution
|
|
return strpos - old_strpos;
|
|
}
|
|
|
|
template< typename CharT, typename TraitsT, typename AllocT >
|
|
static size_t _do_subst
|
|
(
|
|
rpattern_type const & pat,
|
|
std::basic_string<CharT, TraitsT, AllocT> & str,
|
|
basic_subst_results<CharT, TraitsT, AllocT> & results,
|
|
size_type pos,
|
|
size_type len
|
|
)
|
|
{
|
|
typedef std::basic_string<CharT, TraitsT, AllocT> string_type;
|
|
typedef typename basic_subst_results<CharT, TraitsT, AllocT>::backref_vector backref_vector;
|
|
|
|
results.m_pbackref_str = pat._save_backrefs() ? &( results.m_backref_str = str ) : &str;
|
|
results.m_ibegin = results.m_pbackref_str->begin();
|
|
|
|
size_t csubst = 0;
|
|
size_type stop_offset = results.m_pbackref_str->size();
|
|
if( len != rpattern_type::npos )
|
|
stop_offset = regex_min( size_t( pos + len ), stop_offset );
|
|
|
|
match_param<IterT> param( results.m_ibegin, results.m_ibegin, results.m_ibegin, 0, 0 );
|
|
|
|
std::advance( param.m_imatchbegin, pos );
|
|
std::advance( param.m_iend, stop_offset );
|
|
param.m_ibufferbegin = param.m_imatchbegin;
|
|
|
|
if( GLOBAL & pat.flags() )
|
|
{
|
|
bool const fAll = ( ALLBACKREFS == ( ALLBACKREFS & pat.flags() ) );
|
|
bool const fFirst = ( FIRSTBACKREFS == ( FIRSTBACKREFS & pat.flags() ) );
|
|
backref_vector rgtempbackrefs( results.m_rgbackrefs.get_allocator() ); // temporary vector used if fsave_backrefs
|
|
|
|
size_type pos_offset = 0; // keep track of how much the backref_str and
|
|
// the current string are out of sync
|
|
|
|
while( _do_try_match( pat, param, results.m_rgbackrefs, false ) )
|
|
{
|
|
backref_type const & br = param.m_prgbackrefs[0];
|
|
++csubst;
|
|
size_type match_length = std::distance( br.first, br.second );
|
|
pos = std::distance( results.m_ibegin, br.first );
|
|
size_type subst_length = _do_subst_internal( str, results, pat, pos + pos_offset, match_length );
|
|
|
|
if( pat._save_backrefs() )
|
|
{
|
|
pos += match_length;
|
|
pos_offset += ( subst_length - match_length );
|
|
|
|
// Handle specially the backref flags
|
|
if( fFirst )
|
|
{
|
|
rgtempbackrefs.push_back( br );
|
|
}
|
|
else if( fAll )
|
|
{
|
|
rgtempbackrefs.insert(
|
|
rgtempbackrefs.end(),
|
|
results.m_rgbackrefs.begin(),
|
|
results.m_rgbackrefs.end() );
|
|
}
|
|
else
|
|
{
|
|
rgtempbackrefs.swap( results.m_rgbackrefs );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pos += subst_length;
|
|
stop_offset += ( subst_length - match_length );
|
|
results.m_ibegin = results.m_pbackref_str->begin();
|
|
|
|
// we're not saving backref information, so we don't
|
|
// need to do any special backref maintenance here
|
|
}
|
|
|
|
// prevent a pattern that matches 0 characters from matching
|
|
// again at the same point in the string
|
|
param.m_no0len = ( 0 == match_length );
|
|
|
|
param.m_imatchbegin = results.m_ibegin;
|
|
std::advance( param.m_imatchbegin, pos ); // ineffecient for bidirectional iterators.
|
|
|
|
param.m_iend = results.m_ibegin;
|
|
std::advance( param.m_iend, stop_offset ); // ineffecient for bidirectional iterators.
|
|
}
|
|
|
|
// If we did special backref handling, swap the backref vectors
|
|
if( pat._save_backrefs() )
|
|
{
|
|
results.m_rgbackrefs.swap( rgtempbackrefs );
|
|
}
|
|
else if( ! results.m_rgbackrefs[0].matched )
|
|
{
|
|
results.m_rgbackrefs.clear();
|
|
}
|
|
}
|
|
else if( _do_try_match( pat, param, results.m_rgbackrefs, false ) )
|
|
{
|
|
backref_type const & br = param.m_prgbackrefs[0];
|
|
++csubst;
|
|
_do_subst_internal(
|
|
str, results, pat,
|
|
std::distance( results.m_ibegin, br.first ),
|
|
std::distance( br.first, br.second ) );
|
|
results.m_ibegin = results.m_pbackref_str->begin();
|
|
}
|
|
|
|
if( NOBACKREFS == ( pat.flags() & NOBACKREFS ) )
|
|
{
|
|
results.m_rgbackrefs.clear();
|
|
}
|
|
|
|
return csubst;
|
|
}
|
|
|
|
static instantiator instantiate()
|
|
{
|
|
return instantiator_helper
|
|
(
|
|
®ex_access::_do_match_iterative_helper_s,
|
|
®ex_access::_do_match_iterative_helper_c,
|
|
®ex_access::_do_match_recursive_s,
|
|
®ex_access::_do_match_recursive_c,
|
|
®ex_access::_do_match_with_stack,
|
|
®ex_access::_do_match_impl
|
|
);
|
|
}
|
|
};
|
|
|
|
|
|
//
|
|
// Some helper functions needed by process_escapes
|
|
//
|
|
template< typename CharT >
|
|
inline bool regex_isxdigit( CharT ch )
|
|
{
|
|
return ( REGEX_CHAR(CharT,'0') <= ch && REGEX_CHAR(CharT,'9') >= ch )
|
|
|| ( REGEX_CHAR(CharT,'a') <= ch && REGEX_CHAR(CharT,'f') >= ch )
|
|
|| ( REGEX_CHAR(CharT,'A') <= ch && REGEX_CHAR(CharT,'F') >= ch );
|
|
}
|
|
|
|
template< typename CharT >
|
|
inline int regex_xdigit2int( CharT ch )
|
|
{
|
|
if( REGEX_CHAR(CharT,'a') <= ch && REGEX_CHAR(CharT,'f') >= ch )
|
|
return ch - REGEX_CHAR(CharT,'a') + 10;
|
|
if( REGEX_CHAR(CharT,'A') <= ch && REGEX_CHAR(CharT,'F') >= ch )
|
|
return ch - REGEX_CHAR(CharT,'A') + 10;
|
|
return ch - REGEX_CHAR(CharT,'0');
|
|
}
|
|
|
|
} // namespace detail
|
|
|
|
// --------------------------------------------------------------------------
|
|
//
|
|
// Function: process_escapes
|
|
//
|
|
// Description: Turn the escape sequnces \f \n \r \t \v \\ into their
|
|
// ASCII character equivalents. Also, optionally process
|
|
// perl escape sequences.
|
|
//
|
|
// Returns: void
|
|
//
|
|
// Arguments: str - the string to process
|
|
// fPattern - true if the string is to be processed as a regex
|
|
//
|
|
// Notes: When fPattern is true, the perl escape sequences are not
|
|
// processed. If there is an octal or hex excape sequence, we
|
|
// don't want to turn it into a regex metacharacter here. We
|
|
// leave it unescaped so the regex parser correctly interprests
|
|
// it as a character literal.
|
|
//
|
|
// History: 8/1/2001 - ericne - Created
|
|
//
|
|
// --------------------------------------------------------------------------
|
|
template< typename CharT, typename TraitsT, typename AllocT >
|
|
inline void process_escapes( std::basic_string<CharT, TraitsT, AllocT> & str, bool fPattern ) //throw()
|
|
{
|
|
typedef typename std::basic_string<CharT, TraitsT, AllocT>::size_type size_type;
|
|
|
|
size_type i = 0;
|
|
size_type const npos = std::basic_string<CharT, TraitsT, AllocT>::npos;
|
|
|
|
if( str.empty() )
|
|
return;
|
|
|
|
while( npos != ( i = str.find( REGEX_CHAR(CharT,'\\'), i ) ) )
|
|
{
|
|
if( str.size() - 1 == i )
|
|
return;
|
|
|
|
switch( str[i+1] )
|
|
{
|
|
case REGEX_CHAR(CharT,'a'):
|
|
str.replace( i, 2, 1, REGEX_CHAR(CharT,'\a') );
|
|
break;
|
|
case REGEX_CHAR(CharT,'b'):
|
|
if( ! fPattern )
|
|
str.replace( i, 2, 1, REGEX_CHAR(CharT,'\b') );
|
|
else
|
|
++i;
|
|
break;
|
|
case REGEX_CHAR(CharT,'e'):
|
|
str.replace( i, 2, 1, CharT( 27 ) );
|
|
break;
|
|
case REGEX_CHAR(CharT,'f'):
|
|
str.replace( i, 2, 1, REGEX_CHAR(CharT,'\f') );
|
|
break;
|
|
case REGEX_CHAR(CharT,'n'):
|
|
str.replace( i, 2, 1, REGEX_CHAR(CharT,'\n') );
|
|
break;
|
|
case REGEX_CHAR(CharT,'r'):
|
|
str.replace( i, 2, 1, REGEX_CHAR(CharT,'\r') );
|
|
break;
|
|
case REGEX_CHAR(CharT,'t'):
|
|
str.replace( i, 2, 1, REGEX_CHAR(CharT,'\t') );
|
|
break;
|
|
case REGEX_CHAR(CharT,'v'):
|
|
str.replace( i, 2, 1, REGEX_CHAR(CharT,'\v') );
|
|
break;
|
|
case REGEX_CHAR(CharT,'\\'):
|
|
if( fPattern )
|
|
{
|
|
if( i+3 < str.size() && REGEX_CHAR(CharT,'\\') == str[i+2] && REGEX_CHAR(CharT,'\\') == str[i+3] )
|
|
str.erase( i, 2 );
|
|
++i;
|
|
}
|
|
else
|
|
str.erase( i, 1 );
|
|
break;
|
|
case REGEX_CHAR(CharT,'0'): case REGEX_CHAR(CharT,'1'): case REGEX_CHAR(CharT,'2'): case REGEX_CHAR(CharT,'3'):
|
|
case REGEX_CHAR(CharT,'4'): case REGEX_CHAR(CharT,'5'): case REGEX_CHAR(CharT,'6'): case REGEX_CHAR(CharT,'7'):
|
|
if( ! fPattern )
|
|
{
|
|
size_t j=i+2;
|
|
CharT ch = CharT( str[i+1] - REGEX_CHAR(CharT,'0') );
|
|
for( ; j-i < 4 && j < str.size() && REGEX_CHAR(CharT,'0') <= str[j] && REGEX_CHAR(CharT,'7') >= str[j]; ++j )
|
|
ch = CharT( ch * 8 + ( str[j] - REGEX_CHAR(CharT,'0') ) );
|
|
str.replace( i, j-i, 1, ch );
|
|
}
|
|
break;
|
|
case REGEX_CHAR(CharT,'x'):
|
|
if( ! fPattern )
|
|
{
|
|
CharT ch = 0;
|
|
size_t j=i+2;
|
|
for( ; j-i < 4 && j < str.size() && detail::regex_isxdigit( str[j] ); ++j )
|
|
ch = CharT( ch * 16 + detail::regex_xdigit2int( str[j] ) );
|
|
str.replace( i, j-i, 1, ch );
|
|
}
|
|
break;
|
|
case REGEX_CHAR(CharT,'c'):
|
|
if( ! fPattern && i+2 < str.size() )
|
|
{
|
|
CharT ch = str[i+2];
|
|
if( REGEX_CHAR(CharT,'a') <= ch && REGEX_CHAR(CharT,'z') >= ch )
|
|
ch = detail::regex_toupper( ch );
|
|
str.replace( i, 3, 1, CharT( ch ^ 0x40 ) );
|
|
}
|
|
break;
|
|
default:
|
|
if( fPattern )
|
|
++i;
|
|
else
|
|
str.erase( i, 1 );
|
|
break;
|
|
}
|
|
++i;
|
|
if( str.size() <= i )
|
|
return;
|
|
}
|
|
}
|
|
|
|
#endif
|