//+--------------------------------------------------------------------------- // // Copyright ( C ) Microsoft, 1994 - 2002. // // File: reimpl2.h // // Functions: helpers for matching and substituting regular expressions // // Notes: implementation details that really belong in a cpp file, // but can't because of template weirdness // // Author: Eric Niebler ( ericne@microsoft.com ) // // History: 8/15/2001 ericne Created // //---------------------------------------------------------------------------- #ifndef REIMPL_H #define REIMPL_H // // Helper functions for match and substitute // namespace detail { // For use while doing uppercase/lowercase conversions: inline char regex_toupper( char ch ) { using namespace std; return ( char )toupper( ch ); } inline char regex_tolower( char ch ) { using namespace std; return ( char )tolower( ch ); } inline wchar_t regex_toupper( wchar_t ch ) { using namespace std; return ( wchar_t )towupper( ch ); } inline wchar_t regex_tolower( wchar_t ch ) { using namespace std; return ( wchar_t )towlower( ch ); } template< typename IBeginT, typename IEndT > inline void regex_toupper( IBeginT ibegin, IEndT iend ) { typedef typename std::iterator_traits::value_type char_type; typedef std::char_traits traits_type; for( ; iend != ibegin; ++ibegin ) traits_type::assign( *ibegin, regex_toupper( *ibegin ) ); } template< typename IBeginT, typename IEndT > inline void regex_tolower( IBeginT ibegin, IEndT iend ) { typedef typename std::iterator_traits::value_type char_type; typedef std::char_traits traits_type; for( ; iend != ibegin; ++ibegin ) traits_type::assign( *ibegin, regex_tolower( *ibegin ) ); } // // Helper fn for swapping two auto_ptr's // template< typename T > inline void swap_auto_ptr( std::auto_ptr & left, std::auto_ptr & right ) { std::auto_ptr temp( left ); left = right; right = temp; } template< typename T > inline void reset_auto_ptr( std::auto_ptr & left ) { std::auto_ptr temp( 0 ); left = temp; } template< typename T, typename U > inline void reset_auto_ptr( std::auto_ptr & left, U * right ) { std::auto_ptr temp( right ); left = temp; } typedef int instantiator; inline instantiator REGEX_CDECL instantiator_helper( ... ) { return instantiator(); } // -------------------------------------------------------------------------- // // Class: match_param // // Description: Struct that contains the state of the matching operation. // Passed by reference to all recursive_match_all and recursive_match_this routines. // // Methods: match_param - ctor // // Members: ibufferbegin - start of the buffer // ibegin - start of this iteration // iend - end of the string // prgbackrefs - pointer to backref array // // History: 8/14/2000 - ericne - Created // // -------------------------------------------------------------------------- template< typename IterT > struct match_param { typedef backref_tag backref_type; typedef sub_expr_base const * sub_expr_ptr; // for performance reasons, the most frequently used fields // are placed at offsets which are a power of 2 (assuming // a 32-bit architecture, and iterators which are 32 bits). backref_type * m_prgbackrefs; // offsetof == 0 IterT m_iend; // offsetof == 4 IterT m_icur; // offsetof == 8 size_t m_cbackrefs; sub_expr_ptr m_pnext; // offsetof == 16 IterT m_ibufferbegin; IterT m_imatchbegin; sub_expr_ptr m_pfirst; unsafe_stack * m_pstack; // offsetof == 32 bool m_no0len; bool m_reserved; match_param ( IterT ibufferbegin, IterT imatchbegin, IterT iend, backref_type * prgbackrefs, size_t cbackrefs ) : m_prgbackrefs( prgbackrefs ) , m_iend( iend ) , m_icur( imatchbegin ) , m_cbackrefs( cbackrefs ) , m_pnext( 0 ) , m_ibufferbegin( ibufferbegin ) , m_imatchbegin( imatchbegin ) , m_pfirst( 0 ) , m_pstack( 0 ) , m_no0len( false ) , m_reserved( false ) { } }; // -------------------------------------------------------------------------- // // Class: arena_allocator // // Description: A small, fast allocator for speeding up pattern compilation. // Every basic_rpattern object has an arena as a member. // sub_expr objects can only be allocated from this arena. // Memory is alloc'ed in chunks using the underlying allocator. // Chunks are freed en-masse when clear() or finalize() is called. // // History: 8/17/2001 - ericne - Created // // Notes: This is NOT a std-compliant allocator and CANNOT be used with // STL containers. arena_allocator objects maintain state, and // STL containers are allowed to assume their allocators do // not maintain state. In regexpr2.cpp, I define slist<>, a simple // arena-friendly singly-linked list for use with the arena // allocator. // // -------------------------------------------------------------------------- template< typename AllocT = std::allocator > struct pool_impl { typedef typename rebind::type char_allocator_type; struct mem_block { size_t m_offset; size_t m_blocksize; mem_block * m_pnext; unsigned char m_data[ 1 ]; }; #if !defined(_MSC_VER) | 1200 < _MSC_VER struct pool_data : char_allocator_type { pool_data( size_t default_size, char_allocator_type const & alloc ) : char_allocator_type( alloc ) , m_pfirst( 0 ) , m_default_size( default_size ) { } mem_block * m_pfirst; size_t m_default_size; char_allocator_type & get_allocator() { return *this; } } m_data; #else struct pool_data { pool_data( size_t default_size, char_allocator_type const & alloc ) : m_alloc( alloc ) , m_pfirst( 0 ) , m_default_size( default_size ) { } char_allocator_type m_alloc; mem_block * m_pfirst; size_t m_default_size; char_allocator_type & get_allocator() { return m_alloc; } } m_data; #endif void new_block( size_t size ); void clear(); void * allocate( size_t size ); explicit pool_impl( size_t default_size, char_allocator_type const & alloc = char_allocator_type() ); ~pool_impl(); char_allocator_type get_allocator() const { return const_cast(this)->m_data.get_allocator(); } }; template< typename T, typename AllocT = std::allocator > class arena_allocator { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T *pointer; typedef T const *const_pointer; typedef T & reference; typedef T const & const_reference; typedef T value_type; typedef typename rebind::type char_alloc_type; typedef pool_impl pool_impl_t; typedef typename rebind::type pool_alloc_type; explicit arena_allocator( size_t default_size, char_alloc_type const & alloc = char_alloc_type() ) : m_pool( 0 ) { char_alloc_type char_alloc( alloc ); pool_alloc_type pool_alloc( convert_allocator( char_alloc, 0 ) ); m_pool = pool_alloc.allocate( 1, 0 ); pool_alloc.construct( m_pool, pool_impl_t( default_size, char_alloc ) ); // can't throw } #if !defined(_MSC_VER) | 1200 < _MSC_VER arena_allocator( arena_allocator const & that ) : m_pool( that.m_pool ) { } #endif template< typename U > arena_allocator( arena_allocator const & that ) : m_pool( that.m_pool ) { } ~arena_allocator() { // Many arena_allocators may point to m_pool, so don't delete it. } // Rather, wait for someone to call finalize(). pointer allocate( size_type size, void const * =0 ) { return static_cast( m_pool->allocate( size * sizeof(T) ) ); } void deallocate( void *, size_type ) { // no-op. deallocation happens when pool is finalized or cleared. } void construct( pointer p, T const & t ) { new( static_cast(p) ) T( t ); } void destroy( pointer p ) { regex::detail::destroy( p ); } #if !defined(_MSC_VER) | 1200 < _MSC_VER template< typename U > struct rebind { typedef arena_allocator other; }; #endif void clear() { m_pool->clear(); } void finalize() { char_alloc_type char_alloc( m_pool->get_allocator() ); pool_alloc_type pool_alloc( convert_allocator( char_alloc, 0 ) ); pool_alloc.destroy( m_pool ); pool_alloc.deallocate( m_pool, 1 ); m_pool = 0; } void swap( arena_allocator & that ) { using std::swap; swap( m_pool, that.m_pool ); } // the pool lives here pool_impl_t * m_pool; }; // Dummy struct used by the pool allocator to align returned pointers struct not_pod { virtual ~not_pod() {} }; template< typename AllocT > inline pool_impl::pool_impl( size_t default_size, char_allocator_type const & alloc ) : m_data( default_size, alloc ) { } template< typename AllocT > inline pool_impl::~pool_impl() { clear(); } template< typename AllocT > inline void pool_impl::clear() { for( mem_block * pnext; m_data.m_pfirst; m_data.m_pfirst = pnext ) { pnext = m_data.m_pfirst->m_pnext; m_data.get_allocator().deallocate( reinterpret_cast( m_data.m_pfirst ), m_data.m_pfirst->m_blocksize ); } } template< typename AllocT > inline void pool_impl::new_block( size_t size ) { size_t blocksize = regex_max( m_data.m_default_size, size ) + offsetof( mem_block, m_data ); mem_block * pnew = reinterpret_cast( m_data.get_allocator().allocate( blocksize, 0 ) ); if( 0 == pnew ) { throw std::bad_alloc(); } pnew->m_offset = 0; pnew->m_blocksize = blocksize; pnew->m_pnext = m_data.m_pfirst; m_data.m_pfirst = pnew; } template< typename AllocT > inline void * pool_impl::allocate( size_t size ) { if( 0 == size ) size = 1; if( 0 == m_data.m_pfirst || m_data.m_pfirst->m_offset + size > m_data.m_default_size ) new_block( size ); void * pnew = m_data.m_pfirst->m_data + m_data.m_pfirst->m_offset; // ensure returned pointers are always suitably aligned m_data.m_pfirst->m_offset += ( ( size + alignof::value - 1 ) & ~( alignof::value - 1 ) ); return pnew; } // The regex_arena is a basic, vanilla arena_allocator. typedef arena_allocator regex_arena; template< typename T > type_with_size<3> allocator_picker( arena_allocator const &, int ); template<> struct rebind_helper<3> { template< typename, typename ElemT> struct inner { typedef arena_allocator type; }; }; // -------------------------------------------------------------------------- // // Class: sub_expr_base // // Description: patterns are "compiled" into a directed graph of sub_expr_base // structs. Matching is accomplished by traversing this graph. // // Methods: ~sub_expr_base - virt dtor so cleanup happens correctly // recursive_match_all - match this sub-expression and all following // sub-expression // // History: 8/14/2000 - ericne - Created // // -------------------------------------------------------------------------- template< typename IterT > struct sub_expr_base { virtual bool recursive_match_all_s( match_param &, IterT ) const = 0; //throw() (offset 0) virtual bool recursive_match_all_c( match_param &, IterT ) const = 0; //throw() (offset 4) virtual bool iterative_match_this_s( match_param & ) const = 0; //throw() (offset 8) virtual bool iterative_match_this_c( match_param & ) const = 0; //throw() (offset 12) virtual bool iterative_rematch_this_s( match_param & ) const = 0; //throw() (offset 16) virtual bool iterative_rematch_this_c( match_param & ) const = 0; //throw() (offset 20) virtual ~sub_expr_base() = 0; // (offset 24) // Use the regex_arena for memory management static void * operator new( size_t size, regex_arena & arena ) { return arena.allocate( size ); } static void operator delete( void *, regex_arena & ) { } // Invoke the d'tor, but don't bother freeing memory. That will // happen automatically when the arena object gets destroyed. static void operator delete( void * ) { } // For choosing an appropriate virtual function based on a compile time constant bool recursive_match_all( match_param & param, IterT icur, false_t ) const //throw() { return recursive_match_all_s( param, icur ); } bool recursive_match_all( match_param & param, IterT icur, true_t ) const //throw() { return recursive_match_all_c( param, icur ); } bool iterative_match_this( match_param & param, false_t ) const //throw() { return iterative_match_this_s( param ); } bool iterative_match_this( match_param & param, true_t ) const //throw() { return iterative_match_this_c( param ); } bool iterative_rematch_this( match_param & param, false_t ) const //throw() { return iterative_rematch_this_s( param ); } bool iterative_rematch_this( match_param & param, true_t ) const //throw() { return iterative_rematch_this_c( param ); } private: // don't allocate sub-expressions directly on the heap; they should // be allocated from an arena static void * operator new( size_t size ) throw( std::bad_alloc ); // disable all the vector new's and delete's. static void * operator new[]( size_t size, regex_arena & arena ) throw( std::bad_alloc ); static void operator delete[]( void *, regex_arena & ); static void * operator new[]( size_t size ) throw( std::bad_alloc ); static void operator delete[]( void * ); }; template< typename IterT > inline sub_expr_base::~sub_expr_base() { } // -------------------------------------------------------------------------- // // Class: subst_node // // Description: Substitution strings are parsed into an array of these // structures in order to speed up subst operations. // // Members: stype - type of this struct // .m_subst_string - do a string substitution // .m_subst_backref - do a bacref substitution // op - execute an operation // // History: 8/14/2000 - ericne - Created // // -------------------------------------------------------------------------- struct subst_node { enum { PREMATCH = -1, POSTMATCH = -2 }; enum subst_type { SUBST_STRING, SUBST_BACKREF, SUBST_OP }; enum op_type { UPPER_ON = SUBST_UPPER_ON, UPPER_NEXT = SUBST_UPPER_NEXT, LOWER_ON = SUBST_LOWER_ON, LOWER_NEXT = SUBST_LOWER_NEXT, ALL_OFF = SUBST_ALL_OFF }; struct string_offsets { ptrdiff_t m_rstart; ptrdiff_t m_rlength; }; subst_type m_stype; union { string_offsets m_subst_string; size_t m_subst_backref; op_type m_op; }; }; typedef std::list subst_list_type; size_t DEFAULT_BLOCK_SIZE(); template< typename IterT > class boyer_moore; // -------------------------------------------------------------------------- // // Class: basic_rpattern_base_impl // // Description: // // Methods: basic_rpattern_base_impl - ctor // flags - get the state of the flags // uses_backrefs - true if the backrefs are referenced // get_first_subexpression - return ptr to first sub_expr struct // get_width - get min/max nbr chars this pattern can match // loops - if false, we only need to try to match at 1st position // cgroups - number of visible groups // _cgroups_total - total number of groups, including hidden ( ?: ) groups // get_pat - get string representing the pattern // get_subst - get string representing the substitution string // get_subst_list - get the list of subst nodes // _normalize_string - perform character escaping // // Members: m_fuses_backrefs - true if subst string refers to backrefs // m_floop - false if pat only needs to be matched in one place // m_cgroups - total count of groups // m_cgroups_visible - count of visible groups // m_flags - the flags // m_nwidth - width of this pattern // m_pat - pattern string // m_subst - substitution string // m_subst_list - list of substitution nodes // m_pfirst - ptr to first subexpression to match // // Typedefs: char_type - // string_type - // size_type - // // History: 8/14/2000 - ericne - Created // // -------------------------------------------------------------------------- template< typename IterT > class basic_rpattern_base_impl { basic_rpattern_base_impl( basic_rpattern_base_impl const & ); basic_rpattern_base_impl & operator=( basic_rpattern_base_impl const & ); protected: typedef typename std::iterator_traits::value_type char_type; typedef std::char_traits traits_type; typedef std::basic_string string_type; typedef size_t size_type; typedef backref_tag backref_type; typedef std::vector backref_vector; friend struct regex_access; explicit basic_rpattern_base_impl ( REGEX_FLAGS flags = NOFLAGS, REGEX_MODE mode = MODE_DEFAULT, string_type const & pat = string_type(), string_type const & subst = string_type() ) //throw() : m_arena( DEFAULT_BLOCK_SIZE() ) , m_fuses_backrefs( false ) , m_floop( true ) , m_fok_to_recurse( true ) , m_cgroups( 0 ) , m_cgroups_visible( 0 ) , m_flags( flags ) , m_mode( mode ) , m_nwidth( uninit_width() ) , m_pat( new string_type( pat ) ) , m_subst( new string_type( subst ) ) , m_subst_list() , m_pfirst( 0 ) , m_invisible_groups() , m_search( 0 ) { } virtual ~basic_rpattern_base_impl() { // We're not going to be calling destructors because all allocated // memory associated with the parsed pattern resides in the arena. // The memory will be freed when the arena gets destroyed. //delete m_pfirst; reset_auto_ptr( m_pat ); reset_auto_ptr( m_subst ); m_arena.finalize(); } regex_arena m_arena; // The sub_expr arena bool m_fuses_backrefs; // true if the substitution uses backrefs bool m_floop; // false if m_pfirst->recursive_match_all only needs to be called once bool m_fok_to_recurse; // false if the pattern would recurse too deeply size_t m_cgroups; // number of groups ( always at least one ) size_t m_cgroups_visible; // number of visible groups REGEX_FLAGS m_flags; // flags used to customize search/replace REGEX_MODE m_mode; // Used to pick the fast or safe algorithm width_type m_nwidth; // width of the pattern std::auto_ptr m_pat; // contains the unparsed pattern std::auto_ptr m_subst; // contains the unparsed substitution subst_list_type m_subst_list; // used to speed up substitution sub_expr_base const * m_pfirst; // first subexpression in pattern std::list m_invisible_groups; // groups w/o backrefs boyer_moore * m_search; size_t _cgroups_total() const //throw() { return m_cgroups; } bool _loops() const //throw() { return m_floop; } size_t _get_next_group_nbr() { return m_cgroups++; } void _normalize_string( string_type & str ) const //throw() { if( NORMALIZE & flags() ) process_escapes( str, true ); } bool _save_backrefs() const //throw() { return m_fuses_backrefs || ! ( flags() & NOBACKREFS ); } sub_expr_base const * _get_first_subexpression() const //throw() { return m_pfirst; } REGEX_FLAGS flags() const //throw() { return m_flags; } REGEX_MODE mode() const // throw() { return m_mode; } width_type get_width() const //throw() { return m_nwidth; } size_t cgroups() const //throw() { return m_cgroups_visible; } string_type const & get_pat() const //throw() { return *m_pat; } string_type const & get_subst() const //throw() { return *m_subst; } bool _ok_to_recurse() const; //throw(); void swap( basic_rpattern_base_impl & that ); // throw(); enum { npos = static_cast( -1 ) }; static instantiator instantiate() { typedef basic_rpattern_base_impl this_type; return instantiator_helper ( &this_type::_ok_to_recurse, &this_type::swap ); } }; template< typename IterT > struct regex_access { typedef basic_rpattern_base_impl< IterT > rpattern_type; typedef typename rpattern_type::size_type size_type; typedef typename rpattern_type::char_type char_type; typedef typename rpattern_type::traits_type traits_type; typedef typename rpattern_type::backref_type backref_type; static bool _do_match_iterative_helper_s ( sub_expr_base const * expr, match_param & param, IterT icur ); static bool _do_match_iterative_helper_c ( sub_expr_base const * expr, match_param & param, IterT icur ); static bool _do_match_recursive_s ( sub_expr_base const * expr, match_param & param, IterT icur ); static bool _do_match_recursive_c ( sub_expr_base const * expr, match_param & param, IterT icur ); static bool _do_match_impl ( rpattern_type const & pat, match_param & param, bool const use_null ); static bool _do_match_with_stack ( rpattern_type const & pat, match_param & param, bool const use_null ); template< typename Alloc1T, typename Alloc2T > static void _fixup_backrefs ( std::vector & rgbackrefs, std::list const & invisible ) { typedef typename std::list::const_iterator iter_type; // Remove information about the "invisible" groups if( rgbackrefs[0].matched ) { size_t dropped = 0; iter_type const end = invisible.end(); iter_type curr = invisible.begin(), next = invisible.begin(); for( ; end != curr; curr = next, ++dropped ) { if( end == ++next ) { std::copy( rgbackrefs.begin() + *curr + 1, rgbackrefs.end(), rgbackrefs.begin() + *curr - dropped ); } else { std::copy( rgbackrefs.begin() + *curr + 1, rgbackrefs.begin() + *next, rgbackrefs.begin() + *curr - dropped ); } } rgbackrefs.resize( rgbackrefs.size() - dropped ); } else { rgbackrefs.resize( rgbackrefs.size() - invisible.size() ); } } template< typename AllocT > static bool _do_try_match ( rpattern_type const & pat, match_param & param, std::vector & rgbackrefs, bool const use_null ) { bool success; rgbackrefs.resize( pat._cgroups_total() ); param.m_prgbackrefs = & rgbackrefs[0]; param.m_cbackrefs = rgbackrefs.size(); REGEX_SEH_TRY { if( pat._ok_to_recurse() ) { success = _do_match_impl( pat, param, use_null ); } else { success = _do_match_with_stack( pat, param, use_null ); } } REGEX_SEH_EXCEPT( REGEX_SEH_STACK_OVERFLOW == _exception_code() ) { // we have overflowed the stack. reset the guard page. REGEX_RESET_STK_OFLW(); // This match fails silently. for( size_t i=0; i < param.m_cbackrefs; ++i ) { param.m_prgbackrefs[i] = static_init::value; } success = false; } _fixup_backrefs( rgbackrefs, pat.m_invisible_groups ); return success; } template< typename AllocT > static bool _do_match ( rpattern_type const & pat, basic_match_results & results, IterT ibegin, IterT iend, bool use_null ) { typedef typename basic_match_results::backref_vector backref_vector; results.m_ibegin = ibegin; match_param param( ibegin, ibegin, iend, 0, 0 ); if( GLOBAL & pat.flags() ) // do a global find { // The NOBACKREFS flag is ignored in the match method. bool const fAll = ( ALLBACKREFS == ( ALLBACKREFS & pat.flags() ) ); bool const fFirst = ( FIRSTBACKREFS == ( FIRSTBACKREFS & pat.flags() ) ); backref_vector rgtempbackrefs( results.m_rgbackrefs.get_allocator() ); while( _do_try_match( pat, param, results.m_rgbackrefs, use_null ) ) { backref_type const & br = param.m_prgbackrefs[0]; // Handle specially the backref flags if( fFirst ) { rgtempbackrefs.push_back( br ); } else if( fAll ) { rgtempbackrefs.insert( rgtempbackrefs.end(), results.m_rgbackrefs.begin(), results.m_rgbackrefs.end() ); } else { rgtempbackrefs.swap( results.m_rgbackrefs ); } param.m_imatchbegin = br.second; param.m_no0len = ( br.first == br.second ); } // restore the backref vectors results.m_rgbackrefs.swap( rgtempbackrefs ); return ! results.m_rgbackrefs.empty(); } else { return _do_try_match( pat, param, results.m_rgbackrefs, use_null ); } } template< typename AllocT > static bool _do_match_c ( rpattern_type const & pat, basic_match_results & results, char_type const * szbegin ) { if( RIGHTMOST & pat.flags() ) { // We need to know the end of the string if we're doing a // RIGHTMOST match. char_type const * szend = szbegin; std::advance( szend, traits_type::length( szbegin ) ); return _do_match( pat, results, szbegin, szend, false ); } else { return _do_match( pat, results, szbegin, 0, true ); } } static size_t _do_count ( rpattern_type const & pat, IterT ibegin, IterT iend, bool use_null ) { size_t cmatches = 0; std::vector rgbackrefs; // If your compile breaks here, it is because CharT const * is not // convertible to type IterT. Check the declaration of your rpattern object. match_param param( ibegin, ibegin, iend, 0, 0 ); while( _do_try_match( pat, param, rgbackrefs, use_null ) ) { backref_type const & br = param.m_prgbackrefs[0]; ++cmatches; param.m_imatchbegin = br.second; param.m_no0len = ( br.first == br.second ); } return cmatches; } template< typename CharT, typename TraitsT, typename AllocT > static size_t _do_split ( rpattern_type const & pat, basic_split_results & results, IterT ibegin, IterT iend, int limit, bool use_null ) { typedef typename basic_split_results::string_type string_type; typedef typename rebind::type backref_allocator; std::vector rgbackrefs( convert_allocator( results.strings().get_allocator(), 0 ) ); typedef typename rebind::type char_allocator_type; char_allocator_type char_allocator = convert_allocator( results.strings().get_allocator(), 0 ); // reserve some initial space results.strings().clear(); results.strings().reserve( 10 ); match_param param( ibegin, ibegin, iend, 0, 0 ); while( 1 != limit && _do_try_match( pat, param, rgbackrefs, use_null ) ) { backref_type const & br = param.m_prgbackrefs[0]; param.m_no0len = ( br.first == br.second ); // discard zero-width matches at the beginning and end of the buffer if( param.m_no0len ) { // if we're at the beginning, skip if( br.first == param.m_ibufferbegin ) continue; // if we're at the end, break if( use_null ? 0 == *param.m_imatchbegin : param.m_imatchbegin == param.m_iend ) break; } string_type tmp( param.m_imatchbegin, br.first, char_allocator ); results.strings().push_back( tmp ); param.m_imatchbegin = br.second; // add any groups for( size_t i = 1; i < rgbackrefs.size(); ++i ) { backref_type const & br = rgbackrefs[i]; string_type tmp( br.first, br.second, char_allocator ); results.strings().push_back( tmp ); } if( limit > 0 ) --limit; } // append the last string, unless it's empty and limit is 0 if( use_null ) { if( *param.m_imatchbegin || 0 != limit ) results.strings().push_back( string_type( &*param.m_imatchbegin, char_allocator ) ); } else { if( param.m_imatchbegin != param.m_iend || 0 != limit ) results.strings().push_back( string_type( param.m_imatchbegin, param.m_iend, char_allocator ) ); } // remove trailing empty fields if( 0 == limit ) { while( results.size() && results.back().empty() ) { results.strings().pop_back(); } } return results.size(); } template< typename CharT, typename TraitsT, typename AllocT > static size_t _do_subst_internal ( std::basic_string & str, basic_subst_results const & results, rpattern_type const & pat, size_type strpos, size_type strlen ) { typedef subst_list_type::const_iterator iter_type; enum { UPPER = -1, NIL, LOWER } next = NIL, rest = NIL; bool first = true; size_t old_strpos = strpos; typename std::basic_string::iterator itstrlen = str.begin(); std::advance( itstrlen, strpos + strlen ); std::basic_string const & subst = pat.get_subst(); for( iter_type isubst = pat.m_subst_list.begin(); pat.m_subst_list.end() != isubst; ++isubst ) { size_t sublen = 0; typename std::basic_string::const_iterator itsubpos1; // iter into str typename std::basic_string::const_iterator itsublen1; typename std::basic_string::const_iterator itsubpos2; // iter into subst string typename std::basic_string::const_iterator itsublen2; typename std::basic_string::iterator itstrpos = str.begin(); std::advance( itstrpos, strpos ); switch( isubst->m_stype ) { case subst_node::SUBST_STRING: itsubpos2 = subst.begin(); std::advance( itsubpos2, isubst->m_subst_string.m_rstart ); itsublen2 = itsubpos2; std::advance( itsublen2, isubst->m_subst_string.m_rlength ); if( first ) str.replace( itstrpos, itstrlen, itsubpos2, itsublen2 ); else str.insert( itstrpos, itsubpos2, itsublen2 ); sublen = std::distance( itsubpos2, itsublen2 ); break; case subst_node::SUBST_BACKREF: switch( isubst->m_subst_backref ) { case subst_node::PREMATCH: itsubpos1 = results.backref_str().begin(); itsublen1 = itsubpos1; std::advance( itsublen1, sublen = results.rstart() ); break; case subst_node::POSTMATCH: itsubpos1 = results.backref_str().begin(); std::advance( itsubpos1, results.rstart() + results.rlength() ); itsublen1 = results.backref_str().end(); break; default: itsubpos1 = results.backref_str().begin(); std::advance( itsubpos1, results.rstart( isubst->m_subst_backref ) ); itsublen1 = itsubpos1; std::advance( itsublen1, results.rlength( isubst->m_subst_backref ) ); break; } if( first ) str.replace( itstrpos, itstrlen, itsubpos1, itsublen1 ); else str.insert( itstrpos, itsubpos1, itsublen1 ); sublen = std::distance( itsubpos1, itsublen1 ); break; case subst_node::SUBST_OP: switch( isubst->m_op ) { case subst_node::UPPER_ON: rest = UPPER; break; case subst_node::UPPER_NEXT: next = UPPER; break; case subst_node::LOWER_ON: rest = LOWER; break; case subst_node::LOWER_NEXT: next = LOWER; break; case subst_node::ALL_OFF: rest = NIL; break; default: REGEX_ASSERT(false); break; } continue; // jump to the next item in the list default: REGEX_ASSERT(false); break; } first = false; // Are we upper- or lower-casing this string? if( rest ) { typename std::basic_string::iterator ibegin = str.begin(); std::advance( ibegin, strpos ); typename std::basic_string::const_iterator iend = ibegin; std::advance( iend, sublen ); switch( rest ) { case UPPER: regex_toupper( ibegin, iend ); break; case LOWER: regex_tolower( ibegin, iend ); break; default: REGEX_ASSERT(false); break; } } // Are we upper- or lower-casing the next character? if( next ) { switch( next ) { case UPPER: traits_type::assign( str[strpos], regex_toupper( str[strpos] ) ); break; case LOWER: traits_type::assign( str[strpos], regex_tolower( str[strpos] ) ); break; default: REGEX_ASSERT(false); break; } next = NIL; } strpos += sublen; } // If *first* is still true, then we never called str.replace, and the substitution // string is empty. Erase the part of the string that the pattern matched. if( first ) str.erase( strpos, strlen ); // return length of the substitution return strpos - old_strpos; } template< typename CharT, typename TraitsT, typename AllocT > static size_t _do_subst ( rpattern_type const & pat, std::basic_string & str, basic_subst_results & results, size_type pos, size_type len ) { typedef std::basic_string string_type; typedef typename basic_subst_results::backref_vector backref_vector; results.m_pbackref_str = pat._save_backrefs() ? &( results.m_backref_str = str ) : &str; results.m_ibegin = results.m_pbackref_str->begin(); size_t csubst = 0; size_type stop_offset = results.m_pbackref_str->size(); if( len != rpattern_type::npos ) stop_offset = regex_min( size_t( pos + len ), stop_offset ); match_param param( results.m_ibegin, results.m_ibegin, results.m_ibegin, 0, 0 ); std::advance( param.m_imatchbegin, pos ); std::advance( param.m_iend, stop_offset ); param.m_ibufferbegin = param.m_imatchbegin; if( GLOBAL & pat.flags() ) { bool const fAll = ( ALLBACKREFS == ( ALLBACKREFS & pat.flags() ) ); bool const fFirst = ( FIRSTBACKREFS == ( FIRSTBACKREFS & pat.flags() ) ); backref_vector rgtempbackrefs( results.m_rgbackrefs.get_allocator() ); // temporary vector used if fsave_backrefs size_type pos_offset = 0; // keep track of how much the backref_str and // the current string are out of sync while( _do_try_match( pat, param, results.m_rgbackrefs, false ) ) { backref_type const & br = param.m_prgbackrefs[0]; ++csubst; size_type match_length = std::distance( br.first, br.second ); pos = std::distance( results.m_ibegin, br.first ); size_type subst_length = _do_subst_internal( str, results, pat, pos + pos_offset, match_length ); if( pat._save_backrefs() ) { pos += match_length; pos_offset += ( subst_length - match_length ); // Handle specially the backref flags if( fFirst ) { rgtempbackrefs.push_back( br ); } else if( fAll ) { rgtempbackrefs.insert( rgtempbackrefs.end(), results.m_rgbackrefs.begin(), results.m_rgbackrefs.end() ); } else { rgtempbackrefs.swap( results.m_rgbackrefs ); } } else { pos += subst_length; stop_offset += ( subst_length - match_length ); results.m_ibegin = results.m_pbackref_str->begin(); // we're not saving backref information, so we don't // need to do any special backref maintenance here } // prevent a pattern that matches 0 characters from matching // again at the same point in the string param.m_no0len = ( 0 == match_length ); param.m_imatchbegin = results.m_ibegin; std::advance( param.m_imatchbegin, pos ); // ineffecient for bidirectional iterators. param.m_iend = results.m_ibegin; std::advance( param.m_iend, stop_offset ); // ineffecient for bidirectional iterators. } // If we did special backref handling, swap the backref vectors if( pat._save_backrefs() ) { results.m_rgbackrefs.swap( rgtempbackrefs ); } else if( ! results.m_rgbackrefs[0].matched ) { results.m_rgbackrefs.clear(); } } else if( _do_try_match( pat, param, results.m_rgbackrefs, false ) ) { backref_type const & br = param.m_prgbackrefs[0]; ++csubst; _do_subst_internal( str, results, pat, std::distance( results.m_ibegin, br.first ), std::distance( br.first, br.second ) ); results.m_ibegin = results.m_pbackref_str->begin(); } if( NOBACKREFS == ( pat.flags() & NOBACKREFS ) ) { results.m_rgbackrefs.clear(); } return csubst; } static instantiator instantiate() { return instantiator_helper ( ®ex_access::_do_match_iterative_helper_s, ®ex_access::_do_match_iterative_helper_c, ®ex_access::_do_match_recursive_s, ®ex_access::_do_match_recursive_c, ®ex_access::_do_match_with_stack, ®ex_access::_do_match_impl ); } }; // // Some helper functions needed by process_escapes // template< typename CharT > inline bool regex_isxdigit( CharT ch ) { return ( REGEX_CHAR(CharT,'0') <= ch && REGEX_CHAR(CharT,'9') >= ch ) || ( REGEX_CHAR(CharT,'a') <= ch && REGEX_CHAR(CharT,'f') >= ch ) || ( REGEX_CHAR(CharT,'A') <= ch && REGEX_CHAR(CharT,'F') >= ch ); } template< typename CharT > inline int regex_xdigit2int( CharT ch ) { if( REGEX_CHAR(CharT,'a') <= ch && REGEX_CHAR(CharT,'f') >= ch ) return ch - REGEX_CHAR(CharT,'a') + 10; if( REGEX_CHAR(CharT,'A') <= ch && REGEX_CHAR(CharT,'F') >= ch ) return ch - REGEX_CHAR(CharT,'A') + 10; return ch - REGEX_CHAR(CharT,'0'); } } // namespace detail // -------------------------------------------------------------------------- // // Function: process_escapes // // Description: Turn the escape sequnces \f \n \r \t \v \\ into their // ASCII character equivalents. Also, optionally process // perl escape sequences. // // Returns: void // // Arguments: str - the string to process // fPattern - true if the string is to be processed as a regex // // Notes: When fPattern is true, the perl escape sequences are not // processed. If there is an octal or hex excape sequence, we // don't want to turn it into a regex metacharacter here. We // leave it unescaped so the regex parser correctly interprests // it as a character literal. // // History: 8/1/2001 - ericne - Created // // -------------------------------------------------------------------------- template< typename CharT, typename TraitsT, typename AllocT > inline void process_escapes( std::basic_string & str, bool fPattern ) //throw() { typedef typename std::basic_string::size_type size_type; size_type i = 0; size_type const npos = std::basic_string::npos; if( str.empty() ) return; while( npos != ( i = str.find( REGEX_CHAR(CharT,'\\'), i ) ) ) { if( str.size() - 1 == i ) return; switch( str[i+1] ) { case REGEX_CHAR(CharT,'a'): str.replace( i, 2, 1, REGEX_CHAR(CharT,'\a') ); break; case REGEX_CHAR(CharT,'b'): if( ! fPattern ) str.replace( i, 2, 1, REGEX_CHAR(CharT,'\b') ); else ++i; break; case REGEX_CHAR(CharT,'e'): str.replace( i, 2, 1, CharT( 27 ) ); break; case REGEX_CHAR(CharT,'f'): str.replace( i, 2, 1, REGEX_CHAR(CharT,'\f') ); break; case REGEX_CHAR(CharT,'n'): str.replace( i, 2, 1, REGEX_CHAR(CharT,'\n') ); break; case REGEX_CHAR(CharT,'r'): str.replace( i, 2, 1, REGEX_CHAR(CharT,'\r') ); break; case REGEX_CHAR(CharT,'t'): str.replace( i, 2, 1, REGEX_CHAR(CharT,'\t') ); break; case REGEX_CHAR(CharT,'v'): str.replace( i, 2, 1, REGEX_CHAR(CharT,'\v') ); break; case REGEX_CHAR(CharT,'\\'): if( fPattern ) { if( i+3 < str.size() && REGEX_CHAR(CharT,'\\') == str[i+2] && REGEX_CHAR(CharT,'\\') == str[i+3] ) str.erase( i, 2 ); ++i; } else str.erase( i, 1 ); break; case REGEX_CHAR(CharT,'0'): case REGEX_CHAR(CharT,'1'): case REGEX_CHAR(CharT,'2'): case REGEX_CHAR(CharT,'3'): case REGEX_CHAR(CharT,'4'): case REGEX_CHAR(CharT,'5'): case REGEX_CHAR(CharT,'6'): case REGEX_CHAR(CharT,'7'): if( ! fPattern ) { size_t j=i+2; CharT ch = CharT( str[i+1] - REGEX_CHAR(CharT,'0') ); for( ; j-i < 4 && j < str.size() && REGEX_CHAR(CharT,'0') <= str[j] && REGEX_CHAR(CharT,'7') >= str[j]; ++j ) ch = CharT( ch * 8 + ( str[j] - REGEX_CHAR(CharT,'0') ) ); str.replace( i, j-i, 1, ch ); } break; case REGEX_CHAR(CharT,'x'): if( ! fPattern ) { CharT ch = 0; size_t j=i+2; for( ; j-i < 4 && j < str.size() && detail::regex_isxdigit( str[j] ); ++j ) ch = CharT( ch * 16 + detail::regex_xdigit2int( str[j] ) ); str.replace( i, j-i, 1, ch ); } break; case REGEX_CHAR(CharT,'c'): if( ! fPattern && i+2 < str.size() ) { CharT ch = str[i+2]; if( REGEX_CHAR(CharT,'a') <= ch && REGEX_CHAR(CharT,'z') >= ch ) ch = detail::regex_toupper( ch ); str.replace( i, 3, 1, CharT( ch ^ 0x40 ) ); } break; default: if( fPattern ) ++i; else str.erase( i, 1 ); break; } ++i; if( str.size() <= i ) return; } } #endif