# # # delete "tests/syntax_errors_in_.mtn-ignore/ignore.lua" # # patch "Makefile.am" # from [91f52e077decbb6ce022e0d1c0c634bc4d21fd5e] # to [302a257f3be89ef313e76dda8a3e62e0a1afe5fe] # # patch "configure.ac" # from [f257cc5dff1176366eeeb03c38dcb1e189989045] # to [bf67142c868bb70ba1429b49240be849ad315add] # # patch "lua.cc" # from [202bceb895e5daf867e24cde826f86fcb4d83aaf] # to [b020ff7dab1928b9a8981aa85be9323cdea824ac] # # patch "m4/boost.m4" # from [6dba18ce4bf7f711345de1d63bebc0e90c32fa97] # to [f8acc6b6b0859c9ffc31e5e7a37502bc8345176c] # # patch "m4/pcre.m4" # from [806d70df6bad48195a10ab334738542d17eb456c] # to [5f3203a3ad35cad17854a1b3f8c3936a441819bf] # # patch "pcrewrap.cc" # from [dd081644cde0233e19d86e37323b14ff6daf4594] # to [39b0d1504485d4e27219c59702d88f4cb34e4adc] # # patch "pcrewrap.hh" # from [5b3070b3642cc669273a7347bf6feb938f465215] # to [007bcae3d6a968650f662857cf008102d5249944] # # patch "std_hooks.lua" # from [f25782384cad8e2154bd170f51a69bfbdc4c7aa2] # to [361bf7b11b697e89c43c7b011a0b650980164346] # # patch "tests/syntax_errors_in_.mtn-ignore/stderr-ref" # from [6f474b55e26298e81d8a9365fa5bfd99e580e417] # to [81aae288f36ab9e7e65cc454878447c7ce9ad945] # # patch "tests/syntax_errors_in_.mtn-ignore/stdout-ref" # from [73bbb241af4f750ff72b0177441ded3d6cb26be4] # to [98b118d4a2e3bd4f52dcbe7c790a70f1b52aa901] # ============================================================ --- Makefile.am 91f52e077decbb6ce022e0d1c0c634bc4d21fd5e +++ Makefile.am 302a257f3be89ef313e76dda8a3e62e0a1afe5fe @@ -329,7 +329,7 @@ UNIT_TEST_OBJ_SUPPORT = \ mtn-selectors.$(OBJEXT) mtn-specialized_lexical_cast.$(OBJEXT) \ mtn-ssh_agent.$(OBJEXT) mtn-std_hooks.$(OBJEXT) \ mtn-ui.$(OBJEXT) mtn-work.$(OBJEXT) \ - mtn-work_migration.$(OBJEXT) + mtn-work_migration.$(OBJEXT) mtn-pcrewrap.$(OBJEXT) # primaries @@ -426,15 +426,15 @@ if INCLUDED_PCRE if INCLUDED_PCRE lib3rdparty_a_CPPFLAGS += -I$(top_srcdir)/pcre mtn_CPPFLAGS += -I$(top_srcdir)/pcre - unit_tests_CPPFLAGS += -I$(top_srcdir)/pcre + unit_tester_CPPFLAGS += -I$(top_srcdir)/pcre tester_CPPFLAGS += -I$(top_srcdir)/pcre else mtn_CPPFLAGS += $(PCRE_CFLAGS) - unit_tests_CPPFLAGS += $(PCRE_CFLAGS) + unit_tester_CPPFLAGS += $(PCRE_CFLAGS) tester_CPPFLAGS += $(PCRE_CFLAGS) mtn_LDADD += $(PCRE_LIBS) - unit_tests_LDADD += $(PCRE_LIBS) + unit_tester_LDADD += $(PCRE_LIBS) tester_LDADD += $(PCRE_LIBS) endif @@ -781,7 +781,7 @@ package_full_revision.txt: package_full_ # then if that fails fall back on the distributed versions. This # means that 'distclean' should _not_ remove those files, since they # are distributed, yet building the package will generate those files, -# those automake thinks that 'distclean' _should_ remove those files, +# thus automake thinks that 'distclean' _should_ remove those files, # and 'distcheck' gets cranky if we don't. So basically what this # line does is tell 'distcheck' to shut up and ignore those two files. distcleancheck_listfiles = find . -type f -a ! -name package_revision.txt -a ! -name package_full_revision_dist.txt @@ -789,7 +789,7 @@ distcheck-hook: # the distcheck-hook checks for errors in the use of base.hh distcheck-hook: cd $(srcdir) && $(SHELL) audit-includes \ - $(sort $(mtn_SOURCES) $(unit_tests_SOURCES) $(tester_SOURCES) \ + $(sort $(mtn_SOURCES) $(unit_tester_SOURCES) $(tester_SOURCES) \ $(UNIX_PLATFORM_SOURCES) $(WIN32_PLATFORM_SOURCES) \ win32/main.cc unix/main.cc) ============================================================ --- configure.ac f257cc5dff1176366eeeb03c38dcb1e189989045 +++ configure.ac bf67142c868bb70ba1429b49240be849ad315add @@ -59,8 +59,6 @@ AC_PROG_XGETTEXT_FLAG_OPTION AM_GNU_GETTEXT_VERSION(0.11.5) AM_ICONV AC_PROG_XGETTEXT_FLAG_OPTION -# similarly, this calls ACX_PTHREAD, which expects C. -BOOST_THREAD_STUBS # Stick some logic into config.status, after the logic that # AM_GNU_GETTEXT sticks into config.status, to annul the UPDATEPOFILES ============================================================ --- lua.cc 202bceb895e5daf867e24cde826f86fcb4d83aaf +++ lua.cc b020ff7dab1928b9a8981aa85be9323cdea824ac @@ -500,12 +500,9 @@ LUAEXT(search, regex) bool result = false; try { result = pcre::regex(re).match(str); - } catch (pcre::compile_error & e) { + } catch (informative_failure & e) { lua_pushstring(L, e.what()); return lua_error(L); - } catch (pcre::match_error & e) { - lua_pushstring(L, e.what()); - return lua_error(L); } lua_pushboolean(L, result); return 1; ============================================================ --- m4/boost.m4 6dba18ce4bf7f711345de1d63bebc0e90c32fa97 +++ m4/boost.m4 f8acc6b6b0859c9ffc31e5e7a37502bc8345176c @@ -1,26 +1,10 @@ dnl Grab-bag of checks related to boost. dnl Grab-bag of checks related to boost. -# We don't use threads, but some Boost libraries make locking calls -# anyway. So we need to ensure that these symbols are available. -# But it's okay if they're just stubs. This is technically incorrect -# if Boost is for some reason compiled without threads on a system -# that does provide threads; we'll end up compiling monotone with -# thread support for no reason. -AC_DEFUN([BOOST_THREAD_STUBS], -[AC_CHECK_FUNC([pthread_mutex_lock], [], -# But if there isn't even a stub, find the real threading library... - [ACX_PTHREAD( -# ...and if it exists, use it: - [LIBS="$PTHREAD_LIBS $LIBS" - CXXFLAGS="$PTHREAD_CFLAGS $CXXFLAGS"], -# ...but if it doesn't, oh well, maybe things will work out anyway: - [])]) -]) - # Check for suitably new version of boost. AC_DEFUN([BOOST_VERSION_CHECK], [AC_LANG_ASSERT([C++]) - AC_CACHE_CHECK([boost version 1.33.0 or newer], ac_cv_boost_version_least_1_33_0, + AC_CACHE_CHECK([boost version 1.33.0 or newer], + ac_cv_boost_version_least_1_33_0, [ AC_COMPILE_IFELSE( [#include @@ -34,7 +18,7 @@ AC_DEFUN([BOOST_VERSION_CHECK], ac_cv_boost_version_least_1_33_0=no) ]) if test x$ac_cv_boost_version_least_1_33_0 = xno; then - AC_MSG_ERROR([boost 1.33.0 or newer required]) + AC_MSG_ERROR([boost 1.33.0 or newer required]) fi ]) @@ -43,124 +27,3 @@ AC_DEFUN([BOOST_VERSION_SPECIFIC_BUGS], AC_DEFUN([BOOST_VERSION_SPECIFIC_BUGS], [AC_LANG_ASSERT([C++]) ]) - -# Boost libraries have a string suffix that identifies the compiler -# they were built with, among other details. For example, it can be -# '-gcc', '-gcc-mt', '-gcc-mt-1_31', and many other combinations -# depending on the build system. Some systems provide symlinks that -# hide these suffixes, avoiding this mess. However, other systems -# don't; we have to provide a way to let the user manually specify a -# suffix. Guessing can be very difficult, given the variety of -# possibilities. Note that you cannot expand a variable inside the -# second argument to AC_ARG_VAR, so we're stuck listing it twice. -AC_DEFUN([BOOST_SUFFIX_ARG], -[AC_ARG_VAR([BOOST_SUFFIX], - [Space-separated list of suffixes to try appending to the names - of Boost libraries. "none" means no suffix. The default is: - "none -gcc -mipspro -mt -sunpro -sw -mgw -gcc-mt -gcc-mt-s"]) -if test x"$BOOST_SUFFIX" = x; then - BOOST_SUFFIX="none -gcc -mipspro -mt -sunpro -sw -mgw -gcc-mt -gcc-mt-s" -fi -]) - -# Option to link statically against Boost. -# FIXME: Decouple prefix for boost libraries from static/shared linkage? -AC_DEFUN([BOOST_STATIC_LINK_OPTION], -[AC_ARG_ENABLE(static-boost, - AS_HELP_STRING([--enable-static-boost@<:@=prefix@:>@], - [use static libs from boost]), - [], [enable_static_boost=no]) - case "$enable_static_boost" in - # unconfuse emacs: (((( - ""|no) static_boost_prefixes="" ;; - yes) static_boost_prefixes="/ /usr /opt /usr/local" ;; - /*) static_boost_prefixes="$enable_static_boost" ;; - *) AC_MSG_ERROR([prefix argument to --enable-static-boost must be an absolute path]) ;; - esac - if test x"$static_boost_prefixes" != x; then - AC_MSG_CHECKING([location of static boost libraries]) - for i in ${static_boost_prefixes} - do - for s in $BOOST_SUFFIX - do - if test "x$s" = xnone; then - s='' - fi - # FIXME: We should not be hardwiring /lib64/ nor should we be - # unconditionally overriding it with /lib/ (if there are both - # 32- and 64-bit static Boost libraries available, we must - # pick the one that matches the code the compiler generates). - # Fortunately, this is C++, so we'll get link errors if we get - # it wrong. - if test -f $i/lib64/libboost_filesystem${s}.a - then - BOOST_LIBDIR=$i/lib64 - fi - if test -f $i/lib/libboost_filesystem${s}.a - then - BOOST_LIBDIR=$i/lib - fi - done - done - if test "x${BOOST_LIBDIR}" = x - then - AC_MSG_RESULT([not found]) - AC_MSG_ERROR([cannot find boost libraries for static link]) - else - AC_MSG_RESULT([${BOOST_LIBDIR}]) - fi - fi -]) - -# BOOST_LIB_IFELSE(library, testprog, if_found, if_not_found) -# This is tricksome, as we only want to process a list of suffixes -# until we've selected one; once we've done that, it must be used for -# all libraries. (But we need the shell loop in all uses, as previous -# scans might be unsuccessful.) - -AC_DEFUN([BOOST_LIB_IFELSE], -[AC_LANG_ASSERT(C++) - AC_REQUIRE([BOOST_SUFFIX_ARG]) - AC_REQUIRE([BOOST_STATIC_LINK_OPTION]) - found=no - OLD_LIBS="$LIBS" - for s in $BOOST_SUFFIX - do - if test "x$s" = xnone; then - s='' - fi - if test "x${BOOST_LIBDIR}" != x; then - lib="${BOOST_LIBDIR}/libboost_$1${s}.a" - else - lib="-lboost_$1$s" - fi - - LIBS="$lib $OLD_LIBS" - cv=AS_TR_SH([ac_cv_boost_lib_$1${s}_${BOOST_LIBDIR}]) - AC_CACHE_CHECK([for the boost_$1$s library], - $cv, - [AC_LINK_IFELSE([$2], - [eval $cv=yes], - [eval $cv=no])]) - if eval "test \"\${$cv}\" = yes"; then - found=yes - break - fi - done - LIBS="$OLD_LIBS" - AS_IF([test $found = yes], - [BOOST_SUFFIX=${s:-none} - $3], - [$4])]) - -# Checks for specific boost libraries. -# These are named MTN_BOOST_* because their actions are monotone-specific. - -AC_DEFUN([MTN_NEED_BOOST_LIB], -[BOOST_LIB_IFELSE([$1], [$2], - [BOOSTLIBS="$lib $BOOSTLIBS"], - [AC_MSG_FAILURE([the boost_$1 library is required])]) - AC_SUBST(BOOSTLIBS) -]) - - ]])])]) ============================================================ --- m4/pcre.m4 806d70df6bad48195a10ab334738542d17eb456c +++ m4/pcre.m4 5f3203a3ad35cad17854a1b3f8c3936a441819bf @@ -1,17 +1,31 @@ AC_DEFUN([MTN_LIB_PCRE], # Detect libpcre or fall back to our bundled version. AC_DEFUN([MTN_LIB_PCRE], -[AC_ARG_WITH([included-pcre], - AC_HELP_STRING([--with-included-pcre], - [force use of the included copy of libpcre. (To use a specific - installed version, use the environment variables PCRE_CFLAGS and/or - PCRE_LIBS.)]), +[AC_ARG_WITH([system-pcre], + AC_HELP_STRING([--with-system-pcre], + [use a system-provided copy of libpcre instead of the default bundled + copy. (To use a specific installed version, use the environment + variables PCRE_CFLAGS and/or PCRE_LIBS.)]), [case "$withval" in - (""|yes) with_included_pcre=yes ;; - (no) with_included_pcre=no ;; - (*) AC_MSG_ERROR([--with(out)-included-pcre takes no argument]) ;; + (""|yes) with_system_pcre=yes ;; + (no) with_system_pcre=no ;; + (*) AC_MSG_ERROR([--with(out)-system-pcre takes no argument]) ;; esac], - [with_included_pcre=no]) - if test "$with_included_pcre" = no; then + [with_system_pcre=no]) + if test "$with_system_pcre" = yes; then + MTN_FIND_PCRE + fi + if test $with_system_pcre = no; then + AC_MSG_NOTICE([using the bundled copy of PCRE]) + fi + AM_CONDITIONAL([INCLUDED_PCRE], [test $with_system_pcre = no]) + AC_SUBST([PCRE_CFLAGS]) + AC_SUBST([PCRE_LIBS]) +]) + +# This is a separate macro primarily to trick autoconf into not looking +# for pkg-config if we are using the bundled pcre. +AC_DEFUN([MTN_FIND_PCRE], +[ PKG_PROG_PKG_CONFIG # We manually test the variables here because we want them to work # even if pkg-config isn't installed. The use of + instead of :+ is # deliberate; the user should be able to tell us that the empty string @@ -52,7 +66,7 @@ AC_DEFUN([MTN_LIB_PCRE], if test x"$PCRE_LIBS" != x"-lpcre"; then AC_MSG_NOTICE([using PCRE link flags: "$PCRE_LIBS"]) fi - AC_CACHE_CHECK([whether the system libpcre works], ac_cv_lib_pcre_works, + AC_CACHE_CHECK([whether the system libpcre is usable], ac_cv_lib_pcre_works, [save_LIBS="$LIBS" save_CFLAGS="$CFLAGS" LIBS="$LIBS $PCRE_LIBS" @@ -66,16 +80,5 @@ AC_DEFUN([MTN_LIB_PCRE], LIBS="$save_LIBS" CFLAGS="$save_CFLAGS"]) if test $ac_cv_lib_pcre_works = no; then + with_system_pcre=no + fi]) - with_included_pcre=yes - fi - fi - if test $with_included_pcre = yes; then - AC_MSG_NOTICE([using the PCRE library included with the distribution]) - fi - AM_CONDITIONAL([INCLUDED_PCRE], [test $with_included_pcre = yes]) - AC_SUBST([PCRE_CFLAGS]) - AC_SUBST([PCRE_LIBS]) -]) - - - ============================================================ --- pcrewrap.cc dd081644cde0233e19d86e37323b14ff6daf4594 +++ pcrewrap.cc 39b0d1504485d4e27219c59702d88f4cb34e4adc @@ -1,17 +1,29 @@ +// Copyright (C) 2007 Zack Weinberg +// +// This program is made available under the GNU GPL version 2.0 or +// greater. See the accompanying file COPYING for details. +// +// This program is distributed WITHOUT ANY WARRANTY; without even the +// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. + +#include "base.hh" #include "pcrewrap.hh" +#include "sanity.hh" #include +// This dirty trick is necessary to prevent the 'pcre' typedef defined by +// pcre.h from colliding with namespace pcre. #define pcre pcre_t #include "pcre.h" #undef pcre using std::string; -using std::runtime_error; -static void pcre_compile_error(char const * err, int erroff, - char const * pattern) NORETURN; -static void pcre_study_error(char const * err) NORETURN; -static void pcre_match_error(int errcode) NORETURN; +static NORETURN(void pcre_compile_error(int errcode, char const * err, + int erroff, char const * pattern)); +static NORETURN(void pcre_study_error(char const * err, char const * pattern)); +static NORETURN(void pcre_match_error(int errcode)); inline unsigned int flags_to_internal(pcre::flags f) @@ -38,24 +50,6 @@ flags_to_internal(pcre::flags f) return i; } -inline std::pair -compile(const char * pattern, pcre::flags options) -{ - int erroff; - char const * err; - pcre_t const * basedat = pcre_compile(pattern, flags_to_internal(options), - &err, &erroff, 0); - if (!basedat) - pcre_compile_error(err, erroff, pattern); - - pcre_extra const * extradat = pcre_study(basedat, 0, &err); - if (err) - pcre_study_error(err); - - return std::make_pair(static_cast(basedat), - static_cast(extradat)); -} - inline unsigned int get_capturecount(void const * bd) { @@ -69,100 +63,64 @@ namespace pcre namespace pcre { + void regex::init(char const * pattern, flags options) + { + int errcode; + int erroff; + char const * err; + basedat = pcre_compile2(pattern, flags_to_internal(options), + &errcode, &err, &erroff, 0); + if (!basedat) + pcre_compile_error(errcode, err, erroff, pattern); + + pcre_extra *ed = pcre_study(basedat, 0, &err); + if (err) + pcre_study_error(err, pattern); + if (!ed) + { + // I resent that C++ requires this cast. + ed = (pcre_extra *)pcre_malloc(sizeof(pcre_extra)); + std::memset(ed, 0, sizeof(pcre_extra)); + } + + // We set a fairly low recursion depth to avoid stack overflow. + // Per pcrestack(3), one should assume 500 bytes per recursion; + // it should be safe to let pcre have a megabyte of stack, so + // that's a depth of 2000, give or take. (For reference, the + // default stack limit on Linux is 8MB.) + ed->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; + ed->match_limit_recursion = 2000; + extradat = ed; + } + regex::regex(char const * pattern, flags options) - : basic_regex(compile(pattern, options)) - {} + { + this->init(pattern, options); + } regex::regex(string const & pattern, flags options) - : basic_regex(compile(pattern.c_str(), options)) - {} + { + this->init(pattern.c_str(), options); + } regex::~regex() { if (basedat) - pcre_free(const_cast(basedat)); + pcre_free(const_cast(basedat)); if (extradat) - pcre_free(const_cast(extradat)); + pcre_free(const_cast(extradat)); } bool - basic_regex::match(string const & subject, matches & result, - string::const_iterator startptr, - flags options) const + regex::match(string const & subject, + string::const_iterator startptr, + flags options) const { - // pcre_exec wants its caller to provide three integer slots per - // capturing paren, plus three more for the whole-pattern match. - // On exit from pcre_exec, the first two-thirds of the vector will be - // pairs of integers representing [start, end) offsets within the - // string. pcre_exec uses the remaining third of the vector for a - // scratchpad. (Why can't it allocate its own damn scratchpad?) - unsigned int capturecount = get_capturecount(basedat); - std::vector ovec((capturecount + 1) * 3); - - // convert the start pointer to an offset within the string (the &* - // converts each iterator to a bare pointer, which can be subtracted -- - // you should be able to subtract random-access iterators directly, - // grumble) int startoffset = 0; if (startptr != string::const_iterator(0)) startoffset = &*startptr - &*subject.data(); - - int rc = pcre_exec(static_cast(basedat), - static_cast(extradat), - subject.data(), subject.size(), - startoffset, - flags_to_internal(options), - &ovec.front(), ovec.size()); // ??? ovec.data() - if (rc >= 0) - { - // If the return value is nonnegative, the pattern matched, - // and rc is one more than the number of pairs of integers in - // ovec that are meaningful. - result.clear(); - result.reserve(capturecount + 1); - for (int i = 0; i < rc * 2; i += 2) - { - if (ovec[i] == -1 && ovec[i+1] == -1) - result.push_back(capture(string::const_iterator(0), - string::const_iterator(0))); - else - { - I(ovec[i] != -1 && ovec[i+1] != -1); - result.push_back(capture(subject.begin() + ovec[i], - subject.begin() + ovec[i+1])); - } - } - for (unsigned int i = rc; i < capturecount + 1; i++) - result.push_back(capture(string::const_iterator(0), - string::const_iterator(0))); - I(result.size() == capturecount + 1); - return true; - } - else if (rc == PCRE_ERROR_NOMATCH) - { - result = matches(capturecount + 1, - capture(string::const_iterator(0), - string::const_iterator(0))); - I(result.size() == capturecount + 1); - return false; - } - else - pcre_match_error(rc); - } - - // This overload is for when you don't care about captures, only - // whether or not it matched. - bool - basic_regex::match(string const & subject, - string::const_iterator startptr, - flags options) const - { - int startoffset = 0; - if (startptr != string::const_iterator(0)) - startoffset = &*startptr - &*subject.data(); - int rc = pcre_exec(static_cast(basedat), - static_cast(extradat), + int rc = pcre_exec(basedat, extradat, subject.data(), subject.size(), startoffset, flags_to_internal(options), 0, 0); if (rc == 0) @@ -174,137 +132,92 @@ namespace pcre } } // namespace pcre -// These functions produce properly translated diagnostics from PCRE -// internal errors. +// When the library returns an error, these functions discriminate between +// bugs in monotone and user errors in regexp writing. static void -pcre_compile_error(char const *err, int erroff, char const * pattern) +pcre_compile_error(int errcode, char const * err, + int erroff, char const * pattern) { - using std::strcmp; - // Special case out-of-memory ... - if (!strcmp(err, "failed to get memory")) - throw std::bad_alloc(); + // One of the more entertaining things about the PCRE API is that + // while the numeric error codes are documented, they do not get + // symbolic names. - // ... and all errors that represent program bugs. - I(strcmp(err, "erroffset passed as NULL")); - I(strcmp(err, "unknown option bit(s) set")); - I(strcmp(err, "this version of PCRE is not compiled with PCRE_UTF8 support")); - I(strcmp(err, "internal error: code overflow")); - I(strcmp(err, "internal error: unexpected repeat")); - I(strcmp(err, "spare error")); - I(strcmp(err, "invalid UTF-8 string")); - I(strcmp(err, "no error")); // because we should never get here with that + switch (errcode) + { + case 21: // failed to get memory + throw std::bad_alloc(); - // PCRE fails to distinguish between errors at no position and errors at - // character offset 0 in the pattern, so in practice we give the - // position-ful variant for all errors, but I'm leaving the == -1 check - // here in case PCRE gets fixed. - if (erroff == -1) - throw pcre::compile_error(F("error in regex \"%s\": %s") - % pattern % gettext(err)); - else - throw pcre::compile_error(F("error near char %d of regex \"%s\": %s") - % (erroff + 1) % pattern % gettext(err)); + case 10: // [code allegedly not in use] + case 11: // internal error: unexpected repeat + case 16: // erroffset passed as NULL + case 17: // unknown option bit(s) set + case 19: // [code allegedly not in use] + case 23: // internal error: code overflow + case 33: // [code allegedly not in use] + case 50: // [code allegedly not in use] + case 52: // internal error: overran compiling workspace + case 53: // internal error: previously-checked referenced subpattern + // not found + throw oops((F("while compiling regex \"%s\": %s") % pattern % err) + .str().c_str()); + + default: + // PCRE fails to distinguish between errors at no position and errors at + // character offset 0 in the pattern, so in practice we give the + // position-ful variant for all errors, but I'm leaving the == -1 check + // here in case PCRE gets fixed. + throw informative_failure((erroff == -1 + ? (F("error in regex \"%s\": %s") + % pattern % err) + : (F("error near char %d of regex \"%s\": %s") + % (erroff + 1) % pattern % err) + ).str().c_str()); + } } static void -pcre_study_error(char const * err) +pcre_study_error(char const * err, char const * pattern) { + // This interface doesn't even *have* error codes. // If the error is not out-of-memory, it's a bug. - I(!std::strcmp(err, "failed to get memory")); - throw std::bad_alloc(); + if (!std::strcmp(err, "failed to get memory")) + throw std::bad_alloc(); + else + throw oops((F("while studying regex \"%s\": %s") % pattern % err) + .str().c_str()); } static void pcre_match_error(int errcode) { - // This one actually has error codes! Almost all of which indicate bugs - // in monotone. + // This interface provides error codes with symbolic constants for them! + // But it doesn't provide string versions of them. As most of them + // indicate bugs in monotone, it's not worth defining our own strings. + switch(errcode) { case PCRE_ERROR_NOMEMORY: throw std::bad_alloc(); case PCRE_ERROR_MATCHLIMIT: - throw pcre::match_error(F("backtrack limit exceeded")); + throw informative_failure + (_("backtrack limit exceeded in regular expression matching")); case PCRE_ERROR_RECURSIONLIMIT: - throw pcre::match_error(F("recursion limit exceeded")); + throw informative_failure + (_("recursion limit exceeded in regular expression matching")); + case PCRE_ERROR_BADUTF8: + case PCRE_ERROR_BADUTF8_OFFSET: + throw informative_failure + (_("invalid UTF-8 sequence found during regular expression matching")); + default: - global_sanity.invariant_failure((FL("pcre_match returned %d") % errcode) - .str().c_str(), __FILE__, __LINE__); + throw oops((F("pcre_match returned %d") % errcode) + .str().c_str()); } } -#ifdef XGETTEXT -// This is a copy of the error message table from pcre_compile.c, with -// N_() applied to all the strings that the user will actually see. -static char const * const error_texts[] = { - "no error", - N_("\\ at end of pattern"), - N_("\\c at end of pattern"), - N_("unrecognized character follows \\"), - N_("numbers out of order in {} quantifier"), - /* 5 */ - N_("number too big in {} quantifier"), - N_("missing terminating ] for character class"), - N_("invalid escape sequence in character class"), - N_("range out of order in character class"), - N_("nothing to repeat"), - /* 10 */ - N_("operand of unlimited repeat could match the empty string"), - "internal error: unexpected repeat", - N_("unrecognized character after (?"), - N_("POSIX named classes are supported only within a class"), - N_("missing )"), - /* 15 */ - N_("reference to non-existent subpattern"), - "erroffset passed as NULL", - "unknown option bit(s) set", - N_("missing ) after comment"), - N_("parentheses nested too deeply"), - /* 20 */ - N_("regular expression too large"), - "failed to get memory", // std::bad_alloc - N_("unmatched parentheses"), - "internal error: code overflow", - N_("unrecognized character after (?<"), - /* 25 */ - N_("lookbehind assertion is not fixed length"), - N_("malformed number or name after (?("), - N_("conditional group contains more than two branches"), - N_("assertion expected after (?("), - N_("(?R or (?digits must be followed by )"), - /* 30 */ - N_("unknown POSIX class name"), - N_("POSIX collating elements are not supported"), - "this version of PCRE is not compiled with PCRE_UTF8 support", - "spare error", - N_("character value in \\x{...} sequence is too large"), - /* 35 */ - N_("invalid condition (?(0)"), - N_("\\C not allowed in lookbehind assertion"), - N_("PCRE does not support \\L, \\l, \\N, \\U, or \\u"), - N_("number after (?C is > 255"), - N_("closing ) for (?C expected"), - /* 40 */ - N_("recursive call could loop indefinitely"), - N_("unrecognized character after (?P"), - N_("syntax error after (?P"), - N_("two named subpatterns have the same name"), - "invalid UTF-8 string", - /* 45 */ - N_("support for \\P, \\p, and \\X has not been compiled"), - N_("malformed \\P or \\p sequence"), - N_("unknown property name after \\P or \\p"), - N_("subpattern name is too long (maximum 32 characters)"), - N_("too many named subpatterns (maximum 10,000)"), - /* 50 */ - N_("repeated subpattern is too long"), - N_("octal value is greater than \\377 (not in UTF-8 mode)"), -}; -#endif - // Local Variables: // mode: C++ // fill-column: 76 ============================================================ --- pcrewrap.hh 5b3070b3642cc669273a7347bf6feb938f465215 +++ pcrewrap.hh 007bcae3d6a968650f662857cf008102d5249944 @@ -1,15 +1,23 @@ +// Copyright (C) 2007 Zack Weinberg +// +// This program is made available under the GNU GPL version 2.0 or +// greater. See the accompanying file COPYING for details. +// +// This program is distributed WITHOUT ANY WARRANTY; without even the +// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. + #ifndef _PCREWRAP_HH #define _PCREWRAP_HH -#include -#include -#include -#include "sanity.hh" - // This is a sensible C++ wrapper interface around the bare C API exported // by pcre.h. Note that pcre.h is a very "noisy" header in terms of macro -// definitions and so we don't actually expose it here. +// definitions and so we don't actually expose it here. Unfortunately, this +// means we have to hope this pair of forward declarations will not change... +struct real_pcre; +struct pcre_extra; + namespace pcre { enum flags @@ -38,107 +46,38 @@ namespace pcre // followed with ? (opposite of default) }; - // A capture object is a pair of string iterators, such that - // [C.first, C.second) is the range of characters in the "subject" - // string captured by either a full match or some pair of capturing - // parentheses. If both C.first and C.second are null, then the - // associated part of the regex did not match. It is an invariant - // that either both or neither C.first and C.second are null. The - // object provides a couple of helper operations, matched() and str(), - // for common use cases. - struct capture : public std::pair + // A regex object is the compiled form of a PCRE regular expression. + struct regex { - capture(std::string::const_iterator a, - std::string::const_iterator b) - : std::pair - (a, b) - { I((a == std::string::const_iterator(0) - && b == std::string::const_iterator(0)) - || (a != std::string::const_iterator(0) - && b != std::string::const_iterator(0))); } - - bool matched() { return (this->first != std::string::const_iterator(0)); } - std::string str() { return std::string(this->first, this->second); } - }; - - // A matches object stores the result of a PCRE match operation. It - // is a vector of capture objects (see above) such that element N - // corresponds to capture group N of the regexp. Per usual, - // match[0] encompasses the string matched by the entire regular - // expression. - typedef std::vector matches; - - // A basic_regex object is the compiled form of a PCRE regular expression. - // You never construct this directly. - struct basic_regex - { private: - // disable the default and copy constructors - basic_regex(); - basic_regex(basic_regex const &); - basic_regex & operator=(basic_regex const &); + // disable the default and copy constructors - we never need to copy + // these, and this lets us use bare pointers below instead of + // boost::shared_ptr. + regex(); + regex(regex const &); + regex & operator=(regex const &); - protected: - void const * const basedat; - void const * const extradat; + // data + struct real_pcre const * basedat; + struct pcre_extra const * extradat; - // for use only by subclass constructors - basic_regex(std::pair p) - : basedat(p.first), extradat(p.second) {} + // used by constructors + void init(char const *, pcre::flags); public: - ~basic_regex() {} + regex(char const * pattern, pcre::flags options = DEFAULT); + regex(std::string const & pattern, pcre::flags options = DEFAULT); + ~regex(); - bool match(std::string const & subject, matches & result, - std::string::const_iterator startoffset - = std::string::const_iterator(), - pcre::flags options = DEFAULT) const; - bool match(std::string const & subject, std::string::const_iterator startoffset = std::string::const_iterator(), pcre::flags options = DEFAULT) const; - - - // helper function which starts successive matches at the position - // where the last match left off. - bool nextmatch(std::string const & subject, matches & result, - pcre::flags options = DEFAULT) const - { - std::string::const_iterator startoffset(0); - if (result.size() > 0 && result[0].matched()) - startoffset = result[0].second; - return match(subject, result, startoffset, options); - } }; +} // namespace pcre - // A regex is the class you are intended to use directly, in normal usage. - struct regex : public basic_regex - { - regex(char const * pattern, pcre::flags options = DEFAULT); - regex(std::string const & pattern, pcre::flags options = DEFAULT); - ~regex(); - }; +#endif // pcrewrap.hh - // exceptions thrown for errors from PCRE APIs - struct compile_error : public std::runtime_error - { - explicit compile_error(i18n_format const & e) - : runtime_error(e.str().c_str()) {} - virtual ~compile_error() throw() {} - }; - - struct match_error : public std::runtime_error - { - explicit match_error(i18n_format const & e) - : runtime_error(e.str().c_str()) {} - virtual ~match_error() throw() {} - }; - -} // namespace pcre -#endif - // Local Variables: // mode: C++ // fill-column: 76 ============================================================ --- std_hooks.lua f25782384cad8e2154bd170f51a69bfbdc4c7aa2 +++ std_hooks.lua 361bf7b11b697e89c43c7b011a0b650980164346 @@ -108,7 +108,6 @@ function ignore_file(name) ignored_files = {} local ignfile = io.open(".mtn-ignore", "r") if (ignfile ~= nil) then -<<<<<<< variant A local line = ignfile:read() while (line ~= nil) do if line ~= "" then @@ -117,16 +116,6 @@ function ignore_file(name) line = ignfile:read() end io.close(ignfile) ->>>>>>> variant B - for l in ignfile:lines() do table.insert(ignored_files, l) end -####### Ancestor - local line = ignfile:read() - while (line ~= nil) do - table.insert(ignored_files, line) - line = ignfile:read() - end - io.close(ignfile) -======= end end end ============================================================ --- tests/syntax_errors_in_.mtn-ignore/stderr-ref 6f474b55e26298e81d8a9365fa5bfd99e580e417 +++ tests/syntax_errors_in_.mtn-ignore/stderr-ref 81aae288f36ab9e7e65cc454878447c7ce9ad945 @@ -1,4 +1,4 @@ -mtn: warning: while matching file 'tester.log': +mtn: warning: while matching file 'ts-stdin': .mtn-ignore:1: warning: error near char 2 of regex "\": \ at end of pattern - skipping this regex for all remaining files. .mtn-ignore:2: warning: error near char 1 of regex "\c": \c at end of pattern ============================================================ --- tests/syntax_errors_in_.mtn-ignore/stdout-ref 73bbb241af4f750ff72b0177441ded3d6cb26be4 +++ tests/syntax_errors_in_.mtn-ignore/stdout-ref 98b118d4a2e3bd4f52dcbe7c790a70f1b52aa901 @@ -1,7 +1,6 @@ keys .mtn-ignore dontignoreme keys -keys/address@hidden min_hooks.lua test_hooks.lua tester.log