[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] GNU M4 source repository branch, argv_ref, updated. branch-cvs-rea
From: |
Eric Blake |
Subject: |
[SCM] GNU M4 source repository branch, argv_ref, updated. branch-cvs-readonly-53-g4504da8 |
Date: |
Tue, 15 Jul 2008 15:37:15 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".
http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=4504da8cb8c5006044634a7b6a4df7c90487ec16
The branch, argv_ref has been updated
via 4504da8cb8c5006044634a7b6a4df7c90487ec16 (commit)
from ffac5851f2b3a287e0308502e79532432553bcf0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 4504da8cb8c5006044634a7b6a4df7c90487ec16
Author: Eric Blake <address@hidden>
Date: Tue Jul 15 09:33:43 2008 -0600
Stage33: make appending to existing macros linear
-----------------------------------------------------------------------
Summary of changes:
doc/m4.texinfo | 12 ++++++
src/builtin.c | 120 +++++++++++++++++++++++++++++++++++++++++---------------
src/m4.h | 9 ++++
src/macro.c | 89 +++++++++++++++++++++++++++++------------
4 files changed, 172 insertions(+), 58 deletions(-)
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 755725d..7b34589 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -8116,6 +8116,18 @@ include(`loop.m4')dnl
@result{}10000
@end example
address@hidden appending to macro definition
+
address@hidden examples
address@hidden options: -Dlimit=50000
address@hidden
+$ @kbd {m4 -I examples -Dlimit=50000}
+define(`next', `.')define(`debug', `divert`'len(var)')
address@hidden
+include(`append.m4')dnl
address@hidden
address@hidden example
+
@end ignore
@node Improved m4wrap
diff --git a/src/builtin.c b/src/builtin.c
index 4e29d00..6c7526a 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -424,6 +424,47 @@ set_macro_sequence (const char *regexp)
macro_sequence_inuse = true;
}
+/*----------------------------------------------------------------.
+| Check the contents DEFN of length LEN, of the new definition of |
+| macro NAME of length NAME_LEN, for any match of |
+| --warn-macro-sequence. |
+`----------------------------------------------------------------*/
+static void
+check_macro_sequence (const char *name, size_t name_len, const char *defn,
+ size_t len)
+{
+ if (macro_sequence_inuse && len)
+ {
+ regoff_t offset = 0;
+ struct re_registers *regs = ¯o_sequence_regs;
+
+ while (offset < len
+ && (offset = re_search (¯o_sequence_buf, defn, len, offset,
+ len - offset, regs)) >= 0)
+ {
+ /* Skip empty matches. */
+ if (regs->start[0] == regs->end[0])
+ offset++;
+ else
+ {
+ offset = regs->end[0];
+ /* Safe to use slot 1 since we don't pass a macro name
+ to m4_warn. */
+ m4_warn (0, NULL, _("definition of %s contains sequence %s"),
+ quotearg_style_mem (locale_quoting_style, name,
+ name_len),
+ quotearg_n_style_mem (1, locale_quoting_style,
+ defn + regs->start[0],
+ regs->end[0] - regs->start[0]));
+ }
+ }
+ if (offset == -2)
+ m4_warn (0, NULL,
+ _("problem checking --warn-macro-sequence for macro %s"),
+ quotearg_style_mem (locale_quoting_style, name, name_len));
+ }
+}
+
/*------------------------------------------------------.
| Free dynamic memory utilized by regular expressions. |
`------------------------------------------------------*/
@@ -469,40 +510,10 @@ define_user_macro (const char *name, size_t name_len,
const char *text,
SYMBOL_TYPE (s) = TOKEN_TEXT;
SYMBOL_TEXT (s) = defn;
SYMBOL_TEXT_LEN (s) = len;
+ SYMBOL_TEXT_ALLOCATED (s) = len;
SYMBOL_TEXT_QUOTE_AGE (s) = quote_age;
SYMBOL_MACRO_ARGS (s) = true;
-
- /* Implement --warn-macro-sequence. */
- if (macro_sequence_inuse && text)
- {
- regoff_t offset = 0;
- struct re_registers *regs = ¯o_sequence_regs;
-
- while (offset < len
- && (offset = re_search (¯o_sequence_buf, defn, len, offset,
- len - offset, regs)) >= 0)
- {
- /* Skip empty matches. */
- if (regs->start[0] == regs->end[0])
- offset++;
- else
- {
- offset = regs->end[0];
- /* Safe to use slot 1 since we don't pass a macro name
- to m4_warn. */
- m4_warn (0, NULL, _("definition of %s contains sequence %s"),
- quotearg_style_mem (locale_quoting_style, name,
- name_len),
- quotearg_n_style_mem (1, locale_quoting_style,
- defn + regs->start[0],
- regs->end[0] - regs->start[0]));
- }
- }
- if (offset == -2)
- m4_warn (0, NULL,
- _("problem checking --warn-macro-sequence for macro %s"),
- quotearg_style_mem (locale_quoting_style, name, name_len));
- }
+ check_macro_sequence (name, name_len, defn, len);
}
/*-----------------------------------------------.
@@ -727,6 +738,51 @@ define_macro (int argc, macro_arguments *argv,
symbol_lookup mode)
static void
m4_define (struct obstack *obs, int argc, macro_arguments *argv)
{
+ /* Special case if we detect that the user is appending to an
+ existing text macro: try to copy only the suffix into the
+ over-allocated slop, to avoid quadratic behavior of always
+ copying the prefix on each append.
+
+ Pending expansions of the existing macro must be exactly 1 (the
+ macro's existing definition is in use as the prefix of the
+ argument); if pending expansions is zero, we are not appending to
+ the current macro, and if it is larger than 1, then either the
+ macro's existing definition is in use elsewhere and must not be
+ invalidated, or we are at least doubling the size of the macro's
+ defintion which means the suffix wouldn't fit in the
+ pre-allocated slop anyway. */
+ if (argc > 2 && arg_type (argv, 1) == TOKEN_TEXT
+ && arg_type (argv, 2) == TOKEN_TEXT)
+ {
+ const char *name = ARG (1);
+ size_t name_len = ARG_LEN (1);
+ symbol *s = lookup_symbol (name, name_len, SYMBOL_LOOKUP);
+ const char *tail;
+ if (s && SYMBOL_TYPE (s) == TOKEN_TEXT
+ && SYMBOL_PENDING_EXPANSIONS (s) == 1
+ && (tail = arg_has_prefix (argv, 2, SYMBOL_TEXT (s),
+ SYMBOL_TEXT_LEN (s))))
+ {
+ size_t len = ARG_LEN (2);
+ if (SYMBOL_TEXT_ALLOCATED (s) < len)
+ {
+ size_t new_len = len * 2;
+ char *old_defn = SYMBOL_TEXT (s);
+ if (new_len < len)
+ new_len = len;
+ SYMBOL_TEXT (s) = xcharalloc (new_len);
+ memcpy (SYMBOL_TEXT (s), old_defn, SYMBOL_TEXT_LEN (s));
+ free (old_defn);
+ SYMBOL_TEXT_ALLOCATED (s) = new_len;
+ }
+ memcpy (SYMBOL_TEXT (s) + SYMBOL_TEXT_LEN (s), tail,
+ len - SYMBOL_TEXT_LEN (s));
+ SYMBOL_TEXT_LEN (s) = len;
+ SYMBOL_TEXT_QUOTE_AGE (s) = arg_quote_age (argv);
+ check_macro_sequence (name, name_len, SYMBOL_TEXT (s), len);
+ return;
+ }
+ }
define_macro (argc, argv, SYMBOL_INSERT);
}
diff --git a/src/m4.h b/src/m4.h
index 3305fc2..69234ca 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -431,6 +431,12 @@ struct symbol
char *name;
size_t len;
token_data data; /* Type should be only TOKEN_TEXT or TOKEN_FUNC. */
+ /* In order to make appending an amortized O(n) rather than O(n^2),
+ we must geometrically over-allocate and modify only the suffix
+ when possible, rather than reallocating on every insertion. This
+ is only valid for TOKEN_TEXT, and is at least as large as
+ SYMBOL_TEXT_LEN. */
+ size_t allocated;
};
#define SYMBOL_NEXT(S) ((S)->next)
@@ -447,6 +453,7 @@ struct symbol
/* Only safe when SYMBOL_TYPE(S) == TOKEN_TEXT: */
#define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
#define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data))
+#define SYMBOL_TEXT_ALLOCATED(S) ((S)->allocated)
#define SYMBOL_TEXT_QUOTE_AGE(S) (TOKEN_DATA_QUOTE_AGE (&(S)->data))
/* Only safe when SYMBOL_TYPE(S) == TOKEN_FUNC: */
@@ -479,6 +486,8 @@ unsigned int arg_quote_age (macro_arguments *);
token_data_type arg_type (macro_arguments *, unsigned int);
const char *arg_text (macro_arguments *, unsigned int, bool);
bool arg_equal (macro_arguments *, unsigned int, unsigned int);
+const char *arg_has_prefix (macro_arguments *, unsigned int, const char *,
+ size_t);
bool arg_empty (macro_arguments *, unsigned int);
size_t arg_len (macro_arguments *, unsigned int, bool);
builtin_func *arg_func (macro_arguments *, unsigned int);
diff --git a/src/macro.c b/src/macro.c
index 4d16f76..00d4e17 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -912,6 +912,37 @@ arg_type (macro_arguments *argv, unsigned int arg)
return type;
}
+/* Given a CHAIN, convert its links into text on OBS. If FLATTEN,
+ builtins are ignored. */
+static void
+collect_chain (struct obstack *obs, token_chain *chain, bool flatten)
+{
+ while (chain)
+ {
+ switch (chain->type)
+ {
+ case CHAIN_STR:
+ obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
+ break;
+ case CHAIN_FUNC:
+ if (flatten)
+ break;
+ assert (!"flatten_chain");
+ abort ();
+ case CHAIN_ARGV:
+ assert (!chain->u.u_a.has_func || flatten);
+ arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
+ quote_cache (NULL, chain->quote_age, chain->u.u_a.quotes),
+ flatten || chain->u.u_a.flatten, NULL, NULL, NULL, false);
+ break;
+ default:
+ assert (!"flatten_chain");
+ abort ();
+ }
+ chain = chain->next;
+ }
+}
+
/* Given ARGV, return the text at argument ARG. Abort if the argument
is not text. Arg 0 is always text, and indices beyond argc return
the empty string. If FLATTEN, builtins are ignored. The result is
@@ -939,32 +970,7 @@ arg_text (macro_arguments *argv, unsigned int arg, bool
flatten)
case TOKEN_COMP:
chain = token->u.u_c.chain;
obs = arg_scratch ();
- while (chain)
- {
- switch (chain->type)
- {
- case CHAIN_STR:
- obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
- break;
- case CHAIN_FUNC:
- if (flatten)
- break;
- assert (!"arg_text");
- abort ();
- case CHAIN_ARGV:
- assert (!chain->u.u_a.has_func || flatten || argv->flatten);
- arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
- quote_cache (NULL, chain->quote_age,
- chain->u.u_a.quotes),
- flatten || argv->flatten || chain->u.u_a.flatten,
- NULL, NULL, NULL, false);
- break;
- default:
- assert (!"arg_text");
- abort ();
- }
- chain = chain->next;
- }
+ collect_chain (obs, chain, flatten || argv->flatten);
obstack_1grow (obs, '\0');
return (char *) obstack_finish (obs);
case TOKEN_FUNC:
@@ -1124,6 +1130,37 @@ arg_equal (macro_arguments *argv, unsigned int indexa,
unsigned int indexb)
return ca == cb;
}
+/* Given index ARG within ARGV, if the argument is determined to be
+ appending text onto the existing definition DEFN of length LEN,
+ then return only the text occurring after LEN. Otherwise, return
+ NULL. This is useful for optimizing the builtin define, making
+ appending O(n) rather than O(n^2). */
+const char *
+arg_has_prefix (macro_arguments *argv, unsigned int arg, const char *defn,
+ size_t len)
+{
+ token_data *token;
+ token_chain *chain;
+ struct obstack *obs; /* Scratch space; cleaned at end of macro_expand. */
+
+ if (arg == 0 || arg >= argv->argc)
+ return NULL;
+ token = arg_token (argv, arg, NULL, false);
+ if (TOKEN_DATA_TYPE (token) != TOKEN_COMP
+ || (!argv->flatten && token->u.u_c.has_func))
+ return NULL;
+ chain = token->u.u_c.chain;
+ assert (chain);
+ if (chain->type == CHAIN_STR && chain->u.u_s.str == defn
+ && chain->u.u_s.len == len)
+ {
+ obs = arg_scratch ();
+ collect_chain (obs, chain->next, argv->flatten);
+ return (char *) obstack_finish (obs);
+ }
+ return NULL;
+}
+
/* Given ARGV, return true if argument ARG is the empty string. This
gives the same result as comparing arg_len against 0, but is often
faster. */
hooks/post-receive
--
GNU M4 source repository
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] GNU M4 source repository branch, argv_ref, updated. branch-cvs-readonly-53-g4504da8,
Eric Blake <=