[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r22267 - gnunet/src/regex
From: |
gnunet |
Subject: |
[GNUnet-SVN] r22267 - gnunet/src/regex |
Date: |
Mon, 25 Jun 2012 12:05:27 +0200 |
Author: grothoff
Date: 2012-06-25 12:05:27 +0200 (Mon, 25 Jun 2012)
New Revision: 22267
Modified:
gnunet/src/regex/regex.c
gnunet/src/regex/test_regex_eval_api.c
Log:
-optimizing regex
Modified: gnunet/src/regex/regex.c
===================================================================
--- gnunet/src/regex/regex.c 2012-06-25 08:33:13 UTC (rev 22266)
+++ gnunet/src/regex/regex.c 2012-06-25 10:05:27 UTC (rev 22267)
@@ -738,8 +738,7 @@
}
// 3. Rename s1 to {s1,s2}
- new_name = GNUNET_strdup (s1->name);
- GNUNET_free_non_null (s1->name);
+ new_name = s1->name;
GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
GNUNET_free (new_name);
@@ -828,90 +827,72 @@
automaton_state_traverse (cls, a->start, &count, action);
}
+
/**
* Check if the given string 'str' needs parentheses around it when
* using it to generate a regex.
*
+ * Currently only tests for first and last characters being '()' respectively.
+ * FIXME: What about "(ab)|(cd)"?
+ *
* @param str string
*
* @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
*/
static int
-needs_parentheses (char *str)
+needs_parentheses (const char *str)
{
- int length;
+ size_t slen;
- if (NULL == str)
+ if ( (NULL == str) ||
+ ((slen = strlen(str)) < 2) ||
+ ( ('(' == str[0]) && (')' == str[slen-1]) ) )
return GNUNET_NO;
-
- length = strlen (str);
-
- if (length < 2)
- return GNUNET_NO;
-
- if (str[0] == '(' && str[strlen (str) - 1] == ')')
- return GNUNET_NO;
-
return GNUNET_YES;
}
+
/**
* Remove parentheses surrounding string 'str'.
* Example: "(a)" becomes "a".
- * You need to GNUNET_free the retunred string.
+ * You need to GNUNET_free the returned string.
*
- * @param str string
+ * Currently only tests for first and last characters being '()' respectively.
+ * FIXME: What about "(ab)|(cd)"?
*
+ * @param str string, free'd or re-used by this function, can be NULL
+ *
* @return string without surrounding parentheses, string 'str' if no preceding
* epsilon could be found, NULL if 'str' was NULL
*/
static char *
remove_parentheses (char *str)
{
- char *new_str;
- int length;
- int i;
- int j;
+ size_t slen;
- if (NULL == str)
- return NULL;
-
- length = strlen (str);
-
- if (str[0] == '(' && str[length - 1] == ')')
- {
- new_str = GNUNET_malloc (length - 1);
- for (j = 0, i = 1; i < length - 1; i++, j++)
- new_str[j] = str[i];
- new_str[j] = '\0';
- }
- else
- new_str = GNUNET_strdup (str);
-
- return new_str;
+ if ( (NULL == str) || ('(' != str[0]) || (str[(slen = strlen(str)) - 1] !=
')') )
+ return str;
+ memmove (str, &str[1], slen - 2);
+ str[slen - 2] = '\0';
+ return str;
}
+
/**
* Check if the string 'str' starts with an epsilon (empty string).
* Example: "(|a)" is starting with an epsilon.
*
- * @param str
+ * @param str string to test
*
- * @return
+ * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'
*/
static int
-has_epsilon (char *str)
+has_epsilon (const char *str)
{
- int len;
-
- if (NULL == str)
- return 0;
-
- len = strlen (str);
-
- return (0 == strncmp (str, "(|", 2) && str[len - 1] == ')');
+ return (NULL != str) && ('(' == str[0]) && ('|' == str[1]) && (')' ==
str[strlen(str) - 1]);
}
+
/**
* Remove an epsilon from the string str. Where epsilon is an empty string
* Example: str = "(|a|b|c)", result: "a|b|c"
@@ -923,52 +904,47 @@
* could be found, NULL if 'str' was NULL
*/
static char *
-remove_epsilon (char *str)
+remove_epsilon (const char *str)
{
- int len;
- char *new_str;
- int i;
- int j;
+ size_t len;
if (NULL == str)
return NULL;
-
- len = strlen (str);
-
- if (len > 2 && 0 == strncmp (str, "(|", 2) && str[len - 1] == ')')
+ if ( ('(' == str[0]) && ('|' == str[1]) )
{
- new_str = GNUNET_malloc (len - 1);
-
- j = 0;
- for (i = 2; i < len - 1; i++)
- {
- new_str[j] = str[i];
- j++;
- }
- new_str[j] = '\0';
+ len = strlen (str);
+ if (')' == str[len-1])
+ return GNUNET_strndup (&str[2], len - 3);
}
- else
- {
- new_str = GNUNET_strdup (str);
- }
-
- return new_str;
+ return GNUNET_strdup (str);
}
+
static int
-strkcmp (const char *str1, const char *str2, int k)
+strkcmp (const char *str1, const char *str2, size_t k)
{
- const char *new_str1;
-
if (NULL == str1 || NULL == str2)
return -1;
+ return strcmp (&str1[k], str2);
+}
- new_str1 = &str1[k];
- return strcmp (new_str1, str2);
+/**
+ * Compare two strings for equality. If either is NULL (or if both are
+ * NULL), they are not equal.
+ *
+ * @return 0 if the strings are the same, 1 or -1 if not
+ */
+static int
+nullstrcmp (const char *str1, const char *str2)
+{
+ if ( (NULL == str1) || (NULL == str2) )
+ return -1;
+ return strcmp (str1, str2);
}
-void
+
+static void
number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s)
{
struct GNUNET_REGEX_State **states;
@@ -1085,55 +1061,20 @@
// Assign R_temp_(ij|ik|kk|kj) to R_last[][] and remove epsilon as well
// as parentheses, so we can better compare the contents
- temp_a = remove_epsilon (R_last[i][j]);
- R_temp_ij = remove_parentheses (temp_a);
- GNUNET_free_non_null (temp_a);
- temp_a = remove_epsilon (R_last[i][k]);
- R_temp_ik = remove_parentheses (temp_a);
- GNUNET_free_non_null (temp_a);
- temp_a = remove_epsilon (R_last[k][k]);
- R_temp_kk = remove_parentheses (temp_a);
- GNUNET_free_non_null (temp_a);
- temp_a = remove_epsilon (R_last[k][j]);
- R_temp_kj = remove_parentheses (temp_a);
- GNUNET_free_non_null (temp_a);
+ R_temp_ij = NULL;
+ R_temp_ik = NULL;
+ R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k]));
+ R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j]));
- if (NULL != R_last[i][j] && NULL != R_last[k][j])
- ij_kj_cmp = strcmp (R_last[i][j], R_last[k][j]);
- else
- ij_kj_cmp = -1;
+ // cache results from strcmp, we might need these many times
+ ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]);
+ ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]);
+ ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]);
+ ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]);
+ kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]);
+ clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk);
+ clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]);
- if (NULL != R_last[i][j] && NULL != R_last[i][k])
- ij_ik_cmp = strcmp (R_last[i][j], R_last[i][k]);
- else
- ij_ik_cmp = -1;
-
- if (NULL != R_last[i][k] && NULL != R_last[k][k])
- ik_kk_cmp = strcmp (R_last[i][k], R_last[k][k]);
- else
- ik_kk_cmp = -1;
-
- if (NULL != R_last[i][k] && NULL != R_last[k][j])
- ik_kj_cmp = strcmp (R_last[i][k], R_last[k][j]);
- else
- ik_kj_cmp = -1;
-
- if (NULL != R_last[k][k] && NULL != R_last[k][j])
- kk_kj_cmp = strcmp (R_last[k][k], R_last[k][j]);
- else
- kk_kj_cmp = -1;
-
- if (NULL != R_last[i][k] && NULL != R_temp_kk)
- clean_ik_kk_cmp = strcmp (R_last[i][k], R_temp_kk);
- else
- clean_ik_kk_cmp = 1;
-
- if (NULL != R_temp_kk && NULL != R_last[k][j])
- clean_kk_kj_cmp = strcmp (R_temp_kk, R_last[k][j]);
- else
- clean_kk_kj_cmp = -1;
-
-
// $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^*
R^{(k-1)}_{kj}
// With: R_cur[i][j] = R_cur_l | R_cur_r
// Rij(k) = Rij(k-1), because right side (R_cur_r) is empty set (NULL)
@@ -1154,22 +1095,20 @@
/* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */
/* "R_temp_ij = %s R_temp_ik = %s R_temp_kk = %s R_temp_kj =
%s\n", */
/* R_temp_ij, R_temp_ik, R_temp_kk, R_temp_kj); */
+ R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k]));
- GNUNET_assert (NULL != R_last[i][k] && NULL != R_last[k][k] &&
- NULL != R_last[k][j]);
- GNUNET_assert (NULL != R_temp_ik && NULL != R_temp_kk &&
- NULL != R_temp_kj);
-
// construct R_cur_l (and, if necessary R_cur_r)
if (NULL != R_last[i][j])
{
+ R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j]));
+
if (0 == strcmp (R_temp_ij, R_temp_ik) &&
0 == strcmp (R_temp_ik, R_temp_kk) &&
0 == strcmp (R_temp_kk, R_temp_kj))
{
if (0 == strlen (R_temp_ij))
{
- GNUNET_asprintf (&R_cur_r, "");
+ R_cur_r = GNUNET_strdup ("");
}
// a|(e|a)a*(e|a) = a*
// a|(e|a)(e|a)*(e|a) = a*
@@ -1251,10 +1190,9 @@
else
{
/* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n");
*/
-
- temp_a = remove_parentheses (R_last[i][j]);
- R_cur_l = GNUNET_strdup (temp_a);
- GNUNET_free (temp_a);
+ temp_a = (NULL == R_last[i][j]) ? NULL : GNUNET_strdup
(R_last[i][j]);
+ temp_a = remove_parentheses (temp_a);
+ R_cur_l = temp_a;
}
}
// we have no left side
@@ -1464,13 +1402,8 @@
for (j = 0; j < n; j++)
{
GNUNET_free_non_null (R_last[i][j]);
-
- if (NULL != R_cur[i][j])
- {
- R_last[i][j] = GNUNET_strdup (R_cur[i][j]);
- GNUNET_free (R_cur[i][j]);
- R_cur[i][j] = NULL;
- }
+ R_last[i][j] = R_cur[i][j];
+ R_cur[i][j] = NULL;
}
}
}
Modified: gnunet/src/regex/test_regex_eval_api.c
===================================================================
--- gnunet/src/regex/test_regex_eval_api.c 2012-06-25 08:33:13 UTC (rev
22266)
+++ gnunet/src/regex/test_regex_eval_api.c 2012-06-25 10:05:27 UTC (rev
22267)
@@ -369,8 +369,8 @@
}
srand (time (NULL));
- /* for (i = 0; i < 50; i++) */
- /* check_rand += test_random (100, 120, 20); */
+ for (i = 0; i < 50; i++)
+ check_rand += test_random (100, 120, 20);
return check_nfa + check_dfa + check_rand;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r22267 - gnunet/src/regex,
gnunet <=