[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] eliot dic/dic_search.c dic/dic_search.h dic/reg... [multibyt
From: |
eliot-dev |
Subject: |
[Eliot-dev] eliot dic/dic_search.c dic/dic_search.h dic/reg... [multibyte] |
Date: |
Wed, 28 Dec 2005 20:02:52 +0000 |
CVSROOT: /sources/eliot
Module name: eliot
Branch: multibyte
Changes by: Olivier Teulière <address@hidden> 05/12/28 20:02:52
Modified files:
dic : dic_search.c dic_search.h regexpmain.c
utils : eliottxt.cpp
Log message:
Added wchar_t wrappers around Dic_search_* functions
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/dic/dic_search.c.diff?only_with_tag=multibyte&tr1=1.14.2.1&tr2=1.14.2.2&r1=text&r2=text
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/dic/dic_search.h.diff?only_with_tag=multibyte&tr1=1.10.2.1&tr2=1.10.2.2&r1=text&r2=text
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/dic/regexpmain.c.diff?only_with_tag=multibyte&tr1=1.10&tr2=1.10.2.1&r1=text&r2=text
http://cvs.savannah.gnu.org/viewcvs/eliot/eliot/utils/eliottxt.cpp.diff?only_with_tag=multibyte&tr1=1.12.2.1&tr2=1.12.2.2&r1=text&r2=text
Patches:
Index: eliot/dic/dic_search.c
diff -u eliot/dic/dic_search.c:1.14.2.1 eliot/dic/dic_search.c:1.14.2.2
--- eliot/dic/dic_search.c:1.14.2.1 Wed Dec 28 16:47:35 2005
+++ eliot/dic/dic_search.c Wed Dec 28 20:02:52 2005
@@ -27,6 +27,7 @@
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
#include "dic_internals.h"
#include "dic.h"
@@ -84,25 +85,14 @@
/**
- * This method is a wrapper around the Dic_search_word_inner function.
- * It simply converts the wchar_t* string into a char* one.
- * XXX: This is a temporary hack until the dictionaries can handle multibyte
- * characters properly... the Dic_search_word_inner function should disappear!
+ * Wrapper around Dic_search_word_inner, until we have multibyte support in
+ * the dictionary
*/
int Dic_search_word(const Dictionary dic, const wchar_t* word)
{
int res;
- char *tmp_word;
- size_t len;
-
- // Get the needed length (we _can't_ use wstring::size())
- len = wcstombs(NULL, word, 0);
- if (len == (size_t)-1)
- tmp_word = "";
-
- // Convert the string
- tmp_word = malloc(len + 1);
- len = wcstombs(tmp_word, word, len + 1);
+ char *tmp_word = malloc(wcslen(word) + 1);
+ sprintf(tmp_word, "%ls", word);
// Do the actual work
res = Dic_search_word_inner(dic, tmp_word);
@@ -187,10 +177,10 @@
} while (! (*edgeptr++).last);
}
-void
-Dic_search_7pl1(const Dictionary dic, const char* rack,
- char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
- int joker)
+static void
+Dic_search_7pl1_inner(const Dictionary dic, const char* rack,
+ char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+ int joker)
{
int i,j,wordlen;
const char* r = rack;
@@ -263,12 +253,41 @@
}
}
+
+/**
+ * Wrapper around Dic_search_7pl1_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_7pl1(const Dictionary dic, const wchar_t* rack,
+ wchar_t buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+ int joker)
+{
+ int i, j, k;
+ char tmp_buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX];
+ char *tmp_rack = malloc(wcslen(rack) + 1);
+ sprintf(tmp_rack, "%ls", rack);
+ // Do the actual work
+ Dic_search_7pl1_inner(dic, tmp_rack, tmp_buff, joker);
+
+ for (i = 0; i < DIC_LETTERS; i++)
+ {
+ for (j = 0; j < RES_7PL1_MAX; j++)
+ {
+ for (k = 0; k < DIC_WORD_MAX; k++)
+ {
+ buff[i][j][k] = tmp_buff[i][j][k];
+ }
+ }
+ }
+}
+
/****************************************/
/****************************************/
-void
-Dic_search_Racc(const Dictionary dic, const char* word,
- char wordlist[RES_RACC_MAX][DIC_WORD_MAX])
+static void
+Dic_search_Racc_inner(const Dictionary dic, const char* word,
+ char wordlist[RES_RACC_MAX][DIC_WORD_MAX])
{
/* search_racc will try to add a letter in front and at the end of a word */
@@ -316,13 +335,37 @@
}
}
+/**
+ * Wrapper around Dic_search_Racc_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_Racc(const Dictionary dic, const wchar_t* word,
+ wchar_t wordlist[RES_RACC_MAX][DIC_WORD_MAX])
+{
+ int i, j;
+ char tmp_buff[RES_RACC_MAX][DIC_WORD_MAX];
+ char *tmp_word = malloc(wcslen(word) + 1);
+ sprintf(tmp_word, "%ls", word);
+ // Do the actual work
+ Dic_search_Racc_inner(dic, tmp_word, tmp_buff);
+
+ for (i = 0; i < RES_RACC_MAX; i++)
+ {
+ for (j = 0; j < DIC_WORD_MAX; j++)
+ {
+ wordlist[i][j] = tmp_buff[i][j];
+ }
+ }
+}
+
/****************************************/
/****************************************/
-void
-Dic_search_Benj(const Dictionary dic, const char* word,
- char wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
+static void
+Dic_search_Benj_inner(const Dictionary dic, const char* word,
+ char wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
{
int i,wordlistlen;
char wordtst[DIC_WORD_MAX];
@@ -354,6 +397,30 @@
} while (!(*edge0++).last);
}
+/**
+ * Wrapper around Dic_search_Benj_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_Benj(const Dictionary dic, const wchar_t* word,
+ wchar_t wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
+{
+ int i, j;
+ char tmp_buff[RES_BENJ_MAX][DIC_WORD_MAX];
+ char *tmp_word = malloc(wcslen(word) + 1);
+ sprintf(tmp_word, "%ls", word);
+ // Do the actual work
+ Dic_search_Benj_inner(dic, tmp_word, tmp_buff);
+
+ for (i = 0; i < RES_BENJ_MAX; i++)
+ {
+ for (j = 0; j < DIC_WORD_MAX; j++)
+ {
+ wordlist[i][j] = tmp_buff[i][j];
+ }
+ }
+}
+
/****************************************/
/****************************************/
@@ -369,8 +436,8 @@
void
Dic_search_cross_rec(struct params_cross_t *params,
- char wordlist[RES_CROS_MAX][DIC_WORD_MAX],
- Dawg_edge *edgeptr)
+ char wordlist[RES_CROS_MAX][DIC_WORD_MAX],
+ Dawg_edge *edgeptr)
{
Dawg_edge *current = params->dic->dawg + edgeptr->ptr;
@@ -408,10 +475,9 @@
}
-
-void
-Dic_search_Cros(const Dictionary dic, const char* mask,
- char wordlist[RES_CROS_MAX][DIC_WORD_MAX])
+static void
+Dic_search_Cros_inner(const Dictionary dic, const char* mask,
+ char wordlist[RES_CROS_MAX][DIC_WORD_MAX])
{
int i;
struct params_cross_t params;
@@ -438,6 +504,31 @@
Dic_search_cross_rec(¶ms, wordlist, dic->dawg + dic->root);
}
+
+/**
+ * Wrapper around Dic_search_Cros_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_Cros(const Dictionary dic, const wchar_t* mask,
+ wchar_t wordlist[RES_CROS_MAX][DIC_WORD_MAX])
+{
+ int i, j;
+ char tmp_buff[RES_CROS_MAX][DIC_WORD_MAX];
+ char *tmp_mask = malloc(wcslen(mask) + 1);
+ sprintf(tmp_mask, "%ls", mask);
+ // Do the actual work
+ Dic_search_Cros_inner(dic, tmp_mask, tmp_buff);
+
+ for (i = 0; i < RES_CROS_MAX; i++)
+ {
+ for (j = 0; j < DIC_WORD_MAX; j++)
+ {
+ wordlist[i][j] = tmp_buff[i][j];
+ }
+ }
+}
+
/****************************************/
/****************************************/
@@ -494,13 +585,13 @@
* function prototype for parser generated by bison
*/
int regexpparse(yyscan_t scanner, NODE** root,
- struct search_RegE_list_t *list,
- struct regexp_error_report_t *err);
+ struct search_RegE_list_t *list,
+ struct regexp_error_report_t *err);
void
-Dic_search_RegE(const Dictionary dic, const char* re,
- char wordlist[RES_REGE_MAX][DIC_WORD_MAX],
- struct search_RegE_list_t *list)
+Dic_search_RegE_inner(const Dictionary dic, const char* re,
+ char wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+ struct search_RegE_list_t *list)
{
int i,p,n,value;
int ptl[REGEXP_MAX+1];
@@ -576,6 +667,31 @@
regexp_delete_tree(root);
}
+/**
+ * Wrapper around Dic_search_RegE_inner, until we have multibyte support in
+ * the dictionary
+ */
+void
+Dic_search_RegE(const Dictionary dic, const wchar_t* re,
+ wchar_t wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+ struct search_RegE_list_t *list)
+{
+ int i, j;
+ char tmp_buff[RES_REGE_MAX][DIC_WORD_MAX];
+ char *tmp_re = malloc(wcslen(re) + 1);
+ sprintf(tmp_re, "%ls", re);
+ // Do the actual work
+ Dic_search_RegE_inner(dic, tmp_re, tmp_buff, list);
+
+ for (i = 0; i < RES_REGE_MAX; i++)
+ {
+ for (j = 0; j < DIC_WORD_MAX; j++)
+ {
+ wordlist[i][j] = tmp_buff[i][j];
+ }
+ }
+}
+
/****************************************/
/****************************************/
Index: eliot/dic/dic_search.h
diff -u eliot/dic/dic_search.h:1.10.2.1 eliot/dic/dic_search.h:1.10.2.2
--- eliot/dic/dic_search.h:1.10.2.1 Wed Dec 28 16:47:35 2005
+++ eliot/dic/dic_search.h Wed Dec 28 20:02:52 2005
@@ -62,7 +62,8 @@
* @param path : lookup word
* @return 1 present, 0 error
*/
-int Dic_search_word(Dictionary dic, const wchar_t* path);
+int Dic_search_word(Dictionary dic,
+ const wchar_t* path);
/**
* Search for all feasible word with "rack" plus one letter
@@ -70,7 +71,10 @@
* @param rack : letters
* @param wordlist : results
*/
-void Dic_search_7pl1(Dictionary dic, const char* rack, char
wordlist[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX], int joker);
+void Dic_search_7pl1(Dictionary dic,
+ const wchar_t* rack,
+ wchar_t wordlist[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+ int joker);
/**
* Search for all feasible word adding a letter in front or at the end
@@ -78,7 +82,9 @@
* @param word : word
* @param wordlist : results
*/
-void Dic_search_Racc(Dictionary dic, const char* word, char
wordlist[RES_RACC_MAX][DIC_WORD_MAX]);
+void Dic_search_Racc(Dictionary dic,
+ const wchar_t* word,
+ wchar_t wordlist[RES_RACC_MAX][DIC_WORD_MAX]);
/**
* Search for benjamins
@@ -86,7 +92,9 @@
* @param rack : letters
* @param wordlist : results
*/
-void Dic_search_Benj(Dictionary dic, const char* word, char
wordlist[RES_BENJ_MAX][DIC_WORD_MAX]);
+void Dic_search_Benj(Dictionary dic,
+ const wchar_t* word,
+ wchar_t wordlist[RES_BENJ_MAX][DIC_WORD_MAX]);
/**
* Search for crosswords
@@ -94,7 +102,9 @@
* @param rack : letters
* @param wordlist : results
*/
-void Dic_search_Cros(Dictionary dic, const char* mask, char
wordlist[RES_CROS_MAX][DIC_WORD_MAX]);
+void Dic_search_Cros(Dictionary dic,
+ const wchar_t* mask,
+ wchar_t wordlist[RES_CROS_MAX][DIC_WORD_MAX]);
/**
* Search for words matching a regular expression
@@ -102,7 +112,10 @@
* @param re : regular expression
* @param wordlist : results
*/
-void Dic_search_RegE(Dictionary dic, const char* re, char
wordlist[RES_REGE_MAX][DIC_WORD_MAX], struct search_RegE_list_t *list);
+void Dic_search_RegE(Dictionary dic,
+ const wchar_t* re,
+ wchar_t wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+ struct search_RegE_list_t *list);
#if defined(__cplusplus)
}
Index: eliot/dic/regexpmain.c
diff -u /dev/null eliot/dic/regexpmain.c:1.10.2.1
--- /dev/null Wed Dec 28 20:02:52 2005
+++ eliot/dic/regexpmain.c Wed Dec 28 20:02:52 2005
@@ -0,0 +1,138 @@
+/* Eliot */
+/* Copyright (C) 1999 Antoine Fraboulet */
+/* */
+/* This file is part of Eliot. */
+/* */
+/* Eliot is free software; you can redistribute it and/or modify */
+/* it under the terms of the GNU General Public License as published by */
+/* the Free Software Foundation; either version 2 of the License, or */
+/* (at your option) any later version. */
+/* */
+/* Elit is distributed in the hope that it will be useful, */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* */
+/* You should have received a copy of the GNU General Public License */
+/* along with this program; if not, write to the Free Software */
+/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
+/**
+ * \file regexpmain.c
+ * \brief Program used to test regexp
+ * \author Antoine Fraboulet
+ * \date 2005
+ */
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dic.h"
+#include "regexp.h"
+#include "dic_search.h"
+
+/********************************************************/
+/********************************************************/
+/********************************************************/
+
+const unsigned int all_letter[DIC_LETTERS] =
+ {
+ /* 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 */
+ /* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 */
+ /* x A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
+ 0,1,1,1,1, 1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1, 1, 1, 1, 1
+ };
+
+const unsigned int vowels[DIC_LETTERS] =
+ {
+ /* x A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
+ 0,1,0,0,0, 1,0,0,0,1,0, 0,0,0,0,1,0,0,0,0,0,1,0, 0, 0, 1, 0
+ };
+
+const unsigned int consonants[DIC_LETTERS] =
+ {
+ /* x A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
+ 0,0,1,1,1, 0,1,1,1,0,1, 1,1,1,1,0,1,1,1,1,1,0,1, 1, 1, 1, 1
+ };
+
+void init_letter_lists(struct search_RegE_list_t *list)
+{
+ int i;
+ memset (list,0,sizeof(*list));
+ list->minlength = 1;
+ list->maxlength = 15;
+ list->valid[0] = 1; // all letters
+ list->symbl[0] = RE_ALL_MATCH;
+ list->valid[1] = 1; // vowels
+ list->symbl[1] = RE_VOWL_MATCH;
+ list->valid[2] = 1; // consonants
+ list->symbl[2] = RE_CONS_MATCH;
+ for(i=0; i < DIC_LETTERS; i++)
+ {
+ list->letters[0][i] = all_letter[i];
+ list->letters[1][i] = vowels[i];
+ list->letters[2][i] = consonants[i];
+ }
+ list->valid[3] = 0; // user defined list 1
+ list->symbl[3] = RE_USR1_MATCH;
+ list->valid[4] = 0; // user defined list 2
+ list->symbl[4] = RE_USR2_MATCH;
+}
+
+/********************************************************/
+/********************************************************/
+/********************************************************/
+void
+usage(int argc, char* argv[])
+{
+ fprintf(stderr,"usage: %s dictionary\n",argv[0]);
+ fprintf(stderr," dictionary : path to dawg eliot dictionary\n");
+}
+
+int main(int argc, char* argv[])
+{
+ int i;
+ Dictionary dic;
+ char wordlist[RES_REGE_MAX][DIC_WORD_MAX];
+ char er[200];
+ strcpy(er,".");
+ struct search_RegE_list_t list;
+
+ if (argc < 2)
+ {
+ usage(argc,argv);
+ }
+
+ if (Dic_load(&dic,argv[1]))
+ {
+ fprintf(stdout,"impossible de lire le dictionnaire\n");
+ return 1;
+ }
+
+ while (strcmp(er,""))
+ {
+
fprintf(stdout,"**************************************************************\n");
+
fprintf(stdout,"**************************************************************\n");
+ fprintf(stdout,"entrer une ER:\n");
+ fgets(er,sizeof(er),stdin);
+ /* strip \n */
+ er[strlen(er) - 1] = '\0';
+ if (strcmp(er,"") == 0)
+ break;
+
+ /* automaton */
+ init_letter_lists(&list);
+ Dic_search_RegE_inner(dic,er,wordlist,&list);
+
+ fprintf(stdout,"résultat:\n");
+ for(i=0; i<RES_REGE_MAX && wordlist[i][0]; i++)
+ {
+ fprintf(stderr,"%s\n",wordlist[i]);
+ }
+ }
+
+ Dic_destroy(dic);
+ return 0;
+}
Index: eliot/utils/eliottxt.cpp
diff -u eliot/utils/eliottxt.cpp:1.12.2.1 eliot/utils/eliottxt.cpp:1.12.2.2
--- eliot/utils/eliottxt.cpp:1.12.2.1 Wed Dec 28 16:47:35 2005
+++ eliot/utils/eliottxt.cpp Wed Dec 28 20:02:52 2005
@@ -170,15 +170,12 @@
void eliottxt_get_cross(const Dictionary &iDic, wchar_t *cros)
{
- // TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
-#if 0
wchar_t wordlist[RES_CROS_MAX][DIC_WORD_MAX];
Dic_search_Cros(iDic, cros, wordlist);
for (int i = 0; i < RES_CROS_MAX && wordlist[i][0]; i++)
{
printf(" %s\n", convertToMb(wordlist[i]).c_str());
}
-#endif
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Eliot-dev] eliot dic/dic_search.c dic/dic_search.h dic/reg... [multibyte],
eliot-dev <=