[Eliot-dev] eliot dic/compdic.cpp dic/encoding.cpp dic/head...

eliot-dev
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] eliot dic/compdic.cpp dic/encoding.cpp dic/head...

From:	Olivier Teuli�re
Subject:	[Eliot-dev] eliot dic/compdic.cpp dic/encoding.cpp dic/head...
Date:	Sun, 28 Jun 2009 10:55:24 +0000
CVSROOT:        /cvsroot/eliot
Module name:    eliot
Changes by:     Olivier Teulière <ipkiss>       09/06/28 10:55:24

Modified files:
        dic            : compdic.cpp encoding.cpp header.cpp header.h 
        game           : cross.h 

Log message:
        Support display and input strings in the dictionary header

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/compdic.cpp?cvsroot=eliot&r1=1.11&r2=1.12
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/encoding.cpp?cvsroot=eliot&r1=1.8&r2=1.9
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/header.cpp?cvsroot=eliot&r1=1.10&r2=1.11
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/header.h?cvsroot=eliot&r1=1.8&r2=1.9
http://cvs.savannah.gnu.org/viewcvs/eliot/game/cross.h?cvsroot=eliot&r1=1.10&r2=1.11

Patches:
Index: dic/compdic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/compdic.cpp,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -b -r1.11 -r1.12
--- dic/compdic.cpp     21 Jun 2009 12:32:25 -0000      1.11
+++ dic/compdic.cpp     28 Jun 2009 10:55:24 -0000      1.12
@@ -73,6 +73,14 @@
 #define CHECK_RECURSION
 
 
+unsigned int getFileSize(const string &iFileName)
+{
+    struct stat stat_buf;
+    if (stat(iFileName.c_str(), &stat_buf) < 0)
+        throw DicException(_("Cannot stat file ") + iFileName);
+    return (unsigned int)stat_buf.st_size;
+}
+
 const wchar_t* load_uncompressed(const string &iFileName, unsigned int 
&ioDicSize)
 {
     ifstream file(iFileName.c_str(), ios::in | ios::binary);
@@ -88,11 +96,10 @@
     // Buffer for the wide characters (it will use at most as many characters
     // as the utf-8 version)
     wchar_t *wideBuf = new wchar_t[ioDicSize];
-    unsigned int number;
 
     try
     {
-        number = readFromUTF8(wideBuf, ioDicSize, &buffer.front(),
+        unsigned int number = readFromUTF8(wideBuf, ioDicSize, &buffer.front(),
                               ioDicSize, "load_uncompressed");
         ioDicSize = number;
         return wideBuf;
@@ -106,14 +113,16 @@
 }
 
 
-void readLetters(const char *iFileName, DictHeaderInfo &ioHeaderInfo)
+void readLetters(const string &iFileName, DictHeaderInfo &ioHeaderInfo)
 {
-    ifstream in(iFileName);
+    ifstream in(iFileName.c_str());
     if (!in.is_open())
         throw DicException("Could not open file " + string(iFileName));
 
     // Use a more friendly type name
-    typedef boost::tokenizer<boost::char_separator<char> > Tokenizer;
+    typedef boost::tokenizer<boost::char_separator<wchar_t>,
+            std::wstring::const_iterator,
+            std::wstring> Tokenizer;
 
     int lineNb = 1;
     string line;
@@ -123,32 +132,25 @@
         if (line == "" || line == "\r" || line == "\n")
             continue;
 
+        // Convert the line to a wstring
+        const wstring &wline = readFromUTF8(line.c_str(), line.size(), 
"readLetters (1)");
         // Split the lines on space characters
-        vector<string> tokens;
-        boost::char_separator<char> sep(" ");
-        Tokenizer tok(line, sep);
+        boost::char_separator<wchar_t> sep(L" ");
+        Tokenizer tok(wline, sep);
         Tokenizer::iterator it;
-        for (it = tok.begin(); it != tok.end(); ++it)
-        {
-            tokens.push_back(*it);
-        }
+        vector<wstring> tokens(tok.begin(), tok.end());
 
-        // We expect 5 fields on the line, and the first one is a letter, so
-        // it cannot exceed 4 bytes
-        if (tokens.size() != 5 || tokens[0].size() > 4)
+        // We expect at least 5 fields on the line
+        if (tokens.size() < 5)
         {
             ostringstream ss;
-            ss << "readLetters: Invalid line in " << iFileName;
+            ss << "readLetters: Not enough fields in " << iFileName;
             ss << " (line " << lineNb << ")";
             throw DicException(ss.str());
         }
 
-#define MAX_SIZE 4
-        char buff[MAX_SIZE];
-        strncpy(buff, tokens[0].c_str(), MAX_SIZE);
-
-        wstring letter = readFromUTF8(buff, tokens[0].size(), "readLetters");
-
+        // The first field is a single character
+        wstring letter = tokens[0];
         if (letter.size() != 1)
         {
             // On the first line, there could be the BOM...
@@ -164,17 +166,24 @@
             {
                 ostringstream ss;
                 ss << "readLetters: Invalid letter at line " << lineNb;
+                ss << " (only one character allowed)";
                 throw DicException(ss.str());
             }
         }
-#undef MAX_SIZE
 
-        ioHeaderInfo.letters += towupper(letter[0]);
+        wchar_t upChar = towupper(letter[0]);
+        ioHeaderInfo.letters += upChar;
 
-        ioHeaderInfo.points.push_back(atoi(tokens[1].c_str()));
-        ioHeaderInfo.frequency.push_back(atoi(tokens[2].c_str()));
-        ioHeaderInfo.vowels.push_back(atoi(tokens[3].c_str()));
-        ioHeaderInfo.consonants.push_back(atoi(tokens[4].c_str()));
+        ioHeaderInfo.points.push_back(_wtoi(tokens[1].c_str()));
+        ioHeaderInfo.frequency.push_back(_wtoi(tokens[2].c_str()));
+        ioHeaderInfo.vowels.push_back(_wtoi(tokens[3].c_str()));
+        ioHeaderInfo.consonants.push_back(_wtoi(tokens[4].c_str()));
+
+        if (tokens.size() > 5)
+        {
+            ioHeaderInfo.displayInputData[upChar] =
+                vector<wstring>(tokens.begin() + 5, tokens.end());
+        }
 
         ++lineNb;
     }
@@ -416,18 +425,31 @@
          << "  " << iBinaryName << _(" -d 'ODS 5.0' -l letters.txt -i ods5.txt 
-o ods5.dawg") << endl
          << endl
          << _("The file containing the letters (--letters switch) must be 
UTF-8 encoded.") << endl
-         << _("Each line corresponds to one letter, and must contain 5 fields 
separated with ") << endl
-         << _("one or more space(s).") << endl
-         << _(" - 1st field: the letter itself") << endl
+         << _("Each line corresponds to one letter, and must contain at least 
5 fields separated with "
+              "one or more space(s).") << endl
+         << _(" - 1st field: the letter itself, as stored in the input file 
(single character)") << endl
          << _(" - 2nd field: the points of the letter") << endl
          << _(" - 3rd field: the frequency of the letter (how many letters of 
this kind in the game)") << endl
          << _(" - 4th field: 1 if the letter is considered as a vowel in 
Scrabble game, 0 otherwise") << endl
          << _(" - 5th field: 1 if the letter is considered as a consonant in 
Scrabble game, 0 otherwise") << endl
+         << _(" - 6th field (optional): display string for the letter 
(default: the letter itself)") << endl
+         << _(" - other fields (optional): input strings for the letter, in 
addition to the display string") << endl
+         << endl
          << _("Example for french:") << endl
-         << _("A 1 9 1 0") << endl
-         << _("[...]") << endl
-         << _("Z 10 1 0 1") << endl
-         << _("? 0 2 1 1") << endl;
+         << "A 1 9 1 0" << endl
+         << "[...]" << endl
+         << "Z 10 1 0 1" << endl
+         << "? 0 2 1 1" << endl
+         << endl
+         << _("Example for catalan:") << endl
+         << "A 1 12 1 0" << endl
+         << "[...]" << endl
+         // Translators: the first "L.L" must be translated "LÂ·L",
+         // and the last one translated "Ä¿L"
+         << _("W 10 1 0 1 L.L L.L L-L L.L") << endl
+         << "X 10 1 0 1" << endl
+         << "Y 10 1 0 1 NY" << endl
+         << "[...]" << endl;
 }
 
 
@@ -513,13 +535,7 @@
             exit(1);
         }
 
-        struct stat stat_buf;
-        if (stat(inFileName.c_str(), &stat_buf) < 0)
-        {
-            cerr << _("Cannot stat uncompressed dictionary ") << inFileName << 
endl;
-            exit(1);
-        }
-        unsigned int dicsize = (unsigned int)stat_buf.st_size;
+        unsigned int dicSize = getFileSize(inFileName);
 
         ofstream outfile(outFileName.c_str(), ios::out | ios::binary | 
ios::trunc);
         if (!outfile.is_open())
@@ -530,11 +546,11 @@
 
         clock_t startLoadTime = clock();
         // FIXME: not exception safe
-        const wchar_t *uncompressed = load_uncompressed(inFileName, dicsize);
+        const wchar_t *uncompressed = load_uncompressed(inFileName, dicSize);
         clock_t endLoadTime = clock();
 
         global_input = uncompressed;
-        global_endofinput = global_input + dicsize;
+        global_endofinput = global_input + dicSize;
 
         headerInfo.dawg = true;
         Header tempHeader = skip_init_header(outfile, headerInfo);

Index: dic/encoding.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/encoding.cpp,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -b -r1.8 -r1.9
--- dic/encoding.cpp    23 Jun 2009 21:36:33 -0000      1.8
+++ dic/encoding.cpp    28 Jun 2009 10:55:24 -0000      1.9
@@ -304,10 +304,17 @@
         // Padding is needed
         string s((iLength - width) / 2, c);
         string res = s + convertToMb(iWstr) + s;
-        // If the string cannot be centered perfectly, pad again on the right
-        // (arbitrary; if needed, we could take the iLeftPad argument)
-        if (res.size() != iLength)
-            s.append(1, c);
+        // If the string cannot be centered perfectly, pad again
+        // (on the left if iLength is even, on the right otherwise:
+        //  this tends to align numbers of 1 or 2 digits in a nice way)
+        // Note: if needed, we could add the iLeftPad argument
+        if ((iLength - width) % 2)
+        {
+            if (iLength % 2)
+                res.append(1, c);
+            else
+                res.insert(res.begin(), c);
+        }
         return res;
     }
 }

Index: dic/header.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/header.cpp,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -b -r1.10 -r1.11
--- dic/header.cpp      27 Jun 2009 18:09:44 -0000      1.10
+++ dic/header.cpp      28 Jun 2009 10:55:24 -0000      1.11
@@ -24,6 +24,9 @@
 #include <string>
 #include <sstream>
 #include <iostream>
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
 
 // For ntohl & Co.
 #ifdef WIN32
@@ -48,6 +51,9 @@
 #include "encoding.h"
 #include "dic_exception.h"
 
+using boost::format;
+using boost::wformat;
+
 
 #if defined(WORDS_BIGENDIAN)
 // Nothing to do on big-endian machines
@@ -79,7 +85,7 @@
     char ident[sizeof(_COMPIL_KEYWORD_)];
     /// Version of the serialization format
     uint8_t version;
-    /// Unused at the moment, reserved for further use
+    /// Unused at the moment, reserved for future use
     char unused;
     uint32_t root;
     uint32_t nwords;
@@ -98,8 +104,9 @@
 #define _MAX_DIC_NAME_SIZE_ 30
 #define _MAX_LETTERS_NB_ 63
 #define _MAX_LETTERS_SIZE_ 80
+#define _MAX_DISPLAY_INPUT_SIZE_ 112
 
-/** Extension of the old format (used in version 1)*/
+/** Extension of the old format (used in version 1) */
 struct Dict_header_ext
 {
     // Time when the dictionary was compressed
@@ -152,6 +159,18 @@
     // --- we have a multiple of 64 bytes here
 };
 
+/** Extension of the extension :-) (used in version 2) */
+struct Dict_header_ext_2
+{
+    // Additional information concerning the display strings and the
+    // alternative input strings of the letters
+    char displayAndInput[_MAX_DISPLAY_INPUT_SIZE_];
+    // Size taken by the display/input data
+    uint16_t displayAndInputSize;
+
+    // --- we have a multiple of 64 bytes here
+};
+
 
 Header::Header(istream &iStream)
     : m_root(0), m_nbWords(0), m_nodesUsed(0), m_edgesUsed(0),
@@ -161,14 +180,14 @@
     // The code is not moved here because I find it more natural to have a
     // read() method symmetrical to the write() one
     read(iStream);
-    buildMapCodeFromChar();
+    buildCaches();
 }
 
 
 Header::Header(const DictHeaderInfo &iInfo)
 {
     // Use the latest serialization format
-    m_version = 1;
+    m_version = 2;
 
     // Sanity checks
     if (iInfo.letters.size() > _MAX_LETTERS_NB_)
@@ -211,25 +230,78 @@
     m_type = iInfo.dawg ? kDAWG : kGADDAG;
     m_dicName = iInfo.dicName;
     m_letters = iInfo.letters;
-    // FIXME: it should be more than that!
-    m_inputChars = iInfo.letters + L"<>|";
     m_points = iInfo.points;
     m_frequency = iInfo.frequency;
     m_vowels = iInfo.vowels;
     m_consonants = iInfo.consonants;
+    m_displayAndInputData = iInfo.displayInputData;
 
-    buildMapCodeFromChar();
+    buildCaches();
 }
 
 
-void Header::buildMapCodeFromChar()
+void Header::buildCaches()
 {
+    // Build the char --> code mapping
     for (unsigned int i = 0; i < m_letters.size(); ++i)
     {
         // We don't differentiate uppercase and lowercase letters
         m_mapCodeFromChar[towlower(m_letters[i])] = i + 1;
         m_mapCodeFromChar[towupper(m_letters[i])] = i + 1;
     }
+
+    // Build the cache for the convertToDisplay() and convertFromInput()
+    // methods. Also ensure that the strings in m_displayAndInputData
+    // are all in uppercase.
+    map<wchar_t, vector<wstring> >::iterator it;
+    for (it = m_displayAndInputData.begin();
+         it != m_displayAndInputData.end(); ++it)
+    {
+        BOOST_FOREACH(wstring &str, it->second)
+        {
+            // Make sure the string is in uppercase
+            std::transform(str.begin(), str.end(), str.begin(), towupper);
+            // Make a lowercase copy
+            wstring lower = str;
+            std::transform(lower.begin(), lower.end(), lower.begin(), 
towlower);
+            // Fill the cache
+            m_displayInputCache[it->first].push_back(str);
+            m_displayInputCache[towlower(it->first)].push_back(lower);
+        }
+    }
+
+    // Build the display strings cache
+    m_displayCache.assign(m_letters.size() + 1, L"");
+    for (unsigned int i = 0; i < m_letters.size(); ++i)
+    {
+        map<wchar_t, vector<wstring> >::const_iterator it =
+            m_displayAndInputData.find(m_letters[i]);
+        if (it == m_displayAndInputData.end())
+            m_displayCache[i + 1].append(1, m_letters[i]);
+        else
+            m_displayCache[i + 1] = it->second[0];
+    }
+
+    // Create a string with all the characters possibly used
+    m_inputChars.reserve(m_letters.size());
+    BOOST_FOREACH(wchar_t wch, m_letters)
+    {
+        map<wchar_t, vector<wstring> >::const_iterator it =
+            m_displayAndInputData.find(wch);
+        if (it == m_displayAndInputData.end())
+            m_inputChars.append(1, wch);
+        else
+        {
+            BOOST_FOREACH(const wstring &str, it->second)
+            {
+                BOOST_FOREACH(wchar_t chr, str)
+                {
+                    if (m_inputChars.find(towupper(chr)) == string::npos)
+                        m_inputChars.append(1, towupper(chr));
+                }
+            }
+        }
+    }
 }
 
 
@@ -261,35 +333,39 @@
 }
 
 
-wdstring Header::getDisplayStr(unsigned int iCode) const
+const wdstring & Header::getDisplayStr(unsigned int iCode) const
 {
     // Safety check
-    if (iCode == 0 || iCode > m_letters.size())
+    if (iCode == 0 || iCode > m_displayCache.size())
     {
         ostringstream oss;
         oss << iCode;
         throw DicException("Header::getDisplayStr: No code for letter '" + 
oss.str());
     }
-    // TODO: return a const wstring & instead of a wstring
-    return wstring(1, m_letters[iCode - 1]);
+    return m_displayCache[iCode];
 }
 
 
 wdstring Header::convertToDisplay(const wstring &iWord) const
 {
-    // TODO: if we had a flag saying that the current dictionary is
-    // such that all the display strings are equal to the internal
-    // characters themselves (which would be the case for most languages),
-    // we could simply return the given string without further processing.
-    wdstring dispStr;
-    dispStr.reserve(iWord.size());
-    // TODO: change the implementation, to avoid throwing an exception
-    // if there is a character not part of the dictionary (this can happen
-    // with regular expressions, at least...)
-    for (unsigned int i = 0; i < iWord.size(); ++i)
+    // Optimization for dictionaries without display nor input chars,
+    // which is the case in most languages.
+    if (m_displayInputCache.empty())
+        return iWord;
+
+    wdstring dispStr = iWord;
+    map<wchar_t, vector<wstring> >::const_iterator it;
+    for (it = m_displayInputCache.begin();
+         it != m_displayInputCache.end(); ++it)
+    {
+        const wstring &disp = it->second[0];
+        string::size_type pos = 0;
+        while (pos < dispStr.size() &&
+               (pos = dispStr.find(it->first, pos)) != string::npos)
     {
-        const wdstring &chr = getDisplayStr(getCodeFromChar(iWord[i]));
-        dispStr += chr;
+            dispStr.replace(pos, 1, disp);
+            pos += disp.size();
+        }
     }
     return dispStr;
 }
@@ -297,8 +373,28 @@
 
 wstring Header::convertFromInput(const wistring &iWord) const
 {
-    // TODO: do something useful
+    // Optimization for dictionaries without display nor input chars,
+    // which is the case in most languages.
+    if (m_displayInputCache.empty())
     return iWord;
+
+    wstring str = iWord;
+    map<wchar_t, vector<wstring> >::const_iterator it;
+    for (it = m_displayInputCache.begin();
+         it != m_displayInputCache.end(); ++it)
+    {
+        BOOST_FOREACH(const wstring &input, it->second)
+        {
+            string::size_type pos = 0;
+            while (pos < str.size() &&
+                   (pos = str.find(input, pos)) != string::npos)
+            {
+                str.replace(pos, input.size(), wstring(1, it->first));
+                pos += input.size();
+            }
+        }
+    }
+    return str;
 }
 
 
@@ -374,8 +470,6 @@
     {
         throw DicException("Header::read: inconsistent header");
     }
-    // FIXME: it should be more than that!
-    m_inputChars = m_letters + L"<>|";
 
     // Letters points and frequency
     for (unsigned int i = 0; i < m_letters.size(); ++i)
@@ -390,6 +484,26 @@
         m_vowels.push_back(aHeaderExt.vowels & (1 << i));
         m_consonants.push_back(aHeaderExt.consonants & (1 << i));
     }
+
+    // Read the additional display/input data
+    if (m_version >= 2)
+    {
+        // Read the extension of the extension...
+        Dict_header_ext_2 aHeaderExt2;
+        iStream.read((char*)&aHeaderExt2, sizeof(Dict_header_ext_2));
+        if (iStream.gcount() != sizeof(Dict_header_ext_2))
+            throw DicException("Header::read: expected to read more bytes 
(ext2)");
+
+        // Handle endianness
+        aHeaderExt2.displayAndInputSize = 
ntohs(aHeaderExt2.displayAndInputSize);
+
+        // Convert the dictionary letters from UTF-8 to wchar_t*
+        wstring serialized = readFromUTF8(aHeaderExt2.displayAndInput,
+                                          aHeaderExt2.displayAndInputSize,
+                                          "display and input data");
+        // Parse this string and structure the data
+        readDisplayAndInput(serialized);
+    }
 }
 
 
@@ -459,34 +573,158 @@
     oStream.write((char*)&aHeaderExt, sizeof(Dict_header_ext));
     if (!oStream.good())
         throw DicException("Header::write: error when writing to file");
+
+    // Write the second extension
+    Dict_header_ext_2 aHeaderExt2;
+    const wstring &serialized = writeDisplayAndInput();
+
+    // Convert the serialized data to UTF-8
+    aHeaderExt2.displayAndInputSize =
+        writeInUTF8(serialized, aHeaderExt2.displayAndInput,
+                    _MAX_DISPLAY_INPUT_SIZE_, "display and input data");
+
+    // Handle endianness
+    aHeaderExt2.displayAndInputSize = htons(aHeaderExt2.displayAndInputSize);
+
+    // Write the extension
+    oStream.write((char*)&aHeaderExt2, sizeof(Dict_header_ext_2));
+    if (!oStream.good())
+        throw DicException("Header::write: error when writing to file (ext2)");
+}
+
+
+void Header::readDisplayAndInput(const wstring &serialized)
+{
+    // The format is the following:
+    // "X|DISPX|INP1|INP2|INP3 Y|DISPY Z|DISPZ|INP4|INP5"
+    // where X, Y and Z are internal chars (i.e. chars of m_letters),
+    // DISPX, DISPY and DISPZ are the corresponding display strings,
+    // and the INP* are input strings (in addition to the display string,
+    // which is always considered as an input string)
+
+    // Use a more friendly type name
+    typedef boost::tokenizer<boost::char_separator<wchar_t>,
+            std::wstring::const_iterator,
+            std::wstring> Tokenizer;
+
+    // Split the string on double spaces
+    static const boost::char_separator<wchar_t> sep1(L" ");
+    static const boost::char_separator<wchar_t> sep2(L"|");
+    Tokenizer tok(serialized, sep1);
+    Tokenizer::iterator it;
+    for (it = tok.begin(); it != tok.end(); ++it)
+    {
+        // Split the token on single space
+        Tokenizer tok2(*it, sep2);
+        vector<wstring> pieces(tok2.begin(), tok2.end());
+        // Some sanity checks...
+        if (pieces.size() < 2)
+            throw DicException("Header::readDisplayAndInput: no display "
+                               "string. Corrupted dictionary?");
+        // The first piece must be a single char, present in m_letters
+        if (pieces[0].size() != 1 ||
+            m_letters.find(pieces[0][0]) == wstring::npos)
+        {
+            throw DicException("Header::readDisplayAndInput: invalid internal"
+                               " letter. Corrupted dictionary?");
+        }
+        wchar_t chr = pieces[0][0];
+        if (m_displayAndInputData.find(chr) != m_displayAndInputData.end())
+        {
+            throw DicException("Header::readDisplayAndInput: found 2 display"
+                               " data sections for the same letter. Corrupted"
+                               " dictionary?");
+        }
+        // OK, save the data
+        pieces.erase(pieces.begin());
+        m_displayAndInputData[chr] = pieces;
+    }
+}
+
+
+wstring Header::writeDisplayAndInput() const
+{
+    wstring serialized;
+    bool first = true;
+    map<wchar_t, vector<wstring> >::const_iterator it;
+    for (it = m_displayAndInputData.begin();
+         it != m_displayAndInputData.end(); ++it)
+    {
+        if (first)
+            first = false;
+        else
+            serialized += L" ";
+        serialized.append(1, it->first);
+        BOOST_FOREACH(const wstring &str, it->second)
+        {
+            // Make sure the string is uppercase
+            wstring upStr = str;
+            std::transform(upStr.begin(), upStr.end(), upStr.begin(), 
towupper);
+            serialized += L"|" + upStr;
+        }
+    }
+    return serialized;
 }
 
 
 void Header::print() const
 {
-    printf(_("dictionary name: %s\n"), convertToMb(m_dicName).c_str());
-    char buf[50];
+#define fmt(x) format(_(x))
+    cout << fmt("Dictionary name: %1%") % convertToMb(m_dicName) << endl;
+    char buf[150];
     strftime(buf, sizeof(buf), "%c", gmtime(&m_compressDate));
-    printf(_("compressed on: %s\n"), buf);
-    printf(_("compressed using a binary compiled by: %s\n"), 
convertToMb(m_userHost).c_str());
-    printf(_("dictionary type: %s\n"), m_type == kDAWG ? "DAWG" : "GADDAG");
-    printf(_("letters: %s\n"), convertToMb(m_letters).c_str());
-    printf(_("number of letters: %lu\n"), (long unsigned int)m_letters.size());
-    printf(_("number of words: %d\n"), m_nbWords);
-    long unsigned int size = sizeof(Dict_header_old) + sizeof(Dict_header_ext);
-    printf(_("header size: %lu bytes\n"), size);
-    printf(_("root: %d (edge)\n"), m_root);
-    printf(_("nodes: %d used + %d saved\n"), m_nodesUsed, m_nodesSaved);
-    printf(_("edges: %d used + %d saved\n"), m_edgesUsed, m_edgesSaved);
-    printf("===============================================\n");
-    printf(_("letter | points | frequency | vowel | consonant\n"));
-    printf("-------+--------+-----------+-------+----------\n");
+    cout << fmt("Compressed on: %1%") % buf << endl;
+    cout << fmt("Compressed using a binary compiled by: %1%") % 
convertToMb(m_userHost) << endl;
+    cout << fmt("Dictionary type: %1%") % (m_type == kDAWG ? "DAWG" : 
"GADDAG") << endl;
+    cout << fmt("Letters: %1%") % convertToMb(m_letters) << endl;
+    cout << fmt("Number of letters: %1%") % m_letters.size() << endl;
+    cout << fmt("Number of words: %1%") % m_nbWords << endl;
+    long unsigned int size = sizeof(Dict_header_old) +
+        sizeof(Dict_header_ext) + sizeof(Dict_header_ext_2);
+    cout << fmt("Header size: %1% bytes") % size << endl;
+    cout << fmt("Root: %1% (edge)") % m_root << endl;
+    cout << fmt("Nodes: %1% used + %2% saved") % m_nodesUsed % m_nodesSaved << 
endl;
+    cout << fmt("Edges: %1% used + %2% saved") % m_edgesUsed % m_edgesSaved << 
endl;
+#undef fmt
+    cout << "===============================================================" 
<< endl;
+    cout << format("%1% | %2% | %3% | %4% | %5% | %6% | %7%")
+        % _("letter") % _("points") % _("frequency") % _("vowel")
+        % _("consonant") % _("disp.") % _("input") << endl;
+    cout << "-------+--------+-----------+-------+-----------+-------+------" 
<< endl;
+#define sz(x) strlen(_(x))
     for (unsigned int i = 0; i < m_letters.size(); ++i)
     {
-        printf("  %s   |   %2d   |    %2d     |   %d   |    %d\n",
-               padAndConvert(wstring(1, m_letters[i]), 2).c_str(),
-               m_points[i], m_frequency[i], m_vowels[i], m_consonants[i]);
+        format fmter("%1% | %2% | %3% | %4% | %5% | %6% | %7%");
+        fmter % centerAndConvert(wstring(1, m_letters[i]), sz("letter"));
+        fmter % centerAndConvert(str(wformat(L"%1%") % m_points[i]), 
sz("points"));
+        fmter % centerAndConvert(str(wformat(L"%1%") % m_frequency[i]), 
sz("frequency"));
+        fmter % centerAndConvert(str(wformat(L"%1%") % m_vowels[i]), 
sz("vowel"));
+        fmter % centerAndConvert(str(wformat(L"%1%") % m_consonants[i]), 
sz("consonant"));
+        map<wchar_t, vector<wstring> >::const_iterator it =
+            m_displayAndInputData.find(m_letters[i]);
+        if (it != m_displayAndInputData.end())
+        {
+            const vector<wstring> &inputs = it->second;
+            fmter % centerAndConvert(str(wformat(L"%1%") % inputs[0]), 
sz("disp."));
+            bool first = true;
+            string s;
+            for (uint8_t j = 1; j < inputs.size(); ++j)
+            {
+                if (first)
+                    first = false;
+                else
+                    s += " ";
+                s += convertToMb(inputs[j]);
+            }
+            fmter % s;
+        }
+        else
+        {
+            fmter % string(sz("disp."), ' ') % string(sz("input"), ' ');
+        }
+        cout << fmter.str() << endl;
     }
-    printf("===============================================\n");
+#undef sz
+    cout << "===============================================================" 
<< endl;
 }
 

Index: dic/header.h
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/header.h,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -b -r1.8 -r1.9
--- dic/header.h        27 Jun 2009 18:09:44 -0000      1.8
+++ dic/header.h        28 Jun 2009 10:55:24 -0000      1.9
@@ -53,6 +53,7 @@
     vector<uint8_t> frequency;
     vector<bool> vowels;
     vector<bool> consonants;
+    map<wchar_t, vector<wstring> > displayInputData;
 };
 
 
@@ -129,7 +130,7 @@
     /**
      * Return the display string corresponding to the given code
      */
-    wdstring getDisplayStr(unsigned int iCode) const;
+    const wdstring & getDisplayStr(unsigned int iCode) const;
 
     /**
      * Convert the given string (made of internal characters)
@@ -196,8 +197,18 @@
     /// Consonants
     vector<bool> m_consonants;
 
+    /// Additional display and input strings for some letters
+    map<wchar_t, vector<wstring> > m_displayAndInputData;
+
+    /// Cache for the char --> code associations
     map<wchar_t, unsigned int> m_mapCodeFromChar;
 
+    /// Cache for the display string of each code
+    vector<wdstring> m_displayCache;
+
+    /// Same as m_displayAndInputData, but also contains lowercase mappings
+    map<wchar_t, vector<wstring> > m_displayInputCache;
+
     /**
      * Load the header from a file
      * @param iStream: Input stream where to read the header
@@ -205,8 +216,20 @@
      */
     void read(istream &iStream);
 
-    /** Build m_mapCodeFromChar */
-    void buildMapCodeFromChar();
+    /** Build various caches */
+    void buildCaches();
+
+    /**
+     * Fill the m_displayAndInputData field from the serialized data
+     * of the given string
+     */
+    void readDisplayAndInput(const wstring &serialized);
+
+    /**
+     * Return a serialized version of the data contained in the
+     * m_displayAndInputData field
+     */
+    wstring writeDisplayAndInput() const;
 };
 
 #endif /* _HEADER_H */

Index: game/cross.h
===================================================================
RCS file: /cvsroot/eliot/eliot/game/cross.h,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -b -r1.10 -r1.11
--- game/cross.h        22 Nov 2008 13:09:30 -0000      1.10
+++ game/cross.h        28 Jun 2009 10:55:24 -0000      1.11
@@ -32,6 +32,7 @@
  *
  *************************/
 
+// TODO: implement using the bitset class
 class Cross
 {
 public:
[Prev in Thread]
Current Thread
[Next in Thread]
[Eliot-dev] eliot dic/compdic.cpp dic/encoding.cpp dic/head..., Olivier Teuli�re <=
Prev by Date: [Eliot-dev] eliot dic/dic.cpp dic/dic.h dic/header.cpp dic/...
Next by Date: [Eliot-dev] eliot/qt/ui dic_tools_widget.ui
Previous by thread: [Eliot-dev] eliot dic/dic.cpp dic/dic.h dic/header.cpp dic/...
Next by thread: [Eliot-dev] eliot/qt/ui dic_tools_widget.ui
Index(es):
- Date
- Thread