groff
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Groff] unicode support, part 6: public glyph to name/number API


From: Bruno Haible
Subject: [Groff] unicode support, part 6: public glyph to name/number API
Date: Wed, 15 Feb 2006 13:23:00 +0100
User-agent: KMail/1.5

Hi,

Here is a patch that makes the glyph to name or number API available
(for the Unicode font implementation) without the ugly hacks of the
previous attempt. It makes the variants in nametoindex.cpp and
troff/input.cpp more similar, and actually simplifies things:

  - Where character_indexer::numbered_char_index used a fake glyph name
    starting with a blank, now it uses a glyph with no name. For the
    lookup, a hash table indexed by the number is now used - like in
    troff/input.cpp the numbered_charinfo_dictionary.

  - Where character_indexer::index_as_numbered_char used to call strtol()
    to convert the fake name to an integer, the same is now achieved by
    storing the number together with the index in the hash table - like
    in troff/charinfo.h.

  - 'struct glyph' is reduced from a two-element struct to a single pointer.

As a result, the glyph to name/number conversions are now one-liners.

One trick in all this is to create a new class 'glyphinfo' that becomes
a superclass of 'charinfo' and collects all information about a glyph
that is common to both cases. Another trick is to use a hash table with
an int as key: this is the natural datastructure for the numbered glyphs.


2006-02-15  Bruno Haible  <address@hidden>

        * src/include/font.h (struct glyphinfo): New class.
        (struct glyph): Change internal representation. Change constructor
        signature. New method glyph::glyph_number().
        (glyph::glyph, glyph glyph::undefined_glyph, glyph::glyph_index,
        glyph::operator==, glyph::operator!=): Update.
        (font::index_to_name, font::index_to_number): New functions.
        * src/include/itable.h: New file, based on src/include/ptable.h.
        * src/libs/libgroff/nametoindex.cpp: Include itable.h.
        (class charinfo): New class.
        (class character_indexer): Change table result type from 'int' to
        'class charinfo'. Add table with integer key.
        (character_indexer::character_indexer): Update.
        (character_indexer::ascii_char_index): Update.
        (character_indexer::numbered_char_index): Use NULL as name, not a
        string starting with a space.
        (character_indexer::named_char_index): Update.
        (font::number_to_index, font::name_to_index): Remove no-op cast.
        (glyph::glyph_name): New method.
        * src/roff/troff/charinfo.h (class charinfo): Inherit from class
        glyphinfo.
        (NUMBERED): Remove flag bit.
        (charinfo::numbered, charinfo::get_index): Update.
        * src/roff/troff/input.cpp (charinfo::charinfo): Update.
        (charinfo::set_number, charinfo::get_number): Update.
        (glyph::glyph_name): New method.

diff -r -c3 groff-20060213.orig/src/include/font.h 
groff-20060213/src/include/font.h
--- groff-20060213.orig/src/include/font.h      2006-02-13 13:00:19.000000000 
+0100
+++ groff-20060213/src/include/font.h   2006-02-15 01:22:09.000000000 +0100
@@ -38,57 +38,69 @@
 //
 //   - those with a number, referring to the the font-dependent glyph with
 //     the given number.
+
+// The statically allocated information about a glyph.
+// This is an abstract class; only its subclass charinfo is instantiated.
+// This subclass `charinfo' exists in two versions: one in
+// roff/troff/input.cpp for troff, and one in libs/libgroff/nametoindex.cpp
+// for the preprocessors and the postprocessors.
+struct glyphinfo {
+  int index;           // A font-independent integer value.
+  int number;          // Glyph number or -1.
+  friend class character_indexer;
+};
+
 struct glyph {
 private:
-  int index;                   // A font-independent integer value.
-  const char *name;            // Glyph name, statically allocated.
+  glyphinfo *ptr;      // Pointer to the complete information.
   friend class font;
   friend class character_indexer;
   friend class charinfo;
-  glyph(int, const char *);    // Glyph with given index and name.
+  glyph(glyphinfo *);  // Glyph with given complete information.
 public:
-  glyph();                     // Uninitialized glyph.
+  glyph();             // Uninitialized glyph.
   static glyph undefined_glyph();      // Undefined glyph.
   int glyph_index();
-  const char *glyph_name();
+  const char *glyph_name();    // Return the glyph name or NULL.
+  int glyph_number();          // Return the glyph number or -1.
   int operator==(const glyph &) const;
   int operator!=(const glyph &) const;
 };
 
-inline glyph::glyph(int idx, const char *nm)
-: index (idx), name (nm)
+inline glyph::glyph(glyphinfo *p)
+: ptr (p)
 {
 }
 
 inline glyph::glyph()
-: index (0xdeadbeef), name (NULL)
+: ptr ((glyphinfo *) 0xdeadbeef)
 {
 }
 
 inline glyph glyph::undefined_glyph()
 {
-  return glyph(-1, NULL);
+  return glyph(NULL);
 }
 #define UNDEFINED_GLYPH glyph::undefined_glyph()
 
 inline int glyph::glyph_index()
 {
-  return index;
+  return ptr->index;
 }
 
-inline const char *glyph::glyph_name()
+inline int glyph::glyph_number()
 {
-  return name;
+  return ptr->number;
 }
 
 inline int glyph::operator==(const glyph &other) const
 {
-  return index == other.index;
+  return ptr == other.ptr;
 }
 
 inline int glyph::operator!=(const glyph &other) const
 {
-  return index != other.index;
+  return ptr != other.ptr;
 }
 
 // Types used in non-public members of `class font'.
@@ -228,7 +240,7 @@
                        // info from there.
 
   // The next two functions exist in two versions: one in
-  // roff/troff/input.cpp for troff, and one for
+  // roff/troff/input.cpp for troff, and one in
   // libs/libgroff/nametoindex.cpp for the preprocessors and the
   // postprocessors.
   static glyph name_to_index(const char *);    // Convert the glyph with
@@ -241,6 +253,12 @@
                        // object.  This has the same semantics as the groff
                        // escape sequence \N'number'.  If such a `glyph'
                        // object does not yet exist, a new one is allocated.
+  static const char *index_to_name(glyph);     // Convert the given glyph
+                       // back to its name.  Return NULL if the glyph
+                       // doesn't have a name.
+  static int index_to_number(glyph);   // Convert the given glyph back to
+                       // its number.  Return -1 if it does not designate
+                       // a numbered character.
 
   static FONT_COMMAND_HANDLER
     set_unknown_desc_command_handler(FONT_COMMAND_HANDLER);    // Register
@@ -355,4 +373,14 @@
                        // case of failure.
 };
 
+inline const char *font::index_to_name(glyph g)
+{
+  return g.glyph_name();
+}
+
+inline int font::index_to_number(glyph g)
+{
+  return g.glyph_number();
+}
+
 // end of font.h
--- /dev/null   2003-09-23 19:59:22.000000000 +0200
+++ groff-20060213/src/include/itable.h 2006-02-15 01:14:31.000000000 +0100
@@ -0,0 +1,197 @@
+// -*- C++ -*-
+/* Copyright (C) 1989, 1990, 1991, 1992, 2003, 2004, 2006
+   Free Software Foundation, Inc.
+     Written by James Clark (address@hidden)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License along
+with groff; see the file COPYING.  If not, write to the Free Software
+Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <assert.h>
+
+// name2(a,b) concatenates two C identifiers.
+#ifdef TRADITIONAL_CPP
+# define name2(a,b) a/**/b
+#else /* not TRADITIONAL_CPP */
+# define name2(a,b) name2x(a,b)
+# define name2x(a,b) a ## b
+#endif /* not TRADITIONAL_CPP */
+
+// `class ITABLE(T)' is the type of a hash table mapping an integer (int >= 0)
+// to an object of type T.
+//
+// `struct IASSOC(T)' is the type of a association (pair) between an integer
+// (int >= 0) and an object of type T.
+//
+// `class ITABLE_ITERATOR(T)' is the type of an iterator iterating through a
+// `class ITABLE(T)'.
+//
+// Nowadays one would use templates for this; this code predates the addition
+// of templates to C++.
+#define ITABLE(T) name2(T,_itable)
+#define IASSOC(T) name2(T,_iassoc)
+#define ITABLE_ITERATOR(T) name2(T,_itable_iterator)
+
+extern unsigned next_ptable_size(unsigned);    // Return the first suitable
+                               // hash table size greater than the given
+                               // value.
+
+// Declare the types `class ITABLE(T)', `struct IASSOC(T)', and `class
+// ITABLE_ITERATOR(T)' for the type `T'.
+#define declare_itable(T)                                                    \
+                                                                             \
+struct IASSOC(T) {                                                           \
+  int key;                                                                   \
+  T *val;                                                                    \
+  IASSOC(T)();                                                               \
+};                                                                           \
+                                                                             \
+class ITABLE(T);                                                             \
+                                                                             \
+class ITABLE_ITERATOR(T) {                                                   \
+  ITABLE(T) *p;                                                                
      \
+  unsigned i;                                                                \
+public:                                                                        
      \
+  ITABLE_ITERATOR(T)(ITABLE(T) *);     /* Initialize an iterator running     \
+                                          through the given table.  */       \
+  int next(int *, T **);               /* Fetch the next pair, store the key \
+                                          and value in arg1 and arg2,        \
+                                          respectively, and return 1.  If    \
+                                          there is no more pair in the       \
+                                          table, return 0.  */               \
+};                                                                           \
+                                                                             \
+class ITABLE(T) {                                                            \
+  IASSOC(T) *v;                                                                
      \
+  unsigned size;                                                             \
+  unsigned used;                                                             \
+  enum {                                                                     \
+    FULL_NUM = 2,                                                            \
+    FULL_DEN = 3,                                                            \
+    INITIAL_SIZE = 17                                                        \
+  };                                                                         \
+public:                                                                        
      \
+  ITABLE(T)();                         /* Create an empty table.  */         \
+  ~ITABLE(T)();                                /* Delete a table, including 
its      \
+                                          values.  */                        \
+  void define(int, T *);               /* Define the value (arg2) for a key  \
+                                          (arg1).  */                        \
+  T *lookup(int);                      /* Return a pointer to the value of   \
+                                          the given key, if found in the     \
+                                          table, or NULL otherwise.  */      \
+  friend class ITABLE_ITERATOR(T);                                           \
+};
+
+
+// Values must be allocated by the caller (always using new[], not new)
+// and are freed by ITABLE.
+
+// Define the implementations of the members of the types `class ITABLE(T)',
+// `struct IASSOC(T)', `class ITABLE_ITERATOR(T)' for the type `T'.
+#define implement_itable(T)                                                  \
+                                                                             \
+IASSOC(T)::IASSOC(T)()                                                       \
+: key(-1), val(0)                                                            \
+{                                                                            \
+}                                                                            \
+                                                                             \
+ITABLE(T)::ITABLE(T)()                                                       \
+{                                                                            \
+  v = new IASSOC(T)[size = INITIAL_SIZE];                                    \
+  used = 0;                                                                  \
+}                                                                            \
+                                                                             \
+ITABLE(T)::~ITABLE(T)()                                                        
      \
+{                                                                            \
+  for (unsigned i = 0; i < size; i++) {                                        
      \
+    a_delete v[i].val;                                                       \
+  }                                                                          \
+  a_delete v;                                                                \
+}                                                                            \
+                                                                             \
+void ITABLE(T)::define(int key, T *val)                                        
      \
+{                                                                            \
+  assert(key >= 0);                                                          \
+  unsigned int h = (unsigned int)(key);                                        
      \
+  unsigned n;                                                                \
+  for (n = unsigned(h % size);                                               \
+       v[n].key >= 0;                                                        \
+       n = (n == 0 ? size - 1 : n - 1))                                        
      \
+    if (v[n].key == key) {                                                   \
+      a_delete v[n].val;                                                     \
+      v[n].val = val;                                                        \
+      return;                                                                \
+    }                                                                        \
+  if (val == 0)                                                                
      \
+    return;                                                                  \
+  if (used*FULL_DEN >= size*FULL_NUM) {                                        
      \
+    IASSOC(T) *oldv = v;                                                     \
+    unsigned old_size = size;                                                \
+    size = next_ptable_size(size);                                           \
+    v = new IASSOC(T)[size];                                                 \
+    for (unsigned i = 0; i < old_size; i++)                                  \
+      if (oldv[i].key >= 0) {                                                \
+       if (oldv[i].val != 0) {                                               \
+         unsigned j;                                                         \
+         for (j = (unsigned int)(oldv[i].key) % size;                        \
+              v[j].key >= 0;                                                 \
+              j = (j == 0 ? size - 1 : j - 1))                               \
+                ;                                                            \
+         v[j].key = oldv[i].key;                                             \
+         v[j].val = oldv[i].val;                                             \
+       }                                                                     \
+      }                                                                        
      \
+    for (n = unsigned(h % size);                                             \
+        v[n].key >= 0;                                                       \
+        n = (n == 0 ? size - 1 : n - 1))                                     \
+      ;                                                                        
      \
+    a_delete oldv;                                                           \
+  }                                                                          \
+  v[n].key = key;                                                            \
+  v[n].val = val;                                                            \
+  used++;                                                                    \
+}                                                                            \
+                                                                             \
+T *ITABLE(T)::lookup(int key)                                                \
+{                                                                            \
+  assert(key >= 0);                                                          \
+  for (unsigned n = (unsigned int)key % size;                                \
+       v[n].key >= 0;                                                        \
+       n = (n == 0 ? size - 1 : n - 1))                                        
      \
+    if (v[n].key == key)                                                     \
+      return v[n].val;                                                       \
+  return 0;                                                                  \
+}                                                                            \
+                                                                             \
+ITABLE_ITERATOR(T)::ITABLE_ITERATOR(T)(ITABLE(T) *t)                         \
+: p(t), i(0)                                                                 \
+{                                                                            \
+}                                                                            \
+                                                                             \
+int ITABLE_ITERATOR(T)::next(int *keyp, T **valp)                            \
+{                                                                            \
+  unsigned size = p->size;                                                   \
+  IASSOC(T) *v = p->v;                                                       \
+  for (; i < size; i++)                                                        
      \
+    if (v[i].key >= 0) {                                                     \
+      *keyp = v[i].key;                                                        
      \
+      *valp = v[i].val;                                                        
      \
+      i++;                                                                   \
+      return 1;                                                                
      \
+    }                                                                        \
+  return 0;                                                                  \
+}
+
+// end of itable.h
diff -r -c3 groff-20060213.orig/src/libs/libgroff/nametoindex.cpp 
groff-20060213/src/libs/libgroff/nametoindex.cpp
--- groff-20060213.orig/src/libs/libgroff/nametoindex.cpp       2006-02-13 
13:00:19.000000000 +0100
+++ groff-20060213/src/libs/libgroff/nametoindex.cpp    2006-02-15 
02:10:17.000000000 +0100
@@ -28,9 +28,22 @@
 #include "error.h"
 #include "font.h"
 #include "ptable.h"
+#include "itable.h"
 
-declare_ptable(int)
-implement_ptable(int)
+// Every glyphinfo is actually a charinfo.
+class charinfo : glyphinfo {
+private:
+  const char *name;    // The glyph name, or NULL.
+public:
+  friend class character_indexer;
+  friend class glyph;
+};
+
+declare_ptable(charinfo)
+implement_ptable(charinfo)
+
+declare_itable(charinfo)
+implement_itable(charinfo)
 
 class character_indexer {
 public:
@@ -44,7 +57,8 @@
   int next_index;
   glyph ascii_index[256];
   glyph small_number_index[NSMALL];
-  PTABLE(int) table;
+  PTABLE(charinfo) table;
+  ITABLE(charinfo) ntable;
 };
 
 character_indexer::character_indexer()
@@ -52,9 +66,9 @@
 {
   int i;
   for (i = 0; i < 256; i++)
-    ascii_index[i] = glyph(-1, NULL);
+    ascii_index[i] = UNDEFINED_GLYPH;
   for (i = 0; i < NSMALL; i++)
-    small_number_index[i] = glyph(-1, NULL);
+    small_number_index[i] = UNDEFINED_GLYPH;
 }
 
 character_indexer::~character_indexer()
@@ -63,11 +77,15 @@
 
 glyph character_indexer::ascii_char_index(unsigned char c)
 {
-  if (ascii_index[c].index < 0) {
+  if (ascii_index[c] == UNDEFINED_GLYPH) {
     char buf[4+3+1];
     memcpy(buf, "char", 4);
     strcpy(buf + 4, i_to_a(c));
-    ascii_index[c] = glyph(next_index++, strsave(buf));
+    charinfo *ci = new charinfo;
+    ci->index = next_index++;
+    ci->number = -1;
+    ci->name = strsave(buf);
+    ascii_index[c] = glyph(ci);
   }
   return ascii_index[c];
 }
@@ -75,52 +93,62 @@
 glyph character_indexer::numbered_char_index(int n)
 {
   if (n >= 0 && n < NSMALL) {
-    if (small_number_index[n].index < 0)
-      small_number_index[n] = glyph(next_index++, NULL);
+    if (small_number_index[n] == UNDEFINED_GLYPH) {
+      charinfo *ci = new charinfo;
+      ci->index = next_index++;
+      ci->number = n;
+      ci->name = NULL;
+      small_number_index[n] = glyph(ci);
+    }
     return small_number_index[n];
   }
-  // Not the most efficient possible implementation.
-  char buf[1 + 1 + INT_DIGITS + 1];
-  buf[0] = ' ';
-  strcpy(buf + 1, i_to_a(n));
-  int *np = table.lookup(buf);
-  if (!np) {
-    np = new int[1];
-    *np = next_index++;
-    table.define(buf, np);
+  charinfo *ci = ntable.lookup(n);
+  if (ci == NULL) {
+    ci = new charinfo[1];
+    ci->index = next_index++;
+    ci->number = n;
+    ci->name = NULL;
+    ntable.define(n, ci);
   }
-  return glyph(*np, NULL);
+  return glyph(ci);
 }
 
 glyph character_indexer::named_char_index(const char *s)
 {
-  int *np = table.lookupassoc(&s);
-  if (!np) {
-    np = new int[1];
-    *np = next_index++;
-    s = table.define(s, np);
+  charinfo *ci = table.lookupassoc(&s);
+  if (ci == NULL) {
+    ci = new charinfo[1];
+    ci->index = next_index++;
+    ci->number = -1;
+    ci->name = table.define(s, ci);
   }
-  return glyph(*np, s);
+  return glyph(ci);
 }
 
 static character_indexer indexer;
 
 glyph font::number_to_index(int n)
 {
-  return glyph(indexer.numbered_char_index(n));
+  return indexer.numbered_char_index(n);
 }
 
 glyph font::name_to_index(const char *s)
 {
   assert(s != 0 && s[0] != '\0' && s[0] != ' ');
   if (s[1] == '\0')
-    return glyph(indexer.ascii_char_index(s[0]));
+    return indexer.ascii_char_index(s[0]);
   /* char128 and \200 are synonyms */
   if (s[0] == 'c' && s[1] == 'h' && s[2] == 'a' && s[3] == 'r') {
     char *val;
     long n = strtol(s + 4, &val, 10);
     if (val != s + 4 && *val == '\0' && n >= 0 && n < 256)
-      return glyph(indexer.ascii_char_index((unsigned char)n));
+      return indexer.ascii_char_index((unsigned char)n);
   }
-  return glyph(indexer.named_char_index(s));
+  return indexer.named_char_index(s);
+}
+
+const char *glyph::glyph_name()
+{
+  charinfo *ci = (charinfo *)ptr; // Every glyphinfo is actually a charinfo.
+  return ci->name;
 }
diff -r -c3 groff-20060213.orig/src/roff/troff/charinfo.h 
groff-20060213/src/roff/troff/charinfo.h
--- groff-20060213.orig/src/roff/troff/charinfo.h       2006-02-13 
13:00:20.000000000 +0100
+++ groff-20060213/src/roff/troff/charinfo.h    2006-02-15 01:31:02.000000000 
+0100
@@ -21,11 +21,9 @@
 
 class macro;
 
-class charinfo {
+class charinfo : glyphinfo {
   static int next_index;
   charinfo *translation;
-  glyph index;
-  int number;
   macro *mac;
   unsigned char special_translation;
   unsigned char hyphenation_code;
@@ -39,14 +37,14 @@
                                // active for .asciify (set by .trin)
   char_mode mode;
 public:
-  enum { 
+  enum {               // Values for the flags bitmask.  See groff
+                       // manual, description of the `.cflags' request.
     ENDS_SENTENCE = 1,
     BREAK_BEFORE = 2,
     BREAK_AFTER = 4,
     OVERLAPS_HORIZONTALLY = 8,
     OVERLAPS_VERTICALLY = 16,
-    TRANSPARENT = 32,
-    NUMBERED = 64
+    TRANSPARENT = 32
   };
   enum {
     TRANSLATE_NONE,
@@ -126,7 +124,7 @@
 
 inline int charinfo::numbered()
 {
-  return flags & NUMBERED;
+  return number >= 0;
 }
 
 inline int charinfo::is_normal()
@@ -173,7 +171,7 @@
 
 inline glyph charinfo::get_index()
 {
-  return index;
+  return glyph(this);
 }
 
 inline void charinfo::set_translation_input()
diff -r -c3 groff-20060213.orig/src/roff/troff/input.cpp 
groff-20060213/src/roff/troff/input.cpp
--- groff-20060213.orig/src/roff/troff/input.cpp        2006-02-13 
13:00:20.000000000 +0100
+++ groff-20060213/src/roff/troff/input.cpp     2006-02-14 04:04:26.000000000 
+0100
@@ -8094,7 +8094,8 @@
   not_found(0), transparent_translate(1), translate_input(0),
   mode(CHAR_NORMAL), nm(s)
 {
-  index = glyph(next_index++, s.contents());
+  index = next_index++;
+  number = -1;
 }
 
 void charinfo::set_hyphenation_code(unsigned char c)
@@ -8152,13 +8153,13 @@
 
 void charinfo::set_number(int n)
 {
+  assert(n >= 0);
   number = n;
-  flags |= NUMBERED;
 }
 
 int charinfo::get_number()
 {
-  assert(flags & NUMBERED);
+  assert(number >= 0);
   return number;
 }
 
@@ -8213,3 +8214,9 @@
 {
   return get_charinfo_by_number(n)->get_index();
 }
+
+const char *glyph::glyph_name()
+{
+  charinfo *ci = (charinfo *)ptr; // Every glyphinfo is actually a charinfo.
+  return (ci->nm != UNNAMED_SYMBOL ? ci->nm.contents() : NULL);
+}





reply via email to

[Prev in Thread] Current Thread [Next in Thread]