diff -ur groff.old/src/include/font.h groff/src/include/font.h --- groff.old/src/include/font.h 2007-11-19 15:43:33.000000000 +0000 +++ groff/src/include/font.h 2007-12-10 01:27:34.000000000 +0000 @@ -19,6 +19,12 @@ with groff; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ +#include +#include +#include + +class char_class; + // A function of this type can be registered to define the semantics of // arbitrary commands in a font DESC file. typedef void (*FONT_COMMAND_HANDLER)(const char *, // command @@ -268,6 +274,8 @@ // upper1, ... lowerN, upperN, 0 }. private: + std::map > class_map; + // A map of names to class objects. unsigned ligatures; // Bit mask of available ligatures. Used by // has_ligature(). font_kern_list **kern_hash_table; // Hash table of kerning pairs. @@ -309,6 +317,11 @@ void extend_ch(); void compact(); + // These methods add glyphs to character classes. + void add_class(const char *, glyph *); + void add_class(const char *, glyph *, glyph *); + void add_class(const char *, const char *); + void add_kern(glyph *, glyph *, int); // Add to the kerning table a // kerning amount (arg3) between two given glyphs // (arg1 and arg2). diff -ur groff.old/src/libs/libgroff/font.cpp groff/src/libs/libgroff/font.cpp --- groff.old/src/libs/libgroff/font.cpp 2007-11-19 15:43:33.000000000 +0000 +++ groff/src/libs/libgroff/font.cpp 2007-12-10 01:28:05.000000000 +0000 @@ -32,6 +32,7 @@ #include "font.h" #include "unicode.h" #include "paper.h" +#include "classes.h" const char *const WS = " \t\n\r"; @@ -779,6 +780,38 @@ return 0; } +void font::add_class(const char *name, glyph *g) +{ + int num = glyph_to_number(g); + + if (num == -1) + return; + + single_char_class *ref = new single_char_class(num); + class_map[name].push_back(ref); +} + +void font::add_class(const char *name, glyph *g1, glyph *g2) +{ + int num1 = glyph_to_number(g1); + int num2 = glyph_to_number(g2); + + if ((num1 == -1) || (num2 == -1)) + return; + + range_char_class *ref = new range_char_class(num1, num2); + class_map[name].push_back(ref); +} + +void font::add_class(const char *name, const char *oname) +{ + std::vector *vec = &class_map[oname]; + int nelems = vec->size(); + for (int i = 0; i < nelems; i++) { + class_map[name].push_back((*vec)[i]); + } +} + // If the font can't be found, then if not_found is non-NULL, it will be set // to 1 otherwise a message will be printed. @@ -1003,12 +1036,62 @@ copy_entry(number_to_glyph(metric.code), last_glyph); } } + } if (last_glyph == NULL) { t.error("I didn't seem to find any characters"); return 0; } } + else if (strcmp(command, "classes") == 0) { + if (head_only) + return 1; + for (;;) { + if (!t.next()) { + command = 0; + break; + } + char *cname = strtok(t.buf, WS); + if (cname == 0) + continue; + char *equals = strtok(0, WS); + if (equals == 0) { + command = cname; + break; + } + p = strtok(0, WS); + if (p == 0) { + t.error("empty character classes not allowed"); + return 0; + } + glyph *g1 = 0, *g2 = 0; + while (p != 0) { + if ((g1 != 0) && (p[0] == '-')) { + p = strtok(0, WS); + if (p == 0) { + t.error("incomplete range in class definition"); + return 0; + } + g2 = name_to_glyph(p); + add_class(cname, g1, g2); + g1 = g2 = 0; + } + else if (g1 != 0) { + add_class(cname, g1); + g1 = 0; + } + if ((p[0] == '<') && (p[strlen(p)-1] == '>')) { + add_class(cname, p); + } + else if (p[0] != '-') { + g1 = name_to_glyph(p); + } + p = strtok(0, WS); + } + if (g1 != 0) + add_class(cname, g1); + } + } else { t.error("unrecognised command `%1' " "after `kernpairs' or `charset' command", diff -ur groff.old/src/libs/libgroff/Makefile.sub groff/src/libs/libgroff/Makefile.sub --- groff.old/src/libs/libgroff/Makefile.sub 2007-11-19 15:43:33.000000000 +0000 +++ groff/src/libs/libgroff/Makefile.sub 2007-12-10 01:01:26.000000000 +0000 @@ -5,6 +5,7 @@ OBJS=\ assert.$(OBJEXT) \ change_lf.$(OBJEXT) \ + classes.$(OBJEXT) \ cmap.$(OBJEXT) \ color.$(OBJEXT) \ cset.$(OBJEXT) \ @@ -55,6 +56,7 @@ CCSRCS=\ $(srcdir)/assert.cpp \ $(srcdir)/change_lf.cpp \ + $(srcdir)/classes.cpp \ $(srcdir)/cmap.cpp \ $(srcdir)/color.cpp \ $(srcdir)/cset.cpp \ diff -ur groff.old/src/roff/troff/charinfo.h groff/src/roff/troff/charinfo.h --- groff.old/src/roff/troff/charinfo.h 2007-11-19 15:43:33.000000000 +0000 +++ groff/src/roff/troff/charinfo.h 2007-11-20 21:50:09.000000000 +0000 @@ -20,6 +20,7 @@ Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ class macro; +class char_class; class charinfo : glyph { static int next_index; @@ -91,7 +92,7 @@ }; charinfo *get_charinfo(symbol); -extern charinfo *charset_table[]; +extern char_class *charset_table[]; charinfo *get_charinfo_by_number(int); inline int charinfo::overlaps_horizontally() diff -ur groff.old/src/roff/troff/env.cpp groff/src/roff/troff/env.cpp --- groff.old/src/roff/troff/env.cpp 2007-11-19 15:43:33.000000000 +0000 +++ groff/src/roff/troff/env.cpp 2007-11-20 21:59:39.000000000 +0000 @@ -33,6 +33,7 @@ #include "reg.h" #include "font.h" #include "charinfo.h" +#include "classes.h" #include "macropath.h" #include "input.h" #include @@ -657,7 +658,7 @@ current_tab(TAB_NONE), leader_node(0), tab_char(0), - leader_char(charset_table['.']), + leader_char(charset_table['.']->get_charinfo()), current_field(0), discarding(0), spread_flag(0), @@ -1583,7 +1584,7 @@ if (has_arg()) { node *nd = 0; for (int i = '9'; i >= '0'; i--) { - node *tem = make_node(charset_table[i], curenv); + node *tem = make_node(charset_table[i]->get_charinfo(), curenv); if (!tem) { skip_line(); return; diff -ur groff.old/src/roff/troff/input.cpp groff/src/roff/troff/input.cpp --- groff.old/src/roff/troff/input.cpp 2007-11-19 15:43:33.000000000 +0000 +++ groff/src/roff/troff/input.cpp 2007-11-20 21:58:36.000000000 +0000 @@ -35,6 +35,7 @@ #include "reg.h" #include "font.h" #include "charinfo.h" +#include "classes.h" #include "macropath.h" #include "input.h" #include "defs.h" @@ -89,7 +90,9 @@ #ifndef POPEN_MISSING char *pipe_command = 0; #endif -charinfo *charset_table[256]; +// The first 256 entries here are the first 256 characters; after that, they are +// sorted only. +char_class *charset_table[1024]; unsigned char hpf_code_table[256]; static int warning_mask = DEFAULT_WARNING_MASK; @@ -2618,7 +2621,7 @@ static int transparent_translate(int cc) { if (!invalid_input_char(cc)) { - charinfo *ci = charset_table[cc]; + charinfo *ci = charset_table[cc]->get_charinfo(); switch (ci->get_special_translation(1)) { case charinfo::TRANSLATE_SPACE: return ' '; @@ -2804,7 +2807,7 @@ fprintf(stderr, "found [%c]\n", ch); fflush(stderr); } #endif - curenv->add_char(charset_table[ch]); + curenv->add_char(charset_table[ch]->get_charinfo()); tok.next(); if (tok.type != token::TOKEN_CHAR) break; @@ -6438,22 +6441,24 @@ { char buf[16]; strcpy(buf, "char"); + memset(charset_table, 0, sizeof(charset_table)); for (int i = 0; i < 256; i++) { strcpy(buf + 4, i_to_a(i)); - charset_table[i] = get_charinfo(symbol(buf)); - charset_table[i]->set_ascii_code(i); - if (csalpha(i)) - charset_table[i]->set_hyphenation_code(cmlower(i)); - } - charset_table['.']->set_flags(charinfo::ENDS_SENTENCE); - charset_table['?']->set_flags(charinfo::ENDS_SENTENCE); - charset_table['!']->set_flags(charinfo::ENDS_SENTENCE); - charset_table['-']->set_flags(charinfo::BREAK_AFTER); - charset_table['"']->set_flags(charinfo::TRANSPARENT); - charset_table['\'']->set_flags(charinfo::TRANSPARENT); - charset_table[')']->set_flags(charinfo::TRANSPARENT); - charset_table[']']->set_flags(charinfo::TRANSPARENT); - charset_table['*']->set_flags(charinfo::TRANSPARENT); + charset_table[i] = new single_char_class(i); + charset_table[i]->set_charinfo(get_charinfo(symbol(buf))); + charset_table[i]->get_charinfo()->set_ascii_code(i); + if (csalpha(i)) + charset_table[i]->get_charinfo()->set_hyphenation_code(cmlower(i)); + } + charset_table['.']->get_charinfo()->set_flags(charinfo::ENDS_SENTENCE); + charset_table['?']->get_charinfo()->set_flags(charinfo::ENDS_SENTENCE); + charset_table['!']->get_charinfo()->set_flags(charinfo::ENDS_SENTENCE); + charset_table['-']->get_charinfo()->set_flags(charinfo::BREAK_AFTER); + charset_table['"']->get_charinfo()->set_flags(charinfo::TRANSPARENT); + charset_table['\'']->get_charinfo()->set_flags(charinfo::TRANSPARENT); + charset_table[')']->get_charinfo()->set_flags(charinfo::TRANSPARENT); + charset_table[']']->get_charinfo()->set_flags(charinfo::TRANSPARENT); + charset_table['*']->get_charinfo()->set_flags(charinfo::TRANSPARENT); get_charinfo(symbol("dg"))->set_flags(charinfo::TRANSPARENT); get_charinfo(symbol("rq"))->set_flags(charinfo::TRANSPARENT); get_charinfo(symbol("em"))->set_flags(charinfo::BREAK_AFTER); @@ -6464,7 +6469,7 @@ get_charinfo(symbol("sqrtex"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY); get_charinfo(symbol("ru"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY); get_charinfo(symbol("br"))->set_flags(charinfo::OVERLAPS_VERTICALLY); - page_character = charset_table['%']; + page_character = charset_table['%']->get_charinfo(); } static void init_hpf_code_table() @@ -6611,14 +6616,14 @@ charinfo *token::get_char(int required) { if (type == TOKEN_CHAR) - return charset_table[c]; + return charset_table[c]->get_charinfo(); if (type == TOKEN_SPECIAL) return get_charinfo(nm); if (type == TOKEN_NUMBERED_CHAR) return get_charinfo_by_number(val); if (type == TOKEN_ESCAPE) { if (escape_char != 0) - return charset_table[escape_char]; + return charset_table[escape_char]->get_charinfo(); else { error("`\\e' used while no current escape character"); return 0; @@ -6662,14 +6667,15 @@ node *n = 0; switch (type) { case TOKEN_CHAR: - *pp = (*pp)->add_char(charset_table[c], curenv, &w, &s); + *pp = (*pp)->add_char(charset_table[c]->get_charinfo(), curenv, &w, &s); break; case TOKEN_DUMMY: n = new dummy_node; break; case TOKEN_ESCAPE: if (escape_char != 0) - *pp = (*pp)->add_char(charset_table[escape_char], curenv, &w, &s); + *pp = (*pp)->add_char(charset_table[escape_char]->get_charinfo(), curenv, + &w, &s); break; case TOKEN_HYPHEN_INDICATOR: *pp = (*pp)->add_discretionary_hyphen(); @@ -6734,7 +6740,7 @@ curenv->get_fill_color())); break; case TOKEN_CHAR: - curenv->add_char(charset_table[c]); + curenv->add_char(charset_table[c]->get_charinfo()); break; case TOKEN_DUMMY: curenv->add_node(new dummy_node); @@ -6747,7 +6753,7 @@ break; case TOKEN_ESCAPE: if (escape_char != 0) - curenv->add_char(charset_table[escape_char]); + curenv->add_char(charset_table[escape_char]->get_charinfo()); break; case TOKEN_BEGIN_TRAP: case TOKEN_END_TRAP: @@ -8330,7 +8336,7 @@ { charinfo *ci; if (nm[1] == 0) - ci = charset_table[nm[0] & 0xff]; + ci = charset_table[nm[0] & 0xff]->get_charinfo(); else if (nm[0] == '\\' && nm[2] == 0) ci = get_charinfo(symbol(nm + 1)); else diff -ur groff.old/src/roff/troff/node.cpp groff/src/roff/troff/node.cpp --- groff.old/src/roff/troff/node.cpp 2007-11-19 15:43:33.000000000 +0000 +++ groff/src/roff/troff/node.cpp 2007-11-20 22:00:47.000000000 +0000 @@ -40,6 +40,7 @@ #include "reg.h" #include "font.h" #include "charinfo.h" +#include "classes.h" #include "input.h" #include "geometry.h" @@ -6292,7 +6293,7 @@ hunits env_digit_width(environment *env) { - node *n = make_glyph_node(charset_table['0'], env); + node *n = make_glyph_node(charset_table['0']->get_charinfo(), env); if (n) { hunits x = n->width(); delete n; --- groff.old/src/include/classes.h 1970-01-01 00:00:00.000000000 +0000 +++ groff/src/include/classes.h 2007-12-10 01:14:58.000000000 +0000 @@ -0,0 +1,62 @@ +/* This file is in the public domain. */ + +class charinfo; + +class char_class +{ + public: + virtual bool is_in_class(int c); + virtual int lookup_char(int c) = 0; + charinfo *get_charinfo(); + void set_charinfo(charinfo *); + protected: + private: + charinfo *ci; +}; + +class single_char_class : public char_class +{ + public: + single_char_class(int c); + int lookup_char(int c); + protected: + private: + int ch; +}; + +class range_char_class : public char_class +{ + public: + range_char_class(int low, int high); + int lookup_char(int c); + protected: + private: + int lo, hi; +}; + +class ref_char_class : public char_class +{ + public: + ref_char_class(char_class *klass); + int lookup_char(int c); + char_class *get_class(); + protected: + private: + char_class *ref; +}; + +inline bool char_class::is_in_class(int c) +{ + return lookup_char(c) == 0; +} + +inline charinfo *char_class::get_charinfo() +{ + return ci; +} + +inline void char_class::set_charinfo(charinfo *cis) +{ + ci = cis; +} + --- groff.old/src/libs/libgroff/classes.cpp 1970-01-01 00:00:00.000000000 +0000 +++ groff/src/libs/libgroff/classes.cpp 2007-12-10 01:14:43.000000000 +0000 @@ -0,0 +1,48 @@ +/* This file is in the public domain. */ + +#include "classes.h" + +single_char_class::single_char_class(int c) : + ch(c) +{ +} + +int single_char_class::lookup_char(int c) +{ + if (c < ch) + return -1; + else if (c > ch) + return 1; + else + return 0; +} + +range_char_class::range_char_class(int low, int high) : + lo(low), hi(high) +{ +} + +int range_char_class::lookup_char(int c) +{ + if (c < lo) + return -1; + else if (c > hi) + return 1; + else + return 0; +} + +ref_char_class::ref_char_class(char_class *klass) : + ref(klass) +{ +} + +int ref_char_class::lookup_char(int c) +{ + return ref->lookup_char(c); +} + +char_class *ref_char_class::get_class() +{ + return ref; +}