gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5956-g1847a420


From: Arnold Robbins
Subject: [SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5956-g1847a420
Date: Mon, 10 Feb 2025 01:44:06 -0500 (EST)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, feature/minrx has been updated
       via  1847a420bba46a8082148161335e943d7cdf5867 (commit)
      from  e96b3ac2e2a6f8419b7868c581fad28effac2285 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=1847a420bba46a8082148161335e943d7cdf5867

commit 1847a420bba46a8082148161335e943d7cdf5867
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Mon Feb 10 08:43:41 2025 +0200

    Updates to minrx and charset.

diff --git a/support/ChangeLog b/support/ChangeLog
index d055ae9e..d5959951 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,9 @@
+2025-02-10         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * minrx.cpp: Updated with firstcset version that supports
+       charset.
+       * charset.h, charset.c: Updated.
+
 2025-02-06         Arnold D. Robbins     <arnold@skeeve.com>
 
        * minrx.cpp: Update again. More speedups.
diff --git a/support/charset.c b/support/charset.c
index 41a4ebc6..5671bce6 100644
--- a/support/charset.c
+++ b/support/charset.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2023, 2024, Arnold David Robbins.
+ * Copyright (C) 2023, 2024, 2025, Arnold David Robbins.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,6 +32,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <ctype.h>
 #include <wctype.h>
 #include <wchar.h>
 #include <locale.h>
@@ -41,35 +42,3465 @@
 #define MAX_CODE_POINT 0x10ffff        // max Unicode code point
 
 typedef struct set_item {
-       enum set_item_type {
-               CTYPE_ITEM,
-               RANGE_ITEM,
-       } item_type;
-       union {
-               struct _ctype {
-                       wctype_t    wtype;
-                       const char *type_name;
-               } c;
-               struct _range {
-                       int32_t start, end;
-               } r;
-       } u;
+       int32_t start, end;
 } set_item;
-#define wtype          u.c.wtype
-#define type_name      u.c.type_name
-#define start          u.r.start
-#define end                    u.r.end
 struct _charset {
-       bool     complemented;      // For [^...] sets
        bool     no_newlines;       // If \n can't be in the set
        bool     finalized;         // No more changes possible
+       bool     is_utf8;                       // True if using a UTF-8 
character set
+       int              mb_cur_max;            // Value of MB_CUR_MAX when the 
charset was created
        size_t   nchars_inuse;      // Number of characters used
        size_t   nchars_allocated;  // Number of characters allocated
        int32_t  *chars;            // Characters added to the set
        size_t   nelems;            // Number of elements (items) in use
        size_t   allocated;         // Number allocated
+       size_t   nelems8bit;            // Number of elements covering 0-255
        set_item *items;            // Array of items
 };
+
+static const set_item digit[] = {
+       { 48, 57 },
+       { -1, -1 },     // end markers
+};
+static const set_item cntrl[] = {
+       { 0, 31 },
+       { 127, 159 },
+       { 8232, 8233 },
+       { -1, -1 },     // end markers
+};
+static const set_item xdigit[] = {
+       { 48, 57 },
+       { 65, 70 },
+       { 97, 102 },
+       { -1, -1 },     // end markers
+};
+static const set_item print_additional[] = {
+       { 32, 126 },
+       { 5120, 5788 },
+       { 8192, 8231 },
+       { 8234, 8292 },
+       { 12288, 12351 },
+       { -1, -1 },     // end markers
+};
+static const set_item blank[] = {
+       { 9, 9 },
+       { 32, 32 },
+       { 5760, 5760 },
+       { 8192, 8198 },
+       { 8200, 8202 },
+       { 8287, 8287 },
+       { 12288, 12288 },
+       { -1, -1 },     // end markers
+};
+static const set_item graph_additional[] = {
+       { 33, 126 },
+       { 5120, 5759 },
+       { 5761, 5788 },
+       { 8199, 8199 },
+       { 8203, 8231 },
+       { 8234, 8286 },
+       { 8288, 8292 },
+       { 12289, 12351 },
+       { -1, -1 },     // end markers
+};
+static const set_item space[] = {
+       { 9, 13 },
+       { 32, 32 },
+       { 5760, 5760 },
+       { 8192, 8198 },
+       { 8200, 8202 },
+       { 8232, 8233 },
+       { 8287, 8287 },
+       { 12288, 12288 },
+       { -1, -1 },     // end markers
+};
+static const set_item punct[] = {
+       { 33, 47 },
+       { 58, 64 },
+       { 91, 96 },
+       { 123, 126 },
+       { 160, 169 },
+       { 171, 180 },
+       { 182, 185 },
+       { 187, 191 },
+       { 215, 215 },
+       { 247, 247 },
+       { 706, 709 },
+       { 722, 735 },
+       { 741, 747 },
+       { 749, 749 },
+       { 751, 836 },
+       { 838, 879 },
+       { 885, 885 },
+       { 894, 894 },
+       { 900, 901 },
+       { 903, 903 },
+       { 1014, 1014 },
+       { 1154, 1161 },
+       { 1370, 1375 },
+       { 1417, 1418 },
+       { 1421, 1423 },
+       { 1425, 1455 },
+       { 1470, 1470 },
+       { 1472, 1472 },
+       { 1475, 1475 },
+       { 1478, 1478 },
+       { 1523, 1524 },
+       { 1536, 1551 },
+       { 1563, 1567 },
+       { 1624, 1624 },
+       { 1642, 1645 },
+       { 1748, 1748 },
+       { 1757, 1760 },
+       { 1769, 1772 },
+       { 1789, 1790 },
+       { 1792, 1805 },
+       { 1807, 1807 },
+       { 1856, 1866 },
+       { 2027, 2035 },
+       { 2038, 2041 },
+       { 2045, 2047 },
+       { 2072, 2073 },
+       { 2093, 2093 },
+       { 2096, 2110 },
+       { 2137, 2139 },
+       { 2142, 2142 },
+       { 2184, 2184 },
+       { 2192, 2193 },
+       { 2200, 2207 },
+       { 2250, 2259 },
+       { 2272, 2274 },
+       { 2282, 2287 },
+       { 2364, 2364 },
+       { 2381, 2381 },
+       { 2385, 2388 },
+       { 2404, 2405 },
+       { 2416, 2416 },
+       { 2492, 2492 },
+       { 2509, 2509 },
+       { 2546, 2555 },
+       { 2557, 2558 },
+       { 2620, 2620 },
+       { 2637, 2637 },
+       { 2678, 2678 },
+       { 2748, 2748 },
+       { 2765, 2765 },
+       { 2800, 2801 },
+       { 2813, 2815 },
+       { 2876, 2876 },
+       { 2893, 2893 },
+       { 2901, 2901 },
+       { 2928, 2928 },
+       { 2930, 2935 },
+       { 3021, 3021 },
+       { 3056, 3066 },
+       { 3076, 3076 },
+       { 3132, 3132 },
+       { 3149, 3149 },
+       { 3191, 3199 },
+       { 3204, 3204 },
+       { 3260, 3260 },
+       { 3277, 3277 },
+       { 3387, 3388 },
+       { 3405, 3405 },
+       { 3407, 3407 },
+       { 3416, 3422 },
+       { 3440, 3449 },
+       { 3530, 3530 },
+       { 3572, 3572 },
+       { 3647, 3647 },
+       { 3655, 3660 },
+       { 3662, 3663 },
+       { 3674, 3675 },
+       { 3770, 3770 },
+       { 3784, 3788 },
+       { 3841, 3871 },
+       { 3882, 3903 },
+       { 3970, 3975 },
+       { 4030, 4044 },
+       { 4046, 4058 },
+       { 4151, 4151 },
+       { 4153, 4154 },
+       { 4170, 4175 },
+       { 4254, 4255 },
+       { 4347, 4347 },
+       { 4957, 4988 },
+       { 5008, 5017 },
+       { 5120, 5120 },
+       { 5741, 5742 },
+       { 5787, 5788 },
+       { 5867, 5869 },
+       { 5908, 5909 },
+       { 5940, 5942 },
+       { 6068, 6069 },
+       { 6089, 6102 },
+       { 6104, 6107 },
+       { 6109, 6109 },
+       { 6128, 6137 },
+       { 6144, 6159 },
+       { 6457, 6459 },
+       { 6464, 6464 },
+       { 6468, 6469 },
+       { 6618, 6618 },
+       { 6622, 6655 },
+       { 6686, 6687 },
+       { 6752, 6752 },
+       { 6773, 6780 },
+       { 6783, 6783 },
+       { 6816, 6822 },
+       { 6824, 6829 },
+       { 6832, 6846 },
+       { 6849, 6859 },
+       { 6964, 6964 },
+       { 6980, 6980 },
+       { 7002, 7038 },
+       { 7082, 7083 },
+       { 7142, 7142 },
+       { 7154, 7155 },
+       { 7164, 7167 },
+       { 7223, 7223 },
+       { 7227, 7231 },
+       { 7294, 7295 },
+       { 7360, 7367 },
+       { 7376, 7400 },
+       { 7405, 7405 },
+       { 7412, 7412 },
+       { 7415, 7417 },
+       { 7616, 7654 },
+       { 7669, 7679 },
+       { 8125, 8125 },
+       { 8127, 8129 },
+       { 8141, 8143 },
+       { 8157, 8159 },
+       { 8173, 8175 },
+       { 8189, 8190 },
+       { 8199, 8199 },
+       { 8203, 8231 },
+       { 8234, 8286 },
+       { 8288, 8292 },
+       { 8294, 8304 },
+       { 8308, 8318 },
+       { 8320, 8334 },
+       { 8352, 8384 },
+       { 8400, 8432 },
+       { 8448, 8449 },
+       { 8451, 8454 },
+       { 8456, 8457 },
+       { 8468, 8468 },
+       { 8470, 8472 },
+       { 8478, 8483 },
+       { 8485, 8485 },
+       { 8487, 8487 },
+       { 8489, 8489 },
+       { 8494, 8494 },
+       { 8506, 8507 },
+       { 8512, 8516 },
+       { 8522, 8525 },
+       { 8527, 8543 },
+       { 8585, 8587 },
+       { 8592, 9254 },
+       { 9280, 9290 },
+       { 9312, 9397 },
+       { 9450, 11123 },
+       { 11126, 11157 },
+       { 11159, 11263 },
+       { 11493, 11498 },
+       { 11503, 11505 },
+       { 11513, 11519 },
+       { 11632, 11632 },
+       { 11647, 11647 },
+       { 11776, 11822 },
+       { 11824, 11869 },
+       { 11904, 11929 },
+       { 11931, 12019 },
+       { 12032, 12245 },
+       { 12272, 12283 },
+       { 12289, 12292 },
+       { 12296, 12320 },
+       { 12330, 12336 },
+       { 12342, 12343 },
+       { 12349, 12351 },
+       { 12441, 12444 },
+       { 12448, 12448 },
+       { 12539, 12539 },
+       { 12688, 12703 },
+       { 12736, 12771 },
+       { 12800, 12830 },
+       { 12832, 13311 },
+       { 19904, 19967 },
+       { 42128, 42182 },
+       { 42238, 42239 },
+       { 42509, 42511 },
+       { 42607, 42611 },
+       { 42620, 42622 },
+       { 42736, 42743 },
+       { 42752, 42774 },
+       { 42784, 42785 },
+       { 42889, 42890 },
+       { 43014, 43014 },
+       { 43048, 43052 },
+       { 43056, 43065 },
+       { 43124, 43127 },
+       { 43204, 43204 },
+       { 43214, 43215 },
+       { 43232, 43249 },
+       { 43256, 43258 },
+       { 43260, 43260 },
+       { 43307, 43311 },
+       { 43347, 43347 },
+       { 43359, 43359 },
+       { 43443, 43443 },
+       { 43456, 43469 },
+       { 43486, 43487 },
+       { 43612, 43615 },
+       { 43639, 43641 },
+       { 43711, 43711 },
+       { 43713, 43713 },
+       { 43742, 43743 },
+       { 43760, 43761 },
+       { 43766, 43766 },
+       { 43867, 43867 },
+       { 43882, 43883 },
+       { 44011, 44013 },
+       { 57344, 63743 },
+       { 64297, 64297 },
+       { 64434, 64450 },
+       { 64830, 64847 },
+       { 64975, 64975 },
+       { 65020, 65049 },
+       { 65056, 65106 },
+       { 65108, 65126 },
+       { 65128, 65131 },
+       { 65279, 65279 },
+       { 65281, 65295 },
+       { 65306, 65312 },
+       { 65339, 65344 },
+       { 65371, 65381 },
+       { 65504, 65510 },
+       { 65512, 65518 },
+       { 65529, 65533 },
+       { 65792, 65794 },
+       { 65799, 65843 },
+       { 65847, 65855 },
+       { 65909, 65934 },
+       { 65936, 65948 },
+       { 65952, 65952 },
+       { 66000, 66045 },
+       { 66272, 66299 },
+       { 66336, 66339 },
+       { 66463, 66463 },
+       { 66512, 66512 },
+       { 66927, 66927 },
+       { 67671, 67679 },
+       { 67703, 67711 },
+       { 67751, 67759 },
+       { 67835, 67839 },
+       { 67862, 67867 },
+       { 67871, 67871 },
+       { 67903, 67903 },
+       { 68028, 68029 },
+       { 68032, 68047 },
+       { 68050, 68095 },
+       { 68152, 68154 },
+       { 68159, 68168 },
+       { 68176, 68184 },
+       { 68221, 68223 },
+       { 68253, 68255 },
+       { 68296, 68296 },
+       { 68325, 68326 },
+       { 68331, 68342 },
+       { 68409, 68415 },
+       { 68440, 68447 },
+       { 68472, 68479 },
+       { 68505, 68508 },
+       { 68521, 68527 },
+       { 68858, 68863 },
+       { 69216, 69246 },
+       { 69293, 69293 },
+       { 69405, 69414 },
+       { 69446, 69465 },
+       { 69506, 69513 },
+       { 69573, 69579 },
+       { 69702, 69709 },
+       { 69714, 69733 },
+       { 69744, 69744 },
+       { 69759, 69761 },
+       { 69817, 69825 },
+       { 69837, 69837 },
+       { 69939, 69940 },
+       { 69952, 69955 },
+       { 70003, 70005 },
+       { 70080, 70080 },
+       { 70085, 70093 },
+       { 70107, 70107 },
+       { 70109, 70111 },
+       { 70113, 70132 },
+       { 70197, 70198 },
+       { 70200, 70205 },
+       { 70313, 70313 },
+       { 70377, 70378 },
+       { 70459, 70460 },
+       { 70477, 70477 },
+       { 70502, 70508 },
+       { 70512, 70516 },
+       { 70722, 70722 },
+       { 70726, 70726 },
+       { 70731, 70735 },
+       { 70746, 70747 },
+       { 70749, 70750 },
+       { 70850, 70851 },
+       { 70854, 70854 },
+       { 71103, 71127 },
+       { 71231, 71231 },
+       { 71233, 71235 },
+       { 71264, 71276 },
+       { 71350, 71351 },
+       { 71353, 71353 },
+       { 71467, 71467 },
+       { 71482, 71487 },
+       { 71737, 71739 },
+       { 71914, 71922 },
+       { 71997, 71998 },
+       { 72003, 72006 },
+       { 72160, 72160 },
+       { 72162, 72162 },
+       { 72243, 72244 },
+       { 72255, 72263 },
+       { 72344, 72348 },
+       { 72350, 72354 },
+       { 72767, 72767 },
+       { 72769, 72773 },
+       { 72794, 72812 },
+       { 72816, 72817 },
+       { 73026, 73026 },
+       { 73028, 73029 },
+       { 73111, 73111 },
+       { 73463, 73464 },
+       { 73664, 73713 },
+       { 73727, 73727 },
+       { 74864, 74868 },
+       { 77809, 77810 },
+       { 78896, 78904 },
+       { 92782, 92783 },
+       { 92912, 92917 },
+       { 92976, 92991 },
+       { 92996, 92997 },
+       { 93019, 93025 },
+       { 93824, 93850 },
+       { 94178, 94178 },
+       { 94180, 94180 },
+       { 113820, 113821 },
+       { 113823, 113827 },
+       { 118528, 118573 },
+       { 118576, 118598 },
+       { 118608, 118723 },
+       { 118784, 119029 },
+       { 119040, 119078 },
+       { 119081, 119274 },
+       { 119296, 119365 },
+       { 119520, 119539 },
+       { 119552, 119638 },
+       { 119648, 119672 },
+       { 120513, 120513 },
+       { 120539, 120539 },
+       { 120571, 120571 },
+       { 120597, 120597 },
+       { 120629, 120629 },
+       { 120655, 120655 },
+       { 120687, 120687 },
+       { 120713, 120713 },
+       { 120745, 120745 },
+       { 120771, 120771 },
+       { 120832, 121483 },
+       { 121499, 121503 },
+       { 121505, 121519 },
+       { 123184, 123190 },
+       { 123215, 123215 },
+       { 123566, 123566 },
+       { 123628, 123631 },
+       { 123647, 123647 },
+       { 125127, 125142 },
+       { 125252, 125254 },
+       { 125256, 125258 },
+       { 125278, 125279 },
+       { 126065, 126132 },
+       { 126209, 126269 },
+       { 126704, 126705 },
+       { 126976, 127019 },
+       { 127024, 127123 },
+       { 127136, 127150 },
+       { 127153, 127167 },
+       { 127169, 127183 },
+       { 127185, 127221 },
+       { 127232, 127279 },
+       { 127306, 127311 },
+       { 127338, 127343 },
+       { 127370, 127405 },
+       { 127462, 127490 },
+       { 127504, 127547 },
+       { 127552, 127560 },
+       { 127568, 127569 },
+       { 127584, 127589 },
+       { 127744, 128727 },
+       { 128733, 128748 },
+       { 128752, 128764 },
+       { 128768, 128883 },
+       { 128896, 128984 },
+       { 128992, 129003 },
+       { 129008, 129008 },
+       { 129024, 129035 },
+       { 129040, 129095 },
+       { 129104, 129113 },
+       { 129120, 129159 },
+       { 129168, 129197 },
+       { 129200, 129201 },
+       { 129280, 129619 },
+       { 129632, 129645 },
+       { 129648, 129652 },
+       { 129656, 129660 },
+       { 129664, 129670 },
+       { 129680, 129708 },
+       { 129712, 129722 },
+       { 129728, 129733 },
+       { 129744, 129753 },
+       { 129760, 129767 },
+       { 129776, 129782 },
+       { 129792, 129938 },
+       { 129940, 129994 },
+       { 917505, 917505 },
+       { 917536, 917631 },
+       { 917760, 917999 },
+       { 983040, 1048573 },
+       { 1048576, 1114109 },
+       { -1, -1 },     // end markers
+};
+static const set_item upper[] = {
+       { 65, 90 },
+       { 192, 214 },
+       { 216, 222 },
+       { 256, 256 },
+       { 258, 258 },
+       { 260, 260 },
+       { 262, 262 },
+       { 264, 264 },
+       { 266, 266 },
+       { 268, 268 },
+       { 270, 270 },
+       { 272, 272 },
+       { 274, 274 },
+       { 276, 276 },
+       { 278, 278 },
+       { 280, 280 },
+       { 282, 282 },
+       { 284, 284 },
+       { 286, 286 },
+       { 288, 288 },
+       { 290, 290 },
+       { 292, 292 },
+       { 294, 294 },
+       { 296, 296 },
+       { 298, 298 },
+       { 300, 300 },
+       { 302, 302 },
+       { 304, 304 },
+       { 306, 306 },
+       { 308, 308 },
+       { 310, 310 },
+       { 313, 313 },
+       { 315, 315 },
+       { 317, 317 },
+       { 319, 319 },
+       { 321, 321 },
+       { 323, 323 },
+       { 325, 325 },
+       { 327, 327 },
+       { 330, 330 },
+       { 332, 332 },
+       { 334, 334 },
+       { 336, 336 },
+       { 338, 338 },
+       { 340, 340 },
+       { 342, 342 },
+       { 344, 344 },
+       { 346, 346 },
+       { 348, 348 },
+       { 350, 350 },
+       { 352, 352 },
+       { 354, 354 },
+       { 356, 356 },
+       { 358, 358 },
+       { 360, 360 },
+       { 362, 362 },
+       { 364, 364 },
+       { 366, 366 },
+       { 368, 368 },
+       { 370, 370 },
+       { 372, 372 },
+       { 374, 374 },
+       { 376, 377 },
+       { 379, 379 },
+       { 381, 381 },
+       { 385, 386 },
+       { 388, 388 },
+       { 390, 391 },
+       { 393, 395 },
+       { 398, 401 },
+       { 403, 404 },
+       { 406, 408 },
+       { 412, 413 },
+       { 415, 416 },
+       { 418, 418 },
+       { 420, 420 },
+       { 422, 423 },
+       { 425, 425 },
+       { 428, 428 },
+       { 430, 431 },
+       { 433, 435 },
+       { 437, 437 },
+       { 439, 440 },
+       { 444, 444 },
+       { 452, 453 },
+       { 455, 456 },
+       { 458, 459 },
+       { 461, 461 },
+       { 463, 463 },
+       { 465, 465 },
+       { 467, 467 },
+       { 469, 469 },
+       { 471, 471 },
+       { 473, 473 },
+       { 475, 475 },
+       { 478, 478 },
+       { 480, 480 },
+       { 482, 482 },
+       { 484, 484 },
+       { 486, 486 },
+       { 488, 488 },
+       { 490, 490 },
+       { 492, 492 },
+       { 494, 494 },
+       { 497, 498 },
+       { 500, 500 },
+       { 502, 504 },
+       { 506, 506 },
+       { 508, 508 },
+       { 510, 510 },
+       { 512, 512 },
+       { 514, 514 },
+       { 516, 516 },
+       { 518, 518 },
+       { 520, 520 },
+       { 522, 522 },
+       { 524, 524 },
+       { 526, 526 },
+       { 528, 528 },
+       { 530, 530 },
+       { 532, 532 },
+       { 534, 534 },
+       { 536, 536 },
+       { 538, 538 },
+       { 540, 540 },
+       { 542, 542 },
+       { 544, 544 },
+       { 546, 546 },
+       { 548, 548 },
+       { 550, 550 },
+       { 552, 552 },
+       { 554, 554 },
+       { 556, 556 },
+       { 558, 558 },
+       { 560, 560 },
+       { 562, 562 },
+       { 570, 571 },
+       { 573, 574 },
+       { 577, 577 },
+       { 579, 582 },
+       { 584, 584 },
+       { 586, 586 },
+       { 588, 588 },
+       { 590, 590 },
+       { 880, 880 },
+       { 882, 882 },
+       { 886, 886 },
+       { 895, 895 },
+       { 902, 902 },
+       { 904, 906 },
+       { 908, 908 },
+       { 910, 911 },
+       { 913, 929 },
+       { 931, 939 },
+       { 975, 975 },
+       { 978, 980 },
+       { 984, 984 },
+       { 986, 986 },
+       { 988, 988 },
+       { 990, 990 },
+       { 992, 992 },
+       { 994, 994 },
+       { 996, 996 },
+       { 998, 998 },
+       { 1000, 1000 },
+       { 1002, 1002 },
+       { 1004, 1004 },
+       { 1006, 1006 },
+       { 1012, 1012 },
+       { 1015, 1015 },
+       { 1017, 1018 },
+       { 1021, 1071 },
+       { 1120, 1120 },
+       { 1122, 1122 },
+       { 1124, 1124 },
+       { 1126, 1126 },
+       { 1128, 1128 },
+       { 1130, 1130 },
+       { 1132, 1132 },
+       { 1134, 1134 },
+       { 1136, 1136 },
+       { 1138, 1138 },
+       { 1140, 1140 },
+       { 1142, 1142 },
+       { 1144, 1144 },
+       { 1146, 1146 },
+       { 1148, 1148 },
+       { 1150, 1150 },
+       { 1152, 1152 },
+       { 1162, 1162 },
+       { 1164, 1164 },
+       { 1166, 1166 },
+       { 1168, 1168 },
+       { 1170, 1170 },
+       { 1172, 1172 },
+       { 1174, 1174 },
+       { 1176, 1176 },
+       { 1178, 1178 },
+       { 1180, 1180 },
+       { 1182, 1182 },
+       { 1184, 1184 },
+       { 1186, 1186 },
+       { 1188, 1188 },
+       { 1190, 1190 },
+       { 1192, 1192 },
+       { 1194, 1194 },
+       { 1196, 1196 },
+       { 1198, 1198 },
+       { 1200, 1200 },
+       { 1202, 1202 },
+       { 1204, 1204 },
+       { 1206, 1206 },
+       { 1208, 1208 },
+       { 1210, 1210 },
+       { 1212, 1212 },
+       { 1214, 1214 },
+       { 1216, 1217 },
+       { 1219, 1219 },
+       { 1221, 1221 },
+       { 1223, 1223 },
+       { 1225, 1225 },
+       { 1227, 1227 },
+       { 1229, 1229 },
+       { 1232, 1232 },
+       { 1234, 1234 },
+       { 1236, 1236 },
+       { 1238, 1238 },
+       { 1240, 1240 },
+       { 1242, 1242 },
+       { 1244, 1244 },
+       { 1246, 1246 },
+       { 1248, 1248 },
+       { 1250, 1250 },
+       { 1252, 1252 },
+       { 1254, 1254 },
+       { 1256, 1256 },
+       { 1258, 1258 },
+       { 1260, 1260 },
+       { 1262, 1262 },
+       { 1264, 1264 },
+       { 1266, 1266 },
+       { 1268, 1268 },
+       { 1270, 1270 },
+       { 1272, 1272 },
+       { 1274, 1274 },
+       { 1276, 1276 },
+       { 1278, 1278 },
+       { 1280, 1280 },
+       { 1282, 1282 },
+       { 1284, 1284 },
+       { 1286, 1286 },
+       { 1288, 1288 },
+       { 1290, 1290 },
+       { 1292, 1292 },
+       { 1294, 1294 },
+       { 1296, 1296 },
+       { 1298, 1298 },
+       { 1300, 1300 },
+       { 1302, 1302 },
+       { 1304, 1304 },
+       { 1306, 1306 },
+       { 1308, 1308 },
+       { 1310, 1310 },
+       { 1312, 1312 },
+       { 1314, 1314 },
+       { 1316, 1316 },
+       { 1318, 1318 },
+       { 1320, 1320 },
+       { 1322, 1322 },
+       { 1324, 1324 },
+       { 1326, 1326 },
+       { 1329, 1366 },
+       { 4256, 4293 },
+       { 4295, 4295 },
+       { 4301, 4301 },
+       { 5024, 5109 },
+       { 7312, 7354 },
+       { 7357, 7359 },
+       { 7680, 7680 },
+       { 7682, 7682 },
+       { 7684, 7684 },
+       { 7686, 7686 },
+       { 7688, 7688 },
+       { 7690, 7690 },
+       { 7692, 7692 },
+       { 7694, 7694 },
+       { 7696, 7696 },
+       { 7698, 7698 },
+       { 7700, 7700 },
+       { 7702, 7702 },
+       { 7704, 7704 },
+       { 7706, 7706 },
+       { 7708, 7708 },
+       { 7710, 7710 },
+       { 7712, 7712 },
+       { 7714, 7714 },
+       { 7716, 7716 },
+       { 7718, 7718 },
+       { 7720, 7720 },
+       { 7722, 7722 },
+       { 7724, 7724 },
+       { 7726, 7726 },
+       { 7728, 7728 },
+       { 7730, 7730 },
+       { 7732, 7732 },
+       { 7734, 7734 },
+       { 7736, 7736 },
+       { 7738, 7738 },
+       { 7740, 7740 },
+       { 7742, 7742 },
+       { 7744, 7744 },
+       { 7746, 7746 },
+       { 7748, 7748 },
+       { 7750, 7750 },
+       { 7752, 7752 },
+       { 7754, 7754 },
+       { 7756, 7756 },
+       { 7758, 7758 },
+       { 7760, 7760 },
+       { 7762, 7762 },
+       { 7764, 7764 },
+       { 7766, 7766 },
+       { 7768, 7768 },
+       { 7770, 7770 },
+       { 7772, 7772 },
+       { 7774, 7774 },
+       { 7776, 7776 },
+       { 7778, 7778 },
+       { 7780, 7780 },
+       { 7782, 7782 },
+       { 7784, 7784 },
+       { 7786, 7786 },
+       { 7788, 7788 },
+       { 7790, 7790 },
+       { 7792, 7792 },
+       { 7794, 7794 },
+       { 7796, 7796 },
+       { 7798, 7798 },
+       { 7800, 7800 },
+       { 7802, 7802 },
+       { 7804, 7804 },
+       { 7806, 7806 },
+       { 7808, 7808 },
+       { 7810, 7810 },
+       { 7812, 7812 },
+       { 7814, 7814 },
+       { 7816, 7816 },
+       { 7818, 7818 },
+       { 7820, 7820 },
+       { 7822, 7822 },
+       { 7824, 7824 },
+       { 7826, 7826 },
+       { 7828, 7828 },
+       { 7838, 7838 },
+       { 7840, 7840 },
+       { 7842, 7842 },
+       { 7844, 7844 },
+       { 7846, 7846 },
+       { 7848, 7848 },
+       { 7850, 7850 },
+       { 7852, 7852 },
+       { 7854, 7854 },
+       { 7856, 7856 },
+       { 7858, 7858 },
+       { 7860, 7860 },
+       { 7862, 7862 },
+       { 7864, 7864 },
+       { 7866, 7866 },
+       { 7868, 7868 },
+       { 7870, 7870 },
+       { 7872, 7872 },
+       { 7874, 7874 },
+       { 7876, 7876 },
+       { 7878, 7878 },
+       { 7880, 7880 },
+       { 7882, 7882 },
+       { 7884, 7884 },
+       { 7886, 7886 },
+       { 7888, 7888 },
+       { 7890, 7890 },
+       { 7892, 7892 },
+       { 7894, 7894 },
+       { 7896, 7896 },
+       { 7898, 7898 },
+       { 7900, 7900 },
+       { 7902, 7902 },
+       { 7904, 7904 },
+       { 7906, 7906 },
+       { 7908, 7908 },
+       { 7910, 7910 },
+       { 7912, 7912 },
+       { 7914, 7914 },
+       { 7916, 7916 },
+       { 7918, 7918 },
+       { 7920, 7920 },
+       { 7922, 7922 },
+       { 7924, 7924 },
+       { 7926, 7926 },
+       { 7928, 7928 },
+       { 7930, 7930 },
+       { 7932, 7932 },
+       { 7934, 7934 },
+       { 7944, 7951 },
+       { 7960, 7965 },
+       { 7976, 7983 },
+       { 7992, 7999 },
+       { 8008, 8013 },
+       { 8025, 8025 },
+       { 8027, 8027 },
+       { 8029, 8029 },
+       { 8031, 8031 },
+       { 8040, 8047 },
+       { 8072, 8079 },
+       { 8088, 8095 },
+       { 8104, 8111 },
+       { 8120, 8124 },
+       { 8136, 8140 },
+       { 8152, 8155 },
+       { 8168, 8172 },
+       { 8184, 8188 },
+       { 8450, 8450 },
+       { 8455, 8455 },
+       { 8459, 8461 },
+       { 8464, 8466 },
+       { 8469, 8469 },
+       { 8473, 8477 },
+       { 8484, 8484 },
+       { 8486, 8486 },
+       { 8488, 8488 },
+       { 8490, 8493 },
+       { 8496, 8499 },
+       { 8510, 8511 },
+       { 8517, 8517 },
+       { 8544, 8559 },
+       { 8579, 8579 },
+       { 9398, 9423 },
+       { 11264, 11311 },
+       { 11360, 11360 },
+       { 11362, 11364 },
+       { 11367, 11367 },
+       { 11369, 11369 },
+       { 11371, 11371 },
+       { 11373, 11376 },
+       { 11378, 11378 },
+       { 11381, 11381 },
+       { 11390, 11392 },
+       { 11394, 11394 },
+       { 11396, 11396 },
+       { 11398, 11398 },
+       { 11400, 11400 },
+       { 11402, 11402 },
+       { 11404, 11404 },
+       { 11406, 11406 },
+       { 11408, 11408 },
+       { 11410, 11410 },
+       { 11412, 11412 },
+       { 11414, 11414 },
+       { 11416, 11416 },
+       { 11418, 11418 },
+       { 11420, 11420 },
+       { 11422, 11422 },
+       { 11424, 11424 },
+       { 11426, 11426 },
+       { 11428, 11428 },
+       { 11430, 11430 },
+       { 11432, 11432 },
+       { 11434, 11434 },
+       { 11436, 11436 },
+       { 11438, 11438 },
+       { 11440, 11440 },
+       { 11442, 11442 },
+       { 11444, 11444 },
+       { 11446, 11446 },
+       { 11448, 11448 },
+       { 11450, 11450 },
+       { 11452, 11452 },
+       { 11454, 11454 },
+       { 11456, 11456 },
+       { 11458, 11458 },
+       { 11460, 11460 },
+       { 11462, 11462 },
+       { 11464, 11464 },
+       { 11466, 11466 },
+       { 11468, 11468 },
+       { 11470, 11470 },
+       { 11472, 11472 },
+       { 11474, 11474 },
+       { 11476, 11476 },
+       { 11478, 11478 },
+       { 11480, 11480 },
+       { 11482, 11482 },
+       { 11484, 11484 },
+       { 11486, 11486 },
+       { 11488, 11488 },
+       { 11490, 11490 },
+       { 11499, 11499 },
+       { 11501, 11501 },
+       { 11506, 11506 },
+       { 42560, 42560 },
+       { 42562, 42562 },
+       { 42564, 42564 },
+       { 42566, 42566 },
+       { 42568, 42568 },
+       { 42570, 42570 },
+       { 42572, 42572 },
+       { 42574, 42574 },
+       { 42576, 42576 },
+       { 42578, 42578 },
+       { 42580, 42580 },
+       { 42582, 42582 },
+       { 42584, 42584 },
+       { 42586, 42586 },
+       { 42588, 42588 },
+       { 42590, 42590 },
+       { 42592, 42592 },
+       { 42594, 42594 },
+       { 42596, 42596 },
+       { 42598, 42598 },
+       { 42600, 42600 },
+       { 42602, 42602 },
+       { 42604, 42604 },
+       { 42624, 42624 },
+       { 42626, 42626 },
+       { 42628, 42628 },
+       { 42630, 42630 },
+       { 42632, 42632 },
+       { 42634, 42634 },
+       { 42636, 42636 },
+       { 42638, 42638 },
+       { 42640, 42640 },
+       { 42642, 42642 },
+       { 42644, 42644 },
+       { 42646, 42646 },
+       { 42648, 42648 },
+       { 42650, 42650 },
+       { 42786, 42786 },
+       { 42788, 42788 },
+       { 42790, 42790 },
+       { 42792, 42792 },
+       { 42794, 42794 },
+       { 42796, 42796 },
+       { 42798, 42798 },
+       { 42802, 42802 },
+       { 42804, 42804 },
+       { 42806, 42806 },
+       { 42808, 42808 },
+       { 42810, 42810 },
+       { 42812, 42812 },
+       { 42814, 42814 },
+       { 42816, 42816 },
+       { 42818, 42818 },
+       { 42820, 42820 },
+       { 42822, 42822 },
+       { 42824, 42824 },
+       { 42826, 42826 },
+       { 42828, 42828 },
+       { 42830, 42830 },
+       { 42832, 42832 },
+       { 42834, 42834 },
+       { 42836, 42836 },
+       { 42838, 42838 },
+       { 42840, 42840 },
+       { 42842, 42842 },
+       { 42844, 42844 },
+       { 42846, 42846 },
+       { 42848, 42848 },
+       { 42850, 42850 },
+       { 42852, 42852 },
+       { 42854, 42854 },
+       { 42856, 42856 },
+       { 42858, 42858 },
+       { 42860, 42860 },
+       { 42862, 42862 },
+       { 42873, 42873 },
+       { 42875, 42875 },
+       { 42877, 42878 },
+       { 42880, 42880 },
+       { 42882, 42882 },
+       { 42884, 42884 },
+       { 42886, 42886 },
+       { 42891, 42891 },
+       { 42893, 42893 },
+       { 42896, 42896 },
+       { 42898, 42898 },
+       { 42902, 42902 },
+       { 42904, 42904 },
+       { 42906, 42906 },
+       { 42908, 42908 },
+       { 42910, 42910 },
+       { 42912, 42912 },
+       { 42914, 42914 },
+       { 42916, 42916 },
+       { 42918, 42918 },
+       { 42920, 42920 },
+       { 42922, 42926 },
+       { 42928, 42932 },
+       { 42934, 42934 },
+       { 42936, 42936 },
+       { 42938, 42938 },
+       { 42940, 42940 },
+       { 42942, 42942 },
+       { 42944, 42944 },
+       { 42946, 42946 },
+       { 42948, 42951 },
+       { 42953, 42953 },
+       { 42960, 42960 },
+       { 42966, 42966 },
+       { 42968, 42968 },
+       { 42997, 42997 },
+       { 65313, 65338 },
+       { 66560, 66599 },
+       { 66736, 66771 },
+       { 66928, 66938 },
+       { 66940, 66954 },
+       { 66956, 66962 },
+       { 66964, 66965 },
+       { 68736, 68786 },
+       { 71840, 71871 },
+       { 93760, 93791 },
+       { 119808, 119833 },
+       { 119860, 119885 },
+       { 119912, 119937 },
+       { 119964, 119964 },
+       { 119966, 119967 },
+       { 119970, 119970 },
+       { 119973, 119974 },
+       { 119977, 119980 },
+       { 119982, 119989 },
+       { 120016, 120041 },
+       { 120068, 120069 },
+       { 120071, 120074 },
+       { 120077, 120084 },
+       { 120086, 120092 },
+       { 120120, 120121 },
+       { 120123, 120126 },
+       { 120128, 120132 },
+       { 120134, 120134 },
+       { 120138, 120144 },
+       { 120172, 120197 },
+       { 120224, 120249 },
+       { 120276, 120301 },
+       { 120328, 120353 },
+       { 120380, 120405 },
+       { 120432, 120457 },
+       { 120488, 120512 },
+       { 120546, 120570 },
+       { 120604, 120628 },
+       { 120662, 120686 },
+       { 120720, 120744 },
+       { 120778, 120778 },
+       { 125184, 125217 },
+       { 127280, 127305 },
+       { 127312, 127337 },
+       { 127344, 127369 },
+       { -1, -1 },     // end markers
+};
+static const set_item lower[] = {
+       { 97, 122 },
+       { 170, 170 },
+       { 181, 181 },
+       { 186, 186 },
+       { 223, 246 },
+       { 248, 255 },
+       { 257, 257 },
+       { 259, 259 },
+       { 261, 261 },
+       { 263, 263 },
+       { 265, 265 },
+       { 267, 267 },
+       { 269, 269 },
+       { 271, 271 },
+       { 273, 273 },
+       { 275, 275 },
+       { 277, 277 },
+       { 279, 279 },
+       { 281, 281 },
+       { 283, 283 },
+       { 285, 285 },
+       { 287, 287 },
+       { 289, 289 },
+       { 291, 291 },
+       { 293, 293 },
+       { 295, 295 },
+       { 297, 297 },
+       { 299, 299 },
+       { 301, 301 },
+       { 303, 303 },
+       { 305, 305 },
+       { 307, 307 },
+       { 309, 309 },
+       { 311, 312 },
+       { 314, 314 },
+       { 316, 316 },
+       { 318, 318 },
+       { 320, 320 },
+       { 322, 322 },
+       { 324, 324 },
+       { 326, 326 },
+       { 328, 329 },
+       { 331, 331 },
+       { 333, 333 },
+       { 335, 335 },
+       { 337, 337 },
+       { 339, 339 },
+       { 341, 341 },
+       { 343, 343 },
+       { 345, 345 },
+       { 347, 347 },
+       { 349, 349 },
+       { 351, 351 },
+       { 353, 353 },
+       { 355, 355 },
+       { 357, 357 },
+       { 359, 359 },
+       { 361, 361 },
+       { 363, 363 },
+       { 365, 365 },
+       { 367, 367 },
+       { 369, 369 },
+       { 371, 371 },
+       { 373, 373 },
+       { 375, 375 },
+       { 378, 378 },
+       { 380, 380 },
+       { 382, 384 },
+       { 387, 387 },
+       { 389, 389 },
+       { 392, 392 },
+       { 396, 397 },
+       { 402, 402 },
+       { 405, 405 },
+       { 409, 411 },
+       { 414, 414 },
+       { 417, 417 },
+       { 419, 419 },
+       { 421, 421 },
+       { 424, 424 },
+       { 426, 427 },
+       { 429, 429 },
+       { 432, 432 },
+       { 436, 436 },
+       { 438, 438 },
+       { 441, 442 },
+       { 445, 447 },
+       { 453, 454 },
+       { 456, 457 },
+       { 459, 460 },
+       { 462, 462 },
+       { 464, 464 },
+       { 466, 466 },
+       { 468, 468 },
+       { 470, 470 },
+       { 472, 472 },
+       { 474, 474 },
+       { 476, 477 },
+       { 479, 479 },
+       { 481, 481 },
+       { 483, 483 },
+       { 485, 485 },
+       { 487, 487 },
+       { 489, 489 },
+       { 491, 491 },
+       { 493, 493 },
+       { 495, 496 },
+       { 498, 499 },
+       { 501, 501 },
+       { 505, 505 },
+       { 507, 507 },
+       { 509, 509 },
+       { 511, 511 },
+       { 513, 513 },
+       { 515, 515 },
+       { 517, 517 },
+       { 519, 519 },
+       { 521, 521 },
+       { 523, 523 },
+       { 525, 525 },
+       { 527, 527 },
+       { 529, 529 },
+       { 531, 531 },
+       { 533, 533 },
+       { 535, 535 },
+       { 537, 537 },
+       { 539, 539 },
+       { 541, 541 },
+       { 543, 543 },
+       { 545, 545 },
+       { 547, 547 },
+       { 549, 549 },
+       { 551, 551 },
+       { 553, 553 },
+       { 555, 555 },
+       { 557, 557 },
+       { 559, 559 },
+       { 561, 561 },
+       { 563, 569 },
+       { 572, 572 },
+       { 575, 576 },
+       { 578, 578 },
+       { 583, 583 },
+       { 585, 585 },
+       { 587, 587 },
+       { 589, 589 },
+       { 591, 659 },
+       { 661, 696 },
+       { 704, 705 },
+       { 736, 740 },
+       { 837, 837 },
+       { 881, 881 },
+       { 883, 883 },
+       { 887, 887 },
+       { 890, 893 },
+       { 912, 912 },
+       { 940, 974 },
+       { 976, 977 },
+       { 981, 983 },
+       { 985, 985 },
+       { 987, 987 },
+       { 989, 989 },
+       { 991, 991 },
+       { 993, 993 },
+       { 995, 995 },
+       { 997, 997 },
+       { 999, 999 },
+       { 1001, 1001 },
+       { 1003, 1003 },
+       { 1005, 1005 },
+       { 1007, 1011 },
+       { 1013, 1013 },
+       { 1016, 1016 },
+       { 1019, 1020 },
+       { 1072, 1119 },
+       { 1121, 1121 },
+       { 1123, 1123 },
+       { 1125, 1125 },
+       { 1127, 1127 },
+       { 1129, 1129 },
+       { 1131, 1131 },
+       { 1133, 1133 },
+       { 1135, 1135 },
+       { 1137, 1137 },
+       { 1139, 1139 },
+       { 1141, 1141 },
+       { 1143, 1143 },
+       { 1145, 1145 },
+       { 1147, 1147 },
+       { 1149, 1149 },
+       { 1151, 1151 },
+       { 1153, 1153 },
+       { 1163, 1163 },
+       { 1165, 1165 },
+       { 1167, 1167 },
+       { 1169, 1169 },
+       { 1171, 1171 },
+       { 1173, 1173 },
+       { 1175, 1175 },
+       { 1177, 1177 },
+       { 1179, 1179 },
+       { 1181, 1181 },
+       { 1183, 1183 },
+       { 1185, 1185 },
+       { 1187, 1187 },
+       { 1189, 1189 },
+       { 1191, 1191 },
+       { 1193, 1193 },
+       { 1195, 1195 },
+       { 1197, 1197 },
+       { 1199, 1199 },
+       { 1201, 1201 },
+       { 1203, 1203 },
+       { 1205, 1205 },
+       { 1207, 1207 },
+       { 1209, 1209 },
+       { 1211, 1211 },
+       { 1213, 1213 },
+       { 1215, 1215 },
+       { 1218, 1218 },
+       { 1220, 1220 },
+       { 1222, 1222 },
+       { 1224, 1224 },
+       { 1226, 1226 },
+       { 1228, 1228 },
+       { 1230, 1231 },
+       { 1233, 1233 },
+       { 1235, 1235 },
+       { 1237, 1237 },
+       { 1239, 1239 },
+       { 1241, 1241 },
+       { 1243, 1243 },
+       { 1245, 1245 },
+       { 1247, 1247 },
+       { 1249, 1249 },
+       { 1251, 1251 },
+       { 1253, 1253 },
+       { 1255, 1255 },
+       { 1257, 1257 },
+       { 1259, 1259 },
+       { 1261, 1261 },
+       { 1263, 1263 },
+       { 1265, 1265 },
+       { 1267, 1267 },
+       { 1269, 1269 },
+       { 1271, 1271 },
+       { 1273, 1273 },
+       { 1275, 1275 },
+       { 1277, 1277 },
+       { 1279, 1279 },
+       { 1281, 1281 },
+       { 1283, 1283 },
+       { 1285, 1285 },
+       { 1287, 1287 },
+       { 1289, 1289 },
+       { 1291, 1291 },
+       { 1293, 1293 },
+       { 1295, 1295 },
+       { 1297, 1297 },
+       { 1299, 1299 },
+       { 1301, 1301 },
+       { 1303, 1303 },
+       { 1305, 1305 },
+       { 1307, 1307 },
+       { 1309, 1309 },
+       { 1311, 1311 },
+       { 1313, 1313 },
+       { 1315, 1315 },
+       { 1317, 1317 },
+       { 1319, 1319 },
+       { 1321, 1321 },
+       { 1323, 1323 },
+       { 1325, 1325 },
+       { 1327, 1327 },
+       { 1376, 1416 },
+       { 4304, 4346 },
+       { 4349, 4351 },
+       { 5112, 5117 },
+       { 7296, 7304 },
+       { 7424, 7615 },
+       { 7681, 7681 },
+       { 7683, 7683 },
+       { 7685, 7685 },
+       { 7687, 7687 },
+       { 7689, 7689 },
+       { 7691, 7691 },
+       { 7693, 7693 },
+       { 7695, 7695 },
+       { 7697, 7697 },
+       { 7699, 7699 },
+       { 7701, 7701 },
+       { 7703, 7703 },
+       { 7705, 7705 },
+       { 7707, 7707 },
+       { 7709, 7709 },
+       { 7711, 7711 },
+       { 7713, 7713 },
+       { 7715, 7715 },
+       { 7717, 7717 },
+       { 7719, 7719 },
+       { 7721, 7721 },
+       { 7723, 7723 },
+       { 7725, 7725 },
+       { 7727, 7727 },
+       { 7729, 7729 },
+       { 7731, 7731 },
+       { 7733, 7733 },
+       { 7735, 7735 },
+       { 7737, 7737 },
+       { 7739, 7739 },
+       { 7741, 7741 },
+       { 7743, 7743 },
+       { 7745, 7745 },
+       { 7747, 7747 },
+       { 7749, 7749 },
+       { 7751, 7751 },
+       { 7753, 7753 },
+       { 7755, 7755 },
+       { 7757, 7757 },
+       { 7759, 7759 },
+       { 7761, 7761 },
+       { 7763, 7763 },
+       { 7765, 7765 },
+       { 7767, 7767 },
+       { 7769, 7769 },
+       { 7771, 7771 },
+       { 7773, 7773 },
+       { 7775, 7775 },
+       { 7777, 7777 },
+       { 7779, 7779 },
+       { 7781, 7781 },
+       { 7783, 7783 },
+       { 7785, 7785 },
+       { 7787, 7787 },
+       { 7789, 7789 },
+       { 7791, 7791 },
+       { 7793, 7793 },
+       { 7795, 7795 },
+       { 7797, 7797 },
+       { 7799, 7799 },
+       { 7801, 7801 },
+       { 7803, 7803 },
+       { 7805, 7805 },
+       { 7807, 7807 },
+       { 7809, 7809 },
+       { 7811, 7811 },
+       { 7813, 7813 },
+       { 7815, 7815 },
+       { 7817, 7817 },
+       { 7819, 7819 },
+       { 7821, 7821 },
+       { 7823, 7823 },
+       { 7825, 7825 },
+       { 7827, 7827 },
+       { 7829, 7837 },
+       { 7839, 7839 },
+       { 7841, 7841 },
+       { 7843, 7843 },
+       { 7845, 7845 },
+       { 7847, 7847 },
+       { 7849, 7849 },
+       { 7851, 7851 },
+       { 7853, 7853 },
+       { 7855, 7855 },
+       { 7857, 7857 },
+       { 7859, 7859 },
+       { 7861, 7861 },
+       { 7863, 7863 },
+       { 7865, 7865 },
+       { 7867, 7867 },
+       { 7869, 7869 },
+       { 7871, 7871 },
+       { 7873, 7873 },
+       { 7875, 7875 },
+       { 7877, 7877 },
+       { 7879, 7879 },
+       { 7881, 7881 },
+       { 7883, 7883 },
+       { 7885, 7885 },
+       { 7887, 7887 },
+       { 7889, 7889 },
+       { 7891, 7891 },
+       { 7893, 7893 },
+       { 7895, 7895 },
+       { 7897, 7897 },
+       { 7899, 7899 },
+       { 7901, 7901 },
+       { 7903, 7903 },
+       { 7905, 7905 },
+       { 7907, 7907 },
+       { 7909, 7909 },
+       { 7911, 7911 },
+       { 7913, 7913 },
+       { 7915, 7915 },
+       { 7917, 7917 },
+       { 7919, 7919 },
+       { 7921, 7921 },
+       { 7923, 7923 },
+       { 7925, 7925 },
+       { 7927, 7927 },
+       { 7929, 7929 },
+       { 7931, 7931 },
+       { 7933, 7933 },
+       { 7935, 7943 },
+       { 7952, 7957 },
+       { 7968, 7975 },
+       { 7984, 7991 },
+       { 8000, 8005 },
+       { 8016, 8023 },
+       { 8032, 8039 },
+       { 8048, 8061 },
+       { 8064, 8071 },
+       { 8080, 8087 },
+       { 8096, 8103 },
+       { 8112, 8116 },
+       { 8118, 8119 },
+       { 8126, 8126 },
+       { 8130, 8132 },
+       { 8134, 8135 },
+       { 8144, 8147 },
+       { 8150, 8151 },
+       { 8160, 8167 },
+       { 8178, 8180 },
+       { 8182, 8183 },
+       { 8305, 8305 },
+       { 8319, 8319 },
+       { 8336, 8348 },
+       { 8458, 8458 },
+       { 8462, 8463 },
+       { 8467, 8467 },
+       { 8495, 8495 },
+       { 8500, 8500 },
+       { 8505, 8505 },
+       { 8508, 8509 },
+       { 8518, 8521 },
+       { 8526, 8526 },
+       { 8560, 8575 },
+       { 8580, 8580 },
+       { 9424, 9449 },
+       { 11312, 11359 },
+       { 11361, 11361 },
+       { 11365, 11366 },
+       { 11368, 11368 },
+       { 11370, 11370 },
+       { 11372, 11372 },
+       { 11377, 11377 },
+       { 11379, 11380 },
+       { 11382, 11389 },
+       { 11393, 11393 },
+       { 11395, 11395 },
+       { 11397, 11397 },
+       { 11399, 11399 },
+       { 11401, 11401 },
+       { 11403, 11403 },
+       { 11405, 11405 },
+       { 11407, 11407 },
+       { 11409, 11409 },
+       { 11411, 11411 },
+       { 11413, 11413 },
+       { 11415, 11415 },
+       { 11417, 11417 },
+       { 11419, 11419 },
+       { 11421, 11421 },
+       { 11423, 11423 },
+       { 11425, 11425 },
+       { 11427, 11427 },
+       { 11429, 11429 },
+       { 11431, 11431 },
+       { 11433, 11433 },
+       { 11435, 11435 },
+       { 11437, 11437 },
+       { 11439, 11439 },
+       { 11441, 11441 },
+       { 11443, 11443 },
+       { 11445, 11445 },
+       { 11447, 11447 },
+       { 11449, 11449 },
+       { 11451, 11451 },
+       { 11453, 11453 },
+       { 11455, 11455 },
+       { 11457, 11457 },
+       { 11459, 11459 },
+       { 11461, 11461 },
+       { 11463, 11463 },
+       { 11465, 11465 },
+       { 11467, 11467 },
+       { 11469, 11469 },
+       { 11471, 11471 },
+       { 11473, 11473 },
+       { 11475, 11475 },
+       { 11477, 11477 },
+       { 11479, 11479 },
+       { 11481, 11481 },
+       { 11483, 11483 },
+       { 11485, 11485 },
+       { 11487, 11487 },
+       { 11489, 11489 },
+       { 11491, 11492 },
+       { 11500, 11500 },
+       { 11502, 11502 },
+       { 11507, 11507 },
+       { 11520, 11557 },
+       { 11559, 11559 },
+       { 11565, 11565 },
+       { 42561, 42561 },
+       { 42563, 42563 },
+       { 42565, 42565 },
+       { 42567, 42567 },
+       { 42569, 42569 },
+       { 42571, 42571 },
+       { 42573, 42573 },
+       { 42575, 42575 },
+       { 42577, 42577 },
+       { 42579, 42579 },
+       { 42581, 42581 },
+       { 42583, 42583 },
+       { 42585, 42585 },
+       { 42587, 42587 },
+       { 42589, 42589 },
+       { 42591, 42591 },
+       { 42593, 42593 },
+       { 42595, 42595 },
+       { 42597, 42597 },
+       { 42599, 42599 },
+       { 42601, 42601 },
+       { 42603, 42603 },
+       { 42605, 42605 },
+       { 42625, 42625 },
+       { 42627, 42627 },
+       { 42629, 42629 },
+       { 42631, 42631 },
+       { 42633, 42633 },
+       { 42635, 42635 },
+       { 42637, 42637 },
+       { 42639, 42639 },
+       { 42641, 42641 },
+       { 42643, 42643 },
+       { 42645, 42645 },
+       { 42647, 42647 },
+       { 42649, 42649 },
+       { 42651, 42653 },
+       { 42787, 42787 },
+       { 42789, 42789 },
+       { 42791, 42791 },
+       { 42793, 42793 },
+       { 42795, 42795 },
+       { 42797, 42797 },
+       { 42799, 42801 },
+       { 42803, 42803 },
+       { 42805, 42805 },
+       { 42807, 42807 },
+       { 42809, 42809 },
+       { 42811, 42811 },
+       { 42813, 42813 },
+       { 42815, 42815 },
+       { 42817, 42817 },
+       { 42819, 42819 },
+       { 42821, 42821 },
+       { 42823, 42823 },
+       { 42825, 42825 },
+       { 42827, 42827 },
+       { 42829, 42829 },
+       { 42831, 42831 },
+       { 42833, 42833 },
+       { 42835, 42835 },
+       { 42837, 42837 },
+       { 42839, 42839 },
+       { 42841, 42841 },
+       { 42843, 42843 },
+       { 42845, 42845 },
+       { 42847, 42847 },
+       { 42849, 42849 },
+       { 42851, 42851 },
+       { 42853, 42853 },
+       { 42855, 42855 },
+       { 42857, 42857 },
+       { 42859, 42859 },
+       { 42861, 42861 },
+       { 42863, 42872 },
+       { 42874, 42874 },
+       { 42876, 42876 },
+       { 42879, 42879 },
+       { 42881, 42881 },
+       { 42883, 42883 },
+       { 42885, 42885 },
+       { 42887, 42887 },
+       { 42892, 42892 },
+       { 42894, 42894 },
+       { 42897, 42897 },
+       { 42899, 42901 },
+       { 42903, 42903 },
+       { 42905, 42905 },
+       { 42907, 42907 },
+       { 42909, 42909 },
+       { 42911, 42911 },
+       { 42913, 42913 },
+       { 42915, 42915 },
+       { 42917, 42917 },
+       { 42919, 42919 },
+       { 42921, 42921 },
+       { 42927, 42927 },
+       { 42933, 42933 },
+       { 42935, 42935 },
+       { 42937, 42937 },
+       { 42939, 42939 },
+       { 42941, 42941 },
+       { 42943, 42943 },
+       { 42945, 42945 },
+       { 42947, 42947 },
+       { 42952, 42952 },
+       { 42954, 42954 },
+       { 42961, 42961 },
+       { 42963, 42963 },
+       { 42965, 42965 },
+       { 42967, 42967 },
+       { 42969, 42969 },
+       { 42998, 42998 },
+       { 43000, 43002 },
+       { 43824, 43866 },
+       { 43868, 43880 },
+       { 43888, 43967 },
+       { 64256, 64262 },
+       { 64275, 64279 },
+       { 65345, 65370 },
+       { 66600, 66639 },
+       { 66776, 66811 },
+       { 66967, 66977 },
+       { 66979, 66993 },
+       { 66995, 67001 },
+       { 67003, 67004 },
+       { 67456, 67456 },
+       { 67459, 67461 },
+       { 67463, 67504 },
+       { 67506, 67514 },
+       { 68800, 68850 },
+       { 71872, 71903 },
+       { 93792, 93823 },
+       { 119834, 119859 },
+       { 119886, 119892 },
+       { 119894, 119911 },
+       { 119938, 119963 },
+       { 119990, 119993 },
+       { 119995, 119995 },
+       { 119997, 120003 },
+       { 120005, 120015 },
+       { 120042, 120067 },
+       { 120094, 120119 },
+       { 120146, 120171 },
+       { 120198, 120223 },
+       { 120250, 120275 },
+       { 120302, 120327 },
+       { 120354, 120379 },
+       { 120406, 120431 },
+       { 120458, 120485 },
+       { 120514, 120538 },
+       { 120540, 120545 },
+       { 120572, 120596 },
+       { 120598, 120603 },
+       { 120630, 120654 },
+       { 120656, 120661 },
+       { 120688, 120712 },
+       { 120714, 120719 },
+       { 120746, 120770 },
+       { 120772, 120777 },
+       { 120779, 120779 },
+       { 122624, 122633 },
+       { 122635, 122654 },
+       { 125218, 125251 },
+       { -1, -1 },     // end markers
+};
+static const set_item print_graph_common[] = {
+       { 160, 887 },
+       { 890, 895 },
+       { 900, 906 },
+       { 908, 908 },
+       { 910, 929 },
+       { 931, 1327 },
+       { 1329, 1366 },
+       { 1369, 1418 },
+       { 1421, 1423 },
+       { 1425, 1479 },
+       { 1488, 1514 },
+       { 1519, 1524 },
+       { 1536, 1805 },
+       { 1807, 1866 },
+       { 1869, 1969 },
+       { 1984, 2042 },
+       { 2045, 2093 },
+       { 2096, 2110 },
+       { 2112, 2139 },
+       { 2142, 2142 },
+       { 2144, 2154 },
+       { 2160, 2190 },
+       { 2192, 2193 },
+       { 2200, 2435 },
+       { 2437, 2444 },
+       { 2447, 2448 },
+       { 2451, 2472 },
+       { 2474, 2480 },
+       { 2482, 2482 },
+       { 2486, 2489 },
+       { 2492, 2500 },
+       { 2503, 2504 },
+       { 2507, 2510 },
+       { 2519, 2519 },
+       { 2524, 2525 },
+       { 2527, 2531 },
+       { 2534, 2558 },
+       { 2561, 2563 },
+       { 2565, 2570 },
+       { 2575, 2576 },
+       { 2579, 2600 },
+       { 2602, 2608 },
+       { 2610, 2611 },
+       { 2613, 2614 },
+       { 2616, 2617 },
+       { 2620, 2620 },
+       { 2622, 2626 },
+       { 2631, 2632 },
+       { 2635, 2637 },
+       { 2641, 2641 },
+       { 2649, 2652 },
+       { 2654, 2654 },
+       { 2662, 2678 },
+       { 2689, 2691 },
+       { 2693, 2701 },
+       { 2703, 2705 },
+       { 2707, 2728 },
+       { 2730, 2736 },
+       { 2738, 2739 },
+       { 2741, 2745 },
+       { 2748, 2757 },
+       { 2759, 2761 },
+       { 2763, 2765 },
+       { 2768, 2768 },
+       { 2784, 2787 },
+       { 2790, 2801 },
+       { 2809, 2815 },
+       { 2817, 2819 },
+       { 2821, 2828 },
+       { 2831, 2832 },
+       { 2835, 2856 },
+       { 2858, 2864 },
+       { 2866, 2867 },
+       { 2869, 2873 },
+       { 2876, 2884 },
+       { 2887, 2888 },
+       { 2891, 2893 },
+       { 2901, 2903 },
+       { 2908, 2909 },
+       { 2911, 2915 },
+       { 2918, 2935 },
+       { 2946, 2947 },
+       { 2949, 2954 },
+       { 2958, 2960 },
+       { 2962, 2965 },
+       { 2969, 2970 },
+       { 2972, 2972 },
+       { 2974, 2975 },
+       { 2979, 2980 },
+       { 2984, 2986 },
+       { 2990, 3001 },
+       { 3006, 3010 },
+       { 3014, 3016 },
+       { 3018, 3021 },
+       { 3024, 3024 },
+       { 3031, 3031 },
+       { 3046, 3066 },
+       { 3072, 3084 },
+       { 3086, 3088 },
+       { 3090, 3112 },
+       { 3114, 3129 },
+       { 3132, 3140 },
+       { 3142, 3144 },
+       { 3146, 3149 },
+       { 3157, 3158 },
+       { 3160, 3162 },
+       { 3165, 3165 },
+       { 3168, 3171 },
+       { 3174, 3183 },
+       { 3191, 3212 },
+       { 3214, 3216 },
+       { 3218, 3240 },
+       { 3242, 3251 },
+       { 3253, 3257 },
+       { 3260, 3268 },
+       { 3270, 3272 },
+       { 3274, 3277 },
+       { 3285, 3286 },
+       { 3293, 3294 },
+       { 3296, 3299 },
+       { 3302, 3311 },
+       { 3313, 3314 },
+       { 3328, 3340 },
+       { 3342, 3344 },
+       { 3346, 3396 },
+       { 3398, 3400 },
+       { 3402, 3407 },
+       { 3412, 3427 },
+       { 3430, 3455 },
+       { 3457, 3459 },
+       { 3461, 3478 },
+       { 3482, 3505 },
+       { 3507, 3515 },
+       { 3517, 3517 },
+       { 3520, 3526 },
+       { 3530, 3530 },
+       { 3535, 3540 },
+       { 3542, 3542 },
+       { 3544, 3551 },
+       { 3558, 3567 },
+       { 3570, 3572 },
+       { 3585, 3642 },
+       { 3647, 3675 },
+       { 3713, 3714 },
+       { 3716, 3716 },
+       { 3718, 3722 },
+       { 3724, 3747 },
+       { 3749, 3749 },
+       { 3751, 3773 },
+       { 3776, 3780 },
+       { 3782, 3782 },
+       { 3784, 3789 },
+       { 3792, 3801 },
+       { 3804, 3807 },
+       { 3840, 3911 },
+       { 3913, 3948 },
+       { 3953, 3991 },
+       { 3993, 4028 },
+       { 4030, 4044 },
+       { 4046, 4058 },
+       { 4096, 4293 },
+       { 4295, 4295 },
+       { 4301, 4301 },
+       { 4304, 4680 },
+       { 4682, 4685 },
+       { 4688, 4694 },
+       { 4696, 4696 },
+       { 4698, 4701 },
+       { 4704, 4744 },
+       { 4746, 4749 },
+       { 4752, 4784 },
+       { 4786, 4789 },
+       { 4792, 4798 },
+       { 4800, 4800 },
+       { 4802, 4805 },
+       { 4808, 4822 },
+       { 4824, 4880 },
+       { 4882, 4885 },
+       { 4888, 4954 },
+       { 4957, 4988 },
+       { 4992, 5017 },
+       { 5024, 5109 },
+       { 5112, 5117 },
+       { 5792, 5880 },
+       { 5888, 5909 },
+       { 5919, 5942 },
+       { 5952, 5971 },
+       { 5984, 5996 },
+       { 5998, 6000 },
+       { 6002, 6003 },
+       { 6016, 6109 },
+       { 6112, 6121 },
+       { 6128, 6137 },
+       { 6144, 6169 },
+       { 6176, 6264 },
+       { 6272, 6314 },
+       { 6320, 6389 },
+       { 6400, 6430 },
+       { 6432, 6443 },
+       { 6448, 6459 },
+       { 6464, 6464 },
+       { 6468, 6509 },
+       { 6512, 6516 },
+       { 6528, 6571 },
+       { 6576, 6601 },
+       { 6608, 6618 },
+       { 6622, 6683 },
+       { 6686, 6750 },
+       { 6752, 6780 },
+       { 6783, 6793 },
+       { 6800, 6809 },
+       { 6816, 6829 },
+       { 6832, 6862 },
+       { 6912, 6988 },
+       { 6992, 7038 },
+       { 7040, 7155 },
+       { 7164, 7223 },
+       { 7227, 7241 },
+       { 7245, 7304 },
+       { 7312, 7354 },
+       { 7357, 7367 },
+       { 7376, 7418 },
+       { 7424, 7957 },
+       { 7960, 7965 },
+       { 7968, 8005 },
+       { 8008, 8013 },
+       { 8016, 8023 },
+       { 8025, 8025 },
+       { 8027, 8027 },
+       { 8029, 8029 },
+       { 8031, 8061 },
+       { 8064, 8116 },
+       { 8118, 8132 },
+       { 8134, 8147 },
+       { 8150, 8155 },
+       { 8157, 8175 },
+       { 8178, 8180 },
+       { 8182, 8190 },
+       { 8294, 8305 },
+       { 8308, 8334 },
+       { 8336, 8348 },
+       { 8352, 8384 },
+       { 8400, 8432 },
+       { 8448, 8587 },
+       { 8592, 9254 },
+       { 9280, 9290 },
+       { 9312, 11123 },
+       { 11126, 11157 },
+       { 11159, 11507 },
+       { 11513, 11557 },
+       { 11559, 11559 },
+       { 11565, 11565 },
+       { 11568, 11623 },
+       { 11631, 11632 },
+       { 11647, 11670 },
+       { 11680, 11686 },
+       { 11688, 11694 },
+       { 11696, 11702 },
+       { 11704, 11710 },
+       { 11712, 11718 },
+       { 11720, 11726 },
+       { 11728, 11734 },
+       { 11736, 11742 },
+       { 11744, 11869 },
+       { 11904, 11929 },
+       { 11931, 12019 },
+       { 12032, 12245 },
+       { 12272, 12283 },
+       { 12353, 12438 },
+       { 12441, 12543 },
+       { 12549, 12591 },
+       { 12593, 12686 },
+       { 12688, 12771 },
+       { 12784, 12830 },
+       { 12832, 42124 },
+       { 42128, 42182 },
+       { 42192, 42539 },
+       { 42560, 42743 },
+       { 42752, 42954 },
+       { 42960, 42961 },
+       { 42963, 42963 },
+       { 42965, 42969 },
+       { 42994, 43052 },
+       { 43056, 43065 },
+       { 43072, 43127 },
+       { 43136, 43205 },
+       { 43214, 43225 },
+       { 43232, 43347 },
+       { 43359, 43388 },
+       { 43392, 43469 },
+       { 43471, 43481 },
+       { 43486, 43518 },
+       { 43520, 43574 },
+       { 43584, 43597 },
+       { 43600, 43609 },
+       { 43612, 43714 },
+       { 43739, 43766 },
+       { 43777, 43782 },
+       { 43785, 43790 },
+       { 43793, 43798 },
+       { 43808, 43814 },
+       { 43816, 43822 },
+       { 43824, 43883 },
+       { 43888, 44013 },
+       { 44016, 44025 },
+       { 44032, 55203 },
+       { 55216, 55238 },
+       { 55243, 55291 },
+       { 57344, 64109 },
+       { 64112, 64217 },
+       { 64256, 64262 },
+       { 64275, 64279 },
+       { 64285, 64310 },
+       { 64312, 64316 },
+       { 64318, 64318 },
+       { 64320, 64321 },
+       { 64323, 64324 },
+       { 64326, 64450 },
+       { 64467, 64911 },
+       { 64914, 64967 },
+       { 64975, 64975 },
+       { 65008, 65049 },
+       { 65056, 65106 },
+       { 65108, 65126 },
+       { 65128, 65131 },
+       { 65136, 65140 },
+       { 65142, 65276 },
+       { 65279, 65279 },
+       { 65281, 65470 },
+       { 65474, 65479 },
+       { 65482, 65487 },
+       { 65490, 65495 },
+       { 65498, 65500 },
+       { 65504, 65510 },
+       { 65512, 65518 },
+       { 65529, 65533 },
+       { 65536, 65547 },
+       { 65549, 65574 },
+       { 65576, 65594 },
+       { 65596, 65597 },
+       { 65599, 65613 },
+       { 65616, 65629 },
+       { 65664, 65786 },
+       { 65792, 65794 },
+       { 65799, 65843 },
+       { 65847, 65934 },
+       { 65936, 65948 },
+       { 65952, 65952 },
+       { 66000, 66045 },
+       { 66176, 66204 },
+       { 66208, 66256 },
+       { 66272, 66299 },
+       { 66304, 66339 },
+       { 66349, 66378 },
+       { 66384, 66426 },
+       { 66432, 66461 },
+       { 66463, 66499 },
+       { 66504, 66517 },
+       { 66560, 66717 },
+       { 66720, 66729 },
+       { 66736, 66771 },
+       { 66776, 66811 },
+       { 66816, 66855 },
+       { 66864, 66915 },
+       { 66927, 66938 },
+       { 66940, 66954 },
+       { 66956, 66962 },
+       { 66964, 66965 },
+       { 66967, 66977 },
+       { 66979, 66993 },
+       { 66995, 67001 },
+       { 67003, 67004 },
+       { 67072, 67382 },
+       { 67392, 67413 },
+       { 67424, 67431 },
+       { 67456, 67461 },
+       { 67463, 67504 },
+       { 67506, 67514 },
+       { 67584, 67589 },
+       { 67592, 67592 },
+       { 67594, 67637 },
+       { 67639, 67640 },
+       { 67644, 67644 },
+       { 67647, 67669 },
+       { 67671, 67742 },
+       { 67751, 67759 },
+       { 67808, 67826 },
+       { 67828, 67829 },
+       { 67835, 67867 },
+       { 67871, 67897 },
+       { 67903, 67903 },
+       { 67968, 68023 },
+       { 68028, 68047 },
+       { 68050, 68099 },
+       { 68101, 68102 },
+       { 68108, 68115 },
+       { 68117, 68119 },
+       { 68121, 68149 },
+       { 68152, 68154 },
+       { 68159, 68168 },
+       { 68176, 68184 },
+       { 68192, 68255 },
+       { 68288, 68326 },
+       { 68331, 68342 },
+       { 68352, 68405 },
+       { 68409, 68437 },
+       { 68440, 68466 },
+       { 68472, 68497 },
+       { 68505, 68508 },
+       { 68521, 68527 },
+       { 68608, 68680 },
+       { 68736, 68786 },
+       { 68800, 68850 },
+       { 68858, 68903 },
+       { 68912, 68921 },
+       { 69216, 69246 },
+       { 69248, 69289 },
+       { 69291, 69293 },
+       { 69296, 69297 },
+       { 69376, 69415 },
+       { 69424, 69465 },
+       { 69488, 69513 },
+       { 69552, 69579 },
+       { 69600, 69622 },
+       { 69632, 69709 },
+       { 69714, 69749 },
+       { 69759, 69826 },
+       { 69837, 69837 },
+       { 69840, 69864 },
+       { 69872, 69881 },
+       { 69888, 69940 },
+       { 69942, 69959 },
+       { 69968, 70006 },
+       { 70016, 70111 },
+       { 70113, 70132 },
+       { 70144, 70161 },
+       { 70163, 70206 },
+       { 70272, 70278 },
+       { 70280, 70280 },
+       { 70282, 70285 },
+       { 70287, 70301 },
+       { 70303, 70313 },
+       { 70320, 70378 },
+       { 70384, 70393 },
+       { 70400, 70403 },
+       { 70405, 70412 },
+       { 70415, 70416 },
+       { 70419, 70440 },
+       { 70442, 70448 },
+       { 70450, 70451 },
+       { 70453, 70457 },
+       { 70459, 70468 },
+       { 70471, 70472 },
+       { 70475, 70477 },
+       { 70480, 70480 },
+       { 70487, 70487 },
+       { 70493, 70499 },
+       { 70502, 70508 },
+       { 70512, 70516 },
+       { 70656, 70747 },
+       { 70749, 70753 },
+       { 70784, 70855 },
+       { 70864, 70873 },
+       { 71040, 71093 },
+       { 71096, 71133 },
+       { 71168, 71236 },
+       { 71248, 71257 },
+       { 71264, 71276 },
+       { 71296, 71353 },
+       { 71360, 71369 },
+       { 71424, 71450 },
+       { 71453, 71467 },
+       { 71472, 71494 },
+       { 71680, 71739 },
+       { 71840, 71922 },
+       { 71935, 71942 },
+       { 71945, 71945 },
+       { 71948, 71955 },
+       { 71957, 71958 },
+       { 71960, 71989 },
+       { 71991, 71992 },
+       { 71995, 72006 },
+       { 72016, 72025 },
+       { 72096, 72103 },
+       { 72106, 72151 },
+       { 72154, 72164 },
+       { 72192, 72263 },
+       { 72272, 72354 },
+       { 72368, 72440 },
+       { 72704, 72712 },
+       { 72714, 72758 },
+       { 72760, 72773 },
+       { 72784, 72812 },
+       { 72816, 72847 },
+       { 72850, 72871 },
+       { 72873, 72886 },
+       { 72960, 72966 },
+       { 72968, 72969 },
+       { 72971, 73014 },
+       { 73018, 73018 },
+       { 73020, 73021 },
+       { 73023, 73031 },
+       { 73040, 73049 },
+       { 73056, 73061 },
+       { 73063, 73064 },
+       { 73066, 73102 },
+       { 73104, 73105 },
+       { 73107, 73112 },
+       { 73120, 73129 },
+       { 73440, 73464 },
+       { 73648, 73648 },
+       { 73664, 73713 },
+       { 73727, 74649 },
+       { 74752, 74862 },
+       { 74864, 74868 },
+       { 74880, 75075 },
+       { 77712, 77810 },
+       { 77824, 78894 },
+       { 78896, 78904 },
+       { 82944, 83526 },
+       { 92160, 92728 },
+       { 92736, 92766 },
+       { 92768, 92777 },
+       { 92782, 92862 },
+       { 92864, 92873 },
+       { 92880, 92909 },
+       { 92912, 92917 },
+       { 92928, 92997 },
+       { 93008, 93017 },
+       { 93019, 93025 },
+       { 93027, 93047 },
+       { 93053, 93071 },
+       { 93760, 93850 },
+       { 93952, 94026 },
+       { 94031, 94087 },
+       { 94095, 94111 },
+       { 94176, 94180 },
+       { 94192, 94193 },
+       { 94208, 100343 },
+       { 100352, 101589 },
+       { 101632, 101640 },
+       { 110576, 110579 },
+       { 110581, 110587 },
+       { 110589, 110590 },
+       { 110592, 110882 },
+       { 110928, 110930 },
+       { 110948, 110951 },
+       { 110960, 111355 },
+       { 113664, 113770 },
+       { 113776, 113788 },
+       { 113792, 113800 },
+       { 113808, 113817 },
+       { 113820, 113827 },
+       { 118528, 118573 },
+       { 118576, 118598 },
+       { 118608, 118723 },
+       { 118784, 119029 },
+       { 119040, 119078 },
+       { 119081, 119274 },
+       { 119296, 119365 },
+       { 119520, 119539 },
+       { 119552, 119638 },
+       { 119648, 119672 },
+       { 119808, 119892 },
+       { 119894, 119964 },
+       { 119966, 119967 },
+       { 119970, 119970 },
+       { 119973, 119974 },
+       { 119977, 119980 },
+       { 119982, 119993 },
+       { 119995, 119995 },
+       { 119997, 120003 },
+       { 120005, 120069 },
+       { 120071, 120074 },
+       { 120077, 120084 },
+       { 120086, 120092 },
+       { 120094, 120121 },
+       { 120123, 120126 },
+       { 120128, 120132 },
+       { 120134, 120134 },
+       { 120138, 120144 },
+       { 120146, 120485 },
+       { 120488, 120779 },
+       { 120782, 121483 },
+       { 121499, 121503 },
+       { 121505, 121519 },
+       { 122624, 122654 },
+       { 122880, 122886 },
+       { 122888, 122904 },
+       { 122907, 122913 },
+       { 122915, 122916 },
+       { 122918, 122922 },
+       { 123136, 123180 },
+       { 123184, 123197 },
+       { 123200, 123209 },
+       { 123214, 123215 },
+       { 123536, 123566 },
+       { 123584, 123641 },
+       { 123647, 123647 },
+       { 124896, 124902 },
+       { 124904, 124907 },
+       { 124909, 124910 },
+       { 124912, 124926 },
+       { 124928, 125124 },
+       { 125127, 125142 },
+       { 125184, 125259 },
+       { 125264, 125273 },
+       { 125278, 125279 },
+       { 126065, 126132 },
+       { 126209, 126269 },
+       { 126464, 126467 },
+       { 126469, 126495 },
+       { 126497, 126498 },
+       { 126500, 126500 },
+       { 126503, 126503 },
+       { 126505, 126514 },
+       { 126516, 126519 },
+       { 126521, 126521 },
+       { 126523, 126523 },
+       { 126530, 126530 },
+       { 126535, 126535 },
+       { 126537, 126537 },
+       { 126539, 126539 },
+       { 126541, 126543 },
+       { 126545, 126546 },
+       { 126548, 126548 },
+       { 126551, 126551 },
+       { 126553, 126553 },
+       { 126555, 126555 },
+       { 126557, 126557 },
+       { 126559, 126559 },
+       { 126561, 126562 },
+       { 126564, 126564 },
+       { 126567, 126570 },
+       { 126572, 126578 },
+       { 126580, 126583 },
+       { 126585, 126588 },
+       { 126590, 126590 },
+       { 126592, 126601 },
+       { 126603, 126619 },
+       { 126625, 126627 },
+       { 126629, 126633 },
+       { 126635, 126651 },
+       { 126704, 126705 },
+       { 126976, 127019 },
+       { 127024, 127123 },
+       { 127136, 127150 },
+       { 127153, 127167 },
+       { 127169, 127183 },
+       { 127185, 127221 },
+       { 127232, 127405 },
+       { 127462, 127490 },
+       { 127504, 127547 },
+       { 127552, 127560 },
+       { 127568, 127569 },
+       { 127584, 127589 },
+       { 127744, 128727 },
+       { 128733, 128748 },
+       { 128752, 128764 },
+       { 128768, 128883 },
+       { 128896, 128984 },
+       { 128992, 129003 },
+       { 129008, 129008 },
+       { 129024, 129035 },
+       { 129040, 129095 },
+       { 129104, 129113 },
+       { 129120, 129159 },
+       { 129168, 129197 },
+       { 129200, 129201 },
+       { 129280, 129619 },
+       { 129632, 129645 },
+       { 129648, 129652 },
+       { 129656, 129660 },
+       { 129664, 129670 },
+       { 129680, 129708 },
+       { 129712, 129722 },
+       { 129728, 129733 },
+       { 129744, 129753 },
+       { 129760, 129767 },
+       { 129776, 129782 },
+       { 129792, 129938 },
+       { 129940, 129994 },
+       { 130032, 130041 },
+       { 131072, 173791 },
+       { 173824, 177976 },
+       { 177984, 178205 },
+       { 178208, 183969 },
+       { 183984, 191456 },
+       { 194560, 195101 },
+       { 196608, 201546 },
+       { 917505, 917505 },
+       { 917536, 917631 },
+       { 917760, 917999 },
+       { 983040, 1048573 },
+       { 1048576, 1114109 },
+       { -1, -1 },     // end markers
+};
+static const set_item alpha[] = {
+       { 65, 90 },
+       { 97, 122 },
+       { 170, 170 },
+       { 181, 181 },
+       { 186, 186 },
+       { 192, 214 },
+       { 216, 246 },
+       { 248, 705 },
+       { 710, 721 },
+       { 736, 740 },
+       { 748, 748 },
+       { 750, 750 },
+       { 837, 837 },
+       { 880, 884 },
+       { 886, 887 },
+       { 890, 893 },
+       { 895, 895 },
+       { 902, 902 },
+       { 904, 906 },
+       { 908, 908 },
+       { 910, 929 },
+       { 931, 1013 },
+       { 1015, 1153 },
+       { 1162, 1327 },
+       { 1329, 1366 },
+       { 1369, 1369 },
+       { 1376, 1416 },
+       { 1456, 1469 },
+       { 1471, 1471 },
+       { 1473, 1474 },
+       { 1476, 1477 },
+       { 1479, 1479 },
+       { 1488, 1514 },
+       { 1519, 1522 },
+       { 1552, 1562 },
+       { 1568, 1623 },
+       { 1625, 1641 },
+       { 1646, 1747 },
+       { 1749, 1756 },
+       { 1761, 1768 },
+       { 1773, 1788 },
+       { 1791, 1791 },
+       { 1808, 1855 },
+       { 1869, 1969 },
+       { 1984, 2026 },
+       { 2036, 2037 },
+       { 2042, 2042 },
+       { 2048, 2071 },
+       { 2074, 2092 },
+       { 2112, 2136 },
+       { 2144, 2154 },
+       { 2160, 2183 },
+       { 2185, 2190 },
+       { 2208, 2249 },
+       { 2260, 2271 },
+       { 2275, 2281 },
+       { 2288, 2363 },
+       { 2365, 2380 },
+       { 2382, 2384 },
+       { 2389, 2403 },
+       { 2406, 2415 },
+       { 2417, 2435 },
+       { 2437, 2444 },
+       { 2447, 2448 },
+       { 2451, 2472 },
+       { 2474, 2480 },
+       { 2482, 2482 },
+       { 2486, 2489 },
+       { 2493, 2500 },
+       { 2503, 2504 },
+       { 2507, 2508 },
+       { 2510, 2510 },
+       { 2519, 2519 },
+       { 2524, 2525 },
+       { 2527, 2531 },
+       { 2534, 2545 },
+       { 2556, 2556 },
+       { 2561, 2563 },
+       { 2565, 2570 },
+       { 2575, 2576 },
+       { 2579, 2600 },
+       { 2602, 2608 },
+       { 2610, 2611 },
+       { 2613, 2614 },
+       { 2616, 2617 },
+       { 2622, 2626 },
+       { 2631, 2632 },
+       { 2635, 2636 },
+       { 2641, 2641 },
+       { 2649, 2652 },
+       { 2654, 2654 },
+       { 2662, 2677 },
+       { 2689, 2691 },
+       { 2693, 2701 },
+       { 2703, 2705 },
+       { 2707, 2728 },
+       { 2730, 2736 },
+       { 2738, 2739 },
+       { 2741, 2745 },
+       { 2749, 2757 },
+       { 2759, 2761 },
+       { 2763, 2764 },
+       { 2768, 2768 },
+       { 2784, 2787 },
+       { 2790, 2799 },
+       { 2809, 2812 },
+       { 2817, 2819 },
+       { 2821, 2828 },
+       { 2831, 2832 },
+       { 2835, 2856 },
+       { 2858, 2864 },
+       { 2866, 2867 },
+       { 2869, 2873 },
+       { 2877, 2884 },
+       { 2887, 2888 },
+       { 2891, 2892 },
+       { 2902, 2903 },
+       { 2908, 2909 },
+       { 2911, 2915 },
+       { 2918, 2927 },
+       { 2929, 2929 },
+       { 2946, 2947 },
+       { 2949, 2954 },
+       { 2958, 2960 },
+       { 2962, 2965 },
+       { 2969, 2970 },
+       { 2972, 2972 },
+       { 2974, 2975 },
+       { 2979, 2980 },
+       { 2984, 2986 },
+       { 2990, 3001 },
+       { 3006, 3010 },
+       { 3014, 3016 },
+       { 3018, 3020 },
+       { 3024, 3024 },
+       { 3031, 3031 },
+       { 3046, 3055 },
+       { 3072, 3075 },
+       { 3077, 3084 },
+       { 3086, 3088 },
+       { 3090, 3112 },
+       { 3114, 3129 },
+       { 3133, 3140 },
+       { 3142, 3144 },
+       { 3146, 3148 },
+       { 3157, 3158 },
+       { 3160, 3162 },
+       { 3165, 3165 },
+       { 3168, 3171 },
+       { 3174, 3183 },
+       { 3200, 3203 },
+       { 3205, 3212 },
+       { 3214, 3216 },
+       { 3218, 3240 },
+       { 3242, 3251 },
+       { 3253, 3257 },
+       { 3261, 3268 },
+       { 3270, 3272 },
+       { 3274, 3276 },
+       { 3285, 3286 },
+       { 3293, 3294 },
+       { 3296, 3299 },
+       { 3302, 3311 },
+       { 3313, 3314 },
+       { 3328, 3340 },
+       { 3342, 3344 },
+       { 3346, 3386 },
+       { 3389, 3396 },
+       { 3398, 3400 },
+       { 3402, 3404 },
+       { 3406, 3406 },
+       { 3412, 3415 },
+       { 3423, 3427 },
+       { 3430, 3439 },
+       { 3450, 3455 },
+       { 3457, 3459 },
+       { 3461, 3478 },
+       { 3482, 3505 },
+       { 3507, 3515 },
+       { 3517, 3517 },
+       { 3520, 3526 },
+       { 3535, 3540 },
+       { 3542, 3542 },
+       { 3544, 3551 },
+       { 3558, 3567 },
+       { 3570, 3571 },
+       { 3585, 3642 },
+       { 3648, 3654 },
+       { 3661, 3661 },
+       { 3664, 3673 },
+       { 3713, 3714 },
+       { 3716, 3716 },
+       { 3718, 3722 },
+       { 3724, 3747 },
+       { 3749, 3749 },
+       { 3751, 3769 },
+       { 3771, 3773 },
+       { 3776, 3780 },
+       { 3782, 3782 },
+       { 3789, 3789 },
+       { 3792, 3801 },
+       { 3804, 3807 },
+       { 3840, 3840 },
+       { 3872, 3881 },
+       { 3904, 3911 },
+       { 3913, 3948 },
+       { 3953, 3969 },
+       { 3976, 3991 },
+       { 3993, 4028 },
+       { 4096, 4150 },
+       { 4152, 4152 },
+       { 4155, 4169 },
+       { 4176, 4253 },
+       { 4256, 4293 },
+       { 4295, 4295 },
+       { 4301, 4301 },
+       { 4304, 4346 },
+       { 4348, 4680 },
+       { 4682, 4685 },
+       { 4688, 4694 },
+       { 4696, 4696 },
+       { 4698, 4701 },
+       { 4704, 4744 },
+       { 4746, 4749 },
+       { 4752, 4784 },
+       { 4786, 4789 },
+       { 4792, 4798 },
+       { 4800, 4800 },
+       { 4802, 4805 },
+       { 4808, 4822 },
+       { 4824, 4880 },
+       { 4882, 4885 },
+       { 4888, 4954 },
+       { 4992, 5007 },
+       { 5024, 5109 },
+       { 5112, 5117 },
+       { 5121, 5740 },
+       { 5743, 5759 },
+       { 5761, 5786 },
+       { 5792, 5866 },
+       { 5870, 5880 },
+       { 5888, 5907 },
+       { 5919, 5939 },
+       { 5952, 5971 },
+       { 5984, 5996 },
+       { 5998, 6000 },
+       { 6002, 6003 },
+       { 6016, 6067 },
+       { 6070, 6088 },
+       { 6103, 6103 },
+       { 6108, 6108 },
+       { 6112, 6121 },
+       { 6160, 6169 },
+       { 6176, 6264 },
+       { 6272, 6314 },
+       { 6320, 6389 },
+       { 6400, 6430 },
+       { 6432, 6443 },
+       { 6448, 6456 },
+       { 6470, 6509 },
+       { 6512, 6516 },
+       { 6528, 6571 },
+       { 6576, 6601 },
+       { 6608, 6617 },
+       { 6656, 6683 },
+       { 6688, 6750 },
+       { 6753, 6772 },
+       { 6784, 6793 },
+       { 6800, 6809 },
+       { 6823, 6823 },
+       { 6847, 6848 },
+       { 6860, 6862 },
+       { 6912, 6963 },
+       { 6965, 6979 },
+       { 6981, 6988 },
+       { 6992, 7001 },
+       { 7040, 7081 },
+       { 7084, 7141 },
+       { 7143, 7153 },
+       { 7168, 7222 },
+       { 7232, 7241 },
+       { 7245, 7293 },
+       { 7296, 7304 },
+       { 7312, 7354 },
+       { 7357, 7359 },
+       { 7401, 7404 },
+       { 7406, 7411 },
+       { 7413, 7414 },
+       { 7418, 7418 },
+       { 7424, 7615 },
+       { 7655, 7668 },
+       { 7680, 7957 },
+       { 7960, 7965 },
+       { 7968, 8005 },
+       { 8008, 8013 },
+       { 8016, 8023 },
+       { 8025, 8025 },
+       { 8027, 8027 },
+       { 8029, 8029 },
+       { 8031, 8061 },
+       { 8064, 8116 },
+       { 8118, 8124 },
+       { 8126, 8126 },
+       { 8130, 8132 },
+       { 8134, 8140 },
+       { 8144, 8147 },
+       { 8150, 8155 },
+       { 8160, 8172 },
+       { 8178, 8180 },
+       { 8182, 8188 },
+       { 8305, 8305 },
+       { 8319, 8319 },
+       { 8336, 8348 },
+       { 8450, 8450 },
+       { 8455, 8455 },
+       { 8458, 8467 },
+       { 8469, 8469 },
+       { 8473, 8477 },
+       { 8484, 8484 },
+       { 8486, 8486 },
+       { 8488, 8488 },
+       { 8490, 8493 },
+       { 8495, 8505 },
+       { 8508, 8511 },
+       { 8517, 8521 },
+       { 8526, 8526 },
+       { 8544, 8584 },
+       { 9398, 9449 },
+       { 11264, 11492 },
+       { 11499, 11502 },
+       { 11506, 11507 },
+       { 11520, 11557 },
+       { 11559, 11559 },
+       { 11565, 11565 },
+       { 11568, 11623 },
+       { 11631, 11631 },
+       { 11648, 11670 },
+       { 11680, 11686 },
+       { 11688, 11694 },
+       { 11696, 11702 },
+       { 11704, 11710 },
+       { 11712, 11718 },
+       { 11720, 11726 },
+       { 11728, 11734 },
+       { 11736, 11742 },
+       { 11744, 11775 },
+       { 11823, 11823 },
+       { 12293, 12295 },
+       { 12321, 12329 },
+       { 12337, 12341 },
+       { 12344, 12348 },
+       { 12353, 12438 },
+       { 12445, 12447 },
+       { 12449, 12538 },
+       { 12540, 12543 },
+       { 12549, 12591 },
+       { 12593, 12686 },
+       { 12704, 12735 },
+       { 12784, 12799 },
+       { 13312, 19903 },
+       { 19968, 42124 },
+       { 42192, 42237 },
+       { 42240, 42508 },
+       { 42512, 42539 },
+       { 42560, 42606 },
+       { 42612, 42619 },
+       { 42623, 42735 },
+       { 42775, 42783 },
+       { 42786, 42888 },
+       { 42891, 42954 },
+       { 42960, 42961 },
+       { 42963, 42963 },
+       { 42965, 42969 },
+       { 42994, 43013 },
+       { 43015, 43047 },
+       { 43072, 43123 },
+       { 43136, 43203 },
+       { 43205, 43205 },
+       { 43216, 43225 },
+       { 43250, 43255 },
+       { 43259, 43259 },
+       { 43261, 43306 },
+       { 43312, 43346 },
+       { 43360, 43388 },
+       { 43392, 43442 },
+       { 43444, 43455 },
+       { 43471, 43481 },
+       { 43488, 43518 },
+       { 43520, 43574 },
+       { 43584, 43597 },
+       { 43600, 43609 },
+       { 43616, 43638 },
+       { 43642, 43710 },
+       { 43712, 43712 },
+       { 43714, 43714 },
+       { 43739, 43741 },
+       { 43744, 43759 },
+       { 43762, 43765 },
+       { 43777, 43782 },
+       { 43785, 43790 },
+       { 43793, 43798 },
+       { 43808, 43814 },
+       { 43816, 43822 },
+       { 43824, 43866 },
+       { 43868, 43881 },
+       { 43888, 44010 },
+       { 44016, 44025 },
+       { 44032, 55203 },
+       { 55216, 55238 },
+       { 55243, 55291 },
+       { 63744, 64109 },
+       { 64112, 64217 },
+       { 64256, 64262 },
+       { 64275, 64279 },
+       { 64285, 64296 },
+       { 64298, 64310 },
+       { 64312, 64316 },
+       { 64318, 64318 },
+       { 64320, 64321 },
+       { 64323, 64324 },
+       { 64326, 64433 },
+       { 64467, 64829 },
+       { 64848, 64911 },
+       { 64914, 64967 },
+       { 65008, 65019 },
+       { 65136, 65140 },
+       { 65142, 65276 },
+       { 65296, 65305 },
+       { 65313, 65338 },
+       { 65345, 65370 },
+       { 65382, 65470 },
+       { 65474, 65479 },
+       { 65482, 65487 },
+       { 65490, 65495 },
+       { 65498, 65500 },
+       { 65536, 65547 },
+       { 65549, 65574 },
+       { 65576, 65594 },
+       { 65596, 65597 },
+       { 65599, 65613 },
+       { 65616, 65629 },
+       { 65664, 65786 },
+       { 65856, 65908 },
+       { 66176, 66204 },
+       { 66208, 66256 },
+       { 66304, 66335 },
+       { 66349, 66378 },
+       { 66384, 66426 },
+       { 66432, 66461 },
+       { 66464, 66499 },
+       { 66504, 66511 },
+       { 66513, 66517 },
+       { 66560, 66717 },
+       { 66720, 66729 },
+       { 66736, 66771 },
+       { 66776, 66811 },
+       { 66816, 66855 },
+       { 66864, 66915 },
+       { 66928, 66938 },
+       { 66940, 66954 },
+       { 66956, 66962 },
+       { 66964, 66965 },
+       { 66967, 66977 },
+       { 66979, 66993 },
+       { 66995, 67001 },
+       { 67003, 67004 },
+       { 67072, 67382 },
+       { 67392, 67413 },
+       { 67424, 67431 },
+       { 67456, 67461 },
+       { 67463, 67504 },
+       { 67506, 67514 },
+       { 67584, 67589 },
+       { 67592, 67592 },
+       { 67594, 67637 },
+       { 67639, 67640 },
+       { 67644, 67644 },
+       { 67647, 67669 },
+       { 67680, 67702 },
+       { 67712, 67742 },
+       { 67808, 67826 },
+       { 67828, 67829 },
+       { 67840, 67861 },
+       { 67872, 67897 },
+       { 67968, 68023 },
+       { 68030, 68031 },
+       { 68096, 68099 },
+       { 68101, 68102 },
+       { 68108, 68115 },
+       { 68117, 68119 },
+       { 68121, 68149 },
+       { 68192, 68220 },
+       { 68224, 68252 },
+       { 68288, 68295 },
+       { 68297, 68324 },
+       { 68352, 68405 },
+       { 68416, 68437 },
+       { 68448, 68466 },
+       { 68480, 68497 },
+       { 68608, 68680 },
+       { 68736, 68786 },
+       { 68800, 68850 },
+       { 68864, 68903 },
+       { 68912, 68921 },
+       { 69248, 69289 },
+       { 69291, 69292 },
+       { 69296, 69297 },
+       { 69376, 69404 },
+       { 69415, 69415 },
+       { 69424, 69445 },
+       { 69488, 69505 },
+       { 69552, 69572 },
+       { 69600, 69622 },
+       { 69632, 69701 },
+       { 69734, 69743 },
+       { 69745, 69749 },
+       { 69762, 69816 },
+       { 69826, 69826 },
+       { 69840, 69864 },
+       { 69872, 69881 },
+       { 69888, 69938 },
+       { 69942, 69951 },
+       { 69956, 69959 },
+       { 69968, 70002 },
+       { 70006, 70006 },
+       { 70016, 70079 },
+       { 70081, 70084 },
+       { 70094, 70106 },
+       { 70108, 70108 },
+       { 70144, 70161 },
+       { 70163, 70196 },
+       { 70199, 70199 },
+       { 70206, 70206 },
+       { 70272, 70278 },
+       { 70280, 70280 },
+       { 70282, 70285 },
+       { 70287, 70301 },
+       { 70303, 70312 },
+       { 70320, 70376 },
+       { 70384, 70393 },
+       { 70400, 70403 },
+       { 70405, 70412 },
+       { 70415, 70416 },
+       { 70419, 70440 },
+       { 70442, 70448 },
+       { 70450, 70451 },
+       { 70453, 70457 },
+       { 70461, 70468 },
+       { 70471, 70472 },
+       { 70475, 70476 },
+       { 70480, 70480 },
+       { 70487, 70487 },
+       { 70493, 70499 },
+       { 70656, 70721 },
+       { 70723, 70725 },
+       { 70727, 70730 },
+       { 70736, 70745 },
+       { 70751, 70753 },
+       { 70784, 70849 },
+       { 70852, 70853 },
+       { 70855, 70855 },
+       { 70864, 70873 },
+       { 71040, 71093 },
+       { 71096, 71102 },
+       { 71128, 71133 },
+       { 71168, 71230 },
+       { 71232, 71232 },
+       { 71236, 71236 },
+       { 71248, 71257 },
+       { 71296, 71349 },
+       { 71352, 71352 },
+       { 71360, 71369 },
+       { 71424, 71450 },
+       { 71453, 71466 },
+       { 71472, 71481 },
+       { 71488, 71494 },
+       { 71680, 71736 },
+       { 71840, 71913 },
+       { 71935, 71942 },
+       { 71945, 71945 },
+       { 71948, 71955 },
+       { 71957, 71958 },
+       { 71960, 71989 },
+       { 71991, 71992 },
+       { 71995, 71996 },
+       { 71999, 72002 },
+       { 72016, 72025 },
+       { 72096, 72103 },
+       { 72106, 72151 },
+       { 72154, 72159 },
+       { 72161, 72161 },
+       { 72163, 72164 },
+       { 72192, 72242 },
+       { 72245, 72254 },
+       { 72272, 72343 },
+       { 72349, 72349 },
+       { 72368, 72440 },
+       { 72704, 72712 },
+       { 72714, 72758 },
+       { 72760, 72766 },
+       { 72768, 72768 },
+       { 72784, 72793 },
+       { 72818, 72847 },
+       { 72850, 72871 },
+       { 72873, 72886 },
+       { 72960, 72966 },
+       { 72968, 72969 },
+       { 72971, 73014 },
+       { 73018, 73018 },
+       { 73020, 73021 },
+       { 73023, 73025 },
+       { 73027, 73027 },
+       { 73030, 73031 },
+       { 73040, 73049 },
+       { 73056, 73061 },
+       { 73063, 73064 },
+       { 73066, 73102 },
+       { 73104, 73105 },
+       { 73107, 73110 },
+       { 73112, 73112 },
+       { 73120, 73129 },
+       { 73440, 73462 },
+       { 73648, 73648 },
+       { 73728, 74649 },
+       { 74752, 74862 },
+       { 74880, 75075 },
+       { 77712, 77808 },
+       { 77824, 78894 },
+       { 82944, 83526 },
+       { 92160, 92728 },
+       { 92736, 92766 },
+       { 92768, 92777 },
+       { 92784, 92862 },
+       { 92864, 92873 },
+       { 92880, 92909 },
+       { 92928, 92975 },
+       { 92992, 92995 },
+       { 93008, 93017 },
+       { 93027, 93047 },
+       { 93053, 93071 },
+       { 93760, 93823 },
+       { 93952, 94026 },
+       { 94031, 94087 },
+       { 94095, 94111 },
+       { 94176, 94177 },
+       { 94179, 94179 },
+       { 94192, 94193 },
+       { 94208, 100343 },
+       { 100352, 101589 },
+       { 101632, 101640 },
+       { 110576, 110579 },
+       { 110581, 110587 },
+       { 110589, 110590 },
+       { 110592, 110882 },
+       { 110928, 110930 },
+       { 110948, 110951 },
+       { 110960, 111355 },
+       { 113664, 113770 },
+       { 113776, 113788 },
+       { 113792, 113800 },
+       { 113808, 113817 },
+       { 113822, 113822 },
+       { 119808, 119892 },
+       { 119894, 119964 },
+       { 119966, 119967 },
+       { 119970, 119970 },
+       { 119973, 119974 },
+       { 119977, 119980 },
+       { 119982, 119993 },
+       { 119995, 119995 },
+       { 119997, 120003 },
+       { 120005, 120069 },
+       { 120071, 120074 },
+       { 120077, 120084 },
+       { 120086, 120092 },
+       { 120094, 120121 },
+       { 120123, 120126 },
+       { 120128, 120132 },
+       { 120134, 120134 },
+       { 120138, 120144 },
+       { 120146, 120485 },
+       { 120488, 120512 },
+       { 120514, 120538 },
+       { 120540, 120570 },
+       { 120572, 120596 },
+       { 120598, 120628 },
+       { 120630, 120654 },
+       { 120656, 120686 },
+       { 120688, 120712 },
+       { 120714, 120744 },
+       { 120746, 120770 },
+       { 120772, 120779 },
+       { 120782, 120831 },
+       { 122624, 122654 },
+       { 122880, 122886 },
+       { 122888, 122904 },
+       { 122907, 122913 },
+       { 122915, 122916 },
+       { 122918, 122922 },
+       { 123136, 123180 },
+       { 123191, 123197 },
+       { 123200, 123209 },
+       { 123214, 123214 },
+       { 123536, 123565 },
+       { 123584, 123627 },
+       { 123632, 123641 },
+       { 124896, 124902 },
+       { 124904, 124907 },
+       { 124909, 124910 },
+       { 124912, 124926 },
+       { 124928, 125124 },
+       { 125184, 125251 },
+       { 125255, 125255 },
+       { 125259, 125259 },
+       { 125264, 125273 },
+       { 126464, 126467 },
+       { 126469, 126495 },
+       { 126497, 126498 },
+       { 126500, 126500 },
+       { 126503, 126503 },
+       { 126505, 126514 },
+       { 126516, 126519 },
+       { 126521, 126521 },
+       { 126523, 126523 },
+       { 126530, 126530 },
+       { 126535, 126535 },
+       { 126537, 126537 },
+       { 126539, 126539 },
+       { 126541, 126543 },
+       { 126545, 126546 },
+       { 126548, 126548 },
+       { 126551, 126551 },
+       { 126553, 126553 },
+       { 126555, 126555 },
+       { 126557, 126557 },
+       { 126559, 126559 },
+       { 126561, 126562 },
+       { 126564, 126564 },
+       { 126567, 126570 },
+       { 126572, 126578 },
+       { 126580, 126583 },
+       { 126585, 126588 },
+       { 126590, 126590 },
+       { 126592, 126601 },
+       { 126603, 126619 },
+       { 126625, 126627 },
+       { 126629, 126633 },
+       { 126635, 126651 },
+       { 127280, 127305 },
+       { 127312, 127337 },
+       { 127344, 127369 },
+       { 130032, 130041 },
+       { 131072, 173791 },
+       { 173824, 177976 },
+       { 177984, 178205 },
+       { 178208, 183969 },
+       { 183984, 191456 },
+       { 194560, 195101 },
+       { 196608, 201546 },
+       { -1, -1 },     // end markers
+       };
+static struct _class_data {
+       const char *name;                               // e.g., "alpha"
+       int (*charcheckfunc)(int c);    // function pointer, e.g., isalpha
+       wctype_t        wctype;                         // from wctype("alpha")
+       const set_item  *data[2];               // alpha, NULL
+} class_data[] = {
+       { "alnum",      isalnum,        0,      { digit, alpha } },
+       { "alpha",      isalpha,        0,      { alpha, NULL } },
+       { "blank",      isblank,        0,      { blank, NULL } },
+       { "cntrl",      iscntrl,        0,      { cntrl, NULL } },
+       { "digit",      isdigit,        0,      { digit, NULL } },
+       { "graph",      isgraph,        0,      { print_graph_common, 
graph_additional } },
+       { "lower",      islower,        0,      { lower, NULL } },
+       { "print",      isprint,        0,      { print_graph_common, 
print_additional } },
+       { "punct",      ispunct,        0,      { punct, NULL } },
+       { "space",      isspace,        0,      { space, NULL } },
+       { "upper",      isupper,        0,      { upper, NULL } },
+       { "xdigit",     isxdigit,       0,      { xdigit, NULL } },
+       { NULL, NULL, 0, { NULL, NULL } },
+};
+static struct _class_cache {
+       const char *name;
+       charset_t *set;
+       struct _class_cache *next;      // linked list
+} *class_cache[53];
+/* find_cclass --- search class data for a known character class */
+
+static int
+find_class(const char *cclass)
+{
+       int i;
+
+       for (i = 0; class_data[i].name != NULL; i++)
+               if (strcmp(class_data[i].name, cclass) == 0)
+                       return i;
+
+       return -1;
+}
+/* find_class_in_cache --- lookup and/or create locale + class in the table */
+
+static charset_t *
+find_class_in_cache(charset_t *set, const char *cclass, int *errcode, bool 
*is_new)
+{
+       assert(set != NULL && cclass != NULL && errcode != NULL && is_new != 
NULL);
+       *is_new = true;
+
+       const char *locale = setlocale(LC_CTYPE, NULL); // query locale
+       char *buf = (char *) malloc(strlen(locale) + 1 + strlen(cclass) + 1);
+       if (buf == NULL) {
+               *errcode = CSET_ESPACE;
+               return NULL;
+       }
+
+       sprintf(buf, "%s+%s", locale, cclass);
+
+       int index;
+       static char letters[] = 
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+       char *cp = strchr(letters, buf[0]);
+
+       if (cp == NULL)
+               index = 52;
+       else
+               index = cp - letters;
+
+       struct _class_cache *pcache = class_cache[index];
+       if (pcache == NULL) {
+               pcache = (struct _class_cache *) malloc(sizeof(struct 
_class_cache));
+               if (pcache == NULL) {
+                       *errcode = CSET_ESPACE;
+                       return NULL;
+               }
+               pcache->name = buf;
+               pcache->next = NULL;
+               charset_t *newset = charset_create(errcode, set->mb_cur_max, 
set->is_utf8);
+               if (newset == NULL) {
+                       *errcode = CSET_ESPACE;
+                       free((void *) pcache->name);
+                       return NULL;
+               }
+               pcache->set = newset;
+               class_cache[index] = pcache;
+               // fall through to return
+       } else {
+               for (; pcache != NULL; pcache = pcache->next) {
+                       if (strcmp(pcache->name, buf) == 0) {   // found it
+                               free((void *) buf);
+                               *is_new = false;
+                               goto done;
+                       }
+               }
+               pcache = (struct _class_cache *) malloc(sizeof(struct 
_class_cache));
+               if (pcache == NULL) {
+                       *errcode = CSET_ESPACE;
+                       return NULL;
+               }
+               pcache->name = buf;
+               pcache->next = NULL;
+               charset_t *newset = charset_create(errcode, set->mb_cur_max, 
set->is_utf8);
+               if (newset == NULL) {
+                       *errcode = CSET_ESPACE;
+                       free((void *) pcache->name);
+                       return NULL;
+               }
+               pcache->set = newset;
+               pcache->next = class_cache[index];      // push onto front of 
the chain
+               class_cache[index] = pcache;
+               // fall through to return
+       }
+
+done:
+       *errcode = CSET_SUCCESS;
+       return pcache->set;
+}
+/* wide_char_range_loop --- loop over all possible characters to match a 
ctype, and cache it */
+
+static int
+wide_char_range_loop(charset_t *set, const char *cclass, wctype_t ctype)
+{
+       // FIXME: Need to wrap this function in a mutex
+       int errcode = 0;
+       bool is_new = false;
+       charset_t *newset = find_class_in_cache(set, cclass, & errcode, & 
is_new);
+
+       if (newset == NULL)
+               return errcode;
+
+       if (is_new) {
+               for (int32_t c = 0; c < MAX_CODE_POINT; c++) {
+                       if (iswctype(c, ctype)) {
+                               int ret;
+                               if ((ret = charset_add_char(newset, c)) != 
CSET_SUCCESS)
+                                       return ret;
+                       }
+               }
+               charset_finalize(newset);
+       }
+
+       return charset_merge(set, newset);
+}
 /* item_compare_for_searching --- compare two set_items */
 
 static int
@@ -78,8 +3509,6 @@ item_compare_for_searching(const void *k, const void *e)
        set_item *thekey = (set_item *) k;
        set_item *elem = (set_item *) e;
 
-       assert(thekey->item_type == RANGE_ITEM && elem->item_type == 
RANGE_ITEM);
-
        if (elem->start <= thekey->start && thekey->start <= elem->end)
                return 0;       // found it
        else if (thekey->end < elem->start)
@@ -89,13 +3518,13 @@ item_compare_for_searching(const void *k, const void *e)
                return 1;
        }
 }
-/* wint_compare --- compare two wint values for qsort */
+/* int32_t_compare --- compare two int32_t values for qsort */
 
 static int
-wint_compare(const void *l, const void *r)
+int32_t_compare(const void *l, const void *r)
 {
-       wint_t *left = (wint_t *) l;
-       wint_t *right = (wint_t *) r;
+       int32_t *left = (int32_t *) l;
+       int32_t *right = (int32_t *) r;
 
        return *left - *right;
 }
@@ -107,16 +3536,7 @@ item_compare_for_sorting(const void *l, const void *r)
        set_item *left = (set_item *) l;
        set_item *right = (set_item *) r;
 
-       if (left->item_type == CTYPE_ITEM && right->item_type == CTYPE_ITEM) {
-               return left->wtype - right->wtype;
-       } else if (left->item_type == CTYPE_ITEM && right->item_type == 
RANGE_ITEM) {
-               return -1;
-       } else if (left->item_type == RANGE_ITEM && right->item_type == 
CTYPE_ITEM) {
-               return +1;
-       } else {
-               assert(left->item_type == RANGE_ITEM && right->item_type == 
RANGE_ITEM);
-               return left->start - right->start;
-       }
+       return left->start - right->start;
 }
 
 /* is_found --- return true if the character is found */
@@ -124,48 +3544,48 @@ item_compare_for_sorting(const void *l, const void *r)
 static bool
 is_found(const charset_t *set, int32_t the_char)
 {
-       set_item *items = set->items;
-       int i;
-
        if (set->items == NULL)         // empty set, can't match
                return false;
-       if (set->nelems == 1 && set->items[0].item_type == RANGE_ITEM) {
+       if (set->nelems == 1)
            return (set->items[0].start <= the_char && the_char <= 
set->items[0].end);
-       }
-       for (i = 0; i < set->nelems; i++) {
-               // linear search of ctype items
-               if (items[i].item_type == RANGE_ITEM)
-                       break;
-       
-               assert(items[i].item_type == CTYPE_ITEM);
-               if (iswctype(the_char, items[i].wtype))
-                       return true;
-       }
-       
-       if (i >= set->nelems)
-               return false;
-       assert(items[i].item_type == RANGE_ITEM);
-       
        // binary search to see if we have it
        set_item *found;
        set_item key;
-       key.item_type = RANGE_ITEM;
        key.start = key.end = the_char;
        
-       found = bsearch(& key, set->items + i, set->nelems - i,
+       size_t nelems = set->nelems;
+       if (the_char < 128 && set->nelems8bit < set->nelems)
+               nelems = set->nelems8bit;
+       
+       found = bsearch(& key, set->items, nelems,
                                        sizeof(set_item), 
item_compare_for_searching);
        
        return found != NULL;
 }
-/* finalize --- condense all the info into the final data structure */
+/* utfprefix --- return the correct UTF-8 first byte for the wide character */
 
-static void
-finalize(charset_t *set)
+static unsigned int
+utfprefix(int32_t wc)
+{
+       if (wc < 0x80)
+               return wc;
+       if (wc < 0x800)
+               return 0xC0 + (wc >> 6);
+       if (wc < 0x10000)
+               return 0xE0 + (wc >> 12);
+       if (wc < 0x100000)
+               return 0xF0 + (wc >> 18);
+       return 0xF4;
+}
+/* charset_finalize --- condense all the info into the final data structure */
+
+void
+charset_finalize(charset_t *set)
 {
        assert(set != NULL);
        int result = 0;
 
-       qsort(set->chars, set->nchars_inuse, sizeof(wint_t), wint_compare);
+       qsort(set->chars, set->nchars_inuse, sizeof(int32_t), int32_t_compare);
        size_t i, j;
        for (i = 0, j = 1; j < set->nchars_inuse; i++, j++) {
                if (set->chars[i] == set->chars[j]) {
@@ -212,42 +3632,35 @@ finalize(charset_t *set)
        // condense it
        set_item *items = set->items;
        for (i = 0, j = 1; j < set->nelems; i++, j++) {
-               if (   items[i].item_type == CTYPE_ITEM
-                       && items[j].item_type == CTYPE_ITEM
-                       && items[i].wtype == items[j].wtype) {
-                       free((void *) items[j].type_name);
+               bool need_shift = false;
+               if (items[i].start == items[j].start && items[i].end == 
items[j].end) {
+                       need_shift = true;
+               } else if (items[i].end + 1 == items[j].start) {
+                       items[i].end = items[j].end;
+                       need_shift = true;
+               } else if (items[i].start < items[j].start && items[i].end > 
items[j].end) {
+                       need_shift = true;
+               } else if (   items[i].start <= items[j].start
+                          && items[i].end > items[j].start
+                          && items[j].end >= items[i].end) {
+                       items[i].end = items[j].end;
+                       need_shift = true;
+               }
+               if (need_shift) {
                        for (int k = j + 1; k < set->nelems; j++, k++)
                                items[j] = items[k];
                        
                        set->nelems--;
                        i--;    // compensate for loop, continue checking at 
current position
                        j = i + 1;
-               } else if (items[i].item_type != items[j].item_type) {
-                       continue;
-               } else if (items[i].item_type == RANGE_ITEM) {
-                       bool need_shift = false;
-                       if (items[i].start == items[j].start && items[i].end == 
items[j].end) {
-                               need_shift = true;
-                       } else if (items[i].end + 1 == items[j].start) {
-                               items[i].end = items[j].end;
-                               need_shift = true;
-                       } else if (items[i].start < items[j].start && 
items[i].end > items[j].end) {
-                               need_shift = true;
-                       } else if (   items[i].start <= items[j].start
-                                  && items[i].end > items[j].start
-                                  && items[j].end >= items[i].end) {
-                               items[i].end = items[j].end;
-                               need_shift = true;
-                       }
-                       if (need_shift) {
-                               for (int k = j + 1; k < set->nelems; j++, k++)
-                                       items[j] = items[k];
-                               
-                               set->nelems--;
-                               i--;    // compensate for loop, continue 
checking at current position
-                               j = i + 1;
-                       }
-                       // otherwise, just continue around the loop
+               }
+               // otherwise, just continue around the loop
+       }
+       set->nelems8bit = set->nelems;
+       for (int i = 0; i < set->nelems; i++) {
+               if (set->items[i].start >= 128 && i > 0) {
+                       set->nelems8bit = i;    // this is a count, not an index
+                       break;
                }
        }
        set->finalized = true;
@@ -255,7 +3668,7 @@ finalize(charset_t *set)
 /* charset_create --- make a new charset_t and initialize it */
 
 charset_t *
-charset_create(int *errcode)
+charset_create(int *errcode, int mb_cur_max, bool is_utf8)
 {
        if (errcode == NULL)
                return NULL;
@@ -267,6 +3680,8 @@ charset_create(int *errcode)
        }
 
        memset(set, 0, sizeof(charset_t));
+       set->mb_cur_max = mb_cur_max;
+       set->is_utf8 = is_utf8;
 
        *errcode = CSET_SUCCESS;
        return set;
@@ -341,25 +3756,71 @@ charset_add_range(charset_t *set, int32_t first, int32_t 
last)
        }
 
        set_item new_item;
-       new_item.item_type = RANGE_ITEM;
        new_item.start = first;
        new_item.end = last;
        set->items[set->nelems++] = new_item;
 
        return CSET_SUCCESS;
 }
-/* charset_invert --- mark charset to return success if requested character 
not found */
+/* charset_invert --- invert the ranges in set and return a new set */
 
-int
-charset_invert(charset_t *set)
+charset_t *
+charset_invert(charset_t *set, int *errcode)
 {
-       if (set == NULL)
-               return CSET_EBADPTR;
-       if (set->finalized)
-               return CSET_EFROZEN;
+       int ret = CSET_SUCCESS;
 
-       set->complemented = true;
-       return CSET_SUCCESS;
+       if (errcode == NULL)
+               return NULL;
+
+       if (set == NULL) {
+               *errcode = CSET_EBADPTR;
+               return NULL;
+       }
+
+       if (! set->finalized) {
+               charset_finalize(set);
+
+               if (! set->finalized) {
+                       *errcode = CSET_ESPACE; // make a guess
+                       return NULL;
+               }
+       }
+
+       charset_t *newset = charset_create(errcode, set->mb_cur_max, 
set->is_utf8);
+       if (newset == NULL)
+               return NULL;
+
+       newset->no_newlines = set->no_newlines;
+
+       if (set->nelems == 0) { // was empty
+               ret = charset_add_range(newset, 0, MAX_CODE_POINT);
+               if (ret == CSET_SUCCESS)
+                       goto done;
+               else
+                       goto fail;
+       }
+
+       int low = 0;
+
+       for (int i = 0; i < set->nelems; i++) {
+               if (low < set->items[i].start) {
+                       if ((ret = charset_add_range(newset, low, 
set->items[i].start - 1)) != CSET_SUCCESS)
+                               goto fail;
+               }
+               low = set->items[i].end + 1;
+       }
+       if (low <= MAX_CODE_POINT) {
+                       if ((ret = charset_add_range(newset, low, 
MAX_CODE_POINT)) != CSET_SUCCESS)
+                               goto fail;
+       }
+
+done:
+       *errcode = CSET_SUCCESS;
+       return newset;
+fail:
+       *errcode = ret;
+       charset_free(newset);
+       return NULL;
 }
 /* charset_set_no_newline --- set the value of the "no newlines" flag */
 
@@ -373,52 +3834,6 @@ int charset_set_no_newlines(charset_t *set, bool 
no_newlines)
        set->no_newlines = no_newlines;
        return CSET_SUCCESS;
 }
-/* charset_add_cclass --- add a character class, like "alnum" */
-
-int
-charset_add_cclass(charset_t *set, const char *cclass)
-{
-       if (set == NULL)
-               return CSET_EBADPTR;
-       if (set->finalized)
-               return CSET_EFROZEN;
-
-       if (set->items == NULL) {
-               set->items = (set_item *) malloc(sizeof(set_item) * 
INITIAL_ALLOCATION);
-               if (set->items == NULL)
-                       return CSET_ESPACE;
-       
-               set->allocated = INITIAL_ALLOCATION;
-               set->nelems = 0;
-       } else if (set->nelems + 1 >= set->allocated) {
-               int new_amount = set->allocated * 2;
-               set_item *new_data = (set_item *) realloc(set->items, 
new_amount * sizeof(set_item));
-       
-               if (new_data == NULL)
-                       return CSET_ESPACE;
-       
-               memset(new_data + set->allocated, 0, set->allocated * 
sizeof(set_item));
-               set->allocated = new_amount;
-               set->items = new_data;
-       }
-
-       wctype_t the_type = wctype(cclass);
-       if (the_type == 0)      // not a known class name
-               return CSET_ECTYPE;
-
-       const char *class_name = strdup(cclass);
-       if (class_name == NULL)
-               return CSET_ESPACE;
-
-       set_item new_item;
-       new_item.item_type = CTYPE_ITEM;
-
-       new_item.wtype = the_type;
-       new_item.type_name = class_name;
-       set->items[set->nelems++] = new_item;
-
-       return CSET_SUCCESS;
-}
 /* charset_add_equiv --- add an equivalence class */
 
 int
@@ -468,6 +3883,175 @@ charset_add_collate(charset_t *set, const int32_t 
*collate)
 
        return charset_add_char(set, collate[0]);
 }
+/* charset_add_cclass --- add a character class, like "alnum" */
+
+int
+charset_add_cclass(charset_t *set, const char *cclass)
+{
+       if (set == NULL)
+               return CSET_EBADPTR;
+       if (set->finalized)
+               return CSET_EFROZEN;
+
+       int index = find_class(cclass);
+
+       if (index == -1) {
+               if (set->mb_cur_max == 1)
+                       return CSET_ECTYPE;
+               else {
+                       // maybe it's locale-specific
+                       wctype_t ctype = wctype(cclass);        // look it up
+                       if (ctype == 0) // it's invalid
+                               return CSET_ECTYPE;
+                       
+                       // this saves the locale + cclass info for possible 
reuse
+                       return wide_char_range_loop(set, cclass, ctype);
+               }
+       }
+
+       // we have a standard cclass
+       if (set->mb_cur_max == 1) {
+               int (*charcheckfunc)(int c) = class_data[index].charcheckfunc;
+               for (int32_t i = 0; i < 256; i++) {
+                       if (charcheckfunc(i)) {
+                               int ret = charset_add_char(set, i);
+                               if (ret != CSET_SUCCESS)
+                                       return ret;
+                       }
+               }
+       } else if (set->is_utf8) {
+               const set_item *data[2];
+               
+               data[0] = class_data[index].data[0];
+               data[1] = class_data[index].data[1];
+               
+               for (int i = 0; i < 2; i++) {
+                       if (data[i] == NULL)
+                               break;
+                       for (int j = 0; data[i][j].start != -1; j++) {
+                               int ret = charset_add_range(set, 
data[i][j].start, data[i][j].end);
+                               if (ret != CSET_SUCCESS)
+                                       return ret;
+                       }
+               }
+       } else {
+               wctype_t ctype = class_data[index].wctype;
+               if (ctype == 0) {       // haven't checked it yet
+                       ctype = wctype(cclass);
+               
+                       if (ctype == 0) // bad class, should not happen for 
standard classes
+                               return CSET_ECTYPE;
+               }
+               
+               // all ok..
+               class_data[index].wctype = ctype;       // save for next time
+               // this saves the locale + cclass info for possible reuse
+               return wide_char_range_loop(set, cclass, ctype);
+       }
+
+       return CSET_SUCCESS;
+}
+/* charset_copy --- create a new charset that is copy of the original */
+
+charset_t *
+charset_copy(charset_t *set, int *errcode)
+{
+       if (errcode == NULL)
+               return NULL;
+
+       if (set == NULL) {
+               *errcode = CSET_EBADPTR;
+               return NULL;
+       }
+
+       charset_t *newset = charset_create(errcode, set->mb_cur_max, 
set->is_utf8);
+       if (newset == NULL)
+               return NULL;
+
+       *newset = *set;
+       if (newset->nchars_allocated > 0) {
+               newset->chars = (int32_t *) malloc(newset->nchars_allocated * 
sizeof(int32_t));
+               if (newset->chars == NULL) {
+                       *errcode = CSET_ESPACE;
+                       free((void *) newset);
+                       return NULL;
+               } else
+                       memcpy(newset->chars, set->chars, 
newset->nchars_allocated * sizeof(int32_t));
+       }
+
+       if (newset->allocated > 0) {
+               newset->items = (set_item *) malloc(newset->allocated * 
sizeof(set_item));
+               if (newset->items == NULL) {
+                       *errcode = CSET_ESPACE;
+                       free((void *) newset->chars);
+                       free((void *) newset);
+                       return NULL;
+               } else
+                       memcpy(newset->items, set->items, newset->allocated * 
sizeof(set_item));
+       }
+
+       *errcode = CSET_SUCCESS;
+       return newset;
+}
+int
+charset_merge(charset_t *dest, charset_t *src)
+{
+       charset_t *set = dest;
+       if (set == NULL)
+               return CSET_EBADPTR;
+       if (src == NULL)
+               return CSET_EBADPTR;
+       if (set->finalized)
+               return CSET_EFROZEN;
+
+       if (src->nelems == 0 && src->nchars_inuse == 0)
+               return CSET_SUCCESS;    // nothing to do
+
+       int new_char_count = set->nchars_inuse + src->nchars_inuse + 1;
+       
+       int32_t *new_chars = NULL;
+       if (new_char_count > 0) {
+               new_chars = (int32_t *) malloc(new_char_count * 
sizeof(int32_t));
+               if (new_chars == NULL)
+                       return CSET_ESPACE;
+       
+               // allocated the space ok, now copy all the stuff in
+               if (set->nchars_inuse > 0)
+                       memcpy(new_chars, set->chars, set->nchars_inuse * 
sizeof(int32_t));
+       
+               memcpy(new_chars + set->nchars_inuse, src->chars, 
src->nchars_inuse * sizeof(int32_t));
+               new_chars[new_char_count-1] = L'\0';
+       
+               // now update dest
+               if (set->chars != NULL)
+                       free((void *) set->chars);
+               set->chars = new_chars;
+               set->nchars_inuse = new_char_count - 1;
+               set->nchars_allocated = new_char_count;
+       }
+       int new_item_count = set->nelems + src->nelems;
+       // could only be copying characters when merging,
+       // so make sure there are items to copy.
+       if (new_item_count > 0) {
+               set_item *new_items = (set_item *) malloc(new_item_count * 
sizeof(set_item));
+               if (new_items == NULL)
+                       return CSET_ESPACE;
+       
+               // allocated the space ok, now copy all the stuff in
+               if (set->nelems > 0)
+                       memcpy(new_items, set->items, set->nelems * 
sizeof(set_item));
+       
+               memcpy(new_items + set->nelems, src->items, src->nelems * 
sizeof(set_item));
+       
+               // now update dest
+               if (set->items != NULL)
+                       free((void *) set->items);
+               set->items = new_items;
+               set->nelems = set->allocated = new_item_count;
+       }
+
+       return CSET_SUCCESS;
+}
 /* charset_in_set --- see if a character is in the set */
 
 bool
@@ -477,18 +4061,16 @@ charset_in_set(const charset_t *set, int32_t the_char)
                return false;
 
        if (! set->finalized) {
-               finalize((charset_t *) set);
+               charset_finalize((charset_t *) set);
 
                if (! set->finalized)   // finalize() failed
                        return false;
        }
 
-       if (the_char == L'\n' && set->no_newlines && set->complemented)
+       if (the_char == L'\n' && set->no_newlines)      // FIXME: is this still 
right?
                return false;
 
        bool found = is_found(set, the_char);
-       if (set->complemented)
-               found = ! found;                // reverse sense of the match
 
        return found;
 }
@@ -501,15 +4083,8 @@ charset_free(const charset_t *set)
                return CSET_EBADPTR;
        // no need to check for finalized
 
-       if (set->items != NULL) {
-               for (int i = 0; i < set->nelems; i++) {
-                       if (set->items[i].item_type == CTYPE_ITEM)
-                               free((void *) set->items[i].type_name);
-                       else
-                               break;
-               }
+       if (set->items != NULL)
                free((void *) set->items);
-       }
 
        if (set->chars != NULL)
                free((void *) set->chars);
@@ -518,10 +4093,61 @@ charset_free(const charset_t *set)
 
        return CSET_SUCCESS;
 }
+/* charset_firstbytes --- return the set of prefix bytes for the range */
+
+charset_firstbytes_t
+charset_firstbytes(charset_t *set, int *errcode)
+{
+       charset_firstbytes_t result;
+       memset(& result, 0, sizeof(result));
+
+       if (errcode == NULL)
+               goto done;
+
+       if (set == NULL) {
+               *errcode = CSET_EBADPTR;
+               goto done;
+       }
+
+       if (! set->finalized) {
+               charset_finalize(set);
+
+               if (! set->finalized) {
+                       *errcode = CSET_ESPACE; // guess...
+                       goto done;
+               }
+       }
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+       if (set->mb_cur_max == 1) {
+               for (int i = 0; i < set->nelems; i++) {
+                       if (set->items[i].start > 255)
+                               break;
+                       uint32_t low = set->items[i].start;
+                       uint32_t high = min(set->items[i].end, 255);
+
+                       for (int b = low; b <= high; b++)
+                               result.bytes[b] = true;
+               }
+       } else if (set->is_utf8) {
+               for (int i = 0; i < set->nelems; i++) {
+                       uint32_t low = utfprefix(set->items[i].start);
+                       uint32_t high = utfprefix(set->items[i].end);
+
+                       for (int b = low; b <= high; b++)
+                               result.bytes[b] = true;
+               }
+       }
+#undef min
+       *errcode = CSET_SUCCESS;
+
+done:
+       return result;
+}
 /* charset_dump --- dump out the data structures */
 
 void
-charset_dump(const charset_t *set, FILE *fp)
+charset_dump(const charset_t *set, FILE *fp, bool use_c_format)
 {
        static const char *boolval[] = {
                "false",
@@ -531,19 +4157,24 @@ charset_dump(const charset_t *set, FILE *fp)
        if (set == NULL || fp == NULL)
                return;
 
-       fprintf(fp, "complemented = %s\n", boolval[!! set->complemented]);
-       fprintf(fp, "no_newlines = %s\n", boolval[!! set->no_newlines]);
-       fprintf(fp, "finalized = %s\n", boolval[!! set->finalized]);
-
        set_item *items = set->items;
-       for (int i = 0; i < set->nelems; i++) {
-               if (items[i].item_type == CTYPE_ITEM) {
-                       fprintf(fp, "%3d. CTYPE: [:%s:]\n", i, 
items[i].type_name);
-                       continue;
+       if (use_c_format) {
+               for (int i = 0; i < set->nelems; i++) {
+                       fprintf(fp, "\t{ %d, %d },\n", items[i].start, 
items[i].end);
+               }
+       } else {
+               fprintf(fp, "no_newlines = %s\n", boolval[!! set->no_newlines]);
+               fprintf(fp, "finalized = %s\n", boolval[!! set->finalized]);
+               fprintf(fp, "is_utf8 = %s\n", boolval[!! set->is_utf8]);
+               fprintf(fp, "mb_cur_max = %d\n", set->mb_cur_max);
+               fprintf(fp, "nchars_inuse = %zd\n", set->nchars_inuse);
+               fprintf(fp, "nelems = %zd\n", set->nelems);
+               fprintf(fp, "nelems8bit = %zd\n", set->nelems8bit);
+               
+               for (int i = 0; i < set->nelems; i++) {
+                       fprintf(fp, "%3d. RANGE: start = L'%lc' (%d), end = 
L'%lc' (%d)\n",
+                               i, items[i].start, items[i].start, 
items[i].end, items[i].end);
                }
-               assert(items[i].item_type == RANGE_ITEM);
-               fprintf(fp, "%3d. RANGE: start = L'%lc', end = L'%lc'\n",
-                       i, items[i].start, items[i].end);
        }
        fflush(fp);
 }
diff --git a/support/charset.h b/support/charset.h
index 34d78d47..fa3533c1 100644
--- a/support/charset.h
+++ b/support/charset.h
@@ -2,7 +2,7 @@
 #define CHARSET_H 1
 
 /*
- * Copyright (C) 2023, 2024, Arnold David Robbins.
+ * Copyright (C) 2023, 2024, 2025, Arnold David Robbins.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,17 +48,26 @@ enum {
        CSET_ESPACE,            // Corresponds to REG_ESPACE
        CSET_ERANGE,            // Corresponds to REG_ERANGE
 };
-charset_t *charset_create(int *errcode);
+charset_t *charset_create(int *errcode, int mb_cur_max, bool is_utf8);
 int charset_add_char(charset_t *set, int32_t wc);
 int charset_add_range(charset_t *set, int32_t first, int32_t last);
-int charset_invert(charset_t *set);
+charset_t *charset_invert(charset_t *set, int *errcode);
 int charset_set_no_newlines(charset_t *set, bool no_newlines);
-int charset_add_cclass(charset_t *set, const char *cclass);
 int charset_add_equiv(charset_t *set, int32_t equiv);
 int charset_add_collate(charset_t *set, const int32_t *collate);
+int charset_add_cclass(charset_t *set, const char *cclass);
+charset_t *charset_copy(charset_t *set, int *errcode);
+int charset_merge(charset_t *dest, charset_t *src);
 bool charset_in_set(const charset_t *set, int32_t the_char);
 int charset_free(const charset_t *set);
-void charset_dump(const charset_t *set, FILE *fp);
+#define MAX_FIRSTBYTES 256
+typedef struct {
+       bool bytes[MAX_FIRSTBYTES];
+} charset_firstbytes_t;
+
+charset_firstbytes_t charset_firstbytes(charset_t *set, int *errcode);
+void charset_dump(const charset_t *set, FILE *fp, bool use_c_format);
+void charset_finalize(charset_t *set);
 
 #ifdef __cplusplus
 }
diff --git a/support/minrx.cpp b/support/minrx.cpp
index af54e9ea..0d8b3826 100644
--- a/support/minrx.cpp
+++ b/support/minrx.cpp
@@ -46,6 +46,7 @@
 #include <string>
 #include <tuple>
 #include <vector>
+#define CHARSET        1
 #ifdef CHARSET
 #include <memory>
 #include "charset.h"
@@ -390,9 +391,9 @@ WConv &(WConv::*const WConv::nextfns[3])() = { 
&WConv::nextbyte, &WConv::nextmbt
 struct CSet {
 #ifdef CHARSET
        charset_t *charset = nullptr;
-       CSet() {
+       CSet(WConv::Encoding enc) {
                int errcode = 0;
-               charset = charset_create(& errcode);
+               charset = charset_create(& errcode, MB_CUR_MAX, enc == 
WConv::Encoding::UTF8);
                // FIXME: Throw error if charset == nullptr
        }
        CSet(const CSet &) = delete;
@@ -400,6 +401,10 @@ struct CSet {
        CSet(CSet &&cs): charset(cs.charset) { cs.charset = nullptr; }
        CSet &operator=(CSet &&cs) { charset = cs.charset; cs.charset = 
nullptr; return *this; }
        ~CSet() { if (charset) { charset_free(charset); charset = nullptr; } }
+       CSet &operator|=(const CSet &cs) {
+               charset_merge(charset, cs.charset);
+               return *this;
+       }
 #else
        static std::map<std::string, CSet> cclmemo;
        static std::mutex cclmutex;
@@ -416,10 +421,14 @@ struct CSet {
                        set(e.min, e.max);
                return *this;
        }
+       CSet(WConv::Encoding) { }
 #endif
        CSet &invert() {
 #ifdef CHARSET
-               charset_invert(charset); // FIXME: no error checking
+               int errcode = 0;
+               charset_t *newset = charset_invert(charset, &errcode); // 
FIXME: no error checking
+               charset_free(charset);
+               charset = newset;
 #else
                std::set<Range> nranges;
                WChar lo = 0;
@@ -692,6 +701,14 @@ struct CSet {
                };
                switch (e) {
                case WConv::Encoding::Byte:
+#ifdef CHARSET
+               {
+                       int errcode = 0;
+                       charset_firstbytes_t bytes = 
charset_firstbytes(charset, &errcode);
+                       for (int i = 0; i < MAX_FIRSTBYTES; i++)
+                               fb[i] = bytes.bytes[i];
+               }
+#else
                        for (const auto &r : ranges) {
                                if (r.min > 255)
                                        break;
@@ -699,13 +716,23 @@ struct CSet {
                                for (auto b = lo; b <= hi; b++)
                                        fb[b] = true;
                        }
+#endif
                        return {fb, firstunique(fb)};
                case WConv::Encoding::UTF8:
+#ifdef CHARSET
+               {
+                       int errcode = 0;
+                       charset_firstbytes_t bytes = 
charset_firstbytes(charset, &errcode);
+                       for (int i = 0; i < MAX_FIRSTBYTES; i++)
+                               fb[i] = bytes.bytes[i];
+               }
+#else
                        for (const auto &r : ranges) {
                                auto lo = utfprefix(r.min), hi = 
utfprefix(r.max);
                                for (auto b = lo; b <= hi; b++)
                                        fb[b] = true;
                        }
+#endif
                        return {fb, firstunique(fb)};
                default:
                        return {{}, {}};
@@ -990,7 +1017,7 @@ struct Compile {
                                        auto key = std::min(wc, std::min(wcl, 
wcu));
                                        if (icmap.find(key) == icmap.end()) {
                                                icmap.emplace(key, 
csets.size());
-                                               csets.emplace_back();
+                                               csets.emplace_back(enc);
                                                csets.back().set(wc);
                                                csets.back().set(wcl);
                                                csets.back().set(wcu);
@@ -1015,13 +1042,13 @@ struct Compile {
                case L'[':
                        lhmaxstk = nstk;
                        lhs.push_back({Node::CSet, {csets.size(), 0}, nstk});
-                       if (auto err = csets.emplace_back().parse(flags, enc, 
wconv))
+                       if (auto err = csets.emplace_back(enc).parse(flags, 
enc, wconv))
                                return {{}, 0, err};
                        break;
                case L'.':
                        if (!dot.has_value()) {
                                dot = csets.size();
-                               csets.emplace_back();
+                               csets.emplace_back(enc);
                                if ((flags & MINRX_REG_NEWLINE) != 0)
                                        csets.back().set(L'\n');
                                csets.back().invert();
@@ -1080,7 +1107,7 @@ struct Compile {
                                if (!esc_s.has_value()) {
                                        esc_s = csets.size();
                                        WConv wc(enc, "[[:space:]]");
-                                       csets.emplace_back().parse(flags, enc, 
wc.nextchr());
+                                       csets.emplace_back(enc).parse(flags, 
enc, wc.nextchr());
                                }
                                lhs.push_back({Node::CSet, {*esc_s, 0}, nstk});
                                break;
@@ -1090,7 +1117,7 @@ struct Compile {
                                if (!esc_S.has_value()) {
                                        esc_S = csets.size();
                                        WConv wc(enc, "[^[:space:]]");
-                                       csets.emplace_back().parse(flags, enc, 
wc.nextchr());
+                                       csets.emplace_back(enc).parse(flags, 
enc, wc.nextchr());
                                }
                                lhs.push_back({Node::CSet, {*esc_S, 0}, nstk});
                                break;
@@ -1100,7 +1127,7 @@ struct Compile {
                                if (!esc_w.has_value()) {
                                        esc_w = csets.size();
                                        WConv wc(enc, "[[:alnum:]_]");
-                                       csets.emplace_back().parse(flags, enc, 
wc.nextchr());
+                                       csets.emplace_back(enc).parse(flags, 
enc, wc.nextchr());
                                }
                                lhs.push_back({Node::CSet, {*esc_w, 0}, nstk});
                                break;
@@ -1110,7 +1137,7 @@ struct Compile {
                                if (!esc_W.has_value()) {
                                        esc_W = csets.size();
                                        WConv wc(enc, "[^[:alnum:]_]");
-                                       csets.emplace_back().parse(flags, enc, 
wc.nextchr());
+                                       csets.emplace_back(enc).parse(flags, 
enc, wc.nextchr());
                                }
                                lhs.push_back({Node::CSet, {*esc_W, 0}, nstk});
                                break;
@@ -1185,7 +1212,7 @@ struct Compile {
                                        break;
                                }
                } while (!epsq.empty());
-               CSet cs;
+               CSet cs(enc);
                while (!firsts.empty()) {
                        auto k = firsts.remove();
                        auto t = nodes[k].type;

-----------------------------------------------------------------------

Summary of changes:
 support/ChangeLog |    6 +
 support/charset.c | 3973 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 support/charset.h |   19 +-
 support/minrx.cpp |   49 +-
 4 files changed, 3860 insertions(+), 187 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]