[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5956-g1847a420
From: |
Arnold Robbins |
Subject: |
[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5956-g1847a420 |
Date: |
Mon, 10 Feb 2025 01:44:06 -0500 (EST) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, feature/minrx has been updated
via 1847a420bba46a8082148161335e943d7cdf5867 (commit)
from e96b3ac2e2a6f8419b7868c581fad28effac2285 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=1847a420bba46a8082148161335e943d7cdf5867
commit 1847a420bba46a8082148161335e943d7cdf5867
Author: Arnold D. Robbins <arnold@skeeve.com>
Date: Mon Feb 10 08:43:41 2025 +0200
Updates to minrx and charset.
diff --git a/support/ChangeLog b/support/ChangeLog
index d055ae9e..d5959951 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,9 @@
+2025-02-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * minrx.cpp: Updated with firstcset version that supports
+ charset.
+ * charset.h, charset.c: Updated.
+
2025-02-06 Arnold D. Robbins <arnold@skeeve.com>
* minrx.cpp: Update again. More speedups.
diff --git a/support/charset.c b/support/charset.c
index 41a4ebc6..5671bce6 100644
--- a/support/charset.c
+++ b/support/charset.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2023, 2024, Arnold David Robbins.
+ * Copyright (C) 2023, 2024, 2025, Arnold David Robbins.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -32,6 +32,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
#include <wctype.h>
#include <wchar.h>
#include <locale.h>
@@ -41,35 +42,3465 @@
#define MAX_CODE_POINT 0x10ffff // max Unicode code point
typedef struct set_item {
- enum set_item_type {
- CTYPE_ITEM,
- RANGE_ITEM,
- } item_type;
- union {
- struct _ctype {
- wctype_t wtype;
- const char *type_name;
- } c;
- struct _range {
- int32_t start, end;
- } r;
- } u;
+ int32_t start, end;
} set_item;
-#define wtype u.c.wtype
-#define type_name u.c.type_name
-#define start u.r.start
-#define end u.r.end
struct _charset {
- bool complemented; // For [^...] sets
bool no_newlines; // If \n can't be in the set
bool finalized; // No more changes possible
+ bool is_utf8; // True if using a UTF-8
character set
+ int mb_cur_max; // Value of MB_CUR_MAX when the
charset was created
size_t nchars_inuse; // Number of characters used
size_t nchars_allocated; // Number of characters allocated
int32_t *chars; // Characters added to the set
size_t nelems; // Number of elements (items) in use
size_t allocated; // Number allocated
+ size_t nelems8bit; // Number of elements covering 0-255
set_item *items; // Array of items
};
+
+static const set_item digit[] = {
+ { 48, 57 },
+ { -1, -1 }, // end markers
+};
+static const set_item cntrl[] = {
+ { 0, 31 },
+ { 127, 159 },
+ { 8232, 8233 },
+ { -1, -1 }, // end markers
+};
+static const set_item xdigit[] = {
+ { 48, 57 },
+ { 65, 70 },
+ { 97, 102 },
+ { -1, -1 }, // end markers
+};
+static const set_item print_additional[] = {
+ { 32, 126 },
+ { 5120, 5788 },
+ { 8192, 8231 },
+ { 8234, 8292 },
+ { 12288, 12351 },
+ { -1, -1 }, // end markers
+};
+static const set_item blank[] = {
+ { 9, 9 },
+ { 32, 32 },
+ { 5760, 5760 },
+ { 8192, 8198 },
+ { 8200, 8202 },
+ { 8287, 8287 },
+ { 12288, 12288 },
+ { -1, -1 }, // end markers
+};
+static const set_item graph_additional[] = {
+ { 33, 126 },
+ { 5120, 5759 },
+ { 5761, 5788 },
+ { 8199, 8199 },
+ { 8203, 8231 },
+ { 8234, 8286 },
+ { 8288, 8292 },
+ { 12289, 12351 },
+ { -1, -1 }, // end markers
+};
+static const set_item space[] = {
+ { 9, 13 },
+ { 32, 32 },
+ { 5760, 5760 },
+ { 8192, 8198 },
+ { 8200, 8202 },
+ { 8232, 8233 },
+ { 8287, 8287 },
+ { 12288, 12288 },
+ { -1, -1 }, // end markers
+};
+static const set_item punct[] = {
+ { 33, 47 },
+ { 58, 64 },
+ { 91, 96 },
+ { 123, 126 },
+ { 160, 169 },
+ { 171, 180 },
+ { 182, 185 },
+ { 187, 191 },
+ { 215, 215 },
+ { 247, 247 },
+ { 706, 709 },
+ { 722, 735 },
+ { 741, 747 },
+ { 749, 749 },
+ { 751, 836 },
+ { 838, 879 },
+ { 885, 885 },
+ { 894, 894 },
+ { 900, 901 },
+ { 903, 903 },
+ { 1014, 1014 },
+ { 1154, 1161 },
+ { 1370, 1375 },
+ { 1417, 1418 },
+ { 1421, 1423 },
+ { 1425, 1455 },
+ { 1470, 1470 },
+ { 1472, 1472 },
+ { 1475, 1475 },
+ { 1478, 1478 },
+ { 1523, 1524 },
+ { 1536, 1551 },
+ { 1563, 1567 },
+ { 1624, 1624 },
+ { 1642, 1645 },
+ { 1748, 1748 },
+ { 1757, 1760 },
+ { 1769, 1772 },
+ { 1789, 1790 },
+ { 1792, 1805 },
+ { 1807, 1807 },
+ { 1856, 1866 },
+ { 2027, 2035 },
+ { 2038, 2041 },
+ { 2045, 2047 },
+ { 2072, 2073 },
+ { 2093, 2093 },
+ { 2096, 2110 },
+ { 2137, 2139 },
+ { 2142, 2142 },
+ { 2184, 2184 },
+ { 2192, 2193 },
+ { 2200, 2207 },
+ { 2250, 2259 },
+ { 2272, 2274 },
+ { 2282, 2287 },
+ { 2364, 2364 },
+ { 2381, 2381 },
+ { 2385, 2388 },
+ { 2404, 2405 },
+ { 2416, 2416 },
+ { 2492, 2492 },
+ { 2509, 2509 },
+ { 2546, 2555 },
+ { 2557, 2558 },
+ { 2620, 2620 },
+ { 2637, 2637 },
+ { 2678, 2678 },
+ { 2748, 2748 },
+ { 2765, 2765 },
+ { 2800, 2801 },
+ { 2813, 2815 },
+ { 2876, 2876 },
+ { 2893, 2893 },
+ { 2901, 2901 },
+ { 2928, 2928 },
+ { 2930, 2935 },
+ { 3021, 3021 },
+ { 3056, 3066 },
+ { 3076, 3076 },
+ { 3132, 3132 },
+ { 3149, 3149 },
+ { 3191, 3199 },
+ { 3204, 3204 },
+ { 3260, 3260 },
+ { 3277, 3277 },
+ { 3387, 3388 },
+ { 3405, 3405 },
+ { 3407, 3407 },
+ { 3416, 3422 },
+ { 3440, 3449 },
+ { 3530, 3530 },
+ { 3572, 3572 },
+ { 3647, 3647 },
+ { 3655, 3660 },
+ { 3662, 3663 },
+ { 3674, 3675 },
+ { 3770, 3770 },
+ { 3784, 3788 },
+ { 3841, 3871 },
+ { 3882, 3903 },
+ { 3970, 3975 },
+ { 4030, 4044 },
+ { 4046, 4058 },
+ { 4151, 4151 },
+ { 4153, 4154 },
+ { 4170, 4175 },
+ { 4254, 4255 },
+ { 4347, 4347 },
+ { 4957, 4988 },
+ { 5008, 5017 },
+ { 5120, 5120 },
+ { 5741, 5742 },
+ { 5787, 5788 },
+ { 5867, 5869 },
+ { 5908, 5909 },
+ { 5940, 5942 },
+ { 6068, 6069 },
+ { 6089, 6102 },
+ { 6104, 6107 },
+ { 6109, 6109 },
+ { 6128, 6137 },
+ { 6144, 6159 },
+ { 6457, 6459 },
+ { 6464, 6464 },
+ { 6468, 6469 },
+ { 6618, 6618 },
+ { 6622, 6655 },
+ { 6686, 6687 },
+ { 6752, 6752 },
+ { 6773, 6780 },
+ { 6783, 6783 },
+ { 6816, 6822 },
+ { 6824, 6829 },
+ { 6832, 6846 },
+ { 6849, 6859 },
+ { 6964, 6964 },
+ { 6980, 6980 },
+ { 7002, 7038 },
+ { 7082, 7083 },
+ { 7142, 7142 },
+ { 7154, 7155 },
+ { 7164, 7167 },
+ { 7223, 7223 },
+ { 7227, 7231 },
+ { 7294, 7295 },
+ { 7360, 7367 },
+ { 7376, 7400 },
+ { 7405, 7405 },
+ { 7412, 7412 },
+ { 7415, 7417 },
+ { 7616, 7654 },
+ { 7669, 7679 },
+ { 8125, 8125 },
+ { 8127, 8129 },
+ { 8141, 8143 },
+ { 8157, 8159 },
+ { 8173, 8175 },
+ { 8189, 8190 },
+ { 8199, 8199 },
+ { 8203, 8231 },
+ { 8234, 8286 },
+ { 8288, 8292 },
+ { 8294, 8304 },
+ { 8308, 8318 },
+ { 8320, 8334 },
+ { 8352, 8384 },
+ { 8400, 8432 },
+ { 8448, 8449 },
+ { 8451, 8454 },
+ { 8456, 8457 },
+ { 8468, 8468 },
+ { 8470, 8472 },
+ { 8478, 8483 },
+ { 8485, 8485 },
+ { 8487, 8487 },
+ { 8489, 8489 },
+ { 8494, 8494 },
+ { 8506, 8507 },
+ { 8512, 8516 },
+ { 8522, 8525 },
+ { 8527, 8543 },
+ { 8585, 8587 },
+ { 8592, 9254 },
+ { 9280, 9290 },
+ { 9312, 9397 },
+ { 9450, 11123 },
+ { 11126, 11157 },
+ { 11159, 11263 },
+ { 11493, 11498 },
+ { 11503, 11505 },
+ { 11513, 11519 },
+ { 11632, 11632 },
+ { 11647, 11647 },
+ { 11776, 11822 },
+ { 11824, 11869 },
+ { 11904, 11929 },
+ { 11931, 12019 },
+ { 12032, 12245 },
+ { 12272, 12283 },
+ { 12289, 12292 },
+ { 12296, 12320 },
+ { 12330, 12336 },
+ { 12342, 12343 },
+ { 12349, 12351 },
+ { 12441, 12444 },
+ { 12448, 12448 },
+ { 12539, 12539 },
+ { 12688, 12703 },
+ { 12736, 12771 },
+ { 12800, 12830 },
+ { 12832, 13311 },
+ { 19904, 19967 },
+ { 42128, 42182 },
+ { 42238, 42239 },
+ { 42509, 42511 },
+ { 42607, 42611 },
+ { 42620, 42622 },
+ { 42736, 42743 },
+ { 42752, 42774 },
+ { 42784, 42785 },
+ { 42889, 42890 },
+ { 43014, 43014 },
+ { 43048, 43052 },
+ { 43056, 43065 },
+ { 43124, 43127 },
+ { 43204, 43204 },
+ { 43214, 43215 },
+ { 43232, 43249 },
+ { 43256, 43258 },
+ { 43260, 43260 },
+ { 43307, 43311 },
+ { 43347, 43347 },
+ { 43359, 43359 },
+ { 43443, 43443 },
+ { 43456, 43469 },
+ { 43486, 43487 },
+ { 43612, 43615 },
+ { 43639, 43641 },
+ { 43711, 43711 },
+ { 43713, 43713 },
+ { 43742, 43743 },
+ { 43760, 43761 },
+ { 43766, 43766 },
+ { 43867, 43867 },
+ { 43882, 43883 },
+ { 44011, 44013 },
+ { 57344, 63743 },
+ { 64297, 64297 },
+ { 64434, 64450 },
+ { 64830, 64847 },
+ { 64975, 64975 },
+ { 65020, 65049 },
+ { 65056, 65106 },
+ { 65108, 65126 },
+ { 65128, 65131 },
+ { 65279, 65279 },
+ { 65281, 65295 },
+ { 65306, 65312 },
+ { 65339, 65344 },
+ { 65371, 65381 },
+ { 65504, 65510 },
+ { 65512, 65518 },
+ { 65529, 65533 },
+ { 65792, 65794 },
+ { 65799, 65843 },
+ { 65847, 65855 },
+ { 65909, 65934 },
+ { 65936, 65948 },
+ { 65952, 65952 },
+ { 66000, 66045 },
+ { 66272, 66299 },
+ { 66336, 66339 },
+ { 66463, 66463 },
+ { 66512, 66512 },
+ { 66927, 66927 },
+ { 67671, 67679 },
+ { 67703, 67711 },
+ { 67751, 67759 },
+ { 67835, 67839 },
+ { 67862, 67867 },
+ { 67871, 67871 },
+ { 67903, 67903 },
+ { 68028, 68029 },
+ { 68032, 68047 },
+ { 68050, 68095 },
+ { 68152, 68154 },
+ { 68159, 68168 },
+ { 68176, 68184 },
+ { 68221, 68223 },
+ { 68253, 68255 },
+ { 68296, 68296 },
+ { 68325, 68326 },
+ { 68331, 68342 },
+ { 68409, 68415 },
+ { 68440, 68447 },
+ { 68472, 68479 },
+ { 68505, 68508 },
+ { 68521, 68527 },
+ { 68858, 68863 },
+ { 69216, 69246 },
+ { 69293, 69293 },
+ { 69405, 69414 },
+ { 69446, 69465 },
+ { 69506, 69513 },
+ { 69573, 69579 },
+ { 69702, 69709 },
+ { 69714, 69733 },
+ { 69744, 69744 },
+ { 69759, 69761 },
+ { 69817, 69825 },
+ { 69837, 69837 },
+ { 69939, 69940 },
+ { 69952, 69955 },
+ { 70003, 70005 },
+ { 70080, 70080 },
+ { 70085, 70093 },
+ { 70107, 70107 },
+ { 70109, 70111 },
+ { 70113, 70132 },
+ { 70197, 70198 },
+ { 70200, 70205 },
+ { 70313, 70313 },
+ { 70377, 70378 },
+ { 70459, 70460 },
+ { 70477, 70477 },
+ { 70502, 70508 },
+ { 70512, 70516 },
+ { 70722, 70722 },
+ { 70726, 70726 },
+ { 70731, 70735 },
+ { 70746, 70747 },
+ { 70749, 70750 },
+ { 70850, 70851 },
+ { 70854, 70854 },
+ { 71103, 71127 },
+ { 71231, 71231 },
+ { 71233, 71235 },
+ { 71264, 71276 },
+ { 71350, 71351 },
+ { 71353, 71353 },
+ { 71467, 71467 },
+ { 71482, 71487 },
+ { 71737, 71739 },
+ { 71914, 71922 },
+ { 71997, 71998 },
+ { 72003, 72006 },
+ { 72160, 72160 },
+ { 72162, 72162 },
+ { 72243, 72244 },
+ { 72255, 72263 },
+ { 72344, 72348 },
+ { 72350, 72354 },
+ { 72767, 72767 },
+ { 72769, 72773 },
+ { 72794, 72812 },
+ { 72816, 72817 },
+ { 73026, 73026 },
+ { 73028, 73029 },
+ { 73111, 73111 },
+ { 73463, 73464 },
+ { 73664, 73713 },
+ { 73727, 73727 },
+ { 74864, 74868 },
+ { 77809, 77810 },
+ { 78896, 78904 },
+ { 92782, 92783 },
+ { 92912, 92917 },
+ { 92976, 92991 },
+ { 92996, 92997 },
+ { 93019, 93025 },
+ { 93824, 93850 },
+ { 94178, 94178 },
+ { 94180, 94180 },
+ { 113820, 113821 },
+ { 113823, 113827 },
+ { 118528, 118573 },
+ { 118576, 118598 },
+ { 118608, 118723 },
+ { 118784, 119029 },
+ { 119040, 119078 },
+ { 119081, 119274 },
+ { 119296, 119365 },
+ { 119520, 119539 },
+ { 119552, 119638 },
+ { 119648, 119672 },
+ { 120513, 120513 },
+ { 120539, 120539 },
+ { 120571, 120571 },
+ { 120597, 120597 },
+ { 120629, 120629 },
+ { 120655, 120655 },
+ { 120687, 120687 },
+ { 120713, 120713 },
+ { 120745, 120745 },
+ { 120771, 120771 },
+ { 120832, 121483 },
+ { 121499, 121503 },
+ { 121505, 121519 },
+ { 123184, 123190 },
+ { 123215, 123215 },
+ { 123566, 123566 },
+ { 123628, 123631 },
+ { 123647, 123647 },
+ { 125127, 125142 },
+ { 125252, 125254 },
+ { 125256, 125258 },
+ { 125278, 125279 },
+ { 126065, 126132 },
+ { 126209, 126269 },
+ { 126704, 126705 },
+ { 126976, 127019 },
+ { 127024, 127123 },
+ { 127136, 127150 },
+ { 127153, 127167 },
+ { 127169, 127183 },
+ { 127185, 127221 },
+ { 127232, 127279 },
+ { 127306, 127311 },
+ { 127338, 127343 },
+ { 127370, 127405 },
+ { 127462, 127490 },
+ { 127504, 127547 },
+ { 127552, 127560 },
+ { 127568, 127569 },
+ { 127584, 127589 },
+ { 127744, 128727 },
+ { 128733, 128748 },
+ { 128752, 128764 },
+ { 128768, 128883 },
+ { 128896, 128984 },
+ { 128992, 129003 },
+ { 129008, 129008 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
+ { 129200, 129201 },
+ { 129280, 129619 },
+ { 129632, 129645 },
+ { 129648, 129652 },
+ { 129656, 129660 },
+ { 129664, 129670 },
+ { 129680, 129708 },
+ { 129712, 129722 },
+ { 129728, 129733 },
+ { 129744, 129753 },
+ { 129760, 129767 },
+ { 129776, 129782 },
+ { 129792, 129938 },
+ { 129940, 129994 },
+ { 917505, 917505 },
+ { 917536, 917631 },
+ { 917760, 917999 },
+ { 983040, 1048573 },
+ { 1048576, 1114109 },
+ { -1, -1 }, // end markers
+};
+static const set_item upper[] = {
+ { 65, 90 },
+ { 192, 214 },
+ { 216, 222 },
+ { 256, 256 },
+ { 258, 258 },
+ { 260, 260 },
+ { 262, 262 },
+ { 264, 264 },
+ { 266, 266 },
+ { 268, 268 },
+ { 270, 270 },
+ { 272, 272 },
+ { 274, 274 },
+ { 276, 276 },
+ { 278, 278 },
+ { 280, 280 },
+ { 282, 282 },
+ { 284, 284 },
+ { 286, 286 },
+ { 288, 288 },
+ { 290, 290 },
+ { 292, 292 },
+ { 294, 294 },
+ { 296, 296 },
+ { 298, 298 },
+ { 300, 300 },
+ { 302, 302 },
+ { 304, 304 },
+ { 306, 306 },
+ { 308, 308 },
+ { 310, 310 },
+ { 313, 313 },
+ { 315, 315 },
+ { 317, 317 },
+ { 319, 319 },
+ { 321, 321 },
+ { 323, 323 },
+ { 325, 325 },
+ { 327, 327 },
+ { 330, 330 },
+ { 332, 332 },
+ { 334, 334 },
+ { 336, 336 },
+ { 338, 338 },
+ { 340, 340 },
+ { 342, 342 },
+ { 344, 344 },
+ { 346, 346 },
+ { 348, 348 },
+ { 350, 350 },
+ { 352, 352 },
+ { 354, 354 },
+ { 356, 356 },
+ { 358, 358 },
+ { 360, 360 },
+ { 362, 362 },
+ { 364, 364 },
+ { 366, 366 },
+ { 368, 368 },
+ { 370, 370 },
+ { 372, 372 },
+ { 374, 374 },
+ { 376, 377 },
+ { 379, 379 },
+ { 381, 381 },
+ { 385, 386 },
+ { 388, 388 },
+ { 390, 391 },
+ { 393, 395 },
+ { 398, 401 },
+ { 403, 404 },
+ { 406, 408 },
+ { 412, 413 },
+ { 415, 416 },
+ { 418, 418 },
+ { 420, 420 },
+ { 422, 423 },
+ { 425, 425 },
+ { 428, 428 },
+ { 430, 431 },
+ { 433, 435 },
+ { 437, 437 },
+ { 439, 440 },
+ { 444, 444 },
+ { 452, 453 },
+ { 455, 456 },
+ { 458, 459 },
+ { 461, 461 },
+ { 463, 463 },
+ { 465, 465 },
+ { 467, 467 },
+ { 469, 469 },
+ { 471, 471 },
+ { 473, 473 },
+ { 475, 475 },
+ { 478, 478 },
+ { 480, 480 },
+ { 482, 482 },
+ { 484, 484 },
+ { 486, 486 },
+ { 488, 488 },
+ { 490, 490 },
+ { 492, 492 },
+ { 494, 494 },
+ { 497, 498 },
+ { 500, 500 },
+ { 502, 504 },
+ { 506, 506 },
+ { 508, 508 },
+ { 510, 510 },
+ { 512, 512 },
+ { 514, 514 },
+ { 516, 516 },
+ { 518, 518 },
+ { 520, 520 },
+ { 522, 522 },
+ { 524, 524 },
+ { 526, 526 },
+ { 528, 528 },
+ { 530, 530 },
+ { 532, 532 },
+ { 534, 534 },
+ { 536, 536 },
+ { 538, 538 },
+ { 540, 540 },
+ { 542, 542 },
+ { 544, 544 },
+ { 546, 546 },
+ { 548, 548 },
+ { 550, 550 },
+ { 552, 552 },
+ { 554, 554 },
+ { 556, 556 },
+ { 558, 558 },
+ { 560, 560 },
+ { 562, 562 },
+ { 570, 571 },
+ { 573, 574 },
+ { 577, 577 },
+ { 579, 582 },
+ { 584, 584 },
+ { 586, 586 },
+ { 588, 588 },
+ { 590, 590 },
+ { 880, 880 },
+ { 882, 882 },
+ { 886, 886 },
+ { 895, 895 },
+ { 902, 902 },
+ { 904, 906 },
+ { 908, 908 },
+ { 910, 911 },
+ { 913, 929 },
+ { 931, 939 },
+ { 975, 975 },
+ { 978, 980 },
+ { 984, 984 },
+ { 986, 986 },
+ { 988, 988 },
+ { 990, 990 },
+ { 992, 992 },
+ { 994, 994 },
+ { 996, 996 },
+ { 998, 998 },
+ { 1000, 1000 },
+ { 1002, 1002 },
+ { 1004, 1004 },
+ { 1006, 1006 },
+ { 1012, 1012 },
+ { 1015, 1015 },
+ { 1017, 1018 },
+ { 1021, 1071 },
+ { 1120, 1120 },
+ { 1122, 1122 },
+ { 1124, 1124 },
+ { 1126, 1126 },
+ { 1128, 1128 },
+ { 1130, 1130 },
+ { 1132, 1132 },
+ { 1134, 1134 },
+ { 1136, 1136 },
+ { 1138, 1138 },
+ { 1140, 1140 },
+ { 1142, 1142 },
+ { 1144, 1144 },
+ { 1146, 1146 },
+ { 1148, 1148 },
+ { 1150, 1150 },
+ { 1152, 1152 },
+ { 1162, 1162 },
+ { 1164, 1164 },
+ { 1166, 1166 },
+ { 1168, 1168 },
+ { 1170, 1170 },
+ { 1172, 1172 },
+ { 1174, 1174 },
+ { 1176, 1176 },
+ { 1178, 1178 },
+ { 1180, 1180 },
+ { 1182, 1182 },
+ { 1184, 1184 },
+ { 1186, 1186 },
+ { 1188, 1188 },
+ { 1190, 1190 },
+ { 1192, 1192 },
+ { 1194, 1194 },
+ { 1196, 1196 },
+ { 1198, 1198 },
+ { 1200, 1200 },
+ { 1202, 1202 },
+ { 1204, 1204 },
+ { 1206, 1206 },
+ { 1208, 1208 },
+ { 1210, 1210 },
+ { 1212, 1212 },
+ { 1214, 1214 },
+ { 1216, 1217 },
+ { 1219, 1219 },
+ { 1221, 1221 },
+ { 1223, 1223 },
+ { 1225, 1225 },
+ { 1227, 1227 },
+ { 1229, 1229 },
+ { 1232, 1232 },
+ { 1234, 1234 },
+ { 1236, 1236 },
+ { 1238, 1238 },
+ { 1240, 1240 },
+ { 1242, 1242 },
+ { 1244, 1244 },
+ { 1246, 1246 },
+ { 1248, 1248 },
+ { 1250, 1250 },
+ { 1252, 1252 },
+ { 1254, 1254 },
+ { 1256, 1256 },
+ { 1258, 1258 },
+ { 1260, 1260 },
+ { 1262, 1262 },
+ { 1264, 1264 },
+ { 1266, 1266 },
+ { 1268, 1268 },
+ { 1270, 1270 },
+ { 1272, 1272 },
+ { 1274, 1274 },
+ { 1276, 1276 },
+ { 1278, 1278 },
+ { 1280, 1280 },
+ { 1282, 1282 },
+ { 1284, 1284 },
+ { 1286, 1286 },
+ { 1288, 1288 },
+ { 1290, 1290 },
+ { 1292, 1292 },
+ { 1294, 1294 },
+ { 1296, 1296 },
+ { 1298, 1298 },
+ { 1300, 1300 },
+ { 1302, 1302 },
+ { 1304, 1304 },
+ { 1306, 1306 },
+ { 1308, 1308 },
+ { 1310, 1310 },
+ { 1312, 1312 },
+ { 1314, 1314 },
+ { 1316, 1316 },
+ { 1318, 1318 },
+ { 1320, 1320 },
+ { 1322, 1322 },
+ { 1324, 1324 },
+ { 1326, 1326 },
+ { 1329, 1366 },
+ { 4256, 4293 },
+ { 4295, 4295 },
+ { 4301, 4301 },
+ { 5024, 5109 },
+ { 7312, 7354 },
+ { 7357, 7359 },
+ { 7680, 7680 },
+ { 7682, 7682 },
+ { 7684, 7684 },
+ { 7686, 7686 },
+ { 7688, 7688 },
+ { 7690, 7690 },
+ { 7692, 7692 },
+ { 7694, 7694 },
+ { 7696, 7696 },
+ { 7698, 7698 },
+ { 7700, 7700 },
+ { 7702, 7702 },
+ { 7704, 7704 },
+ { 7706, 7706 },
+ { 7708, 7708 },
+ { 7710, 7710 },
+ { 7712, 7712 },
+ { 7714, 7714 },
+ { 7716, 7716 },
+ { 7718, 7718 },
+ { 7720, 7720 },
+ { 7722, 7722 },
+ { 7724, 7724 },
+ { 7726, 7726 },
+ { 7728, 7728 },
+ { 7730, 7730 },
+ { 7732, 7732 },
+ { 7734, 7734 },
+ { 7736, 7736 },
+ { 7738, 7738 },
+ { 7740, 7740 },
+ { 7742, 7742 },
+ { 7744, 7744 },
+ { 7746, 7746 },
+ { 7748, 7748 },
+ { 7750, 7750 },
+ { 7752, 7752 },
+ { 7754, 7754 },
+ { 7756, 7756 },
+ { 7758, 7758 },
+ { 7760, 7760 },
+ { 7762, 7762 },
+ { 7764, 7764 },
+ { 7766, 7766 },
+ { 7768, 7768 },
+ { 7770, 7770 },
+ { 7772, 7772 },
+ { 7774, 7774 },
+ { 7776, 7776 },
+ { 7778, 7778 },
+ { 7780, 7780 },
+ { 7782, 7782 },
+ { 7784, 7784 },
+ { 7786, 7786 },
+ { 7788, 7788 },
+ { 7790, 7790 },
+ { 7792, 7792 },
+ { 7794, 7794 },
+ { 7796, 7796 },
+ { 7798, 7798 },
+ { 7800, 7800 },
+ { 7802, 7802 },
+ { 7804, 7804 },
+ { 7806, 7806 },
+ { 7808, 7808 },
+ { 7810, 7810 },
+ { 7812, 7812 },
+ { 7814, 7814 },
+ { 7816, 7816 },
+ { 7818, 7818 },
+ { 7820, 7820 },
+ { 7822, 7822 },
+ { 7824, 7824 },
+ { 7826, 7826 },
+ { 7828, 7828 },
+ { 7838, 7838 },
+ { 7840, 7840 },
+ { 7842, 7842 },
+ { 7844, 7844 },
+ { 7846, 7846 },
+ { 7848, 7848 },
+ { 7850, 7850 },
+ { 7852, 7852 },
+ { 7854, 7854 },
+ { 7856, 7856 },
+ { 7858, 7858 },
+ { 7860, 7860 },
+ { 7862, 7862 },
+ { 7864, 7864 },
+ { 7866, 7866 },
+ { 7868, 7868 },
+ { 7870, 7870 },
+ { 7872, 7872 },
+ { 7874, 7874 },
+ { 7876, 7876 },
+ { 7878, 7878 },
+ { 7880, 7880 },
+ { 7882, 7882 },
+ { 7884, 7884 },
+ { 7886, 7886 },
+ { 7888, 7888 },
+ { 7890, 7890 },
+ { 7892, 7892 },
+ { 7894, 7894 },
+ { 7896, 7896 },
+ { 7898, 7898 },
+ { 7900, 7900 },
+ { 7902, 7902 },
+ { 7904, 7904 },
+ { 7906, 7906 },
+ { 7908, 7908 },
+ { 7910, 7910 },
+ { 7912, 7912 },
+ { 7914, 7914 },
+ { 7916, 7916 },
+ { 7918, 7918 },
+ { 7920, 7920 },
+ { 7922, 7922 },
+ { 7924, 7924 },
+ { 7926, 7926 },
+ { 7928, 7928 },
+ { 7930, 7930 },
+ { 7932, 7932 },
+ { 7934, 7934 },
+ { 7944, 7951 },
+ { 7960, 7965 },
+ { 7976, 7983 },
+ { 7992, 7999 },
+ { 8008, 8013 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8031 },
+ { 8040, 8047 },
+ { 8072, 8079 },
+ { 8088, 8095 },
+ { 8104, 8111 },
+ { 8120, 8124 },
+ { 8136, 8140 },
+ { 8152, 8155 },
+ { 8168, 8172 },
+ { 8184, 8188 },
+ { 8450, 8450 },
+ { 8455, 8455 },
+ { 8459, 8461 },
+ { 8464, 8466 },
+ { 8469, 8469 },
+ { 8473, 8477 },
+ { 8484, 8484 },
+ { 8486, 8486 },
+ { 8488, 8488 },
+ { 8490, 8493 },
+ { 8496, 8499 },
+ { 8510, 8511 },
+ { 8517, 8517 },
+ { 8544, 8559 },
+ { 8579, 8579 },
+ { 9398, 9423 },
+ { 11264, 11311 },
+ { 11360, 11360 },
+ { 11362, 11364 },
+ { 11367, 11367 },
+ { 11369, 11369 },
+ { 11371, 11371 },
+ { 11373, 11376 },
+ { 11378, 11378 },
+ { 11381, 11381 },
+ { 11390, 11392 },
+ { 11394, 11394 },
+ { 11396, 11396 },
+ { 11398, 11398 },
+ { 11400, 11400 },
+ { 11402, 11402 },
+ { 11404, 11404 },
+ { 11406, 11406 },
+ { 11408, 11408 },
+ { 11410, 11410 },
+ { 11412, 11412 },
+ { 11414, 11414 },
+ { 11416, 11416 },
+ { 11418, 11418 },
+ { 11420, 11420 },
+ { 11422, 11422 },
+ { 11424, 11424 },
+ { 11426, 11426 },
+ { 11428, 11428 },
+ { 11430, 11430 },
+ { 11432, 11432 },
+ { 11434, 11434 },
+ { 11436, 11436 },
+ { 11438, 11438 },
+ { 11440, 11440 },
+ { 11442, 11442 },
+ { 11444, 11444 },
+ { 11446, 11446 },
+ { 11448, 11448 },
+ { 11450, 11450 },
+ { 11452, 11452 },
+ { 11454, 11454 },
+ { 11456, 11456 },
+ { 11458, 11458 },
+ { 11460, 11460 },
+ { 11462, 11462 },
+ { 11464, 11464 },
+ { 11466, 11466 },
+ { 11468, 11468 },
+ { 11470, 11470 },
+ { 11472, 11472 },
+ { 11474, 11474 },
+ { 11476, 11476 },
+ { 11478, 11478 },
+ { 11480, 11480 },
+ { 11482, 11482 },
+ { 11484, 11484 },
+ { 11486, 11486 },
+ { 11488, 11488 },
+ { 11490, 11490 },
+ { 11499, 11499 },
+ { 11501, 11501 },
+ { 11506, 11506 },
+ { 42560, 42560 },
+ { 42562, 42562 },
+ { 42564, 42564 },
+ { 42566, 42566 },
+ { 42568, 42568 },
+ { 42570, 42570 },
+ { 42572, 42572 },
+ { 42574, 42574 },
+ { 42576, 42576 },
+ { 42578, 42578 },
+ { 42580, 42580 },
+ { 42582, 42582 },
+ { 42584, 42584 },
+ { 42586, 42586 },
+ { 42588, 42588 },
+ { 42590, 42590 },
+ { 42592, 42592 },
+ { 42594, 42594 },
+ { 42596, 42596 },
+ { 42598, 42598 },
+ { 42600, 42600 },
+ { 42602, 42602 },
+ { 42604, 42604 },
+ { 42624, 42624 },
+ { 42626, 42626 },
+ { 42628, 42628 },
+ { 42630, 42630 },
+ { 42632, 42632 },
+ { 42634, 42634 },
+ { 42636, 42636 },
+ { 42638, 42638 },
+ { 42640, 42640 },
+ { 42642, 42642 },
+ { 42644, 42644 },
+ { 42646, 42646 },
+ { 42648, 42648 },
+ { 42650, 42650 },
+ { 42786, 42786 },
+ { 42788, 42788 },
+ { 42790, 42790 },
+ { 42792, 42792 },
+ { 42794, 42794 },
+ { 42796, 42796 },
+ { 42798, 42798 },
+ { 42802, 42802 },
+ { 42804, 42804 },
+ { 42806, 42806 },
+ { 42808, 42808 },
+ { 42810, 42810 },
+ { 42812, 42812 },
+ { 42814, 42814 },
+ { 42816, 42816 },
+ { 42818, 42818 },
+ { 42820, 42820 },
+ { 42822, 42822 },
+ { 42824, 42824 },
+ { 42826, 42826 },
+ { 42828, 42828 },
+ { 42830, 42830 },
+ { 42832, 42832 },
+ { 42834, 42834 },
+ { 42836, 42836 },
+ { 42838, 42838 },
+ { 42840, 42840 },
+ { 42842, 42842 },
+ { 42844, 42844 },
+ { 42846, 42846 },
+ { 42848, 42848 },
+ { 42850, 42850 },
+ { 42852, 42852 },
+ { 42854, 42854 },
+ { 42856, 42856 },
+ { 42858, 42858 },
+ { 42860, 42860 },
+ { 42862, 42862 },
+ { 42873, 42873 },
+ { 42875, 42875 },
+ { 42877, 42878 },
+ { 42880, 42880 },
+ { 42882, 42882 },
+ { 42884, 42884 },
+ { 42886, 42886 },
+ { 42891, 42891 },
+ { 42893, 42893 },
+ { 42896, 42896 },
+ { 42898, 42898 },
+ { 42902, 42902 },
+ { 42904, 42904 },
+ { 42906, 42906 },
+ { 42908, 42908 },
+ { 42910, 42910 },
+ { 42912, 42912 },
+ { 42914, 42914 },
+ { 42916, 42916 },
+ { 42918, 42918 },
+ { 42920, 42920 },
+ { 42922, 42926 },
+ { 42928, 42932 },
+ { 42934, 42934 },
+ { 42936, 42936 },
+ { 42938, 42938 },
+ { 42940, 42940 },
+ { 42942, 42942 },
+ { 42944, 42944 },
+ { 42946, 42946 },
+ { 42948, 42951 },
+ { 42953, 42953 },
+ { 42960, 42960 },
+ { 42966, 42966 },
+ { 42968, 42968 },
+ { 42997, 42997 },
+ { 65313, 65338 },
+ { 66560, 66599 },
+ { 66736, 66771 },
+ { 66928, 66938 },
+ { 66940, 66954 },
+ { 66956, 66962 },
+ { 66964, 66965 },
+ { 68736, 68786 },
+ { 71840, 71871 },
+ { 93760, 93791 },
+ { 119808, 119833 },
+ { 119860, 119885 },
+ { 119912, 119937 },
+ { 119964, 119964 },
+ { 119966, 119967 },
+ { 119970, 119970 },
+ { 119973, 119974 },
+ { 119977, 119980 },
+ { 119982, 119989 },
+ { 120016, 120041 },
+ { 120068, 120069 },
+ { 120071, 120074 },
+ { 120077, 120084 },
+ { 120086, 120092 },
+ { 120120, 120121 },
+ { 120123, 120126 },
+ { 120128, 120132 },
+ { 120134, 120134 },
+ { 120138, 120144 },
+ { 120172, 120197 },
+ { 120224, 120249 },
+ { 120276, 120301 },
+ { 120328, 120353 },
+ { 120380, 120405 },
+ { 120432, 120457 },
+ { 120488, 120512 },
+ { 120546, 120570 },
+ { 120604, 120628 },
+ { 120662, 120686 },
+ { 120720, 120744 },
+ { 120778, 120778 },
+ { 125184, 125217 },
+ { 127280, 127305 },
+ { 127312, 127337 },
+ { 127344, 127369 },
+ { -1, -1 }, // end markers
+};
+static const set_item lower[] = {
+ { 97, 122 },
+ { 170, 170 },
+ { 181, 181 },
+ { 186, 186 },
+ { 223, 246 },
+ { 248, 255 },
+ { 257, 257 },
+ { 259, 259 },
+ { 261, 261 },
+ { 263, 263 },
+ { 265, 265 },
+ { 267, 267 },
+ { 269, 269 },
+ { 271, 271 },
+ { 273, 273 },
+ { 275, 275 },
+ { 277, 277 },
+ { 279, 279 },
+ { 281, 281 },
+ { 283, 283 },
+ { 285, 285 },
+ { 287, 287 },
+ { 289, 289 },
+ { 291, 291 },
+ { 293, 293 },
+ { 295, 295 },
+ { 297, 297 },
+ { 299, 299 },
+ { 301, 301 },
+ { 303, 303 },
+ { 305, 305 },
+ { 307, 307 },
+ { 309, 309 },
+ { 311, 312 },
+ { 314, 314 },
+ { 316, 316 },
+ { 318, 318 },
+ { 320, 320 },
+ { 322, 322 },
+ { 324, 324 },
+ { 326, 326 },
+ { 328, 329 },
+ { 331, 331 },
+ { 333, 333 },
+ { 335, 335 },
+ { 337, 337 },
+ { 339, 339 },
+ { 341, 341 },
+ { 343, 343 },
+ { 345, 345 },
+ { 347, 347 },
+ { 349, 349 },
+ { 351, 351 },
+ { 353, 353 },
+ { 355, 355 },
+ { 357, 357 },
+ { 359, 359 },
+ { 361, 361 },
+ { 363, 363 },
+ { 365, 365 },
+ { 367, 367 },
+ { 369, 369 },
+ { 371, 371 },
+ { 373, 373 },
+ { 375, 375 },
+ { 378, 378 },
+ { 380, 380 },
+ { 382, 384 },
+ { 387, 387 },
+ { 389, 389 },
+ { 392, 392 },
+ { 396, 397 },
+ { 402, 402 },
+ { 405, 405 },
+ { 409, 411 },
+ { 414, 414 },
+ { 417, 417 },
+ { 419, 419 },
+ { 421, 421 },
+ { 424, 424 },
+ { 426, 427 },
+ { 429, 429 },
+ { 432, 432 },
+ { 436, 436 },
+ { 438, 438 },
+ { 441, 442 },
+ { 445, 447 },
+ { 453, 454 },
+ { 456, 457 },
+ { 459, 460 },
+ { 462, 462 },
+ { 464, 464 },
+ { 466, 466 },
+ { 468, 468 },
+ { 470, 470 },
+ { 472, 472 },
+ { 474, 474 },
+ { 476, 477 },
+ { 479, 479 },
+ { 481, 481 },
+ { 483, 483 },
+ { 485, 485 },
+ { 487, 487 },
+ { 489, 489 },
+ { 491, 491 },
+ { 493, 493 },
+ { 495, 496 },
+ { 498, 499 },
+ { 501, 501 },
+ { 505, 505 },
+ { 507, 507 },
+ { 509, 509 },
+ { 511, 511 },
+ { 513, 513 },
+ { 515, 515 },
+ { 517, 517 },
+ { 519, 519 },
+ { 521, 521 },
+ { 523, 523 },
+ { 525, 525 },
+ { 527, 527 },
+ { 529, 529 },
+ { 531, 531 },
+ { 533, 533 },
+ { 535, 535 },
+ { 537, 537 },
+ { 539, 539 },
+ { 541, 541 },
+ { 543, 543 },
+ { 545, 545 },
+ { 547, 547 },
+ { 549, 549 },
+ { 551, 551 },
+ { 553, 553 },
+ { 555, 555 },
+ { 557, 557 },
+ { 559, 559 },
+ { 561, 561 },
+ { 563, 569 },
+ { 572, 572 },
+ { 575, 576 },
+ { 578, 578 },
+ { 583, 583 },
+ { 585, 585 },
+ { 587, 587 },
+ { 589, 589 },
+ { 591, 659 },
+ { 661, 696 },
+ { 704, 705 },
+ { 736, 740 },
+ { 837, 837 },
+ { 881, 881 },
+ { 883, 883 },
+ { 887, 887 },
+ { 890, 893 },
+ { 912, 912 },
+ { 940, 974 },
+ { 976, 977 },
+ { 981, 983 },
+ { 985, 985 },
+ { 987, 987 },
+ { 989, 989 },
+ { 991, 991 },
+ { 993, 993 },
+ { 995, 995 },
+ { 997, 997 },
+ { 999, 999 },
+ { 1001, 1001 },
+ { 1003, 1003 },
+ { 1005, 1005 },
+ { 1007, 1011 },
+ { 1013, 1013 },
+ { 1016, 1016 },
+ { 1019, 1020 },
+ { 1072, 1119 },
+ { 1121, 1121 },
+ { 1123, 1123 },
+ { 1125, 1125 },
+ { 1127, 1127 },
+ { 1129, 1129 },
+ { 1131, 1131 },
+ { 1133, 1133 },
+ { 1135, 1135 },
+ { 1137, 1137 },
+ { 1139, 1139 },
+ { 1141, 1141 },
+ { 1143, 1143 },
+ { 1145, 1145 },
+ { 1147, 1147 },
+ { 1149, 1149 },
+ { 1151, 1151 },
+ { 1153, 1153 },
+ { 1163, 1163 },
+ { 1165, 1165 },
+ { 1167, 1167 },
+ { 1169, 1169 },
+ { 1171, 1171 },
+ { 1173, 1173 },
+ { 1175, 1175 },
+ { 1177, 1177 },
+ { 1179, 1179 },
+ { 1181, 1181 },
+ { 1183, 1183 },
+ { 1185, 1185 },
+ { 1187, 1187 },
+ { 1189, 1189 },
+ { 1191, 1191 },
+ { 1193, 1193 },
+ { 1195, 1195 },
+ { 1197, 1197 },
+ { 1199, 1199 },
+ { 1201, 1201 },
+ { 1203, 1203 },
+ { 1205, 1205 },
+ { 1207, 1207 },
+ { 1209, 1209 },
+ { 1211, 1211 },
+ { 1213, 1213 },
+ { 1215, 1215 },
+ { 1218, 1218 },
+ { 1220, 1220 },
+ { 1222, 1222 },
+ { 1224, 1224 },
+ { 1226, 1226 },
+ { 1228, 1228 },
+ { 1230, 1231 },
+ { 1233, 1233 },
+ { 1235, 1235 },
+ { 1237, 1237 },
+ { 1239, 1239 },
+ { 1241, 1241 },
+ { 1243, 1243 },
+ { 1245, 1245 },
+ { 1247, 1247 },
+ { 1249, 1249 },
+ { 1251, 1251 },
+ { 1253, 1253 },
+ { 1255, 1255 },
+ { 1257, 1257 },
+ { 1259, 1259 },
+ { 1261, 1261 },
+ { 1263, 1263 },
+ { 1265, 1265 },
+ { 1267, 1267 },
+ { 1269, 1269 },
+ { 1271, 1271 },
+ { 1273, 1273 },
+ { 1275, 1275 },
+ { 1277, 1277 },
+ { 1279, 1279 },
+ { 1281, 1281 },
+ { 1283, 1283 },
+ { 1285, 1285 },
+ { 1287, 1287 },
+ { 1289, 1289 },
+ { 1291, 1291 },
+ { 1293, 1293 },
+ { 1295, 1295 },
+ { 1297, 1297 },
+ { 1299, 1299 },
+ { 1301, 1301 },
+ { 1303, 1303 },
+ { 1305, 1305 },
+ { 1307, 1307 },
+ { 1309, 1309 },
+ { 1311, 1311 },
+ { 1313, 1313 },
+ { 1315, 1315 },
+ { 1317, 1317 },
+ { 1319, 1319 },
+ { 1321, 1321 },
+ { 1323, 1323 },
+ { 1325, 1325 },
+ { 1327, 1327 },
+ { 1376, 1416 },
+ { 4304, 4346 },
+ { 4349, 4351 },
+ { 5112, 5117 },
+ { 7296, 7304 },
+ { 7424, 7615 },
+ { 7681, 7681 },
+ { 7683, 7683 },
+ { 7685, 7685 },
+ { 7687, 7687 },
+ { 7689, 7689 },
+ { 7691, 7691 },
+ { 7693, 7693 },
+ { 7695, 7695 },
+ { 7697, 7697 },
+ { 7699, 7699 },
+ { 7701, 7701 },
+ { 7703, 7703 },
+ { 7705, 7705 },
+ { 7707, 7707 },
+ { 7709, 7709 },
+ { 7711, 7711 },
+ { 7713, 7713 },
+ { 7715, 7715 },
+ { 7717, 7717 },
+ { 7719, 7719 },
+ { 7721, 7721 },
+ { 7723, 7723 },
+ { 7725, 7725 },
+ { 7727, 7727 },
+ { 7729, 7729 },
+ { 7731, 7731 },
+ { 7733, 7733 },
+ { 7735, 7735 },
+ { 7737, 7737 },
+ { 7739, 7739 },
+ { 7741, 7741 },
+ { 7743, 7743 },
+ { 7745, 7745 },
+ { 7747, 7747 },
+ { 7749, 7749 },
+ { 7751, 7751 },
+ { 7753, 7753 },
+ { 7755, 7755 },
+ { 7757, 7757 },
+ { 7759, 7759 },
+ { 7761, 7761 },
+ { 7763, 7763 },
+ { 7765, 7765 },
+ { 7767, 7767 },
+ { 7769, 7769 },
+ { 7771, 7771 },
+ { 7773, 7773 },
+ { 7775, 7775 },
+ { 7777, 7777 },
+ { 7779, 7779 },
+ { 7781, 7781 },
+ { 7783, 7783 },
+ { 7785, 7785 },
+ { 7787, 7787 },
+ { 7789, 7789 },
+ { 7791, 7791 },
+ { 7793, 7793 },
+ { 7795, 7795 },
+ { 7797, 7797 },
+ { 7799, 7799 },
+ { 7801, 7801 },
+ { 7803, 7803 },
+ { 7805, 7805 },
+ { 7807, 7807 },
+ { 7809, 7809 },
+ { 7811, 7811 },
+ { 7813, 7813 },
+ { 7815, 7815 },
+ { 7817, 7817 },
+ { 7819, 7819 },
+ { 7821, 7821 },
+ { 7823, 7823 },
+ { 7825, 7825 },
+ { 7827, 7827 },
+ { 7829, 7837 },
+ { 7839, 7839 },
+ { 7841, 7841 },
+ { 7843, 7843 },
+ { 7845, 7845 },
+ { 7847, 7847 },
+ { 7849, 7849 },
+ { 7851, 7851 },
+ { 7853, 7853 },
+ { 7855, 7855 },
+ { 7857, 7857 },
+ { 7859, 7859 },
+ { 7861, 7861 },
+ { 7863, 7863 },
+ { 7865, 7865 },
+ { 7867, 7867 },
+ { 7869, 7869 },
+ { 7871, 7871 },
+ { 7873, 7873 },
+ { 7875, 7875 },
+ { 7877, 7877 },
+ { 7879, 7879 },
+ { 7881, 7881 },
+ { 7883, 7883 },
+ { 7885, 7885 },
+ { 7887, 7887 },
+ { 7889, 7889 },
+ { 7891, 7891 },
+ { 7893, 7893 },
+ { 7895, 7895 },
+ { 7897, 7897 },
+ { 7899, 7899 },
+ { 7901, 7901 },
+ { 7903, 7903 },
+ { 7905, 7905 },
+ { 7907, 7907 },
+ { 7909, 7909 },
+ { 7911, 7911 },
+ { 7913, 7913 },
+ { 7915, 7915 },
+ { 7917, 7917 },
+ { 7919, 7919 },
+ { 7921, 7921 },
+ { 7923, 7923 },
+ { 7925, 7925 },
+ { 7927, 7927 },
+ { 7929, 7929 },
+ { 7931, 7931 },
+ { 7933, 7933 },
+ { 7935, 7943 },
+ { 7952, 7957 },
+ { 7968, 7975 },
+ { 7984, 7991 },
+ { 8000, 8005 },
+ { 8016, 8023 },
+ { 8032, 8039 },
+ { 8048, 8061 },
+ { 8064, 8071 },
+ { 8080, 8087 },
+ { 8096, 8103 },
+ { 8112, 8116 },
+ { 8118, 8119 },
+ { 8126, 8126 },
+ { 8130, 8132 },
+ { 8134, 8135 },
+ { 8144, 8147 },
+ { 8150, 8151 },
+ { 8160, 8167 },
+ { 8178, 8180 },
+ { 8182, 8183 },
+ { 8305, 8305 },
+ { 8319, 8319 },
+ { 8336, 8348 },
+ { 8458, 8458 },
+ { 8462, 8463 },
+ { 8467, 8467 },
+ { 8495, 8495 },
+ { 8500, 8500 },
+ { 8505, 8505 },
+ { 8508, 8509 },
+ { 8518, 8521 },
+ { 8526, 8526 },
+ { 8560, 8575 },
+ { 8580, 8580 },
+ { 9424, 9449 },
+ { 11312, 11359 },
+ { 11361, 11361 },
+ { 11365, 11366 },
+ { 11368, 11368 },
+ { 11370, 11370 },
+ { 11372, 11372 },
+ { 11377, 11377 },
+ { 11379, 11380 },
+ { 11382, 11389 },
+ { 11393, 11393 },
+ { 11395, 11395 },
+ { 11397, 11397 },
+ { 11399, 11399 },
+ { 11401, 11401 },
+ { 11403, 11403 },
+ { 11405, 11405 },
+ { 11407, 11407 },
+ { 11409, 11409 },
+ { 11411, 11411 },
+ { 11413, 11413 },
+ { 11415, 11415 },
+ { 11417, 11417 },
+ { 11419, 11419 },
+ { 11421, 11421 },
+ { 11423, 11423 },
+ { 11425, 11425 },
+ { 11427, 11427 },
+ { 11429, 11429 },
+ { 11431, 11431 },
+ { 11433, 11433 },
+ { 11435, 11435 },
+ { 11437, 11437 },
+ { 11439, 11439 },
+ { 11441, 11441 },
+ { 11443, 11443 },
+ { 11445, 11445 },
+ { 11447, 11447 },
+ { 11449, 11449 },
+ { 11451, 11451 },
+ { 11453, 11453 },
+ { 11455, 11455 },
+ { 11457, 11457 },
+ { 11459, 11459 },
+ { 11461, 11461 },
+ { 11463, 11463 },
+ { 11465, 11465 },
+ { 11467, 11467 },
+ { 11469, 11469 },
+ { 11471, 11471 },
+ { 11473, 11473 },
+ { 11475, 11475 },
+ { 11477, 11477 },
+ { 11479, 11479 },
+ { 11481, 11481 },
+ { 11483, 11483 },
+ { 11485, 11485 },
+ { 11487, 11487 },
+ { 11489, 11489 },
+ { 11491, 11492 },
+ { 11500, 11500 },
+ { 11502, 11502 },
+ { 11507, 11507 },
+ { 11520, 11557 },
+ { 11559, 11559 },
+ { 11565, 11565 },
+ { 42561, 42561 },
+ { 42563, 42563 },
+ { 42565, 42565 },
+ { 42567, 42567 },
+ { 42569, 42569 },
+ { 42571, 42571 },
+ { 42573, 42573 },
+ { 42575, 42575 },
+ { 42577, 42577 },
+ { 42579, 42579 },
+ { 42581, 42581 },
+ { 42583, 42583 },
+ { 42585, 42585 },
+ { 42587, 42587 },
+ { 42589, 42589 },
+ { 42591, 42591 },
+ { 42593, 42593 },
+ { 42595, 42595 },
+ { 42597, 42597 },
+ { 42599, 42599 },
+ { 42601, 42601 },
+ { 42603, 42603 },
+ { 42605, 42605 },
+ { 42625, 42625 },
+ { 42627, 42627 },
+ { 42629, 42629 },
+ { 42631, 42631 },
+ { 42633, 42633 },
+ { 42635, 42635 },
+ { 42637, 42637 },
+ { 42639, 42639 },
+ { 42641, 42641 },
+ { 42643, 42643 },
+ { 42645, 42645 },
+ { 42647, 42647 },
+ { 42649, 42649 },
+ { 42651, 42653 },
+ { 42787, 42787 },
+ { 42789, 42789 },
+ { 42791, 42791 },
+ { 42793, 42793 },
+ { 42795, 42795 },
+ { 42797, 42797 },
+ { 42799, 42801 },
+ { 42803, 42803 },
+ { 42805, 42805 },
+ { 42807, 42807 },
+ { 42809, 42809 },
+ { 42811, 42811 },
+ { 42813, 42813 },
+ { 42815, 42815 },
+ { 42817, 42817 },
+ { 42819, 42819 },
+ { 42821, 42821 },
+ { 42823, 42823 },
+ { 42825, 42825 },
+ { 42827, 42827 },
+ { 42829, 42829 },
+ { 42831, 42831 },
+ { 42833, 42833 },
+ { 42835, 42835 },
+ { 42837, 42837 },
+ { 42839, 42839 },
+ { 42841, 42841 },
+ { 42843, 42843 },
+ { 42845, 42845 },
+ { 42847, 42847 },
+ { 42849, 42849 },
+ { 42851, 42851 },
+ { 42853, 42853 },
+ { 42855, 42855 },
+ { 42857, 42857 },
+ { 42859, 42859 },
+ { 42861, 42861 },
+ { 42863, 42872 },
+ { 42874, 42874 },
+ { 42876, 42876 },
+ { 42879, 42879 },
+ { 42881, 42881 },
+ { 42883, 42883 },
+ { 42885, 42885 },
+ { 42887, 42887 },
+ { 42892, 42892 },
+ { 42894, 42894 },
+ { 42897, 42897 },
+ { 42899, 42901 },
+ { 42903, 42903 },
+ { 42905, 42905 },
+ { 42907, 42907 },
+ { 42909, 42909 },
+ { 42911, 42911 },
+ { 42913, 42913 },
+ { 42915, 42915 },
+ { 42917, 42917 },
+ { 42919, 42919 },
+ { 42921, 42921 },
+ { 42927, 42927 },
+ { 42933, 42933 },
+ { 42935, 42935 },
+ { 42937, 42937 },
+ { 42939, 42939 },
+ { 42941, 42941 },
+ { 42943, 42943 },
+ { 42945, 42945 },
+ { 42947, 42947 },
+ { 42952, 42952 },
+ { 42954, 42954 },
+ { 42961, 42961 },
+ { 42963, 42963 },
+ { 42965, 42965 },
+ { 42967, 42967 },
+ { 42969, 42969 },
+ { 42998, 42998 },
+ { 43000, 43002 },
+ { 43824, 43866 },
+ { 43868, 43880 },
+ { 43888, 43967 },
+ { 64256, 64262 },
+ { 64275, 64279 },
+ { 65345, 65370 },
+ { 66600, 66639 },
+ { 66776, 66811 },
+ { 66967, 66977 },
+ { 66979, 66993 },
+ { 66995, 67001 },
+ { 67003, 67004 },
+ { 67456, 67456 },
+ { 67459, 67461 },
+ { 67463, 67504 },
+ { 67506, 67514 },
+ { 68800, 68850 },
+ { 71872, 71903 },
+ { 93792, 93823 },
+ { 119834, 119859 },
+ { 119886, 119892 },
+ { 119894, 119911 },
+ { 119938, 119963 },
+ { 119990, 119993 },
+ { 119995, 119995 },
+ { 119997, 120003 },
+ { 120005, 120015 },
+ { 120042, 120067 },
+ { 120094, 120119 },
+ { 120146, 120171 },
+ { 120198, 120223 },
+ { 120250, 120275 },
+ { 120302, 120327 },
+ { 120354, 120379 },
+ { 120406, 120431 },
+ { 120458, 120485 },
+ { 120514, 120538 },
+ { 120540, 120545 },
+ { 120572, 120596 },
+ { 120598, 120603 },
+ { 120630, 120654 },
+ { 120656, 120661 },
+ { 120688, 120712 },
+ { 120714, 120719 },
+ { 120746, 120770 },
+ { 120772, 120777 },
+ { 120779, 120779 },
+ { 122624, 122633 },
+ { 122635, 122654 },
+ { 125218, 125251 },
+ { -1, -1 }, // end markers
+};
+static const set_item print_graph_common[] = {
+ { 160, 887 },
+ { 890, 895 },
+ { 900, 906 },
+ { 908, 908 },
+ { 910, 929 },
+ { 931, 1327 },
+ { 1329, 1366 },
+ { 1369, 1418 },
+ { 1421, 1423 },
+ { 1425, 1479 },
+ { 1488, 1514 },
+ { 1519, 1524 },
+ { 1536, 1805 },
+ { 1807, 1866 },
+ { 1869, 1969 },
+ { 1984, 2042 },
+ { 2045, 2093 },
+ { 2096, 2110 },
+ { 2112, 2139 },
+ { 2142, 2142 },
+ { 2144, 2154 },
+ { 2160, 2190 },
+ { 2192, 2193 },
+ { 2200, 2435 },
+ { 2437, 2444 },
+ { 2447, 2448 },
+ { 2451, 2472 },
+ { 2474, 2480 },
+ { 2482, 2482 },
+ { 2486, 2489 },
+ { 2492, 2500 },
+ { 2503, 2504 },
+ { 2507, 2510 },
+ { 2519, 2519 },
+ { 2524, 2525 },
+ { 2527, 2531 },
+ { 2534, 2558 },
+ { 2561, 2563 },
+ { 2565, 2570 },
+ { 2575, 2576 },
+ { 2579, 2600 },
+ { 2602, 2608 },
+ { 2610, 2611 },
+ { 2613, 2614 },
+ { 2616, 2617 },
+ { 2620, 2620 },
+ { 2622, 2626 },
+ { 2631, 2632 },
+ { 2635, 2637 },
+ { 2641, 2641 },
+ { 2649, 2652 },
+ { 2654, 2654 },
+ { 2662, 2678 },
+ { 2689, 2691 },
+ { 2693, 2701 },
+ { 2703, 2705 },
+ { 2707, 2728 },
+ { 2730, 2736 },
+ { 2738, 2739 },
+ { 2741, 2745 },
+ { 2748, 2757 },
+ { 2759, 2761 },
+ { 2763, 2765 },
+ { 2768, 2768 },
+ { 2784, 2787 },
+ { 2790, 2801 },
+ { 2809, 2815 },
+ { 2817, 2819 },
+ { 2821, 2828 },
+ { 2831, 2832 },
+ { 2835, 2856 },
+ { 2858, 2864 },
+ { 2866, 2867 },
+ { 2869, 2873 },
+ { 2876, 2884 },
+ { 2887, 2888 },
+ { 2891, 2893 },
+ { 2901, 2903 },
+ { 2908, 2909 },
+ { 2911, 2915 },
+ { 2918, 2935 },
+ { 2946, 2947 },
+ { 2949, 2954 },
+ { 2958, 2960 },
+ { 2962, 2965 },
+ { 2969, 2970 },
+ { 2972, 2972 },
+ { 2974, 2975 },
+ { 2979, 2980 },
+ { 2984, 2986 },
+ { 2990, 3001 },
+ { 3006, 3010 },
+ { 3014, 3016 },
+ { 3018, 3021 },
+ { 3024, 3024 },
+ { 3031, 3031 },
+ { 3046, 3066 },
+ { 3072, 3084 },
+ { 3086, 3088 },
+ { 3090, 3112 },
+ { 3114, 3129 },
+ { 3132, 3140 },
+ { 3142, 3144 },
+ { 3146, 3149 },
+ { 3157, 3158 },
+ { 3160, 3162 },
+ { 3165, 3165 },
+ { 3168, 3171 },
+ { 3174, 3183 },
+ { 3191, 3212 },
+ { 3214, 3216 },
+ { 3218, 3240 },
+ { 3242, 3251 },
+ { 3253, 3257 },
+ { 3260, 3268 },
+ { 3270, 3272 },
+ { 3274, 3277 },
+ { 3285, 3286 },
+ { 3293, 3294 },
+ { 3296, 3299 },
+ { 3302, 3311 },
+ { 3313, 3314 },
+ { 3328, 3340 },
+ { 3342, 3344 },
+ { 3346, 3396 },
+ { 3398, 3400 },
+ { 3402, 3407 },
+ { 3412, 3427 },
+ { 3430, 3455 },
+ { 3457, 3459 },
+ { 3461, 3478 },
+ { 3482, 3505 },
+ { 3507, 3515 },
+ { 3517, 3517 },
+ { 3520, 3526 },
+ { 3530, 3530 },
+ { 3535, 3540 },
+ { 3542, 3542 },
+ { 3544, 3551 },
+ { 3558, 3567 },
+ { 3570, 3572 },
+ { 3585, 3642 },
+ { 3647, 3675 },
+ { 3713, 3714 },
+ { 3716, 3716 },
+ { 3718, 3722 },
+ { 3724, 3747 },
+ { 3749, 3749 },
+ { 3751, 3773 },
+ { 3776, 3780 },
+ { 3782, 3782 },
+ { 3784, 3789 },
+ { 3792, 3801 },
+ { 3804, 3807 },
+ { 3840, 3911 },
+ { 3913, 3948 },
+ { 3953, 3991 },
+ { 3993, 4028 },
+ { 4030, 4044 },
+ { 4046, 4058 },
+ { 4096, 4293 },
+ { 4295, 4295 },
+ { 4301, 4301 },
+ { 4304, 4680 },
+ { 4682, 4685 },
+ { 4688, 4694 },
+ { 4696, 4696 },
+ { 4698, 4701 },
+ { 4704, 4744 },
+ { 4746, 4749 },
+ { 4752, 4784 },
+ { 4786, 4789 },
+ { 4792, 4798 },
+ { 4800, 4800 },
+ { 4802, 4805 },
+ { 4808, 4822 },
+ { 4824, 4880 },
+ { 4882, 4885 },
+ { 4888, 4954 },
+ { 4957, 4988 },
+ { 4992, 5017 },
+ { 5024, 5109 },
+ { 5112, 5117 },
+ { 5792, 5880 },
+ { 5888, 5909 },
+ { 5919, 5942 },
+ { 5952, 5971 },
+ { 5984, 5996 },
+ { 5998, 6000 },
+ { 6002, 6003 },
+ { 6016, 6109 },
+ { 6112, 6121 },
+ { 6128, 6137 },
+ { 6144, 6169 },
+ { 6176, 6264 },
+ { 6272, 6314 },
+ { 6320, 6389 },
+ { 6400, 6430 },
+ { 6432, 6443 },
+ { 6448, 6459 },
+ { 6464, 6464 },
+ { 6468, 6509 },
+ { 6512, 6516 },
+ { 6528, 6571 },
+ { 6576, 6601 },
+ { 6608, 6618 },
+ { 6622, 6683 },
+ { 6686, 6750 },
+ { 6752, 6780 },
+ { 6783, 6793 },
+ { 6800, 6809 },
+ { 6816, 6829 },
+ { 6832, 6862 },
+ { 6912, 6988 },
+ { 6992, 7038 },
+ { 7040, 7155 },
+ { 7164, 7223 },
+ { 7227, 7241 },
+ { 7245, 7304 },
+ { 7312, 7354 },
+ { 7357, 7367 },
+ { 7376, 7418 },
+ { 7424, 7957 },
+ { 7960, 7965 },
+ { 7968, 8005 },
+ { 8008, 8013 },
+ { 8016, 8023 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8061 },
+ { 8064, 8116 },
+ { 8118, 8132 },
+ { 8134, 8147 },
+ { 8150, 8155 },
+ { 8157, 8175 },
+ { 8178, 8180 },
+ { 8182, 8190 },
+ { 8294, 8305 },
+ { 8308, 8334 },
+ { 8336, 8348 },
+ { 8352, 8384 },
+ { 8400, 8432 },
+ { 8448, 8587 },
+ { 8592, 9254 },
+ { 9280, 9290 },
+ { 9312, 11123 },
+ { 11126, 11157 },
+ { 11159, 11507 },
+ { 11513, 11557 },
+ { 11559, 11559 },
+ { 11565, 11565 },
+ { 11568, 11623 },
+ { 11631, 11632 },
+ { 11647, 11670 },
+ { 11680, 11686 },
+ { 11688, 11694 },
+ { 11696, 11702 },
+ { 11704, 11710 },
+ { 11712, 11718 },
+ { 11720, 11726 },
+ { 11728, 11734 },
+ { 11736, 11742 },
+ { 11744, 11869 },
+ { 11904, 11929 },
+ { 11931, 12019 },
+ { 12032, 12245 },
+ { 12272, 12283 },
+ { 12353, 12438 },
+ { 12441, 12543 },
+ { 12549, 12591 },
+ { 12593, 12686 },
+ { 12688, 12771 },
+ { 12784, 12830 },
+ { 12832, 42124 },
+ { 42128, 42182 },
+ { 42192, 42539 },
+ { 42560, 42743 },
+ { 42752, 42954 },
+ { 42960, 42961 },
+ { 42963, 42963 },
+ { 42965, 42969 },
+ { 42994, 43052 },
+ { 43056, 43065 },
+ { 43072, 43127 },
+ { 43136, 43205 },
+ { 43214, 43225 },
+ { 43232, 43347 },
+ { 43359, 43388 },
+ { 43392, 43469 },
+ { 43471, 43481 },
+ { 43486, 43518 },
+ { 43520, 43574 },
+ { 43584, 43597 },
+ { 43600, 43609 },
+ { 43612, 43714 },
+ { 43739, 43766 },
+ { 43777, 43782 },
+ { 43785, 43790 },
+ { 43793, 43798 },
+ { 43808, 43814 },
+ { 43816, 43822 },
+ { 43824, 43883 },
+ { 43888, 44013 },
+ { 44016, 44025 },
+ { 44032, 55203 },
+ { 55216, 55238 },
+ { 55243, 55291 },
+ { 57344, 64109 },
+ { 64112, 64217 },
+ { 64256, 64262 },
+ { 64275, 64279 },
+ { 64285, 64310 },
+ { 64312, 64316 },
+ { 64318, 64318 },
+ { 64320, 64321 },
+ { 64323, 64324 },
+ { 64326, 64450 },
+ { 64467, 64911 },
+ { 64914, 64967 },
+ { 64975, 64975 },
+ { 65008, 65049 },
+ { 65056, 65106 },
+ { 65108, 65126 },
+ { 65128, 65131 },
+ { 65136, 65140 },
+ { 65142, 65276 },
+ { 65279, 65279 },
+ { 65281, 65470 },
+ { 65474, 65479 },
+ { 65482, 65487 },
+ { 65490, 65495 },
+ { 65498, 65500 },
+ { 65504, 65510 },
+ { 65512, 65518 },
+ { 65529, 65533 },
+ { 65536, 65547 },
+ { 65549, 65574 },
+ { 65576, 65594 },
+ { 65596, 65597 },
+ { 65599, 65613 },
+ { 65616, 65629 },
+ { 65664, 65786 },
+ { 65792, 65794 },
+ { 65799, 65843 },
+ { 65847, 65934 },
+ { 65936, 65948 },
+ { 65952, 65952 },
+ { 66000, 66045 },
+ { 66176, 66204 },
+ { 66208, 66256 },
+ { 66272, 66299 },
+ { 66304, 66339 },
+ { 66349, 66378 },
+ { 66384, 66426 },
+ { 66432, 66461 },
+ { 66463, 66499 },
+ { 66504, 66517 },
+ { 66560, 66717 },
+ { 66720, 66729 },
+ { 66736, 66771 },
+ { 66776, 66811 },
+ { 66816, 66855 },
+ { 66864, 66915 },
+ { 66927, 66938 },
+ { 66940, 66954 },
+ { 66956, 66962 },
+ { 66964, 66965 },
+ { 66967, 66977 },
+ { 66979, 66993 },
+ { 66995, 67001 },
+ { 67003, 67004 },
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
+ { 67456, 67461 },
+ { 67463, 67504 },
+ { 67506, 67514 },
+ { 67584, 67589 },
+ { 67592, 67592 },
+ { 67594, 67637 },
+ { 67639, 67640 },
+ { 67644, 67644 },
+ { 67647, 67669 },
+ { 67671, 67742 },
+ { 67751, 67759 },
+ { 67808, 67826 },
+ { 67828, 67829 },
+ { 67835, 67867 },
+ { 67871, 67897 },
+ { 67903, 67903 },
+ { 67968, 68023 },
+ { 68028, 68047 },
+ { 68050, 68099 },
+ { 68101, 68102 },
+ { 68108, 68115 },
+ { 68117, 68119 },
+ { 68121, 68149 },
+ { 68152, 68154 },
+ { 68159, 68168 },
+ { 68176, 68184 },
+ { 68192, 68255 },
+ { 68288, 68326 },
+ { 68331, 68342 },
+ { 68352, 68405 },
+ { 68409, 68437 },
+ { 68440, 68466 },
+ { 68472, 68497 },
+ { 68505, 68508 },
+ { 68521, 68527 },
+ { 68608, 68680 },
+ { 68736, 68786 },
+ { 68800, 68850 },
+ { 68858, 68903 },
+ { 68912, 68921 },
+ { 69216, 69246 },
+ { 69248, 69289 },
+ { 69291, 69293 },
+ { 69296, 69297 },
+ { 69376, 69415 },
+ { 69424, 69465 },
+ { 69488, 69513 },
+ { 69552, 69579 },
+ { 69600, 69622 },
+ { 69632, 69709 },
+ { 69714, 69749 },
+ { 69759, 69826 },
+ { 69837, 69837 },
+ { 69840, 69864 },
+ { 69872, 69881 },
+ { 69888, 69940 },
+ { 69942, 69959 },
+ { 69968, 70006 },
+ { 70016, 70111 },
+ { 70113, 70132 },
+ { 70144, 70161 },
+ { 70163, 70206 },
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70313 },
+ { 70320, 70378 },
+ { 70384, 70393 },
+ { 70400, 70403 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70459, 70468 },
+ { 70471, 70472 },
+ { 70475, 70477 },
+ { 70480, 70480 },
+ { 70487, 70487 },
+ { 70493, 70499 },
+ { 70502, 70508 },
+ { 70512, 70516 },
+ { 70656, 70747 },
+ { 70749, 70753 },
+ { 70784, 70855 },
+ { 70864, 70873 },
+ { 71040, 71093 },
+ { 71096, 71133 },
+ { 71168, 71236 },
+ { 71248, 71257 },
+ { 71264, 71276 },
+ { 71296, 71353 },
+ { 71360, 71369 },
+ { 71424, 71450 },
+ { 71453, 71467 },
+ { 71472, 71494 },
+ { 71680, 71739 },
+ { 71840, 71922 },
+ { 71935, 71942 },
+ { 71945, 71945 },
+ { 71948, 71955 },
+ { 71957, 71958 },
+ { 71960, 71989 },
+ { 71991, 71992 },
+ { 71995, 72006 },
+ { 72016, 72025 },
+ { 72096, 72103 },
+ { 72106, 72151 },
+ { 72154, 72164 },
+ { 72192, 72263 },
+ { 72272, 72354 },
+ { 72368, 72440 },
+ { 72704, 72712 },
+ { 72714, 72758 },
+ { 72760, 72773 },
+ { 72784, 72812 },
+ { 72816, 72847 },
+ { 72850, 72871 },
+ { 72873, 72886 },
+ { 72960, 72966 },
+ { 72968, 72969 },
+ { 72971, 73014 },
+ { 73018, 73018 },
+ { 73020, 73021 },
+ { 73023, 73031 },
+ { 73040, 73049 },
+ { 73056, 73061 },
+ { 73063, 73064 },
+ { 73066, 73102 },
+ { 73104, 73105 },
+ { 73107, 73112 },
+ { 73120, 73129 },
+ { 73440, 73464 },
+ { 73648, 73648 },
+ { 73664, 73713 },
+ { 73727, 74649 },
+ { 74752, 74862 },
+ { 74864, 74868 },
+ { 74880, 75075 },
+ { 77712, 77810 },
+ { 77824, 78894 },
+ { 78896, 78904 },
+ { 82944, 83526 },
+ { 92160, 92728 },
+ { 92736, 92766 },
+ { 92768, 92777 },
+ { 92782, 92862 },
+ { 92864, 92873 },
+ { 92880, 92909 },
+ { 92912, 92917 },
+ { 92928, 92997 },
+ { 93008, 93017 },
+ { 93019, 93025 },
+ { 93027, 93047 },
+ { 93053, 93071 },
+ { 93760, 93850 },
+ { 93952, 94026 },
+ { 94031, 94087 },
+ { 94095, 94111 },
+ { 94176, 94180 },
+ { 94192, 94193 },
+ { 94208, 100343 },
+ { 100352, 101589 },
+ { 101632, 101640 },
+ { 110576, 110579 },
+ { 110581, 110587 },
+ { 110589, 110590 },
+ { 110592, 110882 },
+ { 110928, 110930 },
+ { 110948, 110951 },
+ { 110960, 111355 },
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
+ { 113820, 113827 },
+ { 118528, 118573 },
+ { 118576, 118598 },
+ { 118608, 118723 },
+ { 118784, 119029 },
+ { 119040, 119078 },
+ { 119081, 119274 },
+ { 119296, 119365 },
+ { 119520, 119539 },
+ { 119552, 119638 },
+ { 119648, 119672 },
+ { 119808, 119892 },
+ { 119894, 119964 },
+ { 119966, 119967 },
+ { 119970, 119970 },
+ { 119973, 119974 },
+ { 119977, 119980 },
+ { 119982, 119993 },
+ { 119995, 119995 },
+ { 119997, 120003 },
+ { 120005, 120069 },
+ { 120071, 120074 },
+ { 120077, 120084 },
+ { 120086, 120092 },
+ { 120094, 120121 },
+ { 120123, 120126 },
+ { 120128, 120132 },
+ { 120134, 120134 },
+ { 120138, 120144 },
+ { 120146, 120485 },
+ { 120488, 120779 },
+ { 120782, 121483 },
+ { 121499, 121503 },
+ { 121505, 121519 },
+ { 122624, 122654 },
+ { 122880, 122886 },
+ { 122888, 122904 },
+ { 122907, 122913 },
+ { 122915, 122916 },
+ { 122918, 122922 },
+ { 123136, 123180 },
+ { 123184, 123197 },
+ { 123200, 123209 },
+ { 123214, 123215 },
+ { 123536, 123566 },
+ { 123584, 123641 },
+ { 123647, 123647 },
+ { 124896, 124902 },
+ { 124904, 124907 },
+ { 124909, 124910 },
+ { 124912, 124926 },
+ { 124928, 125124 },
+ { 125127, 125142 },
+ { 125184, 125259 },
+ { 125264, 125273 },
+ { 125278, 125279 },
+ { 126065, 126132 },
+ { 126209, 126269 },
+ { 126464, 126467 },
+ { 126469, 126495 },
+ { 126497, 126498 },
+ { 126500, 126500 },
+ { 126503, 126503 },
+ { 126505, 126514 },
+ { 126516, 126519 },
+ { 126521, 126521 },
+ { 126523, 126523 },
+ { 126530, 126530 },
+ { 126535, 126535 },
+ { 126537, 126537 },
+ { 126539, 126539 },
+ { 126541, 126543 },
+ { 126545, 126546 },
+ { 126548, 126548 },
+ { 126551, 126551 },
+ { 126553, 126553 },
+ { 126555, 126555 },
+ { 126557, 126557 },
+ { 126559, 126559 },
+ { 126561, 126562 },
+ { 126564, 126564 },
+ { 126567, 126570 },
+ { 126572, 126578 },
+ { 126580, 126583 },
+ { 126585, 126588 },
+ { 126590, 126590 },
+ { 126592, 126601 },
+ { 126603, 126619 },
+ { 126625, 126627 },
+ { 126629, 126633 },
+ { 126635, 126651 },
+ { 126704, 126705 },
+ { 126976, 127019 },
+ { 127024, 127123 },
+ { 127136, 127150 },
+ { 127153, 127167 },
+ { 127169, 127183 },
+ { 127185, 127221 },
+ { 127232, 127405 },
+ { 127462, 127490 },
+ { 127504, 127547 },
+ { 127552, 127560 },
+ { 127568, 127569 },
+ { 127584, 127589 },
+ { 127744, 128727 },
+ { 128733, 128748 },
+ { 128752, 128764 },
+ { 128768, 128883 },
+ { 128896, 128984 },
+ { 128992, 129003 },
+ { 129008, 129008 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
+ { 129200, 129201 },
+ { 129280, 129619 },
+ { 129632, 129645 },
+ { 129648, 129652 },
+ { 129656, 129660 },
+ { 129664, 129670 },
+ { 129680, 129708 },
+ { 129712, 129722 },
+ { 129728, 129733 },
+ { 129744, 129753 },
+ { 129760, 129767 },
+ { 129776, 129782 },
+ { 129792, 129938 },
+ { 129940, 129994 },
+ { 130032, 130041 },
+ { 131072, 173791 },
+ { 173824, 177976 },
+ { 177984, 178205 },
+ { 178208, 183969 },
+ { 183984, 191456 },
+ { 194560, 195101 },
+ { 196608, 201546 },
+ { 917505, 917505 },
+ { 917536, 917631 },
+ { 917760, 917999 },
+ { 983040, 1048573 },
+ { 1048576, 1114109 },
+ { -1, -1 }, // end markers
+};
+static const set_item alpha[] = {
+ { 65, 90 },
+ { 97, 122 },
+ { 170, 170 },
+ { 181, 181 },
+ { 186, 186 },
+ { 192, 214 },
+ { 216, 246 },
+ { 248, 705 },
+ { 710, 721 },
+ { 736, 740 },
+ { 748, 748 },
+ { 750, 750 },
+ { 837, 837 },
+ { 880, 884 },
+ { 886, 887 },
+ { 890, 893 },
+ { 895, 895 },
+ { 902, 902 },
+ { 904, 906 },
+ { 908, 908 },
+ { 910, 929 },
+ { 931, 1013 },
+ { 1015, 1153 },
+ { 1162, 1327 },
+ { 1329, 1366 },
+ { 1369, 1369 },
+ { 1376, 1416 },
+ { 1456, 1469 },
+ { 1471, 1471 },
+ { 1473, 1474 },
+ { 1476, 1477 },
+ { 1479, 1479 },
+ { 1488, 1514 },
+ { 1519, 1522 },
+ { 1552, 1562 },
+ { 1568, 1623 },
+ { 1625, 1641 },
+ { 1646, 1747 },
+ { 1749, 1756 },
+ { 1761, 1768 },
+ { 1773, 1788 },
+ { 1791, 1791 },
+ { 1808, 1855 },
+ { 1869, 1969 },
+ { 1984, 2026 },
+ { 2036, 2037 },
+ { 2042, 2042 },
+ { 2048, 2071 },
+ { 2074, 2092 },
+ { 2112, 2136 },
+ { 2144, 2154 },
+ { 2160, 2183 },
+ { 2185, 2190 },
+ { 2208, 2249 },
+ { 2260, 2271 },
+ { 2275, 2281 },
+ { 2288, 2363 },
+ { 2365, 2380 },
+ { 2382, 2384 },
+ { 2389, 2403 },
+ { 2406, 2415 },
+ { 2417, 2435 },
+ { 2437, 2444 },
+ { 2447, 2448 },
+ { 2451, 2472 },
+ { 2474, 2480 },
+ { 2482, 2482 },
+ { 2486, 2489 },
+ { 2493, 2500 },
+ { 2503, 2504 },
+ { 2507, 2508 },
+ { 2510, 2510 },
+ { 2519, 2519 },
+ { 2524, 2525 },
+ { 2527, 2531 },
+ { 2534, 2545 },
+ { 2556, 2556 },
+ { 2561, 2563 },
+ { 2565, 2570 },
+ { 2575, 2576 },
+ { 2579, 2600 },
+ { 2602, 2608 },
+ { 2610, 2611 },
+ { 2613, 2614 },
+ { 2616, 2617 },
+ { 2622, 2626 },
+ { 2631, 2632 },
+ { 2635, 2636 },
+ { 2641, 2641 },
+ { 2649, 2652 },
+ { 2654, 2654 },
+ { 2662, 2677 },
+ { 2689, 2691 },
+ { 2693, 2701 },
+ { 2703, 2705 },
+ { 2707, 2728 },
+ { 2730, 2736 },
+ { 2738, 2739 },
+ { 2741, 2745 },
+ { 2749, 2757 },
+ { 2759, 2761 },
+ { 2763, 2764 },
+ { 2768, 2768 },
+ { 2784, 2787 },
+ { 2790, 2799 },
+ { 2809, 2812 },
+ { 2817, 2819 },
+ { 2821, 2828 },
+ { 2831, 2832 },
+ { 2835, 2856 },
+ { 2858, 2864 },
+ { 2866, 2867 },
+ { 2869, 2873 },
+ { 2877, 2884 },
+ { 2887, 2888 },
+ { 2891, 2892 },
+ { 2902, 2903 },
+ { 2908, 2909 },
+ { 2911, 2915 },
+ { 2918, 2927 },
+ { 2929, 2929 },
+ { 2946, 2947 },
+ { 2949, 2954 },
+ { 2958, 2960 },
+ { 2962, 2965 },
+ { 2969, 2970 },
+ { 2972, 2972 },
+ { 2974, 2975 },
+ { 2979, 2980 },
+ { 2984, 2986 },
+ { 2990, 3001 },
+ { 3006, 3010 },
+ { 3014, 3016 },
+ { 3018, 3020 },
+ { 3024, 3024 },
+ { 3031, 3031 },
+ { 3046, 3055 },
+ { 3072, 3075 },
+ { 3077, 3084 },
+ { 3086, 3088 },
+ { 3090, 3112 },
+ { 3114, 3129 },
+ { 3133, 3140 },
+ { 3142, 3144 },
+ { 3146, 3148 },
+ { 3157, 3158 },
+ { 3160, 3162 },
+ { 3165, 3165 },
+ { 3168, 3171 },
+ { 3174, 3183 },
+ { 3200, 3203 },
+ { 3205, 3212 },
+ { 3214, 3216 },
+ { 3218, 3240 },
+ { 3242, 3251 },
+ { 3253, 3257 },
+ { 3261, 3268 },
+ { 3270, 3272 },
+ { 3274, 3276 },
+ { 3285, 3286 },
+ { 3293, 3294 },
+ { 3296, 3299 },
+ { 3302, 3311 },
+ { 3313, 3314 },
+ { 3328, 3340 },
+ { 3342, 3344 },
+ { 3346, 3386 },
+ { 3389, 3396 },
+ { 3398, 3400 },
+ { 3402, 3404 },
+ { 3406, 3406 },
+ { 3412, 3415 },
+ { 3423, 3427 },
+ { 3430, 3439 },
+ { 3450, 3455 },
+ { 3457, 3459 },
+ { 3461, 3478 },
+ { 3482, 3505 },
+ { 3507, 3515 },
+ { 3517, 3517 },
+ { 3520, 3526 },
+ { 3535, 3540 },
+ { 3542, 3542 },
+ { 3544, 3551 },
+ { 3558, 3567 },
+ { 3570, 3571 },
+ { 3585, 3642 },
+ { 3648, 3654 },
+ { 3661, 3661 },
+ { 3664, 3673 },
+ { 3713, 3714 },
+ { 3716, 3716 },
+ { 3718, 3722 },
+ { 3724, 3747 },
+ { 3749, 3749 },
+ { 3751, 3769 },
+ { 3771, 3773 },
+ { 3776, 3780 },
+ { 3782, 3782 },
+ { 3789, 3789 },
+ { 3792, 3801 },
+ { 3804, 3807 },
+ { 3840, 3840 },
+ { 3872, 3881 },
+ { 3904, 3911 },
+ { 3913, 3948 },
+ { 3953, 3969 },
+ { 3976, 3991 },
+ { 3993, 4028 },
+ { 4096, 4150 },
+ { 4152, 4152 },
+ { 4155, 4169 },
+ { 4176, 4253 },
+ { 4256, 4293 },
+ { 4295, 4295 },
+ { 4301, 4301 },
+ { 4304, 4346 },
+ { 4348, 4680 },
+ { 4682, 4685 },
+ { 4688, 4694 },
+ { 4696, 4696 },
+ { 4698, 4701 },
+ { 4704, 4744 },
+ { 4746, 4749 },
+ { 4752, 4784 },
+ { 4786, 4789 },
+ { 4792, 4798 },
+ { 4800, 4800 },
+ { 4802, 4805 },
+ { 4808, 4822 },
+ { 4824, 4880 },
+ { 4882, 4885 },
+ { 4888, 4954 },
+ { 4992, 5007 },
+ { 5024, 5109 },
+ { 5112, 5117 },
+ { 5121, 5740 },
+ { 5743, 5759 },
+ { 5761, 5786 },
+ { 5792, 5866 },
+ { 5870, 5880 },
+ { 5888, 5907 },
+ { 5919, 5939 },
+ { 5952, 5971 },
+ { 5984, 5996 },
+ { 5998, 6000 },
+ { 6002, 6003 },
+ { 6016, 6067 },
+ { 6070, 6088 },
+ { 6103, 6103 },
+ { 6108, 6108 },
+ { 6112, 6121 },
+ { 6160, 6169 },
+ { 6176, 6264 },
+ { 6272, 6314 },
+ { 6320, 6389 },
+ { 6400, 6430 },
+ { 6432, 6443 },
+ { 6448, 6456 },
+ { 6470, 6509 },
+ { 6512, 6516 },
+ { 6528, 6571 },
+ { 6576, 6601 },
+ { 6608, 6617 },
+ { 6656, 6683 },
+ { 6688, 6750 },
+ { 6753, 6772 },
+ { 6784, 6793 },
+ { 6800, 6809 },
+ { 6823, 6823 },
+ { 6847, 6848 },
+ { 6860, 6862 },
+ { 6912, 6963 },
+ { 6965, 6979 },
+ { 6981, 6988 },
+ { 6992, 7001 },
+ { 7040, 7081 },
+ { 7084, 7141 },
+ { 7143, 7153 },
+ { 7168, 7222 },
+ { 7232, 7241 },
+ { 7245, 7293 },
+ { 7296, 7304 },
+ { 7312, 7354 },
+ { 7357, 7359 },
+ { 7401, 7404 },
+ { 7406, 7411 },
+ { 7413, 7414 },
+ { 7418, 7418 },
+ { 7424, 7615 },
+ { 7655, 7668 },
+ { 7680, 7957 },
+ { 7960, 7965 },
+ { 7968, 8005 },
+ { 8008, 8013 },
+ { 8016, 8023 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8061 },
+ { 8064, 8116 },
+ { 8118, 8124 },
+ { 8126, 8126 },
+ { 8130, 8132 },
+ { 8134, 8140 },
+ { 8144, 8147 },
+ { 8150, 8155 },
+ { 8160, 8172 },
+ { 8178, 8180 },
+ { 8182, 8188 },
+ { 8305, 8305 },
+ { 8319, 8319 },
+ { 8336, 8348 },
+ { 8450, 8450 },
+ { 8455, 8455 },
+ { 8458, 8467 },
+ { 8469, 8469 },
+ { 8473, 8477 },
+ { 8484, 8484 },
+ { 8486, 8486 },
+ { 8488, 8488 },
+ { 8490, 8493 },
+ { 8495, 8505 },
+ { 8508, 8511 },
+ { 8517, 8521 },
+ { 8526, 8526 },
+ { 8544, 8584 },
+ { 9398, 9449 },
+ { 11264, 11492 },
+ { 11499, 11502 },
+ { 11506, 11507 },
+ { 11520, 11557 },
+ { 11559, 11559 },
+ { 11565, 11565 },
+ { 11568, 11623 },
+ { 11631, 11631 },
+ { 11648, 11670 },
+ { 11680, 11686 },
+ { 11688, 11694 },
+ { 11696, 11702 },
+ { 11704, 11710 },
+ { 11712, 11718 },
+ { 11720, 11726 },
+ { 11728, 11734 },
+ { 11736, 11742 },
+ { 11744, 11775 },
+ { 11823, 11823 },
+ { 12293, 12295 },
+ { 12321, 12329 },
+ { 12337, 12341 },
+ { 12344, 12348 },
+ { 12353, 12438 },
+ { 12445, 12447 },
+ { 12449, 12538 },
+ { 12540, 12543 },
+ { 12549, 12591 },
+ { 12593, 12686 },
+ { 12704, 12735 },
+ { 12784, 12799 },
+ { 13312, 19903 },
+ { 19968, 42124 },
+ { 42192, 42237 },
+ { 42240, 42508 },
+ { 42512, 42539 },
+ { 42560, 42606 },
+ { 42612, 42619 },
+ { 42623, 42735 },
+ { 42775, 42783 },
+ { 42786, 42888 },
+ { 42891, 42954 },
+ { 42960, 42961 },
+ { 42963, 42963 },
+ { 42965, 42969 },
+ { 42994, 43013 },
+ { 43015, 43047 },
+ { 43072, 43123 },
+ { 43136, 43203 },
+ { 43205, 43205 },
+ { 43216, 43225 },
+ { 43250, 43255 },
+ { 43259, 43259 },
+ { 43261, 43306 },
+ { 43312, 43346 },
+ { 43360, 43388 },
+ { 43392, 43442 },
+ { 43444, 43455 },
+ { 43471, 43481 },
+ { 43488, 43518 },
+ { 43520, 43574 },
+ { 43584, 43597 },
+ { 43600, 43609 },
+ { 43616, 43638 },
+ { 43642, 43710 },
+ { 43712, 43712 },
+ { 43714, 43714 },
+ { 43739, 43741 },
+ { 43744, 43759 },
+ { 43762, 43765 },
+ { 43777, 43782 },
+ { 43785, 43790 },
+ { 43793, 43798 },
+ { 43808, 43814 },
+ { 43816, 43822 },
+ { 43824, 43866 },
+ { 43868, 43881 },
+ { 43888, 44010 },
+ { 44016, 44025 },
+ { 44032, 55203 },
+ { 55216, 55238 },
+ { 55243, 55291 },
+ { 63744, 64109 },
+ { 64112, 64217 },
+ { 64256, 64262 },
+ { 64275, 64279 },
+ { 64285, 64296 },
+ { 64298, 64310 },
+ { 64312, 64316 },
+ { 64318, 64318 },
+ { 64320, 64321 },
+ { 64323, 64324 },
+ { 64326, 64433 },
+ { 64467, 64829 },
+ { 64848, 64911 },
+ { 64914, 64967 },
+ { 65008, 65019 },
+ { 65136, 65140 },
+ { 65142, 65276 },
+ { 65296, 65305 },
+ { 65313, 65338 },
+ { 65345, 65370 },
+ { 65382, 65470 },
+ { 65474, 65479 },
+ { 65482, 65487 },
+ { 65490, 65495 },
+ { 65498, 65500 },
+ { 65536, 65547 },
+ { 65549, 65574 },
+ { 65576, 65594 },
+ { 65596, 65597 },
+ { 65599, 65613 },
+ { 65616, 65629 },
+ { 65664, 65786 },
+ { 65856, 65908 },
+ { 66176, 66204 },
+ { 66208, 66256 },
+ { 66304, 66335 },
+ { 66349, 66378 },
+ { 66384, 66426 },
+ { 66432, 66461 },
+ { 66464, 66499 },
+ { 66504, 66511 },
+ { 66513, 66517 },
+ { 66560, 66717 },
+ { 66720, 66729 },
+ { 66736, 66771 },
+ { 66776, 66811 },
+ { 66816, 66855 },
+ { 66864, 66915 },
+ { 66928, 66938 },
+ { 66940, 66954 },
+ { 66956, 66962 },
+ { 66964, 66965 },
+ { 66967, 66977 },
+ { 66979, 66993 },
+ { 66995, 67001 },
+ { 67003, 67004 },
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
+ { 67456, 67461 },
+ { 67463, 67504 },
+ { 67506, 67514 },
+ { 67584, 67589 },
+ { 67592, 67592 },
+ { 67594, 67637 },
+ { 67639, 67640 },
+ { 67644, 67644 },
+ { 67647, 67669 },
+ { 67680, 67702 },
+ { 67712, 67742 },
+ { 67808, 67826 },
+ { 67828, 67829 },
+ { 67840, 67861 },
+ { 67872, 67897 },
+ { 67968, 68023 },
+ { 68030, 68031 },
+ { 68096, 68099 },
+ { 68101, 68102 },
+ { 68108, 68115 },
+ { 68117, 68119 },
+ { 68121, 68149 },
+ { 68192, 68220 },
+ { 68224, 68252 },
+ { 68288, 68295 },
+ { 68297, 68324 },
+ { 68352, 68405 },
+ { 68416, 68437 },
+ { 68448, 68466 },
+ { 68480, 68497 },
+ { 68608, 68680 },
+ { 68736, 68786 },
+ { 68800, 68850 },
+ { 68864, 68903 },
+ { 68912, 68921 },
+ { 69248, 69289 },
+ { 69291, 69292 },
+ { 69296, 69297 },
+ { 69376, 69404 },
+ { 69415, 69415 },
+ { 69424, 69445 },
+ { 69488, 69505 },
+ { 69552, 69572 },
+ { 69600, 69622 },
+ { 69632, 69701 },
+ { 69734, 69743 },
+ { 69745, 69749 },
+ { 69762, 69816 },
+ { 69826, 69826 },
+ { 69840, 69864 },
+ { 69872, 69881 },
+ { 69888, 69938 },
+ { 69942, 69951 },
+ { 69956, 69959 },
+ { 69968, 70002 },
+ { 70006, 70006 },
+ { 70016, 70079 },
+ { 70081, 70084 },
+ { 70094, 70106 },
+ { 70108, 70108 },
+ { 70144, 70161 },
+ { 70163, 70196 },
+ { 70199, 70199 },
+ { 70206, 70206 },
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70312 },
+ { 70320, 70376 },
+ { 70384, 70393 },
+ { 70400, 70403 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70461, 70468 },
+ { 70471, 70472 },
+ { 70475, 70476 },
+ { 70480, 70480 },
+ { 70487, 70487 },
+ { 70493, 70499 },
+ { 70656, 70721 },
+ { 70723, 70725 },
+ { 70727, 70730 },
+ { 70736, 70745 },
+ { 70751, 70753 },
+ { 70784, 70849 },
+ { 70852, 70853 },
+ { 70855, 70855 },
+ { 70864, 70873 },
+ { 71040, 71093 },
+ { 71096, 71102 },
+ { 71128, 71133 },
+ { 71168, 71230 },
+ { 71232, 71232 },
+ { 71236, 71236 },
+ { 71248, 71257 },
+ { 71296, 71349 },
+ { 71352, 71352 },
+ { 71360, 71369 },
+ { 71424, 71450 },
+ { 71453, 71466 },
+ { 71472, 71481 },
+ { 71488, 71494 },
+ { 71680, 71736 },
+ { 71840, 71913 },
+ { 71935, 71942 },
+ { 71945, 71945 },
+ { 71948, 71955 },
+ { 71957, 71958 },
+ { 71960, 71989 },
+ { 71991, 71992 },
+ { 71995, 71996 },
+ { 71999, 72002 },
+ { 72016, 72025 },
+ { 72096, 72103 },
+ { 72106, 72151 },
+ { 72154, 72159 },
+ { 72161, 72161 },
+ { 72163, 72164 },
+ { 72192, 72242 },
+ { 72245, 72254 },
+ { 72272, 72343 },
+ { 72349, 72349 },
+ { 72368, 72440 },
+ { 72704, 72712 },
+ { 72714, 72758 },
+ { 72760, 72766 },
+ { 72768, 72768 },
+ { 72784, 72793 },
+ { 72818, 72847 },
+ { 72850, 72871 },
+ { 72873, 72886 },
+ { 72960, 72966 },
+ { 72968, 72969 },
+ { 72971, 73014 },
+ { 73018, 73018 },
+ { 73020, 73021 },
+ { 73023, 73025 },
+ { 73027, 73027 },
+ { 73030, 73031 },
+ { 73040, 73049 },
+ { 73056, 73061 },
+ { 73063, 73064 },
+ { 73066, 73102 },
+ { 73104, 73105 },
+ { 73107, 73110 },
+ { 73112, 73112 },
+ { 73120, 73129 },
+ { 73440, 73462 },
+ { 73648, 73648 },
+ { 73728, 74649 },
+ { 74752, 74862 },
+ { 74880, 75075 },
+ { 77712, 77808 },
+ { 77824, 78894 },
+ { 82944, 83526 },
+ { 92160, 92728 },
+ { 92736, 92766 },
+ { 92768, 92777 },
+ { 92784, 92862 },
+ { 92864, 92873 },
+ { 92880, 92909 },
+ { 92928, 92975 },
+ { 92992, 92995 },
+ { 93008, 93017 },
+ { 93027, 93047 },
+ { 93053, 93071 },
+ { 93760, 93823 },
+ { 93952, 94026 },
+ { 94031, 94087 },
+ { 94095, 94111 },
+ { 94176, 94177 },
+ { 94179, 94179 },
+ { 94192, 94193 },
+ { 94208, 100343 },
+ { 100352, 101589 },
+ { 101632, 101640 },
+ { 110576, 110579 },
+ { 110581, 110587 },
+ { 110589, 110590 },
+ { 110592, 110882 },
+ { 110928, 110930 },
+ { 110948, 110951 },
+ { 110960, 111355 },
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
+ { 113822, 113822 },
+ { 119808, 119892 },
+ { 119894, 119964 },
+ { 119966, 119967 },
+ { 119970, 119970 },
+ { 119973, 119974 },
+ { 119977, 119980 },
+ { 119982, 119993 },
+ { 119995, 119995 },
+ { 119997, 120003 },
+ { 120005, 120069 },
+ { 120071, 120074 },
+ { 120077, 120084 },
+ { 120086, 120092 },
+ { 120094, 120121 },
+ { 120123, 120126 },
+ { 120128, 120132 },
+ { 120134, 120134 },
+ { 120138, 120144 },
+ { 120146, 120485 },
+ { 120488, 120512 },
+ { 120514, 120538 },
+ { 120540, 120570 },
+ { 120572, 120596 },
+ { 120598, 120628 },
+ { 120630, 120654 },
+ { 120656, 120686 },
+ { 120688, 120712 },
+ { 120714, 120744 },
+ { 120746, 120770 },
+ { 120772, 120779 },
+ { 120782, 120831 },
+ { 122624, 122654 },
+ { 122880, 122886 },
+ { 122888, 122904 },
+ { 122907, 122913 },
+ { 122915, 122916 },
+ { 122918, 122922 },
+ { 123136, 123180 },
+ { 123191, 123197 },
+ { 123200, 123209 },
+ { 123214, 123214 },
+ { 123536, 123565 },
+ { 123584, 123627 },
+ { 123632, 123641 },
+ { 124896, 124902 },
+ { 124904, 124907 },
+ { 124909, 124910 },
+ { 124912, 124926 },
+ { 124928, 125124 },
+ { 125184, 125251 },
+ { 125255, 125255 },
+ { 125259, 125259 },
+ { 125264, 125273 },
+ { 126464, 126467 },
+ { 126469, 126495 },
+ { 126497, 126498 },
+ { 126500, 126500 },
+ { 126503, 126503 },
+ { 126505, 126514 },
+ { 126516, 126519 },
+ { 126521, 126521 },
+ { 126523, 126523 },
+ { 126530, 126530 },
+ { 126535, 126535 },
+ { 126537, 126537 },
+ { 126539, 126539 },
+ { 126541, 126543 },
+ { 126545, 126546 },
+ { 126548, 126548 },
+ { 126551, 126551 },
+ { 126553, 126553 },
+ { 126555, 126555 },
+ { 126557, 126557 },
+ { 126559, 126559 },
+ { 126561, 126562 },
+ { 126564, 126564 },
+ { 126567, 126570 },
+ { 126572, 126578 },
+ { 126580, 126583 },
+ { 126585, 126588 },
+ { 126590, 126590 },
+ { 126592, 126601 },
+ { 126603, 126619 },
+ { 126625, 126627 },
+ { 126629, 126633 },
+ { 126635, 126651 },
+ { 127280, 127305 },
+ { 127312, 127337 },
+ { 127344, 127369 },
+ { 130032, 130041 },
+ { 131072, 173791 },
+ { 173824, 177976 },
+ { 177984, 178205 },
+ { 178208, 183969 },
+ { 183984, 191456 },
+ { 194560, 195101 },
+ { 196608, 201546 },
+ { -1, -1 }, // end markers
+ };
+static struct _class_data {
+ const char *name; // e.g., "alpha"
+ int (*charcheckfunc)(int c); // function pointer, e.g., isalpha
+ wctype_t wctype; // from wctype("alpha")
+ const set_item *data[2]; // alpha, NULL
+} class_data[] = {
+ { "alnum", isalnum, 0, { digit, alpha } },
+ { "alpha", isalpha, 0, { alpha, NULL } },
+ { "blank", isblank, 0, { blank, NULL } },
+ { "cntrl", iscntrl, 0, { cntrl, NULL } },
+ { "digit", isdigit, 0, { digit, NULL } },
+ { "graph", isgraph, 0, { print_graph_common,
graph_additional } },
+ { "lower", islower, 0, { lower, NULL } },
+ { "print", isprint, 0, { print_graph_common,
print_additional } },
+ { "punct", ispunct, 0, { punct, NULL } },
+ { "space", isspace, 0, { space, NULL } },
+ { "upper", isupper, 0, { upper, NULL } },
+ { "xdigit", isxdigit, 0, { xdigit, NULL } },
+ { NULL, NULL, 0, { NULL, NULL } },
+};
+static struct _class_cache {
+ const char *name;
+ charset_t *set;
+ struct _class_cache *next; // linked list
+} *class_cache[53];
+/* find_cclass --- search class data for a known character class */
+
+static int
+find_class(const char *cclass)
+{
+ int i;
+
+ for (i = 0; class_data[i].name != NULL; i++)
+ if (strcmp(class_data[i].name, cclass) == 0)
+ return i;
+
+ return -1;
+}
+/* find_class_in_cache --- lookup and/or create locale + class in the table */
+
+static charset_t *
+find_class_in_cache(charset_t *set, const char *cclass, int *errcode, bool
*is_new)
+{
+ assert(set != NULL && cclass != NULL && errcode != NULL && is_new !=
NULL);
+ *is_new = true;
+
+ const char *locale = setlocale(LC_CTYPE, NULL); // query locale
+ char *buf = (char *) malloc(strlen(locale) + 1 + strlen(cclass) + 1);
+ if (buf == NULL) {
+ *errcode = CSET_ESPACE;
+ return NULL;
+ }
+
+ sprintf(buf, "%s+%s", locale, cclass);
+
+ int index;
+ static char letters[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ char *cp = strchr(letters, buf[0]);
+
+ if (cp == NULL)
+ index = 52;
+ else
+ index = cp - letters;
+
+ struct _class_cache *pcache = class_cache[index];
+ if (pcache == NULL) {
+ pcache = (struct _class_cache *) malloc(sizeof(struct
_class_cache));
+ if (pcache == NULL) {
+ *errcode = CSET_ESPACE;
+ return NULL;
+ }
+ pcache->name = buf;
+ pcache->next = NULL;
+ charset_t *newset = charset_create(errcode, set->mb_cur_max,
set->is_utf8);
+ if (newset == NULL) {
+ *errcode = CSET_ESPACE;
+ free((void *) pcache->name);
+ return NULL;
+ }
+ pcache->set = newset;
+ class_cache[index] = pcache;
+ // fall through to return
+ } else {
+ for (; pcache != NULL; pcache = pcache->next) {
+ if (strcmp(pcache->name, buf) == 0) { // found it
+ free((void *) buf);
+ *is_new = false;
+ goto done;
+ }
+ }
+ pcache = (struct _class_cache *) malloc(sizeof(struct
_class_cache));
+ if (pcache == NULL) {
+ *errcode = CSET_ESPACE;
+ return NULL;
+ }
+ pcache->name = buf;
+ pcache->next = NULL;
+ charset_t *newset = charset_create(errcode, set->mb_cur_max,
set->is_utf8);
+ if (newset == NULL) {
+ *errcode = CSET_ESPACE;
+ free((void *) pcache->name);
+ return NULL;
+ }
+ pcache->set = newset;
+ pcache->next = class_cache[index]; // push onto front of
the chain
+ class_cache[index] = pcache;
+ // fall through to return
+ }
+
+done:
+ *errcode = CSET_SUCCESS;
+ return pcache->set;
+}
+/* wide_char_range_loop --- loop over all possible characters to match a
ctype, and cache it */
+
+static int
+wide_char_range_loop(charset_t *set, const char *cclass, wctype_t ctype)
+{
+ // FIXME: Need to wrap this function in a mutex
+ int errcode = 0;
+ bool is_new = false;
+ charset_t *newset = find_class_in_cache(set, cclass, & errcode, &
is_new);
+
+ if (newset == NULL)
+ return errcode;
+
+ if (is_new) {
+ for (int32_t c = 0; c < MAX_CODE_POINT; c++) {
+ if (iswctype(c, ctype)) {
+ int ret;
+ if ((ret = charset_add_char(newset, c)) !=
CSET_SUCCESS)
+ return ret;
+ }
+ }
+ charset_finalize(newset);
+ }
+
+ return charset_merge(set, newset);
+}
/* item_compare_for_searching --- compare two set_items */
static int
@@ -78,8 +3509,6 @@ item_compare_for_searching(const void *k, const void *e)
set_item *thekey = (set_item *) k;
set_item *elem = (set_item *) e;
- assert(thekey->item_type == RANGE_ITEM && elem->item_type ==
RANGE_ITEM);
-
if (elem->start <= thekey->start && thekey->start <= elem->end)
return 0; // found it
else if (thekey->end < elem->start)
@@ -89,13 +3518,13 @@ item_compare_for_searching(const void *k, const void *e)
return 1;
}
}
-/* wint_compare --- compare two wint values for qsort */
+/* int32_t_compare --- compare two int32_t values for qsort */
static int
-wint_compare(const void *l, const void *r)
+int32_t_compare(const void *l, const void *r)
{
- wint_t *left = (wint_t *) l;
- wint_t *right = (wint_t *) r;
+ int32_t *left = (int32_t *) l;
+ int32_t *right = (int32_t *) r;
return *left - *right;
}
@@ -107,16 +3536,7 @@ item_compare_for_sorting(const void *l, const void *r)
set_item *left = (set_item *) l;
set_item *right = (set_item *) r;
- if (left->item_type == CTYPE_ITEM && right->item_type == CTYPE_ITEM) {
- return left->wtype - right->wtype;
- } else if (left->item_type == CTYPE_ITEM && right->item_type ==
RANGE_ITEM) {
- return -1;
- } else if (left->item_type == RANGE_ITEM && right->item_type ==
CTYPE_ITEM) {
- return +1;
- } else {
- assert(left->item_type == RANGE_ITEM && right->item_type ==
RANGE_ITEM);
- return left->start - right->start;
- }
+ return left->start - right->start;
}
/* is_found --- return true if the character is found */
@@ -124,48 +3544,48 @@ item_compare_for_sorting(const void *l, const void *r)
static bool
is_found(const charset_t *set, int32_t the_char)
{
- set_item *items = set->items;
- int i;
-
if (set->items == NULL) // empty set, can't match
return false;
- if (set->nelems == 1 && set->items[0].item_type == RANGE_ITEM) {
+ if (set->nelems == 1)
return (set->items[0].start <= the_char && the_char <=
set->items[0].end);
- }
- for (i = 0; i < set->nelems; i++) {
- // linear search of ctype items
- if (items[i].item_type == RANGE_ITEM)
- break;
-
- assert(items[i].item_type == CTYPE_ITEM);
- if (iswctype(the_char, items[i].wtype))
- return true;
- }
-
- if (i >= set->nelems)
- return false;
- assert(items[i].item_type == RANGE_ITEM);
-
// binary search to see if we have it
set_item *found;
set_item key;
- key.item_type = RANGE_ITEM;
key.start = key.end = the_char;
- found = bsearch(& key, set->items + i, set->nelems - i,
+ size_t nelems = set->nelems;
+ if (the_char < 128 && set->nelems8bit < set->nelems)
+ nelems = set->nelems8bit;
+
+ found = bsearch(& key, set->items, nelems,
sizeof(set_item),
item_compare_for_searching);
return found != NULL;
}
-/* finalize --- condense all the info into the final data structure */
+/* utfprefix --- return the correct UTF-8 first byte for the wide character */
-static void
-finalize(charset_t *set)
+static unsigned int
+utfprefix(int32_t wc)
+{
+ if (wc < 0x80)
+ return wc;
+ if (wc < 0x800)
+ return 0xC0 + (wc >> 6);
+ if (wc < 0x10000)
+ return 0xE0 + (wc >> 12);
+ if (wc < 0x100000)
+ return 0xF0 + (wc >> 18);
+ return 0xF4;
+}
+/* charset_finalize --- condense all the info into the final data structure */
+
+void
+charset_finalize(charset_t *set)
{
assert(set != NULL);
int result = 0;
- qsort(set->chars, set->nchars_inuse, sizeof(wint_t), wint_compare);
+ qsort(set->chars, set->nchars_inuse, sizeof(int32_t), int32_t_compare);
size_t i, j;
for (i = 0, j = 1; j < set->nchars_inuse; i++, j++) {
if (set->chars[i] == set->chars[j]) {
@@ -212,42 +3632,35 @@ finalize(charset_t *set)
// condense it
set_item *items = set->items;
for (i = 0, j = 1; j < set->nelems; i++, j++) {
- if ( items[i].item_type == CTYPE_ITEM
- && items[j].item_type == CTYPE_ITEM
- && items[i].wtype == items[j].wtype) {
- free((void *) items[j].type_name);
+ bool need_shift = false;
+ if (items[i].start == items[j].start && items[i].end ==
items[j].end) {
+ need_shift = true;
+ } else if (items[i].end + 1 == items[j].start) {
+ items[i].end = items[j].end;
+ need_shift = true;
+ } else if (items[i].start < items[j].start && items[i].end >
items[j].end) {
+ need_shift = true;
+ } else if ( items[i].start <= items[j].start
+ && items[i].end > items[j].start
+ && items[j].end >= items[i].end) {
+ items[i].end = items[j].end;
+ need_shift = true;
+ }
+ if (need_shift) {
for (int k = j + 1; k < set->nelems; j++, k++)
items[j] = items[k];
set->nelems--;
i--; // compensate for loop, continue checking at
current position
j = i + 1;
- } else if (items[i].item_type != items[j].item_type) {
- continue;
- } else if (items[i].item_type == RANGE_ITEM) {
- bool need_shift = false;
- if (items[i].start == items[j].start && items[i].end ==
items[j].end) {
- need_shift = true;
- } else if (items[i].end + 1 == items[j].start) {
- items[i].end = items[j].end;
- need_shift = true;
- } else if (items[i].start < items[j].start &&
items[i].end > items[j].end) {
- need_shift = true;
- } else if ( items[i].start <= items[j].start
- && items[i].end > items[j].start
- && items[j].end >= items[i].end) {
- items[i].end = items[j].end;
- need_shift = true;
- }
- if (need_shift) {
- for (int k = j + 1; k < set->nelems; j++, k++)
- items[j] = items[k];
-
- set->nelems--;
- i--; // compensate for loop, continue
checking at current position
- j = i + 1;
- }
- // otherwise, just continue around the loop
+ }
+ // otherwise, just continue around the loop
+ }
+ set->nelems8bit = set->nelems;
+ for (int i = 0; i < set->nelems; i++) {
+ if (set->items[i].start >= 128 && i > 0) {
+ set->nelems8bit = i; // this is a count, not an index
+ break;
}
}
set->finalized = true;
@@ -255,7 +3668,7 @@ finalize(charset_t *set)
/* charset_create --- make a new charset_t and initialize it */
charset_t *
-charset_create(int *errcode)
+charset_create(int *errcode, int mb_cur_max, bool is_utf8)
{
if (errcode == NULL)
return NULL;
@@ -267,6 +3680,8 @@ charset_create(int *errcode)
}
memset(set, 0, sizeof(charset_t));
+ set->mb_cur_max = mb_cur_max;
+ set->is_utf8 = is_utf8;
*errcode = CSET_SUCCESS;
return set;
@@ -341,25 +3756,71 @@ charset_add_range(charset_t *set, int32_t first, int32_t
last)
}
set_item new_item;
- new_item.item_type = RANGE_ITEM;
new_item.start = first;
new_item.end = last;
set->items[set->nelems++] = new_item;
return CSET_SUCCESS;
}
-/* charset_invert --- mark charset to return success if requested character
not found */
+/* charset_invert --- invert the ranges in set and return a new set */
-int
-charset_invert(charset_t *set)
+charset_t *
+charset_invert(charset_t *set, int *errcode)
{
- if (set == NULL)
- return CSET_EBADPTR;
- if (set->finalized)
- return CSET_EFROZEN;
+ int ret = CSET_SUCCESS;
- set->complemented = true;
- return CSET_SUCCESS;
+ if (errcode == NULL)
+ return NULL;
+
+ if (set == NULL) {
+ *errcode = CSET_EBADPTR;
+ return NULL;
+ }
+
+ if (! set->finalized) {
+ charset_finalize(set);
+
+ if (! set->finalized) {
+ *errcode = CSET_ESPACE; // make a guess
+ return NULL;
+ }
+ }
+
+ charset_t *newset = charset_create(errcode, set->mb_cur_max,
set->is_utf8);
+ if (newset == NULL)
+ return NULL;
+
+ newset->no_newlines = set->no_newlines;
+
+ if (set->nelems == 0) { // was empty
+ ret = charset_add_range(newset, 0, MAX_CODE_POINT);
+ if (ret == CSET_SUCCESS)
+ goto done;
+ else
+ goto fail;
+ }
+
+ int low = 0;
+
+ for (int i = 0; i < set->nelems; i++) {
+ if (low < set->items[i].start) {
+ if ((ret = charset_add_range(newset, low,
set->items[i].start - 1)) != CSET_SUCCESS)
+ goto fail;
+ }
+ low = set->items[i].end + 1;
+ }
+ if (low <= MAX_CODE_POINT) {
+ if ((ret = charset_add_range(newset, low,
MAX_CODE_POINT)) != CSET_SUCCESS)
+ goto fail;
+ }
+
+done:
+ *errcode = CSET_SUCCESS;
+ return newset;
+fail:
+ *errcode = ret;
+ charset_free(newset);
+ return NULL;
}
/* charset_set_no_newline --- set the value of the "no newlines" flag */
@@ -373,52 +3834,6 @@ int charset_set_no_newlines(charset_t *set, bool
no_newlines)
set->no_newlines = no_newlines;
return CSET_SUCCESS;
}
-/* charset_add_cclass --- add a character class, like "alnum" */
-
-int
-charset_add_cclass(charset_t *set, const char *cclass)
-{
- if (set == NULL)
- return CSET_EBADPTR;
- if (set->finalized)
- return CSET_EFROZEN;
-
- if (set->items == NULL) {
- set->items = (set_item *) malloc(sizeof(set_item) *
INITIAL_ALLOCATION);
- if (set->items == NULL)
- return CSET_ESPACE;
-
- set->allocated = INITIAL_ALLOCATION;
- set->nelems = 0;
- } else if (set->nelems + 1 >= set->allocated) {
- int new_amount = set->allocated * 2;
- set_item *new_data = (set_item *) realloc(set->items,
new_amount * sizeof(set_item));
-
- if (new_data == NULL)
- return CSET_ESPACE;
-
- memset(new_data + set->allocated, 0, set->allocated *
sizeof(set_item));
- set->allocated = new_amount;
- set->items = new_data;
- }
-
- wctype_t the_type = wctype(cclass);
- if (the_type == 0) // not a known class name
- return CSET_ECTYPE;
-
- const char *class_name = strdup(cclass);
- if (class_name == NULL)
- return CSET_ESPACE;
-
- set_item new_item;
- new_item.item_type = CTYPE_ITEM;
-
- new_item.wtype = the_type;
- new_item.type_name = class_name;
- set->items[set->nelems++] = new_item;
-
- return CSET_SUCCESS;
-}
/* charset_add_equiv --- add an equivalence class */
int
@@ -468,6 +3883,175 @@ charset_add_collate(charset_t *set, const int32_t
*collate)
return charset_add_char(set, collate[0]);
}
+/* charset_add_cclass --- add a character class, like "alnum" */
+
+int
+charset_add_cclass(charset_t *set, const char *cclass)
+{
+ if (set == NULL)
+ return CSET_EBADPTR;
+ if (set->finalized)
+ return CSET_EFROZEN;
+
+ int index = find_class(cclass);
+
+ if (index == -1) {
+ if (set->mb_cur_max == 1)
+ return CSET_ECTYPE;
+ else {
+ // maybe it's locale-specific
+ wctype_t ctype = wctype(cclass); // look it up
+ if (ctype == 0) // it's invalid
+ return CSET_ECTYPE;
+
+ // this saves the locale + cclass info for possible
reuse
+ return wide_char_range_loop(set, cclass, ctype);
+ }
+ }
+
+ // we have a standard cclass
+ if (set->mb_cur_max == 1) {
+ int (*charcheckfunc)(int c) = class_data[index].charcheckfunc;
+ for (int32_t i = 0; i < 256; i++) {
+ if (charcheckfunc(i)) {
+ int ret = charset_add_char(set, i);
+ if (ret != CSET_SUCCESS)
+ return ret;
+ }
+ }
+ } else if (set->is_utf8) {
+ const set_item *data[2];
+
+ data[0] = class_data[index].data[0];
+ data[1] = class_data[index].data[1];
+
+ for (int i = 0; i < 2; i++) {
+ if (data[i] == NULL)
+ break;
+ for (int j = 0; data[i][j].start != -1; j++) {
+ int ret = charset_add_range(set,
data[i][j].start, data[i][j].end);
+ if (ret != CSET_SUCCESS)
+ return ret;
+ }
+ }
+ } else {
+ wctype_t ctype = class_data[index].wctype;
+ if (ctype == 0) { // haven't checked it yet
+ ctype = wctype(cclass);
+
+ if (ctype == 0) // bad class, should not happen for
standard classes
+ return CSET_ECTYPE;
+ }
+
+ // all ok..
+ class_data[index].wctype = ctype; // save for next time
+ // this saves the locale + cclass info for possible reuse
+ return wide_char_range_loop(set, cclass, ctype);
+ }
+
+ return CSET_SUCCESS;
+}
+/* charset_copy --- create a new charset that is copy of the original */
+
+charset_t *
+charset_copy(charset_t *set, int *errcode)
+{
+ if (errcode == NULL)
+ return NULL;
+
+ if (set == NULL) {
+ *errcode = CSET_EBADPTR;
+ return NULL;
+ }
+
+ charset_t *newset = charset_create(errcode, set->mb_cur_max,
set->is_utf8);
+ if (newset == NULL)
+ return NULL;
+
+ *newset = *set;
+ if (newset->nchars_allocated > 0) {
+ newset->chars = (int32_t *) malloc(newset->nchars_allocated *
sizeof(int32_t));
+ if (newset->chars == NULL) {
+ *errcode = CSET_ESPACE;
+ free((void *) newset);
+ return NULL;
+ } else
+ memcpy(newset->chars, set->chars,
newset->nchars_allocated * sizeof(int32_t));
+ }
+
+ if (newset->allocated > 0) {
+ newset->items = (set_item *) malloc(newset->allocated *
sizeof(set_item));
+ if (newset->items == NULL) {
+ *errcode = CSET_ESPACE;
+ free((void *) newset->chars);
+ free((void *) newset);
+ return NULL;
+ } else
+ memcpy(newset->items, set->items, newset->allocated *
sizeof(set_item));
+ }
+
+ *errcode = CSET_SUCCESS;
+ return newset;
+}
+int
+charset_merge(charset_t *dest, charset_t *src)
+{
+ charset_t *set = dest;
+ if (set == NULL)
+ return CSET_EBADPTR;
+ if (src == NULL)
+ return CSET_EBADPTR;
+ if (set->finalized)
+ return CSET_EFROZEN;
+
+ if (src->nelems == 0 && src->nchars_inuse == 0)
+ return CSET_SUCCESS; // nothing to do
+
+ int new_char_count = set->nchars_inuse + src->nchars_inuse + 1;
+
+ int32_t *new_chars = NULL;
+ if (new_char_count > 0) {
+ new_chars = (int32_t *) malloc(new_char_count *
sizeof(int32_t));
+ if (new_chars == NULL)
+ return CSET_ESPACE;
+
+ // allocated the space ok, now copy all the stuff in
+ if (set->nchars_inuse > 0)
+ memcpy(new_chars, set->chars, set->nchars_inuse *
sizeof(int32_t));
+
+ memcpy(new_chars + set->nchars_inuse, src->chars,
src->nchars_inuse * sizeof(int32_t));
+ new_chars[new_char_count-1] = L'\0';
+
+ // now update dest
+ if (set->chars != NULL)
+ free((void *) set->chars);
+ set->chars = new_chars;
+ set->nchars_inuse = new_char_count - 1;
+ set->nchars_allocated = new_char_count;
+ }
+ int new_item_count = set->nelems + src->nelems;
+ // could only be copying characters when merging,
+ // so make sure there are items to copy.
+ if (new_item_count > 0) {
+ set_item *new_items = (set_item *) malloc(new_item_count *
sizeof(set_item));
+ if (new_items == NULL)
+ return CSET_ESPACE;
+
+ // allocated the space ok, now copy all the stuff in
+ if (set->nelems > 0)
+ memcpy(new_items, set->items, set->nelems *
sizeof(set_item));
+
+ memcpy(new_items + set->nelems, src->items, src->nelems *
sizeof(set_item));
+
+ // now update dest
+ if (set->items != NULL)
+ free((void *) set->items);
+ set->items = new_items;
+ set->nelems = set->allocated = new_item_count;
+ }
+
+ return CSET_SUCCESS;
+}
/* charset_in_set --- see if a character is in the set */
bool
@@ -477,18 +4061,16 @@ charset_in_set(const charset_t *set, int32_t the_char)
return false;
if (! set->finalized) {
- finalize((charset_t *) set);
+ charset_finalize((charset_t *) set);
if (! set->finalized) // finalize() failed
return false;
}
- if (the_char == L'\n' && set->no_newlines && set->complemented)
+ if (the_char == L'\n' && set->no_newlines) // FIXME: is this still
right?
return false;
bool found = is_found(set, the_char);
- if (set->complemented)
- found = ! found; // reverse sense of the match
return found;
}
@@ -501,15 +4083,8 @@ charset_free(const charset_t *set)
return CSET_EBADPTR;
// no need to check for finalized
- if (set->items != NULL) {
- for (int i = 0; i < set->nelems; i++) {
- if (set->items[i].item_type == CTYPE_ITEM)
- free((void *) set->items[i].type_name);
- else
- break;
- }
+ if (set->items != NULL)
free((void *) set->items);
- }
if (set->chars != NULL)
free((void *) set->chars);
@@ -518,10 +4093,61 @@ charset_free(const charset_t *set)
return CSET_SUCCESS;
}
+/* charset_firstbytes --- return the set of prefix bytes for the range */
+
+charset_firstbytes_t
+charset_firstbytes(charset_t *set, int *errcode)
+{
+ charset_firstbytes_t result;
+ memset(& result, 0, sizeof(result));
+
+ if (errcode == NULL)
+ goto done;
+
+ if (set == NULL) {
+ *errcode = CSET_EBADPTR;
+ goto done;
+ }
+
+ if (! set->finalized) {
+ charset_finalize(set);
+
+ if (! set->finalized) {
+ *errcode = CSET_ESPACE; // guess...
+ goto done;
+ }
+ }
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+ if (set->mb_cur_max == 1) {
+ for (int i = 0; i < set->nelems; i++) {
+ if (set->items[i].start > 255)
+ break;
+ uint32_t low = set->items[i].start;
+ uint32_t high = min(set->items[i].end, 255);
+
+ for (int b = low; b <= high; b++)
+ result.bytes[b] = true;
+ }
+ } else if (set->is_utf8) {
+ for (int i = 0; i < set->nelems; i++) {
+ uint32_t low = utfprefix(set->items[i].start);
+ uint32_t high = utfprefix(set->items[i].end);
+
+ for (int b = low; b <= high; b++)
+ result.bytes[b] = true;
+ }
+ }
+#undef min
+ *errcode = CSET_SUCCESS;
+
+done:
+ return result;
+}
/* charset_dump --- dump out the data structures */
void
-charset_dump(const charset_t *set, FILE *fp)
+charset_dump(const charset_t *set, FILE *fp, bool use_c_format)
{
static const char *boolval[] = {
"false",
@@ -531,19 +4157,24 @@ charset_dump(const charset_t *set, FILE *fp)
if (set == NULL || fp == NULL)
return;
- fprintf(fp, "complemented = %s\n", boolval[!! set->complemented]);
- fprintf(fp, "no_newlines = %s\n", boolval[!! set->no_newlines]);
- fprintf(fp, "finalized = %s\n", boolval[!! set->finalized]);
-
set_item *items = set->items;
- for (int i = 0; i < set->nelems; i++) {
- if (items[i].item_type == CTYPE_ITEM) {
- fprintf(fp, "%3d. CTYPE: [:%s:]\n", i,
items[i].type_name);
- continue;
+ if (use_c_format) {
+ for (int i = 0; i < set->nelems; i++) {
+ fprintf(fp, "\t{ %d, %d },\n", items[i].start,
items[i].end);
+ }
+ } else {
+ fprintf(fp, "no_newlines = %s\n", boolval[!! set->no_newlines]);
+ fprintf(fp, "finalized = %s\n", boolval[!! set->finalized]);
+ fprintf(fp, "is_utf8 = %s\n", boolval[!! set->is_utf8]);
+ fprintf(fp, "mb_cur_max = %d\n", set->mb_cur_max);
+ fprintf(fp, "nchars_inuse = %zd\n", set->nchars_inuse);
+ fprintf(fp, "nelems = %zd\n", set->nelems);
+ fprintf(fp, "nelems8bit = %zd\n", set->nelems8bit);
+
+ for (int i = 0; i < set->nelems; i++) {
+ fprintf(fp, "%3d. RANGE: start = L'%lc' (%d), end =
L'%lc' (%d)\n",
+ i, items[i].start, items[i].start,
items[i].end, items[i].end);
}
- assert(items[i].item_type == RANGE_ITEM);
- fprintf(fp, "%3d. RANGE: start = L'%lc', end = L'%lc'\n",
- i, items[i].start, items[i].end);
}
fflush(fp);
}
diff --git a/support/charset.h b/support/charset.h
index 34d78d47..fa3533c1 100644
--- a/support/charset.h
+++ b/support/charset.h
@@ -2,7 +2,7 @@
#define CHARSET_H 1
/*
- * Copyright (C) 2023, 2024, Arnold David Robbins.
+ * Copyright (C) 2023, 2024, 2025, Arnold David Robbins.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -48,17 +48,26 @@ enum {
CSET_ESPACE, // Corresponds to REG_ESPACE
CSET_ERANGE, // Corresponds to REG_ERANGE
};
-charset_t *charset_create(int *errcode);
+charset_t *charset_create(int *errcode, int mb_cur_max, bool is_utf8);
int charset_add_char(charset_t *set, int32_t wc);
int charset_add_range(charset_t *set, int32_t first, int32_t last);
-int charset_invert(charset_t *set);
+charset_t *charset_invert(charset_t *set, int *errcode);
int charset_set_no_newlines(charset_t *set, bool no_newlines);
-int charset_add_cclass(charset_t *set, const char *cclass);
int charset_add_equiv(charset_t *set, int32_t equiv);
int charset_add_collate(charset_t *set, const int32_t *collate);
+int charset_add_cclass(charset_t *set, const char *cclass);
+charset_t *charset_copy(charset_t *set, int *errcode);
+int charset_merge(charset_t *dest, charset_t *src);
bool charset_in_set(const charset_t *set, int32_t the_char);
int charset_free(const charset_t *set);
-void charset_dump(const charset_t *set, FILE *fp);
+#define MAX_FIRSTBYTES 256
+typedef struct {
+ bool bytes[MAX_FIRSTBYTES];
+} charset_firstbytes_t;
+
+charset_firstbytes_t charset_firstbytes(charset_t *set, int *errcode);
+void charset_dump(const charset_t *set, FILE *fp, bool use_c_format);
+void charset_finalize(charset_t *set);
#ifdef __cplusplus
}
diff --git a/support/minrx.cpp b/support/minrx.cpp
index af54e9ea..0d8b3826 100644
--- a/support/minrx.cpp
+++ b/support/minrx.cpp
@@ -46,6 +46,7 @@
#include <string>
#include <tuple>
#include <vector>
+#define CHARSET 1
#ifdef CHARSET
#include <memory>
#include "charset.h"
@@ -390,9 +391,9 @@ WConv &(WConv::*const WConv::nextfns[3])() = {
&WConv::nextbyte, &WConv::nextmbt
struct CSet {
#ifdef CHARSET
charset_t *charset = nullptr;
- CSet() {
+ CSet(WConv::Encoding enc) {
int errcode = 0;
- charset = charset_create(& errcode);
+ charset = charset_create(& errcode, MB_CUR_MAX, enc ==
WConv::Encoding::UTF8);
// FIXME: Throw error if charset == nullptr
}
CSet(const CSet &) = delete;
@@ -400,6 +401,10 @@ struct CSet {
CSet(CSet &&cs): charset(cs.charset) { cs.charset = nullptr; }
CSet &operator=(CSet &&cs) { charset = cs.charset; cs.charset =
nullptr; return *this; }
~CSet() { if (charset) { charset_free(charset); charset = nullptr; } }
+ CSet &operator|=(const CSet &cs) {
+ charset_merge(charset, cs.charset);
+ return *this;
+ }
#else
static std::map<std::string, CSet> cclmemo;
static std::mutex cclmutex;
@@ -416,10 +421,14 @@ struct CSet {
set(e.min, e.max);
return *this;
}
+ CSet(WConv::Encoding) { }
#endif
CSet &invert() {
#ifdef CHARSET
- charset_invert(charset); // FIXME: no error checking
+ int errcode = 0;
+ charset_t *newset = charset_invert(charset, &errcode); //
FIXME: no error checking
+ charset_free(charset);
+ charset = newset;
#else
std::set<Range> nranges;
WChar lo = 0;
@@ -692,6 +701,14 @@ struct CSet {
};
switch (e) {
case WConv::Encoding::Byte:
+#ifdef CHARSET
+ {
+ int errcode = 0;
+ charset_firstbytes_t bytes =
charset_firstbytes(charset, &errcode);
+ for (int i = 0; i < MAX_FIRSTBYTES; i++)
+ fb[i] = bytes.bytes[i];
+ }
+#else
for (const auto &r : ranges) {
if (r.min > 255)
break;
@@ -699,13 +716,23 @@ struct CSet {
for (auto b = lo; b <= hi; b++)
fb[b] = true;
}
+#endif
return {fb, firstunique(fb)};
case WConv::Encoding::UTF8:
+#ifdef CHARSET
+ {
+ int errcode = 0;
+ charset_firstbytes_t bytes =
charset_firstbytes(charset, &errcode);
+ for (int i = 0; i < MAX_FIRSTBYTES; i++)
+ fb[i] = bytes.bytes[i];
+ }
+#else
for (const auto &r : ranges) {
auto lo = utfprefix(r.min), hi =
utfprefix(r.max);
for (auto b = lo; b <= hi; b++)
fb[b] = true;
}
+#endif
return {fb, firstunique(fb)};
default:
return {{}, {}};
@@ -990,7 +1017,7 @@ struct Compile {
auto key = std::min(wc, std::min(wcl,
wcu));
if (icmap.find(key) == icmap.end()) {
icmap.emplace(key,
csets.size());
- csets.emplace_back();
+ csets.emplace_back(enc);
csets.back().set(wc);
csets.back().set(wcl);
csets.back().set(wcu);
@@ -1015,13 +1042,13 @@ struct Compile {
case L'[':
lhmaxstk = nstk;
lhs.push_back({Node::CSet, {csets.size(), 0}, nstk});
- if (auto err = csets.emplace_back().parse(flags, enc,
wconv))
+ if (auto err = csets.emplace_back(enc).parse(flags,
enc, wconv))
return {{}, 0, err};
break;
case L'.':
if (!dot.has_value()) {
dot = csets.size();
- csets.emplace_back();
+ csets.emplace_back(enc);
if ((flags & MINRX_REG_NEWLINE) != 0)
csets.back().set(L'\n');
csets.back().invert();
@@ -1080,7 +1107,7 @@ struct Compile {
if (!esc_s.has_value()) {
esc_s = csets.size();
WConv wc(enc, "[[:space:]]");
- csets.emplace_back().parse(flags, enc,
wc.nextchr());
+ csets.emplace_back(enc).parse(flags,
enc, wc.nextchr());
}
lhs.push_back({Node::CSet, {*esc_s, 0}, nstk});
break;
@@ -1090,7 +1117,7 @@ struct Compile {
if (!esc_S.has_value()) {
esc_S = csets.size();
WConv wc(enc, "[^[:space:]]");
- csets.emplace_back().parse(flags, enc,
wc.nextchr());
+ csets.emplace_back(enc).parse(flags,
enc, wc.nextchr());
}
lhs.push_back({Node::CSet, {*esc_S, 0}, nstk});
break;
@@ -1100,7 +1127,7 @@ struct Compile {
if (!esc_w.has_value()) {
esc_w = csets.size();
WConv wc(enc, "[[:alnum:]_]");
- csets.emplace_back().parse(flags, enc,
wc.nextchr());
+ csets.emplace_back(enc).parse(flags,
enc, wc.nextchr());
}
lhs.push_back({Node::CSet, {*esc_w, 0}, nstk});
break;
@@ -1110,7 +1137,7 @@ struct Compile {
if (!esc_W.has_value()) {
esc_W = csets.size();
WConv wc(enc, "[^[:alnum:]_]");
- csets.emplace_back().parse(flags, enc,
wc.nextchr());
+ csets.emplace_back(enc).parse(flags,
enc, wc.nextchr());
}
lhs.push_back({Node::CSet, {*esc_W, 0}, nstk});
break;
@@ -1185,7 +1212,7 @@ struct Compile {
break;
}
} while (!epsq.empty());
- CSet cs;
+ CSet cs(enc);
while (!firsts.empty()) {
auto k = firsts.remove();
auto t = nodes[k].type;
-----------------------------------------------------------------------
Summary of changes:
support/ChangeLog | 6 +
support/charset.c | 3973 ++++++++++++++++++++++++++++++++++++++++++++++++++---
support/charset.h | 19 +-
support/minrx.cpp | 49 +-
4 files changed, 3860 insertions(+), 187 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5956-g1847a420,
Arnold Robbins <=