diff --git a/libguile/strorder.c b/libguile/strorder.c index a51ce17..6df8343 100644 --- a/libguile/strorder.c +++ b/libguile/strorder.c @@ -21,6 +21,8 @@ # include #endif +#include + #include "libguile/_scm.h" #include "libguile/chars.h" #include "libguile/strings.h" @@ -42,6 +44,164 @@ srfi13_cmp (SCM s1, SCM s2, SCM (*cmp) (SCM, SCM, SCM, SCM, SCM, SCM)) return SCM_BOOL_F; } +#define SHARP_S (0xdf) +#define MICRO_SIGN (0xb5) +#define MU (0x3bc) +/* This function compares does a comparison of the case-folded + versions of S1 and S2. It returns -1 if S1 < S2, 0 if they are equal + or 1 if S1 > S2. */ +static int +compare_folded_strings (SCM s1, SCM s2) +{ + if (!scm_is_string (s1)) + scm_wrong_type_arg (__func__, 0, s1); + if (!scm_is_string (s2)) + scm_wrong_type_arg (__func__, 1, s2); + if (scm_i_is_narrow_string (s1) && scm_i_is_narrow_string (s2)) + { + size_t cindex1 = 0, cindex2 = 0; + const size_t cend1 = scm_i_string_length (s1); + const size_t cend2 = scm_i_string_length (s2); + ucs4_t a, b; + int ss1 = 0, ss2 = 0; + + /* For narrow strings, folding equals downcasing except for sharp + s which becomes 'ss' and the micro sign which becomes Greek + mu. */ + while (cindex1 < cend1 && cindex2 < cend2) + { + if (ss1) + a = (ucs4_t) 's'; + else + { + a = uc_tolower ((unsigned char) (scm_i_string_chars (s1))[cindex1]); + if (a == SHARP_S) + { + a = (ucs4_t) 's'; + ss1 = 2; + } + if (a == MICRO_SIGN) + a = MU; + } + if (ss2) + b = (ucs4_t) 's'; + else + { + b = uc_tolower ((unsigned char) (scm_i_string_chars (s2))[cindex2]); + if (b == SHARP_S) + { + b = 's'; + ss2 = 2; + } + if (b == MICRO_SIGN) + b = MU; + } + if (a < b) + return -1; + else if (a > b) + return 1; + if (ss1) + ss1 --; + if (!ss1) + cindex1 ++; + if (ss2) + ss2 --; + if (!ss2) + cindex2 ++; + } + if (cindex1 < cend1) + return 1; + else if (cindex2 < cend2) + return -1; + + return 0; + } + else if (!scm_i_is_narrow_string (s1) && !scm_i_is_narrow_string (s2)) + { + int ret, result; + + ret = u32_casecmp ((const uint32_t *) scm_i_string_wide_chars (s1), + scm_i_string_length (s1), + (const uint32_t *) scm_i_string_wide_chars (s2), + scm_i_string_length (s2), + NULL, NULL, &result); + if (ret != 0) + scm_encoding_error (__func__, errno, + "cannot do case-folded comparison", + SCM_BOOL_F, + /* FIXME: Faulty character unknown. */ + SCM_BOOL_F); + return result; + } + else + { + int swap = 1, ss1 = 0; + uint32_t *str2 = NULL; + size_t cindex1 = 0, cindex2 = 0; + const size_t cend1 = scm_i_string_length (s1); + size_t cend2; + ucs4_t a, b; + + /* Swap so that s1 is narrow and s2 is wide. */ + if (scm_i_is_narrow_string (s2)) + { + SCM s3; + s3 = s1; + s1 = s2; + s2 = s3; + swap = -1; + } + str2 = u32_casefold ((const uint32_t *) scm_i_string_wide_chars (s2), + scm_i_string_length (s2), + NULL, NULL, NULL, &cend2); + if (str2 == NULL) + scm_memory_error (__func__); + + while (cindex1 < cend1 && cindex2 < cend2) + { + if (ss1) + a = (ucs4_t) 's'; + else + { + a = uc_tolower ((unsigned char) scm_i_string_chars (s1)[cindex1]); + if (a == SHARP_S) + { + a = (ucs4_t) 's'; + ss1 = 2; + } + if (a == MICRO_SIGN) + a = MU; + } + b = str2[cindex2]; + if (a < b) + { + free (str2); + return -1 * swap; + } + else if (a > b) + { + free (str2); + return 1 * swap; + } + if (ss1) + ss1 --; + if (!ss1) + cindex1 ++; + cindex2 ++; + } + free (str2); + if (cindex1 < cend1) + return -1 * swap; + else if (cindex2 > cend2) + return 1 * swap; + + return 0; + } + + return 0; +} + + static SCM scm_i_string_equal_p (SCM s1, SCM s2, SCM rest); SCM_DEFINE (scm_i_string_equal_p, "string=?", 0, 2, 1, (SCM s1, SCM s2, SCM rest), @@ -80,8 +240,8 @@ static SCM scm_i_string_ci_equal_p (SCM s1, SCM s2, SCM rest); SCM_DEFINE (scm_i_string_ci_equal_p, "string-ci=?", 0, 2, 1, (SCM s1, SCM s2, SCM rest), "Case-insensitive string equality predicate; return @code{#t} if\n" - "the two strings are the same length and their component\n" - "characters match (ignoring case) at each position; otherwise\n" + "case-folded versions of the the two strings are the same length\n" + "and their component characters match at each position; otherwise\n" "return @code{#f}.") #define FUNC_NAME s_scm_i_string_ci_equal_p { @@ -89,13 +249,13 @@ SCM_DEFINE (scm_i_string_ci_equal_p, "string-ci=?", 0, 2, 1, return SCM_BOOL_T; while (!scm_is_null (rest)) { - if (scm_is_false (srfi13_cmp (s1, s2, scm_string_ci_eq))) + if (0 != compare_folded_strings (s1, s2)) return SCM_BOOL_F; s1 = s2; s2 = scm_car (rest); rest = scm_cdr (rest); } - return srfi13_cmp (s1, s2, scm_string_ci_eq); + return scm_from_bool (0 == compare_folded_strings (s1, s2)); } #undef FUNC_NAME @@ -218,6 +378,7 @@ SCM scm_string_geq_p (SCM s1, SCM s2) } #undef FUNC_NAME + static SCM scm_i_string_ci_less_p (SCM s1, SCM s2, SCM rest); SCM_DEFINE (scm_i_string_ci_less_p, "string-ci?", 0, 2, 1, return SCM_BOOL_T; while (!scm_is_null (rest)) { - if (scm_is_false (srfi13_cmp (s1, s2, scm_string_ci_gt))) + if (1 != compare_folded_strings (s1, s2)) return SCM_BOOL_F; s1 = s2; s2 = scm_car (rest); rest = scm_cdr (rest); } - return srfi13_cmp (s1, s2, scm_string_ci_gt); + return scm_from_bool (1 == compare_folded_strings (s1, s2)); } #undef FUNC_NAME @@ -317,20 +478,20 @@ SCM_DEFINE (scm_i_string_ci_geq_p, "string-ci>=?", 0, 2, 1, return SCM_BOOL_T; while (!scm_is_null (rest)) { - if (scm_is_false (srfi13_cmp (s1, s2, scm_string_ci_ge))) + if (-1 == compare_folded_strings (s1, s2)) return SCM_BOOL_F; s1 = s2; s2 = scm_car (rest); rest = scm_cdr (rest); } - return srfi13_cmp (s1, s2, scm_string_ci_ge); + return scm_from_bool (-1 != compare_folded_strings (s1, s2)); } #undef FUNC_NAME SCM scm_string_ci_geq_p (SCM s1, SCM s2) #define FUNC_NAME s_scm_i_string_ci_geq_p { - return srfi13_cmp (s1, s2, scm_string_ci_ge); + return scm_from_bool (-1 != compare_folded_strings (s1, s2)); } #undef FUNC_NAME