From 93e98eb64e33d1a9d5e562fe61f9eb86a2a4de2e Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 29 Mar 2023 00:22:17 +0200 Subject: [PATCH 1/7] string-desc: New module. * lib/string-desc.h: New file. * lib/string-desc.c: New file. * lib/string-desc-contains.c: New file. * modules/string-desc: New file. --- ChangeLog | 8 + lib/string-desc-contains.c | 44 +++++ lib/string-desc.c | 358 +++++++++++++++++++++++++++++++++++++ lib/string-desc.h | 229 ++++++++++++++++++++++++ modules/string-desc | 30 ++++ 5 files changed, 669 insertions(+) create mode 100644 lib/string-desc-contains.c create mode 100644 lib/string-desc.c create mode 100644 lib/string-desc.h create mode 100644 modules/string-desc diff --git a/ChangeLog b/ChangeLog index 0688e05bd8..2865fdea7e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2023-03-28 Bruno Haible + + string-desc: New module. + * lib/string-desc.h: New file. + * lib/string-desc.c: New file. + * lib/string-desc-contains.c: New file. + * modules/string-desc: New file. + 2023-03-28 Bruno Haible doc: Fix placement of memset_explicit node. diff --git a/lib/string-desc-contains.c b/lib/string-desc-contains.c new file mode 100644 index 0000000000..c02617629e --- /dev/null +++ b/lib/string-desc-contains.c @@ -0,0 +1,44 @@ +/* String descriptors. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2023. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* Specification. */ +#include "string-desc.h" + +#include + + +/* This function is in a separate compilation unit, because not all users + of the 'string-desc' module need this function and it depends on 'memmem' + which — depending on platforms — costs up to 2 KB of binary code. */ + +ptrdiff_t +string_desc_contains (string_desc_t haystack, string_desc_t needle) +{ + if (needle._nbytes == 0) + return 0; + void *found = + memmem (haystack._data, haystack._nbytes, needle._data, needle._nbytes); + if (found != NULL) + return (char *) found - haystack._data; + else + return -1; +} diff --git a/lib/string-desc.c b/lib/string-desc.c new file mode 100644 index 0000000000..2747612bbc --- /dev/null +++ b/lib/string-desc.c @@ -0,0 +1,358 @@ +/* String descriptors. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2023. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#define GL_STRING_DESC_INLINE _GL_EXTERN_INLINE + +/* Specification and inline definitions. */ +#include "string-desc.h" + +#include +#include +#include + +#include "ialloc.h" +#include "full-write.h" + + +/* ==== Side-effect-free operations on string descriptors ==== */ + +/* Return true if A and B are equal. */ +bool +string_desc_equals (string_desc_t a, string_desc_t b) +{ + return (a._nbytes == b._nbytes + && (a._nbytes == 0 || memcmp (a._data, b._data, a._nbytes) == 0)); +} + +bool +string_desc_startswith (string_desc_t s, string_desc_t prefix) +{ + return (s._nbytes >= prefix._nbytes + && (prefix._nbytes == 0 + || memcmp (s._data, prefix._data, prefix._nbytes) == 0)); +} + +bool +string_desc_endswith (string_desc_t s, string_desc_t suffix) +{ + return (s._nbytes >= suffix._nbytes + && (suffix._nbytes == 0 + || memcmp (s._data + (s._nbytes - suffix._nbytes), suffix._data, + suffix._nbytes) == 0)); +} + +int +string_desc_cmp (string_desc_t a, string_desc_t b) +{ + if (a._nbytes > b._nbytes) + { + if (b._nbytes == 0) + return 1; + return (memcmp (a._data, b._data, b._nbytes) < 0 ? -1 : 1); + } + else if (a._nbytes < b._nbytes) + { + if (a._nbytes == 0) + return -1; + return (memcmp (a._data, b._data, a._nbytes) > 0 ? 1 : -1); + } + else /* a._nbytes == b._nbytes */ + { + if (a._nbytes == 0) + return 0; + return memcmp (a._data, b._data, a._nbytes); + } +} + +ptrdiff_t +string_desc_index (string_desc_t s, char c) +{ + if (s._nbytes > 0) + { + void *found = memchr (s._data, (unsigned char) c, s._nbytes); + if (found != NULL) + return (char *) found - s._data; + } + return -1; +} + +ptrdiff_t +string_desc_last_index (string_desc_t s, char c) +{ + if (s._nbytes > 0) + { + void *found = memrchr (s._data, (unsigned char) c, s._nbytes); + if (found != NULL) + return (char *) found - s._data; + } + return -1; +} + +string_desc_t +string_desc_new_empty (void) +{ + string_desc_t result; + + result._nbytes = 0; + result._data = NULL; + + return result; + +} + +string_desc_t +string_desc_from_c (const char *s) +{ + string_desc_t result; + + result._nbytes = strlen (s); + result._data = (char *) s; + + return result; +} + +string_desc_t +string_desc_substring (string_desc_t s, idx_t start, idx_t end) +{ + string_desc_t result; + + if (!(start >= 0 && start <= end)) + /* Invalid arguments. */ + abort (); + + result._nbytes = end - start; + result._data = s._data + start; + + return result; +} + +int +string_desc_write (int fd, string_desc_t s) +{ + if (s._nbytes > 0) + if (full_write (fd, s._data, s._nbytes) != s._nbytes) + /* errno is set here. */ + return -1; + return 0; +} + +int +string_desc_fwrite (FILE *fp, string_desc_t s) +{ + if (s._nbytes > 0) + if (fwrite (s._data, 1, s._nbytes, fp) != s._nbytes) + return -1; + return 0; +} + + +/* ==== Memory-allocating operations on string descriptors ==== */ + +int +string_desc_new (string_desc_t *resultp, idx_t n) +{ + string_desc_t result; + + if (!(n >= 0)) + /* Invalid argument. */ + abort (); + + result._nbytes = n; + if (n == 0) + result._data = NULL; + else + { + result._data = (char *) imalloc (n); + if (result._data == NULL) + /* errno is set here. */ + return -1; + } + + *resultp = result; + return 0; +} + +string_desc_t +string_desc_new_addr (idx_t n, char *addr) +{ + string_desc_t result; + + result._nbytes = n; + if (n == 0) + result._data = NULL; + else + result._data = addr; + + return result; +} + +int +string_desc_new_filled (string_desc_t *resultp, idx_t n, char c) +{ + string_desc_t result; + + result._nbytes = n; + if (n == 0) + result._data = NULL; + else + { + result._data = (char *) imalloc (n); + if (result._data == NULL) + /* errno is set here. */ + return -1; + memset (result._data, (unsigned char) c, n); + } + + *resultp = result; + return 0; +} + +int +string_desc_copy (string_desc_t *resultp, string_desc_t s) +{ + string_desc_t result; + idx_t n = s._nbytes; + + result._nbytes = n; + if (n == 0) + result._data = NULL; + else + { + result._data = (char *) imalloc (n); + if (result._data == NULL) + /* errno is set here. */ + return -1; + memcpy (result._data, s._data, n); + } + + *resultp = result; + return 0; +} + +int +string_desc_concat (string_desc_t *resultp, idx_t n, string_desc_t string1, ...) +{ + if (n <= 0) + /* Invalid argument. */ + abort (); + + idx_t total = 0; + total += string1._nbytes; + if (n > 1) + { + va_list other_strings; + idx_t i; + + va_start (other_strings, string1); + for (i = n - 1; i > 0; i--) + { + string_desc_t arg = va_arg (other_strings, string_desc_t); + total += arg._nbytes; + } + va_end (other_strings); + } + + char *combined = (char *) imalloc (total); + if (combined == NULL) + /* errno is set here. */ + return -1; + idx_t pos = 0; + memcpy (combined, string1._data, string1._nbytes); + pos += string1._nbytes; + if (n > 1) + { + va_list other_strings; + idx_t i; + + va_start (other_strings, string1); + for (i = n - 1; i > 0; i--) + { + string_desc_t arg = va_arg (other_strings, string_desc_t); + if (arg._nbytes > 0) + memcpy (combined + pos, arg._data, arg._nbytes); + pos += arg._nbytes; + } + va_end (other_strings); + } + + string_desc_t result; + result._nbytes = total; + result._data = combined; + + *resultp = result; + return 0; +} + +char * +string_desc_c (string_desc_t s) +{ + idx_t n = s._nbytes; + char *result = (char *) imalloc (n + 1); + if (result == NULL) + /* errno is set here. */ + return NULL; + if (n > 0) + memcpy (result, s._data, n); + result[n] = '\0'; + + return result; +} + + +/* ==== Operations with side effects on string descriptors ==== */ + +void +string_desc_set_char_at (string_desc_t s, idx_t i, char c) +{ + if (!(i >= 0 && i < s._nbytes)) + /* Invalid argument. */ + abort (); + s._data[i] = c; +} + +void +string_desc_fill (string_desc_t s, idx_t start, idx_t end, char c) +{ + if (!(start >= 0 && start <= end)) + /* Invalid arguments. */ + abort (); + + if (start < end) + memset (s._data + start, (unsigned char) c, end - start); +} + +void +string_desc_overwrite (string_desc_t s, idx_t start, string_desc_t t) +{ + if (!(start >= 0 && start + t._nbytes <= s._nbytes)) + /* Invalid arguments. */ + abort (); + + if (t._nbytes > 0) + memcpy (s._data + start, t._data, t._nbytes); +} + +void +string_desc_free (string_desc_t s) +{ + free (s._data); +} diff --git a/lib/string-desc.h b/lib/string-desc.h new file mode 100644 index 0000000000..9bd086f689 --- /dev/null +++ b/lib/string-desc.h @@ -0,0 +1,229 @@ +/* String descriptors. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2023. */ + +#ifndef _STRING_DESC_H +#define _STRING_DESC_H 1 + +/* Get ptrdiff_t. */ +#include + +/* Get FILE. */ +#include + +/* Get abort(), free(). */ +#include + +/* Get idx_t. */ +#include "idx.h" + + +#ifndef _GL_INLINE_HEADER_BEGIN + #error "Please include config.h first." +#endif +_GL_INLINE_HEADER_BEGIN +#ifndef GL_STRING_DESC_INLINE +# define GL_STRING_DESC_INLINE _GL_INLINE +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Type describing a string that may contain NUL bytes. + It's merely a descriptor of an array of bytes. */ +typedef struct string_desc_t string_desc_t; +struct string_desc_t +{ + /* The fields of this struct should be considered private. */ + idx_t _nbytes; + char *_data; +}; + +/* String descriptors can be passed and returned by value. + + String descriptors and NUL-terminated 'const char *'/'char *' C strings + cannot be used interchangeably. You will get compilation errors if you + attempt to assign a string descriptor to a C string or vice versa. */ + + +/* ==== Side-effect-free operations on string descriptors ==== */ + +/* Return the length of the string S. */ +#if 0 /* Defined inline below. */ +extern idx_t string_desc_length (string_desc_t s); +#endif + +/* Return the byte at index I of string S. + I must be < length(S). */ +#if 0 /* Defined inline below. */ +extern char string_desc_char_at (string_desc_t s, idx_t i); +#endif + +/* Return a read-only view of the bytes of S. */ +#if 0 /* Defined inline below. */ +extern const char * string_desc_data (string_desc_t s); +#endif + +/* Return true if S is the empty string. */ +#if 0 /* Defined inline below. */ +extern bool string_desc_is_empty (string_desc_t s); +#endif + +/* Return true if A and B are equal. */ +extern bool string_desc_equals (string_desc_t a, string_desc_t b); + +/* Return true if S starts with PREFIX. */ +extern bool string_desc_startswith (string_desc_t s, string_desc_t prefix); + +/* Return true if S ends with SUFFIX. */ +extern bool string_desc_endswith (string_desc_t s, string_desc_t suffix); + +/* Return > 0, == 0, or < 0 if A > B, A == B, A < B. + This uses a lexicographic ordering, where the bytes are compared as + 'unsigned char'. */ +extern int string_desc_cmp (string_desc_t a, string_desc_t b); + +/* Return the index of the first occurrence of C in S, + or -1 if there is none. */ +extern ptrdiff_t string_desc_index (string_desc_t s, char c); + +/* Return the index of the last occurrence of C in S, + or -1 if there is none. */ +extern ptrdiff_t string_desc_last_index (string_desc_t s, char c); + +/* Return the index of the first occurrence of NEEDLE in HAYSTACK, + or -1 if there is none. */ +extern ptrdiff_t string_desc_contains (string_desc_t haystack, string_desc_t needle); + +/* Return an empty string. */ +extern string_desc_t string_desc_new_empty (void); + +/* Return a string that represents the C string S, of length strlen (S). */ +extern string_desc_t string_desc_from_c (const char *s); + +/* Return the substring of S, starting at offset START and ending at offset END. + START must be <= END. + The result is of length END - START. + The result must not be freed (since its storage is part of the storage + of S). */ +extern string_desc_t string_desc_substring (string_desc_t s, idx_t start, idx_t end); + +/* Output S to the file descriptor FD. + Return 0 if successful. + Upon error, return -1 with errno set. */ +extern int string_desc_write (int fd, string_desc_t s); + +/* Output S to the FILE stream FP. + Return 0 if successful. + Upon error, return -1. */ +extern int string_desc_fwrite (FILE *fp, string_desc_t s); + + +/* ==== Memory-allocating operations on string descriptors ==== */ + +/* Construct a string of length N, with uninitialized contents. + Return 0 if successful. + Upon error, return -1 with errno set. */ +_GL_ATTRIBUTE_NODISCARD +extern int string_desc_new (string_desc_t *resultp, idx_t n); + +/* Construct and return a string of length N, at the given memory address. */ +extern string_desc_t string_desc_new_addr (idx_t n, char *addr); + +/* Construct a string of length N, filled with C. + Return 0 if successful. + Upon error, return -1 with errno set. */ +_GL_ATTRIBUTE_NODISCARD +extern int string_desc_new_filled (string_desc_t *resultp, idx_t n, char c); + +/* Construct a copy of string S. + Return 0 if successful. + Upon error, return -1 with errno set. */ +_GL_ATTRIBUTE_NODISCARD +extern int string_desc_copy (string_desc_t *resultp, string_desc_t s); + +/* Construct the concatenation of N strings. N must be > 0. + Return 0 if successful. + Upon error, return -1 with errno set. */ +_GL_ATTRIBUTE_NODISCARD +extern int string_desc_concat (string_desc_t *resultp, idx_t n, string_desc_t string1, ...); + +/* Construct a copy of string S, as a NUL-terminated C string. + Return it is successful. + Upon error, return NULL with errno set. */ +extern char * string_desc_c (string_desc_t s) _GL_ATTRIBUTE_DEALLOC_FREE; + + +/* ==== Operations with side effects on string descriptors ==== */ + +/* Overwrite the byte at index I of string S with C. + I must be < length(S). */ +extern void string_desc_set_char_at (string_desc_t s, idx_t i, char c); + +/* Fill part of S, starting at offset START and ending at offset END, + with copies of C. + START must be <= END. */ +extern void string_desc_fill (string_desc_t s, idx_t start, idx_t end, char c); + +/* Overwrite part of S with T, starting at offset START. + START + length(T) must be <= length (S). */ +extern void string_desc_overwrite (string_desc_t s, idx_t start, string_desc_t t); + +/* Free S. */ +extern void string_desc_free (string_desc_t s); + + +/* ==== Inline function definitions ==== */ + +GL_STRING_DESC_INLINE idx_t +string_desc_length (string_desc_t s) +{ + return s._nbytes; +} + +GL_STRING_DESC_INLINE char +string_desc_char_at (string_desc_t s, idx_t i) +{ + if (!(i >= 0 && i < s._nbytes)) + /* Invalid argument. */ + abort (); + return s._data[i]; +} + +GL_STRING_DESC_INLINE const char * +string_desc_data (string_desc_t s) +{ + return s._data; +} + +GL_STRING_DESC_INLINE bool +string_desc_is_empty (string_desc_t s) +{ + return s._nbytes == 0; +} + + +#ifdef __cplusplus +} +#endif + +_GL_INLINE_HEADER_END + + +#endif /* _STRING_DESC_H */ diff --git a/modules/string-desc b/modules/string-desc new file mode 100644 index 0000000000..044ee266e4 --- /dev/null +++ b/modules/string-desc @@ -0,0 +1,30 @@ +Description: +String descriptors. + +Files: +lib/string-desc.h +lib/string-desc.c +lib/string-desc-contains.c + +Depends-on: +stdbool +idx +ialloc +memchr +memrchr +memmem +full-write + +configure.ac: + +Makefile.am: +lib_SOURCES += string-desc.c string-desc-contains.c + +Include: +"string-desc.h" + +License: +LGPL + +Maintainer: +all -- 2.34.1