[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemacs-commit] qemacs buffer.c charset.c dired.c extras.c orgm...
From: |
Charlie Gordon |
Subject: |
[Qemacs-commit] qemacs buffer.c charset.c dired.c extras.c orgm... |
Date: |
Wed, 05 Feb 2014 00:56:50 +0000 |
CVSROOT: /sources/qemacs
Module name: qemacs
Changes by: Charlie Gordon <chqrlie> 14/02/05 00:56:50
Modified files:
. : buffer.c charset.c dired.c extras.c orgmode.c
qe.c qe.h shell.c unihex.c
libqhtml : xmlparse.c
Log message:
add support for end of line types
* support 3 types of end of line: Unix, Dos and old style Mac
* detect eol type automatically in detect_charset
* set buffer eol_type together with charset
* transparently convert eol sequence to \n upon reading buffers
characters in eb_nextc().
* transparently convert \n to eol sequence in eb_encode_uchar()
* handle eol types in convert-file-buffer-coding-system: either preserve
current value or force it via charset suffix -unix, -doc and -mac
* change read_charset() to accept eol_type suffix
* add eol_type and eol_char to CharsetDecodeState
* change charset methods to take CharsetDecodeState instead of charset
pointers
* display eol_type in mode line if not unix
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/qemacs/buffer.c?cvsroot=qemacs&r1=1.70&r2=1.71
http://cvs.savannah.gnu.org/viewcvs/qemacs/charset.c?cvsroot=qemacs&r1=1.29&r2=1.30
http://cvs.savannah.gnu.org/viewcvs/qemacs/dired.c?cvsroot=qemacs&r1=1.35&r2=1.36
http://cvs.savannah.gnu.org/viewcvs/qemacs/extras.c?cvsroot=qemacs&r1=1.18&r2=1.19
http://cvs.savannah.gnu.org/viewcvs/qemacs/orgmode.c?cvsroot=qemacs&r1=1.11&r2=1.12
http://cvs.savannah.gnu.org/viewcvs/qemacs/qe.c?cvsroot=qemacs&r1=1.143&r2=1.144
http://cvs.savannah.gnu.org/viewcvs/qemacs/qe.h?cvsroot=qemacs&r1=1.135&r2=1.136
http://cvs.savannah.gnu.org/viewcvs/qemacs/shell.c?cvsroot=qemacs&r1=1.83&r2=1.84
http://cvs.savannah.gnu.org/viewcvs/qemacs/unihex.c?cvsroot=qemacs&r1=1.20&r2=1.21
http://cvs.savannah.gnu.org/viewcvs/qemacs/libqhtml/xmlparse.c?cvsroot=qemacs&r1=1.25&r2=1.26
Patches:
Index: buffer.c
===================================================================
RCS file: /sources/qemacs/qemacs/buffer.c,v
retrieving revision 1.70
retrieving revision 1.71
diff -u -b -r1.70 -r1.71
--- buffer.c 4 Feb 2014 22:47:31 -0000 1.70
+++ buffer.c 5 Feb 2014 00:56:48 -0000 1.71
@@ -485,6 +485,7 @@
/* initialize default mode stuff */
b->tab_width = qs->default_tab_width;
b->fill_column = qs->default_fill_column;
+ b->eol_type = qs->default_eol_type;
/* add buffer in global buffer list (at end for system buffers) */
pb = &qs->first_buffer;
@@ -496,10 +497,10 @@
*pb = b;
if (flags & BF_UTF8) {
- eb_set_charset(b, &charset_utf8);
+ eb_set_charset(b, &charset_utf8, b->eol_type);
} else {
/* CG: default charset should be selectable */
- eb_set_charset(b, &charset_8859_1);
+ eb_set_charset(b, &charset_8859_1, b->eol_type);
}
/* add mark move callback */
@@ -1104,20 +1105,21 @@
/************************************************************/
/* line related functions */
-void eb_set_charset(EditBuffer *b, QECharset *charset)
+void eb_set_charset(EditBuffer *b, QECharset *charset, EOLType eol_type)
{
int n;
if (b->charset) {
charset_decode_close(&b->charset_state);
}
+ b->eol_type = eol_type;
b->charset = charset;
b->flags &= ~BF_UTF8;
if (charset == &charset_utf8)
b->flags |= BF_UTF8;
if (charset)
- charset_decode_init(&b->charset_state, charset);
+ charset_decode_init(&b->charset_state, charset, eol_type);
b->char_bytes = 1;
b->char_shift = 0;
@@ -1168,11 +1170,21 @@
/* we use the charset conversion table directly to go faster */
ch = b->charset_state.table[buf[0]];
offset++;
- if (ch == ESCAPE_CHAR) {
+ if (ch == ESCAPE_CHAR || ch == '\r') {
eb_read(b, offset, buf + 1, MAX_CHAR_BYTES - 1);
b->charset_state.p = buf;
ch = b->charset_state.decode_func(&b->charset_state);
offset += (b->charset_state.p - buf) - 1;
+ if (ch == '\r') {
+ if (b->eol_type == EOL_DOS
+ && b->charset_state.decode_func(&b->charset_state) == '\n') {
+ ch = '\n';
+ offset += b->charset_state.char_size;
+ } else
+ if (b->eol_type == EOL_MAC) {
+ ch = '\n';
+ }
+ }
}
}
*next_ptr = offset;
@@ -1259,6 +1271,12 @@
b->charset_state.p = q;
ch = b->charset_state.decode_func(&b->charset_state);
}
+ if (ch == '\n' && b->eol_type == EOL_DOS && offset >= char_size) {
+ eb_read(b, offset - char_size, buf, char_size);
+ b->charset_state.p = buf;
+ if (b->charset_state.decode_func(&b->charset_state) == '\r')
+ offset -= char_size;
+ }
}
the_end:
*prev_ptr = offset;
@@ -1290,7 +1308,7 @@
/* compute offset */
if (line < line1) {
/* seek to the correct line */
- offset += b->charset->goto_line_func(b->charset,
+ offset += b->charset->goto_line_func(&b->charset_state,
p->data, p->size, line1 - line);
line = line1;
col = 0;
@@ -1359,7 +1377,7 @@
int offset;
Page *p, *p_end;
- if (!b->charset->variable_size) {
+ if (!b->charset->variable_size && b->eol_type != EOL_DOS) {
offset = min(pos * b->charset->char_size, b->total_size);
} else {
offset = 0;
@@ -1368,10 +1386,10 @@
while (p < p_end) {
if (!(p->flags & PG_VALID_CHAR)) {
p->flags |= PG_VALID_CHAR;
- p->nb_chars = b->charset->get_chars_func(b->charset, p->data,
p->size);
+ p->nb_chars = b->charset->get_chars_func(&b->charset_state,
p->data, p->size);
}
if (pos < p->nb_chars) {
- offset += b->charset->goto_char_func(b->charset, p->data,
p->size, pos);
+ offset += b->charset->goto_char_func(&b->charset_state,
p->data, p->size, pos);
break;
} else {
pos -= p->nb_chars;
@@ -1392,10 +1410,12 @@
if (offset < 0)
offset = 0;
- if (!b->charset->variable_size) {
+ if (!b->charset->variable_size && b->eol_type != EOL_DOS) {
/* offset is round down to character boundary */
pos = min(offset, b->total_size) / b->charset->char_size;
} else {
+ /* XXX: should handle rounding if EOL_DOS */
+ /* XXX: should fix buffer offset via charset specific method */
if (b->charset == &charset_utf8) {
/* Round offset down to character boundary */
u8 buf[1];
@@ -1413,10 +1433,10 @@
while (p < p_end) {
if (!(p->flags & PG_VALID_CHAR)) {
p->flags |= PG_VALID_CHAR;
- p->nb_chars = b->charset->get_chars_func(b->charset, p->data,
p->size);
+ p->nb_chars = b->charset->get_chars_func(&b->charset_state,
p->data, p->size);
}
if (offset < p->size) {
- pos += b->charset->get_chars_func(b->charset, p->data, offset);
+ pos += b->charset->get_chars_func(&b->charset_state, p->data,
offset);
break;
} else {
pos += p->nb_chars;
@@ -1716,7 +1736,7 @@
eb_set_buffer_name(b, get_basename(filename));
}
-/* Encode unicode character according to buffer charset */
+/* Encode unicode character according to buffer charset and eol_type */
/* Return number of bytes of conversion */
/* the function uses '?' to indicate that no match could be found in
buffer charset */
@@ -1725,6 +1745,14 @@
QECharset *charset = b->charset;
u8 *q = (u8 *)buf;
+ if (c == '\n') {
+ if (b->eol_type == EOL_MAC)
+ c = '\r';
+ else
+ if (b->eol_type == EOL_DOS) {
+ q = charset->encode_func(charset, q, '\r');
+ }
+ }
q = charset->encode_func(charset, q, c);
if (!q) {
q = (u8 *)buf;
@@ -1749,7 +1777,7 @@
/* Return number of bytes inserted */
int eb_insert_utf8_buf(EditBuffer *b, int offset, const char *buf, int len)
{
- if (b->charset == &charset_utf8) {
+ if (b->charset == &charset_utf8 && b->eol_type == EOL_UNIX) {
return eb_insert(b, offset, buf, len);
} else {
char buf1[1024];
@@ -1835,7 +1863,7 @@
vsnprintf(buf, size, fmt, ap);
va_end(ap);
}
- /* CG: insert buffer translating according b->charset.
+ /* CG: insert buf encoding according to b->charset and b->eol_type.
* buf may contain \0 characters via the %c modifer.
* XXX: %c does not encode non ASCII characters as utf8.
*/
@@ -1867,11 +1895,12 @@
}
#endif
-/* Read the comtents of a buffer encoded in a utf8 string */
+/* Read the contents of a buffer encoded in a utf8 string */
int eb_get_contents(EditBuffer *b, char *buf, int buf_size)
{
/* do not use eb_read if overflow to avoid partial characters */
- if (b->charset == &charset_utf8 && b->total_size < buf_size) {
+ if (b->charset == &charset_utf8 && b->eol_type == EOL_UNIX
+ && b->total_size < buf_size) {
int len = b->total_size;
eb_read(b, 0, buf, len);
buf[len] = '\0';
@@ -1902,7 +1931,9 @@
{
int styles_flags = min((dest->flags & BF_STYLES), (src->flags &
BF_STYLES));
- if (dest->charset == src->charset && !styles_flags) {
+ if (dest->charset == src->charset
+ && dest->eol_type == src->eol_type
+ && !styles_flags) {
return eb_insert_buffer(dest, dest_offset, src, src_offset, size);
} else {
EditBuffer *b;
@@ -1912,7 +1943,7 @@
if (!styles_flags
&& ((b->flags & BF_SAVELOG) || dest_offset != b->total_size)) {
b = eb_new("*tmp*", BF_SYSTEM);
- eb_set_charset(b, dest->charset);
+ eb_set_charset(b, dest->charset, dest->eol_type);
offset1 = 0;
}
Index: charset.c
===================================================================
RCS file: /sources/qemacs/qemacs/charset.c,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -b -r1.29 -r1.30
--- charset.c 4 Feb 2014 22:47:31 -0000 1.29
+++ charset.c 5 Feb 2014 00:56:49 -0000 1.30
@@ -30,6 +30,9 @@
* spacing and enclosing combining characters and control chars.
*/
+/* XXX: This table is incomplete, should compute from UnicodeData.txt
+ * via a specialized utility
+ */
static unsigned int const unicode_glyph_ranges[] = {
0x10FF, 1, 0x115f, 2, /* 0: Hangul Jamo */
0x2328, 1, 0x232a, 2, /* 2: wide Angle brackets */
@@ -309,7 +312,7 @@
line = 0;
lp = p = buf;
p1 = p + size;
- nl = s->charset->eol_char;
+ nl = s->eol_char;
for (;;) {
p = memchr(p, nl, p1 - p);
@@ -330,16 +333,29 @@
*col_ptr = col;
}
-static int charset_get_chars_utf8(QECharset *charset, const u8 *buf, int size)
+static int charset_get_chars_utf8(CharsetDecodeState *s,
+ const u8 *buf, int size)
{
int nb_chars, c;
const u8 *buf_end, *buf_ptr;
nb_chars = 0;
buf_ptr = buf;
- buf_end = buf + size;
+ buf_end = buf_ptr + size;
while (buf_ptr < buf_end) {
c = *buf_ptr++;
+ if (c == '\n' && s->eol_type == EOL_DOS) {
+ /* ignore \n in EOL_DOS scan, but count \r.
+ * XXX: potentially incorrect if buffer contains
+ * \n not preceded by \r and requires special state
+ * data to handle \r\n sequence at page boundary.
+ */
+ continue;
+ }
+ /* ignoring trailing bytes: will produce incorrect
+ * count on isolated and trailing bytes and overlong
+ * sequences.
+ */
if (c < 0x80 || c >= 0xc0)
nb_chars++;
}
@@ -347,21 +363,30 @@
* utf-8 sequence at start of buffer is ignored in count while
* incomplete utf-8 sequence at end of buffer is counted. This may
* cause problems when counting characters with eb_get_pos with an
- * offset falling indside an utf-8 sequence.
+ * offset falling inside a utf-8 sequence, and will produce
+ * incorrect counts on broken utf-8 sequences spanning page
+ * boundaries.
*/
return nb_chars;
}
-static int charset_goto_char_utf8(QECharset *charset, const u8 *buf, int size,
int pos)
+static int charset_goto_char_utf8(CharsetDecodeState *s,
+ const u8 *buf, int size, int pos)
{
int nb_chars, c;
const u8 *buf_ptr, *buf_end;
nb_chars = 0;
buf_ptr = buf;
- buf_end = buf + size;
+ buf_end = buf_ptr + size;
while (buf_ptr < buf_end) {
c = *buf_ptr;
+ if (c == '\n' && s->eol_type == EOL_DOS) {
+ /* ignore \n in EOL_DOS scan, but count \r.
+ * see comment above.
+ */
+ continue;
+ }
if (c < 0x80 || c >= 0xc0) {
/* Test done here to skip initial trailing bytes if any */
if (nb_chars >= pos)
@@ -396,6 +421,7 @@
static int decode_ucs2le(CharsetDecodeState *s)
{
+ /* XXX: should handle surrogates */
const u8 *p;
p = s->p;
@@ -405,6 +431,7 @@
static u8 *encode_ucs2le(__unused__ QECharset *charset, u8 *p, int c)
{
+ /* XXX: should handle surrogates */
p[0] = c;
p[1] = c >> 8;
return p + 2;
@@ -423,9 +450,10 @@
lp = p = (const uint16_t *)buf;
p1 = p + (size >> 1);
u.n = 0;
- u.c[s->charset == &charset_ucs2be] = s->charset->eol_char;
+ u.c[s->charset == &charset_ucs2be] = s->eol_char;
nl = u.n;
+ /* XXX: should handle surrogates */
while (p < p1) {
if (*p++ == nl) {
lp = p;
@@ -437,8 +465,8 @@
*col_ptr = col;
}
-static int charset_goto_line_ucs2(QECharset *charset, const u8 *buf, int size,
- int nlines)
+static int charset_goto_line_ucs2(CharsetDecodeState *s,
+ const u8 *buf, int size, int nlines)
{
const uint16_t *p, *p1, *lp;
uint16_t nl;
@@ -447,7 +475,7 @@
lp = p = (const uint16_t *)buf;
p1 = p + (size >> 1);
u.n = 0;
- u.c[charset == &charset_ucs2be] = charset->eol_char;
+ u.c[s->charset == &charset_ucs2be] = s->eol_char;
nl = u.n;
while (nlines > 0 && p < p1) {
@@ -464,6 +492,7 @@
static int decode_ucs2be(CharsetDecodeState *s)
{
+ /* XXX: should handle surrogates */
const u8 *p;
p = s->p;
@@ -473,19 +502,70 @@
static u8 *encode_ucs2be(__unused__ QECharset *charset, u8 *p, int c)
{
+ /* XXX: should handle surrogates */
p[0] = c >> 8;
p[1] = c;
return p + 2;
}
-static int charset_get_chars_ucs2(__unused__ QECharset *charset, const u8
*buf, int size)
+static int charset_get_chars_ucs2(CharsetDecodeState *s,
+ const u8 *buf, int size)
{
+ /* XXX: should handle surrogates */
+ int nb_skip;
+ const uint16_t *buf_end, *buf_ptr;
+ uint16_t nl;
+ union { uint16_t n; char c[2]; } u;
+
+ if (s->eol_type != EOL_DOS)
return size >> 1;
+
+ nb_skip = 0;
+ buf_ptr = (const uint16_t *)buf;
+ buf_end = buf_ptr + (size >> 1);
+ u.n = 0;
+ u.c[s->charset == &charset_ucs2be] = '\n';
+ nl = u.n;
+
+ while (buf_ptr < buf_end) {
+ if (*buf_ptr++ == nl) {
+ /* ignore \n in EOL_DOS scan, but count \r. (see above) */
+ nb_skip++;
+ }
+ }
+ return (size >> 1) - nb_skip;
}
-static int charset_goto_char_ucs2(__unused__ QECharset *charset, const u8
*buf, int size, int pos)
+static int charset_goto_char_ucs2(CharsetDecodeState *s,
+ const u8 *buf, int size, int pos)
{
+ /* XXX: should handle surrogates */
+ int nb_chars;
+ const uint16_t *buf_ptr, *buf_end;
+ uint16_t nl;
+ union { uint16_t n; char c[2]; } u;
+
+ if (s->eol_type != EOL_DOS)
return min(pos << 1, size);
+
+ nb_chars = 0;
+ buf_ptr = (const uint16_t *)buf;
+ buf_end = buf_ptr + (size >> 1);
+ u.n = 0;
+ u.c[s->charset == &charset_ucs2be] = '\n';
+ nl = u.n;
+
+ while (buf_ptr < buf_end) {
+ if (*buf_ptr == nl) {
+ /* ignore \n in EOL_DOS scan, but count \r. (see above) */
+ continue;
+ }
+ if (nb_chars >= pos)
+ break;
+ nb_chars++;
+ buf_ptr++;
+ }
+ return (const u8*)buf_ptr - buf;
}
QECharset charset_ucs2le = {
@@ -545,7 +625,7 @@
lp = p = (const uint32_t *)buf;
p1 = p + (size >> 2);
u.n = 0;
- u.c[(s->charset == &charset_ucs4be) * 3] = s->charset->eol_char;
+ u.c[(s->charset == &charset_ucs4be) * 3] = s->eol_char;
nl = u.n;
while (p < p1) {
@@ -559,8 +639,8 @@
*col_ptr = col;
}
-static int charset_goto_line_ucs4(QECharset *charset, const u8 *buf, int size,
- int nlines)
+static int charset_goto_line_ucs4(CharsetDecodeState *s,
+ const u8 *buf, int size, int nlines)
{
const uint32_t *p, *p1, *lp;
uint32_t nl;
@@ -569,7 +649,7 @@
lp = p = (const uint32_t *)buf;
p1 = p + (size >> 2);
u.n = 0;
- u.c[(charset == &charset_ucs4be) * 3] = charset->eol_char;
+ u.c[(s->charset == &charset_ucs4be) * 3] = s->eol_char;
nl = u.n;
while (nlines > 0 && p < p1) {
@@ -602,14 +682,62 @@
return p + 4;
}
-static int charset_get_chars_ucs4(__unused__ QECharset *charset, const u8
*buf, int size)
+static int charset_get_chars_ucs4(CharsetDecodeState *s,
+ const u8 *buf, int size)
{
+ int nb_skip;
+ const uint32_t *buf_end, *buf_ptr;
+ uint32_t nl;
+ union { uint32_t n; char c[4]; } u;
+
+ if (s->eol_type != EOL_DOS)
return size >> 2;
+
+ nb_skip = 0;
+ buf_ptr = (const uint32_t *)buf;
+ buf_end = buf_ptr + (size >> 2);
+ u.n = 0;
+ u.c[(s->charset == &charset_ucs4be) * 3] = '\n';
+ nl = u.n;
+
+ while (buf_ptr < buf_end) {
+ if (*buf_ptr++ == nl) {
+ /* ignore \n in EOL_DOS scan, but count \r. (see above) */
+ nb_skip++;
+ }
+ }
+ return (size >> 2) - nb_skip;
}
-static int charset_goto_char_ucs4(__unused__ QECharset *charset, const u8
*buf, int size, int pos)
+static int charset_goto_char_ucs4(CharsetDecodeState *s,
+ const u8 *buf, int size, int pos)
{
+ int nb_chars;
+ const uint32_t *buf_ptr, *buf_end;
+ uint32_t nl;
+ union { uint32_t n; char c[4]; } u;
+
+ if (s->eol_type != EOL_DOS)
return min(pos << 2, size);
+
+ nb_chars = 0;
+ buf_ptr = (const uint32_t *)buf;
+ buf_end = buf_ptr + (size >> 2);
+ u.n = 0;
+ u.c[(s->charset == &charset_ucs4be) * 3] = '\n';
+ nl = u.n;
+
+ while (buf_ptr < buf_end) {
+ if (*buf_ptr == nl) {
+ /* ignore \n in EOL_DOS scan, but count \r. (see above) */
+ continue;
+ }
+ if (nb_chars >= pos)
+ break;
+ nb_chars++;
+ buf_ptr++;
+ }
+ return (const u8*)buf_ptr - buf;
}
QECharset charset_ucs4le = {
@@ -699,7 +827,8 @@
return NULL;
}
-void charset_decode_init(CharsetDecodeState *s, QECharset *charset)
+void charset_decode_init(CharsetDecodeState *s, QECharset *charset,
+ EOLType eol_type)
{
s->table = NULL; /* fail safe */
if (charset->table_alloc) {
@@ -710,6 +839,10 @@
}
s->charset = charset;
s->char_size = charset->char_size;
+ s->eol_type = eol_type;
+ s->eol_char = charset->eol_char;
+ if (s->eol_char == '\n' && (s->eol_type == EOL_MAC || s->eol_type ==
EOL_DOS))
+ s->eol_char = '\r';
s->decode_func = charset->decode_func;
s->get_pos_func = charset->get_pos_func;
if (charset->decode_init)
@@ -725,10 +858,48 @@
}
/* detect the charset. Actually only UTF8 is detected */
-QECharset *detect_charset(const u8 *buf, int size)
+QECharset *detect_charset(const u8 *buf, int size, EOLType *eol_typep)
{
int i, l, c, has_utf8;
+ if (eol_typep) {
+ /* XXX: delay test after charset match */
+ /* XXX: only works for 8 bit charsets */
+ int eol_bits = 0;
+ for (i = 0; i < size - 1; i++) {
+ c = buf[i++];
+ if (c == '\r') {
+ if (buf[i] == '\n') {
+ eol_bits |= 1 << EOL_DOS;
+ i++;
+ } else {
+ eol_bits |= 1 << EOL_MAC;
+ }
+ } else
+ if (buf[i] == '\n') {
+ eol_bits |= 1 << EOL_UNIX;
+ }
+ }
+ switch (eol_bits) {
+ case 0:
+ /* no change, keep default value */
+ break;
+ case 1 << EOL_UNIX:
+ *eol_typep = EOL_UNIX;
+ break;
+ case 1 << EOL_DOS:
+ *eol_typep = EOL_DOS;
+ break;
+ case 1 << EOL_MAC:
+ *eol_typep = EOL_MAC;
+ break;
+ default:
+ /* A mixture of different styles, binary / unix */
+ *eol_typep = EOL_UNIX;
+ break;
+ }
+ }
+
has_utf8 = 0;
for (i = 0; i < size;) {
c = buf[i++];
@@ -855,7 +1026,7 @@
line = 0;
lp = p = buf;
p1 = p + size;
- nl = s->charset->eol_char;
+ nl = s->eol_char;
for (;;) {
p = memchr(p, nl, p1 - p);
@@ -870,14 +1041,15 @@
*col_ptr = col;
}
-int charset_goto_line_8bit(QECharset *charset, const u8 *buf, int size, int
nlines)
+int charset_goto_line_8bit(CharsetDecodeState *s,
+ const u8 *buf, int size, int nlines)
{
const u8 *p, *p1, *lp;
int nl;
lp = p = buf;
p1 = p + size;
- nl = charset->eol_char;
+ nl = s->eol_char;
while (nlines > 0) {
p = memchr(p, nl, p1 - p);
@@ -890,14 +1062,50 @@
return lp - buf;
}
-int charset_get_chars_8bit(QECharset *charset, const u8 *buf, int size)
+int charset_get_chars_8bit(CharsetDecodeState *s,
+ const u8 *buf, int size)
{
+ int nb_skip;
+ const u8 *buf_end, *buf_ptr;
+
+ if (s->eol_type != EOL_DOS)
return size;
+
+ nb_skip = 0;
+ buf_ptr = buf;
+ buf_end = buf_ptr + size;
+ while (buf_ptr < buf_end) {
+ if (*buf_ptr++ == '\n') {
+ /* ignore \n in EOL_DOS scan, but count \r. (see above) */
+ nb_skip++;
+ }
+ }
+ return size - nb_skip;
}
-int charset_goto_char_8bit(QECharset *charset, const u8 *buf, int size, int
pos)
+int charset_goto_char_8bit(CharsetDecodeState *s,
+ const u8 *buf, int size, int pos)
{
+ int nb_chars;
+ const u8 *buf_ptr, *buf_end;
+
+ if (s->eol_type != EOL_DOS)
return min(pos, size);
+
+ nb_chars = 0;
+ buf_ptr = buf;
+ buf_end = buf_ptr + size;
+ while (buf_ptr < buf_end) {
+ if (*buf_ptr == '\n') {
+ /* ignore \n in EOL_DOS scan, but count \r. */
+ continue;
+ }
+ if (nb_chars >= pos)
+ break;
+ nb_chars++;
+ buf_ptr++;
+ }
+ return buf_ptr - buf;
}
/********************************************************/
Index: dired.c
===================================================================
RCS file: /sources/qemacs/qemacs/dired.c,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -b -r1.35 -r1.36
--- dired.c 23 Jan 2014 12:56:22 -0000 1.35
+++ dired.c 5 Feb 2014 00:56:49 -0000 1.36
@@ -493,7 +493,7 @@
list_mode.mode_init(s, saved_data);
/* XXX: File system charset should be detected automatically */
- eb_set_charset(s->b, &charset_utf8);
+ eb_set_charset(s->b, &charset_utf8, s->b->eol_type);
hs = s->mode_data;
hs->sort_mode = DIRED_SORT_GROUP | DIRED_SORT_NAME;
Index: extras.c
===================================================================
RCS file: /sources/qemacs/qemacs/extras.c,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -b -r1.18 -r1.19
--- extras.c 31 Jan 2014 14:50:13 -0000 1.18
+++ extras.c 5 Feb 2014 00:56:49 -0000 1.19
@@ -405,7 +405,7 @@
} else {
EditBuffer *b1 = eb_new("*tmp*", BF_SYSTEM | (b->flags & BF_STYLES));
- eb_set_charset(b1, b->charset);
+ eb_set_charset(b1, b->charset, b->eol_type);
/* Use eb_insert_buffer_convert to copy styles.
* This conversion should not change sizes */
eb_insert_buffer_convert(b1, 0, b, offset2, size2);
@@ -712,6 +712,11 @@
s->b->flags &= ~BF_STYLES;
}
+static void do_set_eol_type(EditState *s, int eol_type)
+{
+ eb_set_charset(s->b, s->b->charset, eol_type);
+}
+
static CmdDef extra_commands[] = {
CMD2( KEY_META('='), KEY_NONE,
"compare-windows", do_compare_windows, ESi, "ui" )
@@ -770,6 +775,10 @@
CMD0( KEY_NONE, KEY_NONE,
"drop-styles", do_drop_styles)
+ CMD2( KEY_NONE, KEY_NONE,
+ "set-eol-type", do_set_eol_type, ESi,
+ "ui{EOL Type [0=Unix, 1=Dos, 2=Mac]: }")
+
CMD_DEF_END,
};
Index: orgmode.c
===================================================================
RCS file: /sources/qemacs/qemacs/orgmode.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -b -r1.11 -r1.12
--- orgmode.c 29 Jan 2014 23:24:00 -0000 1.11
+++ orgmode.c 5 Feb 2014 00:56:49 -0000 1.12
@@ -617,7 +617,7 @@
offset2 = org_next_heading(s, offset1, level, &level2);
}
b1 = eb_new("*tmp*", BF_SYSTEM | (s->b->flags & BF_STYLES));
- eb_set_charset(b1, s->b->charset);
+ eb_set_charset(b1, s->b->charset, s->b->eol_type);
eb_insert_buffer_convert(b1, 0, s->b, offset, size);
eb_delete(s->b, offset, size);
if (offset2 > offset)
Index: qe.c
===================================================================
RCS file: /sources/qemacs/qemacs/qe.c,v
retrieving revision 1.143
retrieving revision 1.144
diff -u -b -r1.143 -r1.144
--- qe.c 4 Feb 2014 22:47:31 -0000 1.143
+++ qe.c 5 Feb 2014 00:56:49 -0000 1.144
@@ -1485,7 +1485,7 @@
}
snprintf(bufname, sizeof(bufname), "*kill-%d*", qs->yank_current + 1);
b = eb_new(bufname, base->flags & BF_STYLES);
- eb_set_charset(b, base->charset);
+ eb_set_charset(b, base->charset, base->eol_type);
qs->yank_buffers[qs->yank_current] = b;
return b;
}
@@ -1823,26 +1823,49 @@
}
#endif
-QECharset *read_charset(EditState *s, const char *charset_str)
+QECharset *read_charset(EditState *s, const char *charset_str,
+ EOLType *eol_typep)
{
+ char buf[64];
+ const char *p;
QECharset *charset;
+ EOLType eol_type = *eol_typep;
+
+ p = NULL;
+
+ if (strend(charset_str, "-mac", &p))
+ eol_type = EOL_MAC;
+ else
+ if (strend(charset_str, "-dos", &p))
+ eol_type = EOL_DOS;
+ else
+ if (strend(charset_str, "-unix", &p))
+ eol_type = EOL_UNIX;
+
+ if (p) {
+ pstrncpy(buf, sizeof(buf), charset_str, p - charset_str);
+ charset_str = buf;
+ }
charset = find_charset(charset_str);
if (!charset) {
put_status(s, "Unknown charset '%s'", charset_str);
return NULL;
}
+ *eol_typep = eol_type;
return charset;
}
void do_set_buffer_file_coding_system(EditState *s, const char *charset_str)
{
QECharset *charset;
+ EOLType eol_type;
- charset = read_charset(s, charset_str);
+ eol_type = s->b->eol_type;
+ charset = read_charset(s, charset_str, &eol_type);
if (!charset)
return;
- eb_set_charset(s->b, charset);
+ eb_set_charset(s->b, charset, eol_type);
put_status(s, "Charset is now %s for this buffer", s->b->charset->name);
}
@@ -1851,20 +1874,22 @@
const char *charset_str)
{
QECharset *charset;
+ EOLType eol_type;
EditBuffer *b1, *b;
int offset, c, len, i;
EditBufferCallbackList *cb;
int pos[32];
char buf[MAX_CHAR_BYTES];
- charset = read_charset(s, charset_str);
+ eol_type = s->b->eol_type;
+ charset = read_charset(s, charset_str, &eol_type);
if (!charset)
return;
b = s->b;
b1 = eb_new("*tmp*", b->flags & BF_STYLES);
- eb_set_charset(b1, charset);
+ eb_set_charset(b1, charset, eol_type);
/* preserve positions */
cb = b->first_callback;
@@ -1887,7 +1912,7 @@
/* quick hack to transfer styles from tmp buffer to b */
eb_free(&b->b_styles);
eb_delete(b, 0, b->total_size);
- eb_set_charset(b, charset);
+ eb_set_charset(b, charset, eol_type);
eb_insert_buffer(b, 0, b1, 0, b1->total_size);
b->b_styles = b1->b_styles;
b1->b_styles = NULL;
@@ -2145,6 +2170,10 @@
eb_get_pos(s->b, &line_num, &col_num, s->offset);
buf_printf(out, "L%d--C%d--%s",
line_num + 1, col_num, s->b->charset->name);
+ if (s->b->eol_type == EOL_DOS)
+ buf_printf(out, "-dos");
+ if (s->b->eol_type == EOL_MAC)
+ buf_printf(out, "-mac");
if (s->bidir)
buf_printf(out, "--%s", s->cur_rtl ? "RTL" : "LTR");
@@ -5646,7 +5675,7 @@
/* First we try to read the first block to determine the data type */
if (stat(filename, &st) < 0) {
/* XXX: default charset should be selectable. Use utf8 for now */
- eb_set_charset(b, &charset_utf8);
+ eb_set_charset(b, &charset_utf8, b->eol_type);
/* CG: should check for wildcards and do dired */
//if (strchr(filename, '*') || strchr(filename, '?'))
// goto dired;
@@ -5686,8 +5715,13 @@
bdt = selected_mode->data_type;
/* autodetect buffer charset (could move it to raw buffer loader) */
- if (bdt == &raw_data_type)
- eb_set_charset(b, detect_charset(buf, buf_size));
+ if (bdt == &raw_data_type) {
+ QECharset *charset;
+ EOLType eol_type;
+
+ charset = detect_charset(buf, buf_size, &eol_type);
+ eb_set_charset(b, charset, eol_type);
+ }
/* now we can set the mode */
edit_set_mode_full(s, selected_mode, NULL, f);
Index: qe.h
===================================================================
RCS file: /sources/qemacs/qemacs/qe.h,v
retrieving revision 1.135
retrieving revision 1.136
diff -u -b -r1.135 -r1.136
--- qe.h 4 Feb 2014 22:47:31 -0000 1.135
+++ qe.h 5 Feb 2014 00:56:49 -0000 1.136
@@ -491,9 +491,9 @@
u8 *(*encode_func)(QECharset *charset, u8 *buf, int size);
void (*get_pos_func)(CharsetDecodeState *s, const u8 *buf, int size,
int *line_ptr, int *col_ptr);
- int (*get_chars_func)(QECharset *charset, const u8 *buf, int size);
- int (*goto_char_func)(QECharset *charset, const u8 *buf, int size, int
pos);
- int (*goto_line_func)(QECharset *charset, const u8 *buf, int size, int
lines);
+ int (*get_chars_func)(CharsetDecodeState *s, const u8 *buf, int size);
+ int (*goto_char_func)(CharsetDecodeState *s, const u8 *buf, int size, int
pos);
+ int (*goto_line_func)(CharsetDecodeState *s, const u8 *buf, int size, int
lines);
unsigned int char_size : 3;
unsigned int variable_size : 1;
unsigned int table_alloc : 1; /* true if CharsetDecodeState.table must be
malloced */
@@ -510,10 +510,18 @@
extern QECharset charset_ucs2le, charset_ucs2be;
extern QECharset charset_ucs4le, charset_ucs4be;
+typedef enum EOLType {
+ EOL_UNIX = 0,
+ EOL_DOS,
+ EOL_MAC,
+} EOLType;
+
struct CharsetDecodeState {
/* 256 ushort table for hyper fast decoding */
unsigned short *table;
int char_size;
+ EOLType eol_type;
+ int eol_char;
const u8 *p;
/* slower decode function for complicated cases */
int (*decode_func)(CharsetDecodeState *s);
@@ -538,15 +546,16 @@
void charset_completion(CompleteState *cp);
QECharset *find_charset(const char *str);
-void charset_decode_init(CharsetDecodeState *s, QECharset *charset);
+void charset_decode_init(CharsetDecodeState *s, QECharset *charset,
+ EOLType eol_type);
void charset_decode_close(CharsetDecodeState *s);
void charset_get_pos_8bit(CharsetDecodeState *s, const u8 *buf, int size,
int *line_ptr, int *col_ptr);
-int charset_get_chars_8bit(QECharset *charset, const u8 *buf, int size);
-int charset_goto_char_8bit(QECharset *charset, const u8 *buf, int size, int
pos);
-int charset_goto_line_8bit(QECharset *charset, const u8 *buf, int size, int
nlines);
+int charset_get_chars_8bit(CharsetDecodeState *s, const u8 *buf, int size);
+int charset_goto_char_8bit(CharsetDecodeState *s, const u8 *buf, int size, int
pos);
+int charset_goto_line_8bit(CharsetDecodeState *s, const u8 *buf, int size, int
nlines);
-QECharset *detect_charset(const u8 *buf, int size);
+QECharset *detect_charset(const u8 *buf, int size, EOLType *eol_typep);
void decode_8bit_init(CharsetDecodeState *s);
int decode_8bit(CharsetDecodeState *s);
@@ -815,6 +824,7 @@
int tab_width;
int fill_column;
+ EOLType eol_type;
EditBuffer *next; /* next editbuffer in qe_state buffer list */
@@ -867,7 +877,7 @@
EditBuffer *eb_find_file(const char *filename);
EditState *eb_find_window(EditBuffer *b, EditState *e);
-void eb_set_charset(EditBuffer *b, QECharset *charset);
+void eb_set_charset(EditBuffer *b, QECharset *charset, EOLType eol_type);
__attr_nonnull((3))
int eb_nextc(EditBuffer *b, int offset, int *next_ptr);
__attr_nonnull((3))
@@ -1314,6 +1324,7 @@
int max_load_size; /* maximum file size for loading in memory */
int default_tab_width; /* 8 */
int default_fill_column; /* 70 */
+ EOLType default_eol_type; /* EOL_UNIX */
};
extern QEmacsState qe_state;
@@ -1744,7 +1755,8 @@
void do_yank(EditState *s);
void do_yank_pop(EditState *s);
void do_exchange_point_and_mark(EditState *s);
-QECharset *read_charset(EditState *s, const char *charset_str);
+QECharset *read_charset(EditState *s, const char *charset_str,
+ EOLType *eol_typep);
void do_set_buffer_file_coding_system(EditState *s, const char *charset_str);
void do_convert_buffer_file_coding_system(EditState *s,
const char *charset_str);
Index: shell.c
===================================================================
RCS file: /sources/qemacs/qemacs/shell.c,v
retrieving revision 1.83
retrieving revision 1.84
diff -u -b -r1.83 -r1.84
--- shell.c 4 Feb 2014 22:47:31 -0000 1.83
+++ shell.c 5 Feb 2014 00:56:49 -0000 1.84
@@ -1362,9 +1362,9 @@
/* Select shell output buffer encoding from LANG setting */
if (((lang = getenv("LANG")) != NULL && strstr(lang, "UTF-8")) ||
qs->screen->charset == &charset_utf8) {
- eb_set_charset(b, &charset_utf8);
+ eb_set_charset(b, &charset_utf8, b->eol_type);
} else {
- eb_set_charset(b, &charset_vt100);
+ eb_set_charset(b, &charset_vt100, b->eol_type);
}
s = qe_mallocz(ShellState);
Index: unihex.c
===================================================================
RCS file: /sources/qemacs/qemacs/unihex.c,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -b -r1.20 -r1.21
--- unihex.c 16 Jan 2014 14:00:28 -0000 1.20
+++ unihex.c 5 Feb 2014 00:56:50 -0000 1.21
@@ -27,6 +27,9 @@
text_mode_init(s, saved_data);
+ /* unihex mode is incompatible with EOL_DOS eol type */
+ eb_set_charset(s->b, s->b->charset, EOL_UNIX);
+
/* Compute max width of character in hex dump (limit to first 64K) */
maxc = 0xFF;
max_offset = min(65536, s->b->total_size);
Index: libqhtml/xmlparse.c
===================================================================
RCS file: /sources/qemacs/qemacs/libqhtml/xmlparse.c,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -b -r1.25 -r1.26
--- libqhtml/xmlparse.c 23 Jan 2014 12:56:24 -0000 1.25
+++ libqhtml/xmlparse.c 5 Feb 2014 00:56:50 -0000 1.26
@@ -301,7 +301,7 @@
pstrcpy(s->filename, sizeof(s->filename), filename);
s->charset = charset;
if (charset) {
- charset_decode_init(&s->charset_state, charset);
+ charset_decode_init(&s->charset_state, charset, EOL_UNIX);
}
return s;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Qemacs-commit] qemacs buffer.c charset.c dired.c extras.c orgm...,
Charlie Gordon <=