mblaze/safe_u8putstr.c

62 lines
1.3 KiB
C
Raw Normal View History

2017-03-13 14:50:41 +00:00
#include <stdint.h>
2017-08-31 15:30:17 +00:00
#include <stdio.h>
2017-03-13 14:50:41 +00:00
2017-11-22 22:48:00 +00:00
#include "u8decode.h"
2017-03-13 14:50:41 +00:00
void
safe_u8putstr(char *s0, size_t l, int oneline, FILE *stream)
2017-03-13 14:50:41 +00:00
{
// tty-safe output of s, with relaxed utf-8 semantics:
// - C0 and C1 are displayed as escape sequences
2017-11-22 22:48:00 +00:00
// - valid utf-8 is printed as is
// - rest is assumed to be latin-1, and translated into utf-8
2017-03-13 14:50:41 +00:00
// - translate CRLF to CR
2017-08-31 15:30:17 +00:00
unsigned char *s = (unsigned char *)s0;
2017-03-13 14:50:41 +00:00
unsigned char *e = s + l;
2017-11-22 22:48:00 +00:00
uint32_t c;
2017-03-13 14:50:41 +00:00
while (s < e) {
2017-11-22 22:48:00 +00:00
int l = u8decode((char *)s, &c);
if (l == -1) {
l = 1;
if (*s <= 0x9fu) {
// C1
2017-03-13 14:50:41 +00:00
fputc(0xe2, stream);
fputc(0x90, stream);
2017-11-22 22:48:00 +00:00
fputc(0x80+0x1b, stream);
2017-03-13 14:50:41 +00:00
fputc(0xe2, stream);
fputc(0x90, stream);
fputc(*s, stream);
} else {
2017-11-22 22:48:00 +00:00
/* invalid utf-8, assume it was latin-1 */
fputc(0xc0 | (*s >> 6), stream);
fputc(0x80 | (*s & 0x3f), stream);
2017-03-13 14:50:41 +00:00
}
2018-11-14 16:15:53 +00:00
} else if (c < 32 &&
*s != ' ' && *s != '\t' &&
(oneline || (*s != '\n' && *s != '\r'))) {
// NUL
if (l == 0)
l = 1;
2017-11-22 22:48:00 +00:00
// C0
2017-03-13 14:50:41 +00:00
fputc(0xe2, stream);
fputc(0x90, stream);
2017-11-22 22:48:00 +00:00
fputc(0x80+*s, stream);
} else if (c == 127) {
// DEL
2017-03-13 14:50:41 +00:00
fputc(0xe2, stream);
fputc(0x90, stream);
2017-11-22 22:48:00 +00:00
fputc(0xa1, stream);
} else if (c == '\r') {
if (e - s > 1 && s[1] == '\n')
s++;
2017-03-13 14:50:41 +00:00
fputc(*s, stream);
} else {
2017-11-22 22:48:00 +00:00
fwrite(s, 1, l, stream);
2017-03-13 14:50:41 +00:00
}
2017-11-22 22:48:00 +00:00
s += l;
2017-03-13 14:50:41 +00:00
}
}