2016-07-11 21:40:00 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <iconv.h>
|
2016-07-18 15:06:41 +00:00
|
|
|
#include <stdint.h>
|
2016-07-11 21:40:00 +00:00
|
|
|
#include <stdio.h>
|
2016-07-18 15:06:41 +00:00
|
|
|
#include <stdlib.h>
|
2016-07-11 21:40:00 +00:00
|
|
|
#include <string.h>
|
|
|
|
|
2016-07-13 13:56:50 +00:00
|
|
|
#include "blaze822.h"
|
|
|
|
#include "blaze822_priv.h"
|
2016-07-12 13:16:56 +00:00
|
|
|
|
2016-07-11 21:40:00 +00:00
|
|
|
// XXX keep trying bytewise on invalid iconv
|
|
|
|
|
|
|
|
int
|
2016-11-08 15:19:26 +00:00
|
|
|
blaze822_decode_qp(char *start, char *stop, char **deco, size_t *decleno, int underscore)
|
2016-07-11 21:40:00 +00:00
|
|
|
{
|
|
|
|
static signed char hex[] = {
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
|
|
|
|
};
|
|
|
|
|
2016-11-02 12:37:22 +00:00
|
|
|
char *buf = malloc(stop - start + 1);
|
2016-07-11 21:40:00 +00:00
|
|
|
if (!buf)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
*deco = buf;
|
|
|
|
|
|
|
|
char *s = start;
|
|
|
|
while (s < stop) {
|
|
|
|
if (*s == '=' && s[1] == '\n') {
|
|
|
|
s += 2;
|
2016-07-29 11:58:11 +00:00
|
|
|
} else if (*s == '=' && s[1] == '\r' && s[2] == '\n') {
|
|
|
|
s += 3;
|
2016-07-11 21:40:00 +00:00
|
|
|
} else if (*s == '=' && s+2 < stop) {
|
2016-07-14 15:43:09 +00:00
|
|
|
unsigned char c1 = s[1];
|
|
|
|
unsigned char c2 = s[2];
|
2016-07-11 21:40:00 +00:00
|
|
|
s += 3;
|
2016-07-15 14:51:17 +00:00
|
|
|
if (c1 > 127 || c2 > 127 || hex[c1] < 0 || hex[c2] < 0) {
|
|
|
|
*buf++ = '?';
|
|
|
|
*buf++ = '?';
|
|
|
|
*buf++ = '?';
|
2016-07-14 15:43:09 +00:00
|
|
|
continue;
|
2016-07-15 14:51:17 +00:00
|
|
|
}
|
2016-07-14 15:43:09 +00:00
|
|
|
*buf++ = (hex[c1] << 4) | hex[c2];
|
2016-11-08 15:19:26 +00:00
|
|
|
} else if (underscore && *s == '_') {
|
2016-07-11 21:40:00 +00:00
|
|
|
*buf++ = ' ';
|
|
|
|
s++;
|
|
|
|
} else {
|
|
|
|
*buf++ = *s++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*buf = 0;
|
|
|
|
|
|
|
|
*decleno = buf - *deco;
|
|
|
|
return 1;
|
|
|
|
}
|
2016-07-13 13:52:39 +00:00
|
|
|
|
2016-07-11 21:40:00 +00:00
|
|
|
int
|
2016-07-13 13:52:39 +00:00
|
|
|
blaze822_decode_b64(char *s, char *e, char **deco, size_t *decleno)
|
2016-07-11 21:40:00 +00:00
|
|
|
{
|
|
|
|
static signed char b64[128] = {
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
|
|
|
|
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1,
|
|
|
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
|
|
|
|
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
|
|
|
|
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
|
|
|
|
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
|
|
|
|
};
|
|
|
|
|
2016-07-12 14:12:44 +00:00
|
|
|
char *buf = malloc((e - s) / 4 * 3);
|
2016-07-11 21:40:00 +00:00
|
|
|
if (!buf)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
*deco = buf;
|
|
|
|
|
|
|
|
while (s + 4 <= e) {
|
2016-07-13 14:00:20 +00:00
|
|
|
while (s < e && isfws((unsigned char) *s))
|
2016-07-11 21:40:00 +00:00
|
|
|
s++;
|
2016-07-14 16:24:08 +00:00
|
|
|
if (s >= e)
|
|
|
|
break;
|
|
|
|
|
|
|
|
uint32_t v = 0;
|
|
|
|
unsigned char t = 0;
|
|
|
|
|
|
|
|
unsigned char c0=s[0], c1=s[1], c2=s[2], c3=s[3];
|
|
|
|
s += 4;
|
|
|
|
|
|
|
|
if ((c0 | c1 | c2 | c3) > 127)
|
2016-07-15 14:51:17 +00:00
|
|
|
goto error;
|
2016-07-14 16:24:08 +00:00
|
|
|
|
|
|
|
v |= b64[c0]; t |= b64[c0]; v <<= 6;
|
|
|
|
v |= b64[c1]; t |= b64[c1]; v <<= 6;
|
|
|
|
v |= b64[c2]; t |= b64[c2]; v <<= 6;
|
|
|
|
v |= b64[c3]; t |= b64[c3];
|
|
|
|
|
2016-07-15 14:51:17 +00:00
|
|
|
if (t >= 64) {
|
|
|
|
error:
|
|
|
|
*buf++ = '?';
|
|
|
|
*buf++ = '?';
|
|
|
|
*buf++ = '?';
|
2016-07-14 16:24:08 +00:00
|
|
|
continue;
|
2016-07-15 14:51:17 +00:00
|
|
|
}
|
2016-07-14 16:24:08 +00:00
|
|
|
|
|
|
|
char d2 = v & 0xff; v >>= 8;
|
|
|
|
char d1 = v & 0xff; v >>= 8;
|
|
|
|
char d0 = v & 0xff;
|
2016-07-15 14:39:11 +00:00
|
|
|
|
2016-07-14 16:24:08 +00:00
|
|
|
if (c1 != '=') *buf++ = d0;
|
|
|
|
if (c2 != '=') *buf++ = d1;
|
|
|
|
if (c3 != '=') *buf++ = d2;
|
2016-07-11 21:40:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
*decleno = buf - *deco;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
|
|
|
|
{
|
2016-07-13 15:17:43 +00:00
|
|
|
iconv_t ic = (iconv_t)-1;
|
2016-10-14 20:20:11 +00:00
|
|
|
char *srcenc = 0;
|
2016-07-11 21:40:00 +00:00
|
|
|
|
|
|
|
char *b = src;
|
|
|
|
|
|
|
|
// use memmem
|
|
|
|
char *s = strstr(src, "=?");
|
|
|
|
if (!s)
|
|
|
|
goto nocodeok;
|
|
|
|
|
2016-10-14 20:20:11 +00:00
|
|
|
// keep track of partial multibyte sequences
|
|
|
|
char *partial = 0;
|
|
|
|
size_t partiallen = 0;
|
|
|
|
|
2016-07-11 21:40:00 +00:00
|
|
|
do {
|
|
|
|
char *t;
|
|
|
|
t = b;
|
|
|
|
while (t < s) // strip space-only inbetween encoded words
|
2016-07-13 14:00:20 +00:00
|
|
|
if (!isfws(*t++)) {
|
2016-10-14 20:20:11 +00:00
|
|
|
if (partial) // mixed up encodings
|
|
|
|
goto nocode;
|
2016-07-12 12:08:31 +00:00
|
|
|
while (b < s && dlen) {
|
2016-07-11 21:40:00 +00:00
|
|
|
*dst++ = *b++;
|
2016-07-12 12:08:31 +00:00
|
|
|
dlen--;
|
|
|
|
}
|
2016-07-11 21:40:00 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-07-12 12:08:31 +00:00
|
|
|
if (!dlen)
|
|
|
|
break;
|
|
|
|
|
2016-07-11 21:40:00 +00:00
|
|
|
s += 2;
|
|
|
|
|
|
|
|
char *e = strchr(s, '?');
|
2016-07-12 12:08:31 +00:00
|
|
|
if (!e)
|
|
|
|
goto nocode;
|
2016-07-11 21:40:00 +00:00
|
|
|
|
|
|
|
*e = 0;
|
2016-10-14 20:20:11 +00:00
|
|
|
if (!srcenc || strcmp(srcenc, s) != 0) {
|
|
|
|
if (partial) // mixed up encodings
|
|
|
|
goto nocode;
|
|
|
|
free(srcenc);
|
|
|
|
srcenc = strdup(s);
|
2017-04-06 20:55:00 +00:00
|
|
|
char *lang = strchr(srcenc, '*');
|
|
|
|
if (lang)
|
|
|
|
*lang = 0; // kill RFC2231 language tag
|
2016-10-14 20:20:11 +00:00
|
|
|
if (!srcenc)
|
|
|
|
goto nocode;
|
|
|
|
if (ic != (iconv_t)-1)
|
|
|
|
iconv_close(ic);
|
|
|
|
ic = iconv_open(tgtenc, srcenc);
|
|
|
|
}
|
2016-07-11 21:40:00 +00:00
|
|
|
*e = '?';
|
|
|
|
e++;
|
|
|
|
|
2016-07-12 14:23:51 +00:00
|
|
|
if (ic == (iconv_t)-1)
|
2016-07-11 21:40:00 +00:00
|
|
|
goto nocode;
|
|
|
|
|
2016-07-12 13:16:56 +00:00
|
|
|
char enc = lc(*e++);
|
2016-07-11 21:40:00 +00:00
|
|
|
if (*e++ != '?')
|
|
|
|
goto nocode;
|
2016-07-29 09:51:08 +00:00
|
|
|
char *start = e;
|
2016-07-11 21:40:00 +00:00
|
|
|
char *stop = strstr(e, "?=");
|
|
|
|
if (!stop)
|
|
|
|
goto nocode;
|
|
|
|
|
2016-07-21 18:16:10 +00:00
|
|
|
char *dec = 0, *decchunk;
|
|
|
|
size_t declen = 0;
|
2016-07-11 21:40:00 +00:00
|
|
|
if (enc == 'q')
|
2016-11-08 15:19:26 +00:00
|
|
|
blaze822_decode_qp(start, stop, &dec, &declen, 1);
|
2016-07-11 21:40:00 +00:00
|
|
|
else if (enc == 'b')
|
2016-07-13 13:52:39 +00:00
|
|
|
blaze822_decode_b64(start, stop, &dec, &declen);
|
2016-07-11 21:40:00 +00:00
|
|
|
else
|
|
|
|
goto nocode;
|
|
|
|
|
2016-10-14 20:20:11 +00:00
|
|
|
if (partial) {
|
|
|
|
dec = realloc(dec, declen + partiallen);
|
|
|
|
if (!dec)
|
|
|
|
goto nocode;
|
|
|
|
memmove(dec + partiallen, dec, declen);
|
|
|
|
memcpy(dec, partial, partiallen);
|
|
|
|
declen += partiallen;
|
|
|
|
free(partial);
|
|
|
|
partial = 0;
|
|
|
|
partiallen = 0;
|
|
|
|
}
|
|
|
|
|
2016-07-13 15:17:43 +00:00
|
|
|
decchunk = dec;
|
2016-07-11 21:40:00 +00:00
|
|
|
int r = iconv(ic, &dec, &declen, &dst, &dlen);
|
|
|
|
if (r < 0) {
|
2016-07-12 14:15:22 +00:00
|
|
|
if (errno == E2BIG) {
|
2016-07-11 21:40:00 +00:00
|
|
|
break;
|
2016-10-14 20:20:11 +00:00
|
|
|
} else if (errno == EILSEQ) {
|
2016-07-12 14:15:22 +00:00
|
|
|
goto nocode;
|
2016-10-14 20:20:11 +00:00
|
|
|
} else if (errno == EINVAL) {
|
|
|
|
partial = malloc(declen);
|
|
|
|
if (!partial)
|
|
|
|
goto nocode;
|
|
|
|
memcpy(partial, dec, declen);
|
|
|
|
partiallen = declen;
|
2016-07-12 14:15:22 +00:00
|
|
|
} else {
|
|
|
|
perror("iconv");
|
|
|
|
goto nocode;
|
|
|
|
}
|
2016-07-11 21:40:00 +00:00
|
|
|
}
|
|
|
|
|
2016-10-14 20:20:11 +00:00
|
|
|
while (!partial && declen && dlen) {
|
2016-07-11 21:40:00 +00:00
|
|
|
*dst++ = *dec++;
|
2016-07-12 14:13:11 +00:00
|
|
|
declen--;
|
2016-07-12 12:08:31 +00:00
|
|
|
dlen--;
|
|
|
|
}
|
2016-07-11 21:40:00 +00:00
|
|
|
|
2016-07-13 15:17:43 +00:00
|
|
|
free(decchunk);
|
|
|
|
|
2016-07-11 21:40:00 +00:00
|
|
|
b = stop + 2;
|
2016-07-12 12:08:31 +00:00
|
|
|
} while (dlen && (s = strstr(b, "=?")));
|
2016-07-11 21:40:00 +00:00
|
|
|
|
2016-07-12 14:13:11 +00:00
|
|
|
while (*b && dlen > 0) {
|
2016-07-11 21:40:00 +00:00
|
|
|
*dst++ = *b++;
|
2016-07-12 14:13:11 +00:00
|
|
|
dlen--;
|
|
|
|
}
|
2016-07-11 21:40:00 +00:00
|
|
|
|
|
|
|
*dst = 0;
|
|
|
|
|
2016-10-14 20:20:11 +00:00
|
|
|
if (ic != (iconv_t)-1)
|
|
|
|
iconv_close(ic);
|
|
|
|
free(srcenc);
|
|
|
|
|
2016-07-11 21:40:00 +00:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
nocode:
|
2016-10-14 20:20:11 +00:00
|
|
|
fprintf(stderr, "error decoding rfc2047\n");
|
2016-07-13 15:17:43 +00:00
|
|
|
if (ic != (iconv_t)-1)
|
|
|
|
iconv_close(ic);
|
2016-10-14 20:20:11 +00:00
|
|
|
free(srcenc);
|
2016-07-11 21:40:00 +00:00
|
|
|
nocodeok:
|
2016-07-12 12:08:31 +00:00
|
|
|
while (*src && dlen) {
|
2016-07-11 21:40:00 +00:00
|
|
|
*dst++ = *src++;
|
2016-07-12 12:08:31 +00:00
|
|
|
dlen--;
|
|
|
|
}
|
2016-07-11 21:40:00 +00:00
|
|
|
*dst = 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef TEST
|
|
|
|
int
|
|
|
|
main() {
|
|
|
|
char *r;
|
|
|
|
size_t l;
|
|
|
|
char test[] = "Keld_J=F8rn_Simonsen";
|
2016-07-14 16:24:08 +00:00
|
|
|
blaze822_decode_qp(test, test + sizeof test, &r, &l);
|
2016-07-11 21:40:00 +00:00
|
|
|
printf("%s %d\n", r, l);
|
|
|
|
|
|
|
|
char *r2;
|
|
|
|
size_t l2;
|
2016-07-14 16:24:08 +00:00
|
|
|
char test2[] = "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8="; // dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==";
|
|
|
|
blaze822_decode_b64(test2, test2+sizeof test2, &r2, &l2);
|
2016-07-11 21:40:00 +00:00
|
|
|
printf("%s %d\n", r2, l2);
|
|
|
|
|
|
|
|
char test3[] = "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>";
|
|
|
|
char test3dec[255];
|
|
|
|
blaze822_decode_rfc2047(test3dec, test3, sizeof test3dec, "UTF-8");
|
|
|
|
printf("%s\n", test3dec);
|
|
|
|
|
|
|
|
char test4[] = "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= "
|
|
|
|
"=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= z "
|
|
|
|
"=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=";
|
|
|
|
char test4dec[255];
|
|
|
|
blaze822_decode_rfc2047(test4dec, test4, sizeof test4dec, "UTF-8");
|
|
|
|
printf("%s\n", test4dec);
|
2016-10-14 20:20:11 +00:00
|
|
|
|
|
|
|
char test5[] = "=?UTF-8?Q?z=E2=80?= =?UTF-8?Q?=99z?=";
|
|
|
|
char test5dec[255];
|
|
|
|
blaze822_decode_rfc2047(test5dec, test5, sizeof test5dec, "UTF-8");
|
|
|
|
printf("%s\n", test5dec);
|
2016-07-11 21:40:00 +00:00
|
|
|
}
|
|
|
|
#endif
|