OpenTTD-patches/string.c

/* $Id$ */

#include "stdafx.h"
#include "openttd.h"
#include "functions.h"
#include "string.h"
#include "macros.h"
#include "table/control_codes.h"

#include <stdarg.h>
#include <wctype.h> // required for towlower()
#include <locale.h> // required for setlocale()

void ttd_strlcat(char *dst, const char *src, size_t size)
{
	assert(size > 0);
	for (; size > 0 && *dst != '\0'; --size, ++dst) {}
	assert(size > 0);
	while (--size > 0 && *src != '\0') *dst++ = *src++;
	*dst = '\0';
}


void ttd_strlcpy(char *dst, const char *src, size_t size)
{
	assert(size > 0);
	while (--size > 0 && *src != '\0') *dst++ = *src++;
	*dst = '\0';
}


char* strecat(char* dst, const char* src, const char* last)
{
	assert(dst <= last);
	for (; *dst != '\0'; ++dst)
		if (dst == last) return dst;
	for (; *src != '\0' && dst != last; ++dst, ++src) *dst = *src;
	*dst = '\0';
	return strecpy(dst, src, last);
}


char* strecpy(char* dst, const char* src, const char* last)
{
	assert(dst <= last);
	for (; *src != '\0' && dst != last; ++dst, ++src) *dst = *src;
	*dst = '\0';
#if 1
	if (dst == last && *src != '\0') {
		error("String too long for destination buffer");
	}
#endif
	return dst;
}


char* CDECL str_fmt(const char* str, ...)
{
	char buf[4096];
	va_list va;
	int len;
	char* p;

	va_start(va, str);
	len = vsnprintf(buf, lengthof(buf), str, va);
	va_end(va);
	p = malloc(len + 1);
	if (p != NULL) memcpy(p, buf, len + 1);
	return p;
}


void str_validate(char *str)
{
	char *dst = str;
	WChar c;
	size_t len;

	for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
		if (IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END ||
			IsValidChar(c - SCC_SPRITE_START, CS_ALPHANUMERAL))) {
			/* Copy the character back. Even if dst is current the same as str
			 * (i.e. no characters have been changed) this is quicker than
			 * moving the pointers ahead by len */
			do {
				*dst++ = *str++;
			} while (--len != 0);
		} else {
			/* Replace the undesirable character with a question mark */
			str += len;
			*dst++ = '?';
		}
	}

	*dst = '\0';
}


void str_strip_colours(char *str)
{
	char *dst = str;
	WChar c;
	size_t len;

	strtolower(str);
	for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
		if (c < SCC_BLUE || c > SCC_BLACK) {
			/* Copy the character back. Even if dst is current the same as str
			 * (i.e. no characters have been changed) this is quicker than
			 * moving the pointers ahead by len */
			do {
				*dst++ = *str++;
			} while (--len != 0);
		} else {
			/* Just skip (strip) the colour codes */
			str += len;
		}
	}
	*dst = '\0';
}


void strtolower(char *str)
{
	WChar c;
	/* Convert according to native locale, needed for unicode characters
	 * We backup the current locale, then set it to native "", the set back */
	char *locale = strdup(setlocale(LC_CTYPE, NULL));

	setlocale(LC_CTYPE, "");
	for (Utf8Decode(&c, str); c != '\0'; Utf8Decode(&c, str)) {
		/* XXX - assume lowercase version does not use more bytes */
		c = towlower(c);
		str += Utf8Encode(str, c);
	}
	setlocale(LC_CTYPE, locale);
	free(locale);
}


/**
 * Only allow certain keys. You can define the filter to be used. This makes
 *  sure no invalid keys can get into an editbox, like BELL.
 * @param key character to be checked
 * @param afilter the filter to use
 * @return true or false depending if the character is printable/valid or not
 */
bool IsValidChar(WChar key, CharSetFilter afilter)
{
	switch (afilter) {
		case CS_ALPHANUMERAL: return IsPrintable(key);
		case CS_NUMERAL:      return (key >= '0' && key <= '9');
		case CS_ALPHA:        return IsPrintable(key) && !(key >= '0' && key <= '9');
	}

	return false;
}

#ifdef WIN32
int CDECL snprintf(char *str, size_t size, const char *format, ...)
{
	va_list ap;
	int ret;

	va_start(ap, format);
	ret = vsnprintf(str, size, format, ap);
	va_end(ap);
	return ret;
}

#ifdef _MSC_VER
int CDECL vsnprintf(char *str, size_t size, const char *format, va_list ap)
{
	int ret;
	ret = _vsnprintf(str, size, format, ap);
	if (ret < 0) str[size - 1] = '\0';
	return ret;
}
#endif /* _MSC_VER */

#endif /* WIN32 */


/* UTF-8 handling routines */


/* Decode and consume the next UTF-8 encoded character
 * @param c Buffer to place decoded character.
 * @param s Character stream to retrieve character from.
 * @return Number of characters in the sequence.
 */
size_t Utf8Decode(WChar *c, const char *s)
{
	assert(c != NULL);

	if (!HASBIT(s[0], 7)) {
		/* Single byte character: 0xxxxxxx */
		*c = s[0];
		return 1;
	} else if (GB(s[0], 5, 3) == 6) {
		if (IsUtf8Part(s[1])) {
			/* Double byte character: 110xxxxx 10xxxxxx */
			*c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
			if (*c >= 0x80) return 2;
		}
	} else if (GB(s[0], 4, 4) == 14) {
		if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
			/* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */
			*c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
			if (*c >= 0x800) return 3;
		}
	} else if (GB(s[0], 3, 5) == 30) {
		if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
			/* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
			*c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
			if (*c >= 0x10000 && *c <= 0x10FFFF) return 4;
		}
	}

	//DEBUG(misc, 1) ("Invalid UTF-8 sequence");
	*c = '?';
	return 1;
}


/* Encode a unicode character and place it in the buffer
 * @param buf Buffer to place character.
 * @param c   Unicode character to encode.
 * @return Number of characters in the encoded sequence.
 */
size_t Utf8Encode(char *buf, WChar c)
{
	if (c < 0x80) {
		*buf = c;
		return 1;
	} else if (c < 0x800) {
		*buf++ = 0xC0 + GB(c,  6, 5);
		*buf   = 0x80 + GB(c,  0, 6);
		return 2;
	} else if (c < 0x10000) {
		*buf++ = 0xE0 + GB(c, 12, 4);
		*buf++ = 0x80 + GB(c,  6, 6);
		*buf   = 0x80 + GB(c,  0, 6);
		return 3;
	} else if (c < 0x110000) {
		*buf++ = 0xF0 + GB(c, 18, 3);
		*buf++ = 0x80 + GB(c, 12, 6);
		*buf++ = 0x80 + GB(c,  6, 6);
		*buf   = 0x80 + GB(c,  0, 6);
		return 4;
	}

	//DEBUG(misc, 1) ("Can't UTF-8 encode value 0x%X", c);
	*buf = '?';
	return 1;
}
(svn r2701) Insert Id tags into all source files 2005-07-24 14:12:37 +00:00			`/* $Id$ */`

(svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics 2005-02-06 13:41:02 +00:00			`#include "stdafx.h"`
(svn r6884) -Codechange: Add strict bounds checking in string formatting system. The last parameter should point to the end of the buffer (eg lastof(buf)) Courtesy of Tron. 2006-10-21 23:31:34 +00:00			`#include "openttd.h"`
			`#include "functions.h"`
(svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics 2005-02-06 13:41:02 +00:00			`#include "string.h"`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00			`#include "macros.h"`
			`#include "table/control_codes.h"`
(svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics 2005-02-06 13:41:02 +00:00
(svn r2754) Move str_fmt into string.[ch] 2005-07-29 21:52:20 +00:00			`#include <stdarg.h>`
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00			`#include <wctype.h> // required for towlower()`
			`#include <locale.h> // required for setlocale()`
(svn r2754) Move str_fmt into string.[ch] 2005-07-29 21:52:20 +00:00
(svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics 2005-02-06 13:41:02 +00:00			`void ttd_strlcat(char dst, const char src, size_t size)`
			`{`
			`assert(size > 0);`
			`for (; size > 0 && *dst != '\0'; --size, ++dst) {}`
			`assert(size > 0);`
			`while (--size > 0 && src != '\0') dst++ = *src++;`
			`*dst = '\0';`
			`}`


			`void ttd_strlcpy(char dst, const char src, size_t size)`
			`{`
			`assert(size > 0);`
			`while (--size > 0 && src != '\0') dst++ = *src++;`
			`*dst = '\0';`
			`}`


			`char* strecat(char* dst, const char* src, const char* last)`
			`{`
(svn r6884) -Codechange: Add strict bounds checking in string formatting system. The last parameter should point to the end of the buffer (eg lastof(buf)) Courtesy of Tron. 2006-10-21 23:31:34 +00:00			`assert(dst <= last);`
(svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics 2005-02-06 13:41:02 +00:00			`for (; *dst != '\0'; ++dst)`
			`if (dst == last) return dst;`
			`for (; src != '\0' && dst != last; ++dst, ++src) dst = *src;`
			`*dst = '\0';`
			`return strecpy(dst, src, last);`
			`}`


			`char* strecpy(char* dst, const char* src, const char* last)`
			`{`
(svn r6884) -Codechange: Add strict bounds checking in string formatting system. The last parameter should point to the end of the buffer (eg lastof(buf)) Courtesy of Tron. 2006-10-21 23:31:34 +00:00			`assert(dst <= last);`
(svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics 2005-02-06 13:41:02 +00:00			`for (; src != '\0' && dst != last; ++dst, ++src) dst = *src;`
			`*dst = '\0';`
(svn r6886) -Be anal for the time being about string-wannabe-buffer-overflows 2006-10-21 23:36:43 +00:00			`#if 1`
(svn r6884) -Codechange: Add strict bounds checking in string formatting system. The last parameter should point to the end of the buffer (eg lastof(buf)) Courtesy of Tron. 2006-10-21 23:31:34 +00:00			`if (dst == last && *src != '\0') {`
			`error("String too long for destination buffer");`
			`}`
			`#endif`
(svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics 2005-02-06 13:41:02 +00:00			`return dst;`
			`}`
(svn r2754) Move str_fmt into string.[ch] 2005-07-29 21:52:20 +00:00

			`char* CDECL str_fmt(const char* str, ...)`
			`{`
			`char buf[4096];`
			`va_list va;`
			`int len;`
			`char* p;`

			`va_start(va, str);`
(svn r6089) -Backport r6088: added -s (source) and -d (destination) to strgen (Darkvater) 2006-08-24 12:08:25 +00:00			`len = vsnprintf(buf, lengthof(buf), str, va);`
(svn r2754) Move str_fmt into string.[ch] 2005-07-29 21:52:20 +00:00			`va_end(va);`
			`p = malloc(len + 1);`
			`if (p != NULL) memcpy(p, buf, len + 1);`
			`return p;`
			`}`
(svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received - CodeChange: added str_validate(char *str) function that checks if a string contains only printable characters and if not, replaces those characters by question marks. Also move IsValidAsciiChar() to string.h 2005-12-20 20:52:05 +00:00
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00
(svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received - CodeChange: added str_validate(char *str) function that checks if a string contains only printable characters and if not, replaces those characters by question marks. Also move IsValidAsciiChar() to string.h 2005-12-20 20:52:05 +00:00			`void str_validate(char *str)`
			`{`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00			`char *dst = str;`
			`WChar c;`
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00			`size_t len;`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00			`for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00			`if (IsPrintable(c) && (c < SCC_SPRITE_START \|\| c > SCC_SPRITE_END \|\|`
			`IsValidChar(c - SCC_SPRITE_START, CS_ALPHANUMERAL))) {`
			`/* Copy the character back. Even if dst is current the same as str`
			`* (i.e. no characters have been changed) this is quicker than`
			`* moving the pointers ahead by len */`
			`do {`
			`dst++ = str++;`
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00			`} while (--len != 0);`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00			`} else {`
			`/* Replace the undesirable character with a question mark */`
			`str += len;`
			`*dst++ = '?';`
			`}`
			`}`

			`*dst = '\0';`
(svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received - CodeChange: added str_validate(char *str) function that checks if a string contains only printable characters and if not, replaces those characters by question marks. Also move IsValidAsciiChar() to string.h 2005-12-20 20:52:05 +00:00			`}`
(svn r5684) - Codechange: create an strtolower() function that uses tolower() on a whole string and apply it in the places this was used. 2006-07-31 22:11:34 +00:00
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00
(svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all colours for the console. It's a bit magicky (magic numbers) but UTF8 fixes that soon. 2006-11-16 17:41:24 +00:00			`void str_strip_colours(char *str)`
			`{`
			`char *dst = str;`
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00			`WChar c;`
			`size_t len;`

			`strtolower(str);`
			`for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {`
			`if (c < SCC_BLUE \|\| c > SCC_BLACK) {`
			`/* Copy the character back. Even if dst is current the same as str`
			`* (i.e. no characters have been changed) this is quicker than`
			`* moving the pointers ahead by len */`
			`do {`
			`dst++ = str++;`
			`} while (--len != 0);`
(svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all colours for the console. It's a bit magicky (magic numbers) but UTF8 fixes that soon. 2006-11-16 17:41:24 +00:00			`} else {`
(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00			`/* Just skip (strip) the colour codes */`
			`str += len;`
(svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all colours for the console. It's a bit magicky (magic numbers) but UTF8 fixes that soon. 2006-11-16 17:41:24 +00:00			`}`
			`}`
			`*dst = '\0';`
			`}`

(svn r7199) -Codechange: [utf8] Make strtolower and str_strip_colours UTF8 aware. 2006-11-17 23:29:22 +00:00
			`void strtolower(char *str)`
			`{`
			`WChar c;`
			`/* Convert according to native locale, needed for unicode characters`
			`* We backup the current locale, then set it to native "", the set back */`
			`char *locale = strdup(setlocale(LC_CTYPE, NULL));`

			`setlocale(LC_CTYPE, "");`
			`for (Utf8Decode(&c, str); c != '\0'; Utf8Decode(&c, str)) {`
			`/* XXX - assume lowercase version does not use more bytes */`
			`c = towlower(c);`
			`str += Utf8Encode(str, c);`
			`}`
			`setlocale(LC_CTYPE, locale);`
			`free(locale);`
			`}`


(svn r5946) -Add: merged the TGP branch to mainline. TGP adds: - New optional landscape generator (TerraGenesis Perlin) - Load heightmaps (either BMP or PNG) - Progress dialog while generating worlds (no longer a 'hanging' screen) - New dialogs for NewGame, Create Scenario and Play Heightmap - Easier to configure your landscape - More things to configure (tree-placer, ..) - Speedup of world generation - New console command 'restart': restart the map EXACTLY as it was when you first started it (needs a game made after or with this commit) - New console command 'getseed': get the seed of your map and share it with others (of course only works with generated maps) - Many new, world generation related, things - Many internal cleanups and rewrites Many tnx to those people who helped making this: Belugas, DaleStan, glx, KUDr, RichK67, Rubidium, and TrueLight (alfabetic) Many tnx to those who helped testing: Arnau, Bjarni, and tokai (alfabetic) And to all other people who helped testing and sending comments / bugs Stats: 673 lines changed, 3534 new lines, 79 new strings 2006-08-19 10:00:30 +00:00			`/**`
			`* Only allow certain keys. You can define the filter to be used. This makes`
			`* sure no invalid keys can get into an editbox, like BELL.`
(svn r5944) -Merge TGP (r5578, r5579, r5724, r5726): -Feature: filter for textboxes to only allow certain patterns (like numbers only) 2006-08-19 09:31:22 +00:00			`* @param key character to be checked`
(svn r5946) -Add: merged the TGP branch to mainline. TGP adds: - New optional landscape generator (TerraGenesis Perlin) - Load heightmaps (either BMP or PNG) - Progress dialog while generating worlds (no longer a 'hanging' screen) - New dialogs for NewGame, Create Scenario and Play Heightmap - Easier to configure your landscape - More things to configure (tree-placer, ..) - Speedup of world generation - New console command 'restart': restart the map EXACTLY as it was when you first started it (needs a game made after or with this commit) - New console command 'getseed': get the seed of your map and share it with others (of course only works with generated maps) - Many new, world generation related, things - Many internal cleanups and rewrites Many tnx to those people who helped making this: Belugas, DaleStan, glx, KUDr, RichK67, Rubidium, and TrueLight (alfabetic) Many tnx to those who helped testing: Arnau, Bjarni, and tokai (alfabetic) And to all other people who helped testing and sending comments / bugs Stats: 673 lines changed, 3534 new lines, 79 new strings 2006-08-19 10:00:30 +00:00			`* @param afilter the filter to use`
			`* @return true or false depending if the character is printable/valid or not`
			`*/`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00			`bool IsValidChar(WChar key, CharSetFilter afilter)`
(svn r5944) -Merge TGP (r5578, r5579, r5724, r5726): -Feature: filter for textboxes to only allow certain patterns (like numbers only) 2006-08-19 09:31:22 +00:00			`{`
			`switch (afilter) {`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00			`case CS_ALPHANUMERAL: return IsPrintable(key);`
			`case CS_NUMERAL: return (key >= '0' && key <= '9');`
			`case CS_ALPHA: return IsPrintable(key) && !(key >= '0' && key <= '9');`
(svn r5944) -Merge TGP (r5578, r5579, r5724, r5726): -Feature: filter for textboxes to only allow certain patterns (like numbers only) 2006-08-19 09:31:22 +00:00			`}`

(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00			`return false;`
(svn r5944) -Merge TGP (r5578, r5579, r5724, r5726): -Feature: filter for textboxes to only allow certain patterns (like numbers only) 2006-08-19 09:31:22 +00:00			`}`
(svn r5946) -Add: merged the TGP branch to mainline. TGP adds: - New optional landscape generator (TerraGenesis Perlin) - Load heightmaps (either BMP or PNG) - Progress dialog while generating worlds (no longer a 'hanging' screen) - New dialogs for NewGame, Create Scenario and Play Heightmap - Easier to configure your landscape - More things to configure (tree-placer, ..) - Speedup of world generation - New console command 'restart': restart the map EXACTLY as it was when you first started it (needs a game made after or with this commit) - New console command 'getseed': get the seed of your map and share it with others (of course only works with generated maps) - Many new, world generation related, things - Many internal cleanups and rewrites Many tnx to those people who helped making this: Belugas, DaleStan, glx, KUDr, RichK67, Rubidium, and TrueLight (alfabetic) Many tnx to those who helped testing: Arnau, Bjarni, and tokai (alfabetic) And to all other people who helped testing and sending comments / bugs Stats: 673 lines changed, 3534 new lines, 79 new strings 2006-08-19 10:00:30 +00:00
(svn r6089) -Backport r6088: added -s (source) and -d (destination) to strgen (Darkvater) 2006-08-24 12:08:25 +00:00			`#ifdef WIN32`
			`int CDECL snprintf(char str, size_t size, const char format, ...)`
			`{`
			`va_list ap;`
			`int ret;`

			`va_start(ap, format);`
			`ret = vsnprintf(str, size, format, ap);`
			`va_end(ap);`
			`return ret;`
			`}`

			`#ifdef _MSC_VER`
			`int CDECL vsnprintf(char str, size_t size, const char format, va_list ap)`
			`{`
			`int ret;`
			`ret = _vsnprintf(str, size, format, ap);`
			`if (ret < 0) str[size - 1] = '\0';`
			`return ret;`
			`}`
			`#endif /* _MSC_VER */`

			`#endif /* WIN32 */`
(svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come. 2006-11-16 22:05:33 +00:00

			`/* UTF-8 handling routines */`


			`/* Decode and consume the next UTF-8 encoded character`
			`* @param c Buffer to place decoded character.`
			`* @param s Character stream to retrieve character from.`
			`* @return Number of characters in the sequence.`
			`*/`
			`size_t Utf8Decode(WChar c, const char s)`
			`{`
			`assert(c != NULL);`

			`if (!HASBIT(s[0], 7)) {`
			`/* Single byte character: 0xxxxxxx */`
			`*c = s[0];`
			`return 1;`
			`} else if (GB(s[0], 5, 3) == 6) {`
			`if (IsUtf8Part(s[1])) {`
			`/* Double byte character: 110xxxxx 10xxxxxx */`
			`*c = GB(s[0], 0, 5) << 6 \| GB(s[1], 0, 6);`
			`if (*c >= 0x80) return 2;`
			`}`
			`} else if (GB(s[0], 4, 4) == 14) {`
			`if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {`
			`/* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */`
			`*c = GB(s[0], 0, 4) << 12 \| GB(s[1], 0, 6) << 6 \| GB(s[2], 0, 6);`
			`if (*c >= 0x800) return 3;`
			`}`
			`} else if (GB(s[0], 3, 5) == 30) {`
			`if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {`
			`/* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */`
			`*c = GB(s[0], 0, 3) << 18 \| GB(s[1], 0, 6) << 12 \| GB(s[2], 0, 6) << 6 \| GB(s[3], 0, 6);`
			`if (c >= 0x10000 && c <= 0x10FFFF) return 4;`
			`}`
			`}`

			`//DEBUG(misc, 1) ("Invalid UTF-8 sequence");`
			`*c = '?';`
			`return 1;`
			`}`


			`/* Encode a unicode character and place it in the buffer`
			`* @param buf Buffer to place character.`
			`* @param c Unicode character to encode.`
			`* @return Number of characters in the encoded sequence.`
			`*/`
			`size_t Utf8Encode(char *buf, WChar c)`
			`{`
			`if (c < 0x80) {`
			`*buf = c;`
			`return 1;`
			`} else if (c < 0x800) {`
			`*buf++ = 0xC0 + GB(c, 6, 5);`
			`*buf = 0x80 + GB(c, 0, 6);`
			`return 2;`
			`} else if (c < 0x10000) {`
			`*buf++ = 0xE0 + GB(c, 12, 4);`
			`*buf++ = 0x80 + GB(c, 6, 6);`
			`*buf = 0x80 + GB(c, 0, 6);`
			`return 3;`
			`} else if (c < 0x110000) {`
			`*buf++ = 0xF0 + GB(c, 18, 3);`
			`*buf++ = 0x80 + GB(c, 12, 6);`
			`*buf++ = 0x80 + GB(c, 6, 6);`
			`*buf = 0x80 + GB(c, 0, 6);`
			`return 4;`
			`}`

			`//DEBUG(misc, 1) ("Can't UTF-8 encode value 0x%X", c);`
			`*buf = '?';`
			`return 1;`
			`}`