OpenTTD-patches/src/string.cpp

/* $Id$ */

/** @file string.cpp Handling of C-type strings (char*). */

#include "stdafx.h"
#include "openttd.h"
#include "debug.h"
#include "core/alloc_func.hpp"
#include "core/math_func.hpp"
#include "string_func.h"

#include "table/control_codes.h"

#include <stdarg.h>
#include <ctype.h> // required for tolower()

/**
 * Safer implementation of vsnprintf; same as vsnprintf except:
 * - last instead of size, i.e. replace sizeof with lastof.
 * - return gives the amount of characters added, not what it would add.
 * @param str    buffer to write to up to last
 * @param last   last character we may write to
 * @param format the formatting (see snprintf)
 * @param ap     the list of arguments for the format
 * @return the number of added characters
 */
static int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
{
	if (str >= last) return 0;
	size_t size = last - str;
	return min((int)size, vsnprintf(str, size, format, ap));
}

void ttd_strlcat(char *dst, const char *src, size_t size)
{
	assert(size > 0);
	while (size > 0 && *dst != '\0') {
		size--;
		dst++;
	}

	ttd_strlcpy(dst, src, size);
}


void ttd_strlcpy(char *dst, const char *src, size_t size)
{
	assert(size > 0);
	while (--size > 0 && *src != '\0') {
		*dst++ = *src++;
	}
	*dst = '\0';
}


char* strecat(char* dst, const char* src, const char* last)
{
	assert(dst <= last);
	while (*dst != '\0') {
		if (dst == last) return dst;
		dst++;
	}

	return strecpy(dst, src, last);
}


char* strecpy(char* dst, const char* src, const char* last)
{
	assert(dst <= last);
	while (dst != last && *src != '\0') {
		*dst++ = *src++;
	}
	*dst = '\0';

	if (dst == last && *src != '\0') {
#ifdef STRGEN
		error("String too long for destination buffer");
#else /* STRGEN */
		DEBUG(misc, 0, "String too long for destination buffer");
#endif /* STRGEN */
	}
	return dst;
}


char *CDECL str_fmt(const char *str, ...)
{
	char buf[4096];
	va_list va;

	va_start(va, str);
	int len = vseprintf(buf, lastof(buf), str, va);
	va_end(va);
	char *p = MallocT<char>(len + 1);
	memcpy(p, buf, len + 1);
	return p;
}


void str_validate(char *str)
{
	char *dst = str;
	WChar c;
	size_t len;

	for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
		if (IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END ||
			IsValidChar(c - SCC_SPRITE_START, CS_ALPHANUMERAL))) {
			/* Copy the character back. Even if dst is current the same as str
			 * (i.e. no characters have been changed) this is quicker than
			 * moving the pointers ahead by len */
			do {
				*dst++ = *str++;
			} while (--len != 0);
		} else {
			/* Replace the undesirable character with a question mark */
			str += len;
			*dst++ = '?';
		}
	}

	*dst = '\0';
}


void str_strip_colours(char *str)
{
	char *dst = str;
	WChar c;
	size_t len;

	for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
		if (c < SCC_BLUE || c > SCC_BLACK) {
			/* Copy the character back. Even if dst is current the same as str
			 * (i.e. no characters have been changed) this is quicker than
			 * moving the pointers ahead by len */
			do {
				*dst++ = *str++;
			} while (--len != 0);
		} else {
			/* Just skip (strip) the colour codes */
			str += len;
		}
	}
	*dst = '\0';
}

/** Convert a given ASCII string to lowercase.
 * NOTE: only support ASCII characters, no UTF8 fancy. As currently
 * the function is only used to lowercase data-filenames if they are
 * not found, this is sufficient. If more, or general functionality is
 * needed, look to r7271 where it was removed because it was broken when
 * using certain locales: eg in Turkish the uppercase 'I' was converted to
 * '?', so just revert to the old functionality
 * @param str string to convert */
void strtolower(char *str)
{
	for (; *str != '\0'; str++) *str = tolower(*str);
}

/**
 * Only allow certain keys. You can define the filter to be used. This makes
 *  sure no invalid keys can get into an editbox, like BELL.
 * @param key character to be checked
 * @param afilter the filter to use
 * @return true or false depending if the character is printable/valid or not
 */
bool IsValidChar(WChar key, CharSetFilter afilter)
{
	switch (afilter) {
		case CS_ALPHANUMERAL: return IsPrintable(key);
		case CS_NUMERAL:      return (key >= '0' && key <= '9');
		case CS_ALPHA:        return IsPrintable(key) && !(key >= '0' && key <= '9');
	}

	return false;
}

#ifdef WIN32
/* Since version 3.14, MinGW Runtime has snprintf() and vsnprintf() conform to C99 but it's not the case for older versions */
#if (__MINGW32_MAJOR_VERSION < 3) || ((__MINGW32_MAJOR_VERSION == 3) && (__MINGW32_MINOR_VERSION < 14))
int CDECL snprintf(char *str, size_t size, const char *format, ...)
{
	va_list ap;
	int ret;

	va_start(ap, format);
	ret = vsnprintf(str, size, format, ap);
	va_end(ap);
	return ret;
}
#endif /* MinGW Runtime < 3.14 */

#ifdef _MSC_VER
/* *nprintf broken, not POSIX compliant, MSDN description
 * - If len < count, then len characters are stored in buffer, a null-terminator is appended, and len is returned.
 * - If len = count, then len characters are stored in buffer, no null-terminator is appended, and len is returned.
 * - If len > count, then count characters are stored in buffer, no null-terminator is appended, and a negative value is returned
 */
int CDECL vsnprintf(char *str, size_t size, const char *format, va_list ap)
{
	int ret;
	ret = _vsnprintf(str, size, format, ap);
	if (ret < 0 || ret == size) str[size - 1] = '\0';
	return ret;
}
#endif /* _MSC_VER */

#endif /* WIN32 */

/**
 * Safer implementation of snprintf; same as snprintf except:
 * - last instead of size, i.e. replace sizeof with lastof.
 * - return gives the amount of characters added, not what it would add.
 * @param str    buffer to write to up to last
 * @param last   last character we may write to
 * @param format the formatting (see snprintf)
 * @return the number of added characters
 */
int CDECL seprintf(char *str, const char *last, const char *format, ...)
{
	va_list ap;

	va_start(ap, format);
	int ret = vseprintf(str, last, format, ap);
	va_end(ap);
	return ret;
}


/** Convert the md5sum to a hexadecimal string representation
 * @param buf buffer to put the md5sum into
 * @param last last character of buffer (usually lastof(buf))
 * @param md5sum the md5sum itself
 * @return a pointer to the next character after the md5sum */
char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
{
	char *p = buf;

	for (uint i = 0; i < 16; i++) {
		p += seprintf(p, last, "%02X", md5sum[i]);
	}

	return p;
}


/* UTF-8 handling routines */


/* Decode and consume the next UTF-8 encoded character
 * @param c Buffer to place decoded character.
 * @param s Character stream to retrieve character from.
 * @return Number of characters in the sequence.
 */
size_t Utf8Decode(WChar *c, const char *s)
{
	assert(c != NULL);

	if (!HasBit(s[0], 7)) {
		/* Single byte character: 0xxxxxxx */
		*c = s[0];
		return 1;
	} else if (GB(s[0], 5, 3) == 6) {
		if (IsUtf8Part(s[1])) {
			/* Double byte character: 110xxxxx 10xxxxxx */
			*c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
			if (*c >= 0x80) return 2;
		}
	} else if (GB(s[0], 4, 4) == 14) {
		if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
			/* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */
			*c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
			if (*c >= 0x800) return 3;
		}
	} else if (GB(s[0], 3, 5) == 30) {
		if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
			/* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
			*c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
			if (*c >= 0x10000 && *c <= 0x10FFFF) return 4;
		}
	}

	//DEBUG(misc, 1, "[utf8] invalid UTF-8 sequence");
	*c = '?';
	return 1;
}


/* Encode a unicode character and place it in the buffer
 * @param buf Buffer to place character.
 * @param c   Unicode character to encode.
 * @return Number of characters in the encoded sequence.
 */
size_t Utf8Encode(char *buf, WChar c)
{
	if (c < 0x80) {
		*buf = c;
		return 1;
	} else if (c < 0x800) {
		*buf++ = 0xC0 + GB(c,  6, 5);
		*buf   = 0x80 + GB(c,  0, 6);
		return 2;
	} else if (c < 0x10000) {
		*buf++ = 0xE0 + GB(c, 12, 4);
		*buf++ = 0x80 + GB(c,  6, 6);
		*buf   = 0x80 + GB(c,  0, 6);
		return 3;
	} else if (c < 0x110000) {
		*buf++ = 0xF0 + GB(c, 18, 3);
		*buf++ = 0x80 + GB(c, 12, 6);
		*buf++ = 0x80 + GB(c,  6, 6);
		*buf   = 0x80 + GB(c,  0, 6);
		return 4;
	}

	//DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c);
	*buf = '?';
	return 1;
}

/**
 * Properly terminate an UTF8 string to some maximum length
 * @param s string to check if it needs additional trimming
 * @param maxlen the maximum length the buffer can have.
 * @return the new length in bytes of the string (eg. strlen(new_string))
 * @NOTE maxlen is the string length _INCLUDING_ the terminating '\0'
 */
size_t Utf8TrimString(char *s, size_t maxlen)
{
	size_t length = 0;

	for (const char *ptr = strchr(s, '\0'); *s != '\0';) {
		size_t len = Utf8EncodedCharLen(*s);
		/* Silently ignore invalid UTF8 sequences, our only concern trimming */
		if (len == 0) len = 1;

		/* Take care when a hard cutoff was made for the string and
		 * the last UTF8 sequence is invalid */
		if (length + len >= maxlen || (s + len > ptr)) break;
		s += len;
		length += len;
	}

	*s = '\0';
	return length;
}

#ifndef _GNU_SOURCE
#include "core/math_func.hpp"
char *strndup(const char *s, size_t len)
{
	len = min(strlen(s), len);
	char *tmp = CallocT<char>(len + 1);
	memcpy(tmp, s, len);
	return tmp;
}
#endif /* !_GNU_SOURCE */
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`/* $Id$ */`

(svn r12971) -Documentation: add @file in files that missed them and add something more than whitespace as description of files that don't have a description. 2008-05-06 15:11:33 +00:00			`/** @file string.cpp Handling of C-type strings (char). /`
(svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near 2007-04-04 01:35:16 +00:00
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`#include "stdafx.h"`
			`#include "openttd.h"`
(svn r10566) -Fix [FS#1025]: a NewGRF could have a information message that is too long for the internal buffers to handle. We should not crash on such a case even though the message is too big for the window anyway. 2007-07-14 22:37:40 +00:00			`#include "debug.h"`
(svn r11691) -Codechange: move+rename helpers.hpp and only include it when it is really needed. 2007-12-25 09:48:53 +00:00			`#include "core/alloc_func.hpp"`
(svn r14540) -Codechange: introduce [v]seprintf which are like [v]snprintf but do return the number of characters written instead of the number of characters that would be written; as size_t is unsigned substraction can cause integer underflows quite quickly. 2008-10-28 14:42:31 +00:00			`#include "core/math_func.hpp"`
(svn r11777) -Codechange: split the string header and make do not include it when it's not necessary. 2008-01-07 14:23:25 +00:00			`#include "string_func.h"`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00
(svn r11828) -Codechange: include table/* as the last includes and remove an unneeded include from openttd.h. 2008-01-13 01:21:35 +00:00			`#include "table/control_codes.h"`

(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`#include <stdarg.h>`
			`#include <ctype.h> // required for tolower()`

(svn r14541) -Fix (r14540): mingw didn't like it :( 2008-10-28 15:47:42 +00:00			`/**`
			`* Safer implementation of vsnprintf; same as vsnprintf except:`
			`* - last instead of size, i.e. replace sizeof with lastof.`
			`* - return gives the amount of characters added, not what it would add.`
			`* @param str buffer to write to up to last`
			`* @param last last character we may write to`
			`* @param format the formatting (see snprintf)`
			`* @param ap the list of arguments for the format`
			`* @return the number of added characters`
			`*/`
			`static int CDECL vseprintf(char str, const char last, const char *format, va_list ap)`
			`{`
			`if (str >= last) return 0;`
			`size_t size = last - str;`
			`return min((int)size, vsnprintf(str, size, format, ap));`
			`}`

(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`void ttd_strlcat(char dst, const char src, size_t size)`
			`{`
			`assert(size > 0);`
(svn r14546) -Codechange: Unify string(cpy\|cat) functions -Doc: string(cpy\|cat) functions 2008-10-29 16:30:41 +00:00			`while (size > 0 && *dst != '\0') {`
			`size--;`
			`dst++;`
			`}`

			`ttd_strlcpy(dst, src, size);`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`}`


			`void ttd_strlcpy(char dst, const char src, size_t size)`
			`{`
			`assert(size > 0);`
(svn r14546) -Codechange: Unify string(cpy\|cat) functions -Doc: string(cpy\|cat) functions 2008-10-29 16:30:41 +00:00			`while (--size > 0 && *src != '\0') {`
			`dst++ = src++;`
			`}`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`*dst = '\0';`
			`}`


			`char* strecat(char* dst, const char* src, const char* last)`
			`{`
			`assert(dst <= last);`
(svn r14546) -Codechange: Unify string(cpy\|cat) functions -Doc: string(cpy\|cat) functions 2008-10-29 16:30:41 +00:00			`while (*dst != '\0') {`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`if (dst == last) return dst;`
(svn r14546) -Codechange: Unify string(cpy\|cat) functions -Doc: string(cpy\|cat) functions 2008-10-29 16:30:41 +00:00			`dst++;`
			`}`

(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`return strecpy(dst, src, last);`
			`}`


			`char* strecpy(char* dst, const char* src, const char* last)`
			`{`
			`assert(dst <= last);`
(svn r14546) -Codechange: Unify string(cpy\|cat) functions -Doc: string(cpy\|cat) functions 2008-10-29 16:30:41 +00:00			`while (dst != last && *src != '\0') {`
			`dst++ = src++;`
			`}`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`*dst = '\0';`
(svn r14546) -Codechange: Unify string(cpy\|cat) functions -Doc: string(cpy\|cat) functions 2008-10-29 16:30:41 +00:00
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`if (dst == last && *src != '\0') {`
(svn r10566) -Fix [FS#1025]: a NewGRF could have a information message that is too long for the internal buffers to handle. We should not crash on such a case even though the message is too big for the window anyway. 2007-07-14 22:37:40 +00:00			`#ifdef STRGEN`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`error("String too long for destination buffer");`
(svn r10566) -Fix [FS#1025]: a NewGRF could have a information message that is too long for the internal buffers to handle. We should not crash on such a case even though the message is too big for the window anyway. 2007-07-14 22:37:40 +00:00			`#else /* STRGEN */`
			`DEBUG(misc, 0, "String too long for destination buffer");`
			`#endif /* STRGEN */`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`}`
			`return dst;`
			`}`


(svn r14540) -Codechange: introduce [v]seprintf which are like [v]snprintf but do return the number of characters written instead of the number of characters that would be written; as size_t is unsigned substraction can cause integer underflows quite quickly. 2008-10-28 14:42:31 +00:00			`char CDECL str_fmt(const char str, ...)`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`{`
			`char buf[4096];`
			`va_list va;`

			`va_start(va, str);`
(svn r14540) -Codechange: introduce [v]seprintf which are like [v]snprintf but do return the number of characters written instead of the number of characters that would be written; as size_t is unsigned substraction can cause integer underflows quite quickly. 2008-10-28 14:42:31 +00:00			`int len = vseprintf(buf, lastof(buf), str, va);`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`va_end(va);`
(svn r14540) -Codechange: introduce [v]seprintf which are like [v]snprintf but do return the number of characters written instead of the number of characters that would be written; as size_t is unsigned substraction can cause integer underflows quite quickly. 2008-10-28 14:42:31 +00:00			`char *p = MallocT<char>(len + 1);`
			`memcpy(p, buf, len + 1);`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`return p;`
			`}`


			`void str_validate(char *str)`
			`{`
			`char *dst = str;`
			`WChar c;`
			`size_t len;`

			`for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {`
			`if (IsPrintable(c) && (c < SCC_SPRITE_START \|\| c > SCC_SPRITE_END \|\|`
			`IsValidChar(c - SCC_SPRITE_START, CS_ALPHANUMERAL))) {`
			`/* Copy the character back. Even if dst is current the same as str`
			`* (i.e. no characters have been changed) this is quicker than`
			`* moving the pointers ahead by len */`
			`do {`
			`dst++ = str++;`
			`} while (--len != 0);`
			`} else {`
			`/* Replace the undesirable character with a question mark */`
			`str += len;`
			`*dst++ = '?';`
			`}`
			`}`

			`*dst = '\0';`
			`}`


			`void str_strip_colours(char *str)`
			`{`
			`char *dst = str;`
			`WChar c;`
			`size_t len;`

			`for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {`
			`if (c < SCC_BLUE \|\| c > SCC_BLACK) {`
			`/* Copy the character back. Even if dst is current the same as str`
			`* (i.e. no characters have been changed) this is quicker than`
			`* moving the pointers ahead by len */`
			`do {`
			`dst++ = str++;`
			`} while (--len != 0);`
			`} else {`
			`/* Just skip (strip) the colour codes */`
			`str += len;`
			`}`
			`}`
			`*dst = '\0';`
			`}`

			`/** Convert a given ASCII string to lowercase.`
			`* NOTE: only support ASCII characters, no UTF8 fancy. As currently`
			`* the function is only used to lowercase data-filenames if they are`
			`* not found, this is sufficient. If more, or general functionality is`
			`* needed, look to r7271 where it was removed because it was broken when`
			`* using certain locales: eg in Turkish the uppercase 'I' was converted to`
(svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near 2007-04-04 01:35:16 +00:00			`* '?', so just revert to the old functionality`
			`* @param str string to convert */`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`void strtolower(char *str)`
			`{`
			`for (; str != '\0'; str++) str = tolower(*str);`
			`}`

			`/**`
			`* Only allow certain keys. You can define the filter to be used. This makes`
			`* sure no invalid keys can get into an editbox, like BELL.`
			`* @param key character to be checked`
			`* @param afilter the filter to use`
			`* @return true or false depending if the character is printable/valid or not`
			`*/`
			`bool IsValidChar(WChar key, CharSetFilter afilter)`
			`{`
			`switch (afilter) {`
			`case CS_ALPHANUMERAL: return IsPrintable(key);`
			`case CS_NUMERAL: return (key >= '0' && key <= '9');`
			`case CS_ALPHA: return IsPrintable(key) && !(key >= '0' && key <= '9');`
			`}`

			`return false;`
			`}`

			`#ifdef WIN32`
(svn r11744) -Codechange: don't redefine snprintf when using MinGW Runtime 3.14 or superior, as it now have snprintf() and vsnprintf conform to C99 2008-01-02 18:42:51 +00:00			`/* Since version 3.14, MinGW Runtime has snprintf() and vsnprintf() conform to C99 but it's not the case for older versions */`
			`#if (__MINGW32_MAJOR_VERSION < 3) \|\| ((__MINGW32_MAJOR_VERSION == 3) && (__MINGW32_MINOR_VERSION < 14))`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`int CDECL snprintf(char str, size_t size, const char format, ...)`
			`{`
			`va_list ap;`
			`int ret;`

			`va_start(ap, format);`
			`ret = vsnprintf(str, size, format, ap);`
			`va_end(ap);`
			`return ret;`
			`}`
(svn r11744) -Codechange: don't redefine snprintf when using MinGW Runtime 3.14 or superior, as it now have snprintf() and vsnprintf conform to C99 2008-01-02 18:42:51 +00:00			`#endif /* MinGW Runtime < 3.14 */`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00
			`#ifdef _MSC_VER`
(svn r8089) -[win32] MS-borkedness: All *nprintf functions are broken, but we didn't test to fix it ourselves when 'len = count'. 2007-01-13 13:06:18 +00:00			`/* *nprintf broken, not POSIX compliant, MSDN description`
			`* - If len < count, then len characters are stored in buffer, a null-terminator is appended, and len is returned.`
			`* - If len = count, then len characters are stored in buffer, no null-terminator is appended, and len is returned.`
			`* - If len > count, then count characters are stored in buffer, no null-terminator is appended, and a negative value is returned`
			`*/`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`int CDECL vsnprintf(char str, size_t size, const char format, va_list ap)`
			`{`
			`int ret;`
			`ret = _vsnprintf(str, size, format, ap);`
(svn r8090) -[win32] Fix (r8089): for *nprintf 'ret = count' NOT 'ret = 0'... 2007-01-13 13:13:32 +00:00			`if (ret < 0 \|\| ret == size) str[size - 1] = '\0';`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`return ret;`
			`}`
			`#endif /* _MSC_VER */`

			`#endif /* WIN32 */`

(svn r14540) -Codechange: introduce [v]seprintf which are like [v]snprintf but do return the number of characters written instead of the number of characters that would be written; as size_t is unsigned substraction can cause integer underflows quite quickly. 2008-10-28 14:42:31 +00:00			`/**`
			`* Safer implementation of snprintf; same as snprintf except:`
			`* - last instead of size, i.e. replace sizeof with lastof.`
			`* - return gives the amount of characters added, not what it would add.`
			`* @param str buffer to write to up to last`
			`* @param last last character we may write to`
			`* @param format the formatting (see snprintf)`
			`* @return the number of added characters`
			`*/`
			`int CDECL seprintf(char str, const char last, const char *format, ...)`
			`{`
			`va_list ap;`

			`va_start(ap, format);`
			`int ret = vseprintf(str, last, format, ap);`
			`va_end(ap);`
			`return ret;`
			`}`

(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00
(svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it. 2007-01-13 13:47:57 +00:00			`/** Convert the md5sum to a hexadecimal string representation`
			`* @param buf buffer to put the md5sum into`
			`* @param last last character of buffer (usually lastof(buf))`
			`* @param md5sum the md5sum itself`
			`* @return a pointer to the next character after the md5sum */`
			`char md5sumToString(char buf, const char *last, const uint8 md5sum[16])`
			`{`
			`char *p = buf;`

			`for (uint i = 0; i < 16; i++) {`
(svn r14540) -Codechange: introduce [v]seprintf which are like [v]snprintf but do return the number of characters written instead of the number of characters that would be written; as size_t is unsigned substraction can cause integer underflows quite quickly. 2008-10-28 14:42:31 +00:00			`p += seprintf(p, last, "%02X", md5sum[i]);`
(svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it. 2007-01-13 13:47:57 +00:00			`}`

			`return p;`
			`}`


(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`/* UTF-8 handling routines */`


			`/* Decode and consume the next UTF-8 encoded character`
			`* @param c Buffer to place decoded character.`
			`* @param s Character stream to retrieve character from.`
			`* @return Number of characters in the sequence.`
			`*/`
			`size_t Utf8Decode(WChar c, const char s)`
			`{`
			`assert(c != NULL);`

(svn r11481) -Codechange: Rename the HASBIT function to fit with the naming style 2007-11-19 21:02:30 +00:00			`if (!HasBit(s[0], 7)) {`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`/* Single byte character: 0xxxxxxx */`
			`*c = s[0];`
			`return 1;`
			`} else if (GB(s[0], 5, 3) == 6) {`
			`if (IsUtf8Part(s[1])) {`
			`/* Double byte character: 110xxxxx 10xxxxxx */`
			`*c = GB(s[0], 0, 5) << 6 \| GB(s[1], 0, 6);`
			`if (*c >= 0x80) return 2;`
			`}`
			`} else if (GB(s[0], 4, 4) == 14) {`
			`if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {`
			`/* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */`
			`*c = GB(s[0], 0, 4) << 12 \| GB(s[1], 0, 6) << 6 \| GB(s[2], 0, 6);`
			`if (*c >= 0x800) return 3;`
			`}`
			`} else if (GB(s[0], 3, 5) == 30) {`
			`if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {`
			`/* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */`
			`*c = GB(s[0], 0, 3) << 18 \| GB(s[1], 0, 6) << 12 \| GB(s[2], 0, 6) << 6 \| GB(s[3], 0, 6);`
			`if (c >= 0x10000 && c <= 0x10FFFF) return 4;`
			`}`
			`}`

(svn r7565) -Codechange: Rework DEBUG functionality. Look for appropiate debugging levels to use in debug.h. grfmsg() is now used as a specific debug-function for grf. 2006-12-26 17:36:18 +00:00			`//DEBUG(misc, 1, "[utf8] invalid UTF-8 sequence");`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`*c = '?';`
			`return 1;`
			`}`


			`/* Encode a unicode character and place it in the buffer`
			`* @param buf Buffer to place character.`
			`* @param c Unicode character to encode.`
			`* @return Number of characters in the encoded sequence.`
			`*/`
			`size_t Utf8Encode(char *buf, WChar c)`
			`{`
			`if (c < 0x80) {`
			`*buf = c;`
			`return 1;`
			`} else if (c < 0x800) {`
			`*buf++ = 0xC0 + GB(c, 6, 5);`
			`*buf = 0x80 + GB(c, 0, 6);`
			`return 2;`
			`} else if (c < 0x10000) {`
			`*buf++ = 0xE0 + GB(c, 12, 4);`
			`*buf++ = 0x80 + GB(c, 6, 6);`
			`*buf = 0x80 + GB(c, 0, 6);`
			`return 3;`
			`} else if (c < 0x110000) {`
			`*buf++ = 0xF0 + GB(c, 18, 3);`
			`*buf++ = 0x80 + GB(c, 12, 6);`
			`*buf++ = 0x80 + GB(c, 6, 6);`
			`*buf = 0x80 + GB(c, 0, 6);`
			`return 4;`
			`}`

(svn r7565) -Codechange: Rework DEBUG functionality. Look for appropiate debugging levels to use in debug.h. grfmsg() is now used as a specific debug-function for grf. 2006-12-26 17:36:18 +00:00			`//DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c);`
(svn r7272) -Ok, let's hope this one's correct...stupid msvc 2006-11-28 14:19:18 +00:00			`*buf = '?';`
			`return 1;`
			`}`
(svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'. -Codechange: Add a function Utf8TrimString() that properly trims a string to an UTF8 encoding seperation instead of somewhere in the wild (and use it in the chat area) 2007-03-05 00:45:56 +00:00
			`/**`
			`* Properly terminate an UTF8 string to some maximum length`
			`* @param s string to check if it needs additional trimming`
			`* @param maxlen the maximum length the buffer can have.`
			`* @return the new length in bytes of the string (eg. strlen(new_string))`
			`* @NOTE maxlen is the string length _INCLUDING_ the terminating '\0'`
			`*/`
			`size_t Utf8TrimString(char *s, size_t maxlen)`
			`{`
			`size_t length = 0;`

			`for (const char ptr = strchr(s, '\0'); s != '\0';) {`
			`size_t len = Utf8EncodedCharLen(*s);`
(svn r9083) -Codechange: Be more lenient when trimming UTF-8 strings and don't terminate the string when an invalid encoding is encountered, but only focus on maximum length. 2007-03-10 00:26:19 +00:00			`/* Silently ignore invalid UTF8 sequences, our only concern trimming */`
			`if (len == 0) len = 1;`
(svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'. -Codechange: Add a function Utf8TrimString() that properly trims a string to an UTF8 encoding seperation instead of somewhere in the wild (and use it in the chat area) 2007-03-05 00:45:56 +00:00
			`/* Take care when a hard cutoff was made for the string and`
			`* the last UTF8 sequence is invalid */`
			`if (length + len >= maxlen \|\| (s + len > ptr)) break;`
			`s += len;`
			`length += len;`
			`}`

			`*s = '\0';`
			`return length;`
(svn r9015) -Fix NL at EOF 2007-03-05 05:03:28 +00:00			`}`
(svn r14154) -Fix (r14153): strndup is a GNU extension, so it doesn't exist on all platforms 2008-08-24 17:02:21 +00:00
			`#ifndef _GNU_SOURCE`
			`#include "core/math_func.hpp"`
			`char strndup(const char s, size_t len)`
			`{`
			`len = min(strlen(s), len);`
			`char *tmp = CallocT<char>(len + 1);`
			`memcpy(tmp, s, len);`
			`return tmp;`
			`}`
			`#endif /* !_GNU_SOURCE */`