(svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset

17 years ago · ca4c856247
parent f8a5fa0921
commit ca4c856247
2 changed files with 20 additions and 24 deletions
--- a/src/misc_gui.cpp
+++ b/src/misc_gui.cpp
@ -804,17 +804,21 @@ static void DelChar(Textbuf *tb, bool backspace)
 	WChar c;
 	uint width;
 	size_t len;
+	char *s = tb->buf + tb->caretpos;

-	if (backspace) tb->caretpos -= Utf8PrevCharLen(tb->buf + tb->caretpos);
+	if (backspace) s = Utf8PrevChar(s);

-	len = Utf8Decode(&c, tb->buf + tb->caretpos);
+	len = Utf8Decode(&c, s);
 	width = GetCharacterWidth(FS_NORMAL, c);

 	tb->width  -= width;
-	if (backspace) tb->caretxoffs -= width;
+	if (backspace) {
+		tb->caretpos   -= len;
+		tb->caretxoffs -= width;
+	}

 	/* Move the remaining characters over the marker */
-	memmove(tb->buf + tb->caretpos, tb->buf + tb->caretpos + len, tb->length - tb->caretpos - len + 1);
+	memmove(s, s + len, tb->length - (s - tb->buf) - len + 1);
 	tb->length -= len;
 }

@ -887,9 +891,9 @@ bool MoveTextBufferPos(Textbuf *tb, int navmode)
 	case WKC_LEFT:
 		if (tb->caretpos != 0) {
 			WChar c;
-
-			tb->caretpos -= Utf8PrevCharLen(tb->buf + tb->caretpos);
-			Utf8Decode(&c, tb->buf + tb->caretpos);
+			const char *s = Utf8PrevChar(tb->buf + tb->caretpos);
+			Utf8Decode(&c, s);
+			tb->caretpos    = s - tb->buf; // -= (tb->buf + tb->caretpos - s)
 			tb->caretxoffs -= GetCharacterWidth(FS_NORMAL, c);

 			return true;
--- a/src/string.h
+++ b/src/string.h
@ -107,25 +107,17 @@ static inline bool IsUtf8Part(char c)
 }

 /**
- * Retrieve the (partial) length of the previous UNICODE character
- * in an UTF-8 encoded string.
- * @param s char pointer pointing to the first char of the next character
- * @returns the decoded length in bytes (size) of the UNICODE character
- * that was just before the one where 's' is pointing to
- * @note If 's' is not pointing to the first byte of the next UNICODE character
- * only a partial length of the sequence will be returned.
- * For example given this sequence: 0xE3 0x85 0x80, 0xE3 0x81 0x9E
- * 1. 's' is pointing to the second 0xE3, return value is 3
- * 2. 's' is pointing to 0x80, return value is 2.
- * So take care with the return values of this function. To get the real length
- * for an (invalid) sequence, pass the string offset of this function's return
- * value to Utf8EncodedCharLen() or Utf8Decode()
+ * Retrieve the previous UNICODE character in an UTF-8 encoded string.
+ * @param s char pointer pointing to (the first char of) the next character
+ * @returns a pointer in 's' to the previous UNICODE character's first byte
+ * @note The function should not be used to determine the length of the previous
+ * encoded char because it might be an invalid/corrupt start-sequence
 */
-static inline size_t Utf8PrevCharLen(const char *s)
+static inline char *Utf8PrevChar(const char *s)
 {
-	size_t len = 1;
-	while (IsUtf8Part(*--s)) len++;
-	return len;
+	const char *ret = s;
+	while (IsUtf8Part(*--ret));
+	return (char*)ret;
 }