Merge #4481

4481: Don't create cut off UTF-8 sequences on string manipulation r=def- a=heinrich5991 CC #4463 CC #4465 ## Checklist - [ ] Tested the change ingame - [ ] Provided screenshots if it is a visual change - [ ] Tested in combination with possibly related configuration options - [x] Written a unit test if it works standalone, system.c especially - [ ] Considered possible null pointers and out of bounds array indexing - [ ] Changed no physics that affect existing maps - [ ] Tested the change with [ASan+UBSan or valgrind's memcheck](https://github.com/ddnet/ddnet/#using-addresssanitizer--undefinedbehavioursanitizer-or-valgrinds-memcheck) (optional) Co-authored-by: heinrich5991 <heinrich5991@gmail.com>
2024-11-10 10:08:18 +00:00 · 2021-12-20 08:58:55 +00:00 · 2021-12-20 08:58:55 +00:00 · ec0d1172eb
parent 3013466b86 58533cddef
commit ec0d1172eb
10 changed files with 131 additions and 99 deletions
--- a/src/base/system.cpp
+++ b/src/base/system.cpp
@ -2545,23 +2545,28 @@ void str_append(char *dst, const char *src, int dst_size)
 	}

 	dst[dst_size - 1] = 0; /* assure null termination */
+	str_utf8_fix_truncation(dst);
 }

 void str_copy(char *dst, const char *src, int dst_size)
 {
 	strncpy(dst, src, dst_size - 1);
 	dst[dst_size - 1] = 0; /* assure null termination */
+	str_utf8_fix_truncation(dst);
 }

 void str_utf8_truncate(char *dst, int dst_size, const char *src, int truncation_len)
 {
 	int size = -1;
-	int cursor = 0;
+	const char *cursor = src;
 	int pos = 0;
-	while(pos <= truncation_len && cursor < dst_size && size != cursor)
+	while(pos <= truncation_len && cursor - src < dst_size && size != cursor - src)
 	{
-		size = cursor;
-		cursor = str_utf8_forward(src, cursor);
+		size = cursor - src;
+		if(str_utf8_decode(&cursor) == 0)
+		{
+			break;
+		}
 		pos++;
 	}
 	str_copy(dst, src, size + 1);
@ -2584,33 +2589,22 @@ int str_length(const char *str)

 int str_format(char *buffer, int buffer_size, const char *format, ...)
 {
-	int ret;
 #if defined(CONF_FAMILY_WINDOWS)
 	va_list ap;
 	va_start(ap, format);
-	ret = _vsnprintf(buffer, buffer_size, format, ap);
+	_vsnprintf(buffer, buffer_size, format, ap);
 	va_end(ap);

 	buffer[buffer_size - 1] = 0; /* assure null termination */
-
-	/* _vsnprintf is documented to return negative values on truncation, but
-	 * in practice we didn't see that. let's handle it anyway just in case. */
-	if(ret < 0)
-		ret = buffer_size - 1;
 #else
 	va_list ap;
 	va_start(ap, format);
-	ret = vsnprintf(buffer, buffer_size, format, ap);
+	vsnprintf(buffer, buffer_size, format, ap);
 	va_end(ap);

 	/* null termination is assured by definition of vsnprintf */
 #endif
-
-	/* a return value of buffer_size or more indicates truncated output */
-	if(ret >= buffer_size)
-		ret = buffer_size - 1;
-
-	return ret;
+	return str_utf8_fix_truncation(buffer);
 }

 char *str_trim_words(char *str, int words)
@ -3276,41 +3270,31 @@ int str_utf8_rewind(const char *str, int cursor)
 	return cursor;
 }

+int str_utf8_fix_truncation(char *str)
+{
+	int len = str_length(str);
+	if(len > 0)
+	{
+		int last_char_index = str_utf8_rewind(str, len);
+		const char *last_char = str + last_char_index;
+		// Fix truncated UTF-8.
+		if(str_utf8_decode(&last_char) == -1)
+		{
+			str[last_char_index] = 0;
+			return last_char_index;
+		}
+	}
+	return len;
+}
+
 int str_utf8_forward(const char *str, int cursor)
 {
-	const char *buf = str + cursor;
-	if(!buf[0])
+	const char *ptr = str + cursor;
+	if(str_utf8_decode(&ptr) == 0)
+	{
 		return cursor;
-
-	if((*buf & 0x80) == 0x0) /* 0xxxxxxx */
-		return cursor + 1;
-	else if((*buf & 0xE0) == 0xC0) /* 110xxxxx */
-	{
-		if(!buf[1])
-			return cursor + 1;
-		return cursor + 2;
 	}
-	else if((*buf & 0xF0) == 0xE0) /* 1110xxxx */
-	{
-		if(!buf[1])
-			return cursor + 1;
-		if(!buf[2])
-			return cursor + 2;
-		return cursor + 3;
-	}
-	else if((*buf & 0xF8) == 0xF0) /* 11110xxx */
-	{
-		if(!buf[1])
-			return cursor + 1;
-		if(!buf[2])
-			return cursor + 2;
-		if(!buf[3])
-			return cursor + 3;
-		return cursor + 4;
-	}
-
-	/* invalid */
-	return cursor + 1;
+	return ptr - str;
 }

 int str_utf8_encode(char *ptr, int chr)
@ -3464,21 +3448,22 @@ int str_utf8_check(const char *str)
 	return 1;
 }

-void str_utf8_copy(char *dst, const char *src, int dst_size)
-{
-	str_utf8_truncate(dst, dst_size, src, dst_size);
-}
-
 void str_utf8_stats(const char *str, int max_size, int max_count, int *size, int *count)
 {
+	const char *cursor = str;
 	*size = 0;
 	*count = 0;
 	while(*size < max_size && *count < max_count)
 	{
-		int new_size = str_utf8_forward(str, *size);
-		if(new_size == *size || new_size >= max_size)
+		if(str_utf8_decode(&cursor) == 0)
+		{
 			break;
-		*size = new_size;
+		}
+		if(cursor - str >= max_size)
+		{
+			break;
+		}
+		*size = cursor - str;
 		++(*count);
 	}
 }
--- a/src/base/system.h
+++ b/src/base/system.h
@ -2002,6 +2002,19 @@ void str_utf8_trim_right(char *str);
 */
 int str_utf8_rewind(const char *str, int cursor);

+/*
+	Function: str_utf8_fix_truncation
+		Fixes truncation of a Unicode character at the end of a UTF-8
+		string.
+
+	Returns:
+		The new string length.
+
+	Parameters:
+		str - utf8 string
+*/
+int str_utf8_fix_truncation(char *str);
+
 /*
 	Function: str_utf8_forward
 		Moves a cursor forwards in an utf8 string
@ -2080,22 +2093,6 @@ int str_utf16le_encode(char *ptr, int chr);
 */
 int str_utf8_check(const char *str);

-/*
-	Function: str_utf8_copy
-		Copies a utf8 string to a buffer.
-
-	Parameters:
-		dst - Pointer to a buffer that shall receive the string.
-		src - utf8 string to be copied.
-		dst_size - Size of the buffer dst.
-
-	Remarks:
-		- The strings are treated as zero-terminated strings.
-		- Guarantees that dst string will contain zero-termination.
-		- Guarantees that dst always contains a valid utf8 string.
-*/
-void str_utf8_copy(char *dst, const char *src, int dst_size);
-
 /*
 	Function: str_utf8_stats
 		Determines the byte size and utf8 character count of a utf8 string.
--- a/src/engine/client/input.cpp
+++ b/src/engine/client/input.cpp
@ -26,7 +26,7 @@ void CInput::AddEvent(char *pText, int Key, int Flags)
 		if(!pText)
 			m_aInputEvents[m_NumEvents].m_aText[0] = 0;
 		else
-			str_utf8_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText));
+			str_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText));
 		m_aInputEvents[m_NumEvents].m_InputCount = m_InputCounter;
 		m_NumEvents++;
 	}
--- a/src/engine/client/steam.cpp
+++ b/src/engine/client/steam.cpp
@ -22,7 +22,7 @@ public:
 		m_pSteamFriends = SteamAPI_SteamFriends_v017();

 		ReadLaunchCommandLine();
-		str_utf8_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName));
+		str_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName));
 	}
 	~CSteam()
 	{
--- a/src/engine/client/text.cpp
+++ b/src/engine/client/text.cpp
@ -263,15 +263,14 @@ class CTextRender : public IEngineTextRender

 	int WordLength(const char *pText)
 	{
-		int Length = 0;
+		const char *pCursor = pText;
 		while(1)
 		{
-			const char *pCursor = (pText + Length);
 			if(*pCursor == 0)
-				return Length;
+				return pCursor - pText;
 			if(*pCursor == '\n' || *pCursor == '\t' || *pCursor == ' ')
-				return Length + 1;
-			Length = str_utf8_forward(pText, Length);
+				return pCursor - pText + 1;
+			str_utf8_decode(&pCursor);
 		}
 	}

--- a/src/engine/server/server.cpp
+++ b/src/engine/server/server.cpp
@ -400,9 +400,7 @@ bool CServer::SetClientNameImpl(int ClientID, const char *pNameRequest, bool Set
 		// auto rename
 		for(int i = 1;; i++)
 		{
-			char aNameTryFull[MAX_NAME_LENGTH + 4];
-			str_format(aNameTryFull, sizeof(aNameTryFull), "(%d)%s", i, aTrimmedName);
-			str_utf8_copy(aNameTry, aNameTryFull, sizeof(aNameTry));
+			str_format(aNameTry, sizeof(aNameTry), "(%d)%s", i, aTrimmedName);
 			if(IsClientNameAvailable(ClientID, aNameTry))
 				break;
 		}
--- a/src/engine/shared/network_conn.cpp
+++ b/src/engine/shared/network_conn.cpp
@ -294,7 +294,7 @@ int CNetConnection::Feed(CNetPacketConstruct *pPacket, NETADDR *pAddr, SECURITY_
 				if(pPacket->m_DataSize > 1)
 				{
 					// make sure to sanitize the error string form the other party
-					str_utf8_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr)));
+					str_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr)));
 					str_sanitize_cc(aStr);
 				}

--- a/src/game/client/components/chat.cpp
+++ b/src/game/client/components/chat.cpp
@ -192,7 +192,7 @@ bool CChat::OnInput(IInput::CEvent Event)
 				if(Text[i] == '\n')
 				{
 					int max = minimum(i - Begin + 1, (int)sizeof(aLine));
-					str_utf8_copy(aLine, Text + Begin, max);
+					str_copy(aLine, Text + Begin, max);
 					Begin = i + 1;
 					SayChat(aLine);
 					while(Text[i] == '\n')
@ -200,7 +200,7 @@ bool CChat::OnInput(IInput::CEvent Event)
 				}
 			}
 			int max = minimum(i - Begin + 1, (int)sizeof(aLine));
-			str_utf8_copy(aLine, Text + Begin, max);
+			str_copy(aLine, Text + Begin, max);
 			m_Input.Append(aLine);
 		}
 	}
--- a/src/game/client/components/scoreboard.cpp
+++ b/src/game/client/components/scoreboard.cpp
@ -206,7 +206,7 @@ void CScoreboard::RenderScoreboard(float x, float y, float w, int Team, const ch
 			pTitle = Localize("Game over");
 		else
 		{
-			str_utf8_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf));
+			str_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf));
 			while(TextRender()->TextWidth(0, TitleFontsize, aBuf, -1, -1.0f) > TitleWidth)
 				aBuf[str_length(aBuf) - 1] = '\0';
 			if(str_comp(aBuf, Client()->GetCurrentMap()))
--- a/src/test/str.cpp
+++ b/src/test/str.cpp
@ -95,6 +95,33 @@ TEST(Str, Utf8ToLower)
 	EXPECT_TRUE(str_utf8_find_nocase(str, "z") == NULL);
 }

+TEST(Str, Utf8FixTruncation)
+{
+	char aaBuf[][32] = {
+		"",
+		"\xff",
+		"abc",
+		"abc\xff",
+		"blub\xffxyz",
+		"привет Наташа\xff",
+		"до свидания\xffОлег",
+	};
+	const char *apExpected[] = {
+		"",
+		"",
+		"abc",
+		"abc",
+		"blub\xffxyz",
+		"привет Наташа",
+		"до свидания\xffОлег",
+	};
+	for(unsigned i = 0; i < sizeof(aaBuf) / sizeof(aaBuf[0]); i++)
+	{
+		EXPECT_EQ(str_utf8_fix_truncation(aaBuf[i]), str_length(apExpected[i]));
+		EXPECT_STREQ(aaBuf[i], apExpected[i]);
+	}
+}
+
 TEST(Str, Startswith)
 {
 	EXPECT_TRUE(str_startswith("abcdef", "abc"));
@ -204,6 +231,32 @@ TEST(Str, StrFormat)
 	EXPECT_STREQ(aBuf, "99:");
 }

+TEST(Str, StrFormatTruncate)
+{
+	const char *pStr = "DDNet最好了";
+	char aBuf[64];
+	str_format(aBuf, 7, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet");
+	str_format(aBuf, 8, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet");
+	str_format(aBuf, 9, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最");
+	str_format(aBuf, 10, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最");
+	str_format(aBuf, 11, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最");
+	str_format(aBuf, 12, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最好");
+	str_format(aBuf, 13, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最好");
+	str_format(aBuf, 14, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最好");
+	str_format(aBuf, 15, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最好了");
+	str_format(aBuf, 16, "%s", pStr);
+	EXPECT_STREQ(aBuf, "DDNet最好了");
+}
+
 TEST(Str, StrCopyNum)
 {
 	const char *foo = "Foobaré";
@ -229,29 +282,29 @@ TEST(Str, StrCopyNum)
 	EXPECT_STREQ(aBuf3, "Foobaré");
 }

-TEST(Str, StrCopyUtf8)
+TEST(Str, StrCopy)
 {
-	const char *foo = "DDNet最好了";
+	const char *pStr = "DDNet最好了";
 	char aBuf[64];
-	str_utf8_copy(aBuf, foo, 7);
+	str_copy(aBuf, pStr, 7);
 	EXPECT_STREQ(aBuf, "DDNet");
-	str_utf8_copy(aBuf, foo, 8);
+	str_copy(aBuf, pStr, 8);
 	EXPECT_STREQ(aBuf, "DDNet");
-	str_utf8_copy(aBuf, foo, 9);
+	str_copy(aBuf, pStr, 9);
 	EXPECT_STREQ(aBuf, "DDNet最");
-	str_utf8_copy(aBuf, foo, 10);
+	str_copy(aBuf, pStr, 10);
 	EXPECT_STREQ(aBuf, "DDNet最");
-	str_utf8_copy(aBuf, foo, 11);
+	str_copy(aBuf, pStr, 11);
 	EXPECT_STREQ(aBuf, "DDNet最");
-	str_utf8_copy(aBuf, foo, 12);
+	str_copy(aBuf, pStr, 12);
 	EXPECT_STREQ(aBuf, "DDNet最好");
-	str_utf8_copy(aBuf, foo, 13);
+	str_copy(aBuf, pStr, 13);
 	EXPECT_STREQ(aBuf, "DDNet最好");
-	str_utf8_copy(aBuf, foo, 14);
+	str_copy(aBuf, pStr, 14);
 	EXPECT_STREQ(aBuf, "DDNet最好");
-	str_utf8_copy(aBuf, foo, 15);
+	str_copy(aBuf, pStr, 15);
 	EXPECT_STREQ(aBuf, "DDNet最好了");
-	str_utf8_copy(aBuf, foo, 16);
+	str_copy(aBuf, pStr, 16);
 	EXPECT_STREQ(aBuf, "DDNet最好了");
 }