4481: Don't create cut off UTF-8 sequences on string manipulation r=def- a=heinrich5991

CC #4463
CC #4465 

## Checklist

- [ ] Tested the change ingame
- [ ] Provided screenshots if it is a visual change
- [ ] Tested in combination with possibly related configuration options
- [x] Written a unit test if it works standalone, system.c especially
- [ ] Considered possible null pointers and out of bounds array indexing
- [ ] Changed no physics that affect existing maps
- [ ] Tested the change with [ASan+UBSan or valgrind's memcheck](https://github.com/ddnet/ddnet/#using-addresssanitizer--undefinedbehavioursanitizer-or-valgrinds-memcheck) (optional)


Co-authored-by: heinrich5991 <heinrich5991@gmail.com>
This commit is contained in:
bors[bot] 2021-12-20 08:58:55 +00:00 committed by GitHub
commit ec0d1172eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 131 additions and 99 deletions

View file

@ -2545,23 +2545,28 @@ void str_append(char *dst, const char *src, int dst_size)
} }
dst[dst_size - 1] = 0; /* assure null termination */ dst[dst_size - 1] = 0; /* assure null termination */
str_utf8_fix_truncation(dst);
} }
void str_copy(char *dst, const char *src, int dst_size) void str_copy(char *dst, const char *src, int dst_size)
{ {
strncpy(dst, src, dst_size - 1); strncpy(dst, src, dst_size - 1);
dst[dst_size - 1] = 0; /* assure null termination */ dst[dst_size - 1] = 0; /* assure null termination */
str_utf8_fix_truncation(dst);
} }
void str_utf8_truncate(char *dst, int dst_size, const char *src, int truncation_len) void str_utf8_truncate(char *dst, int dst_size, const char *src, int truncation_len)
{ {
int size = -1; int size = -1;
int cursor = 0; const char *cursor = src;
int pos = 0; int pos = 0;
while(pos <= truncation_len && cursor < dst_size && size != cursor) while(pos <= truncation_len && cursor - src < dst_size && size != cursor - src)
{ {
size = cursor; size = cursor - src;
cursor = str_utf8_forward(src, cursor); if(str_utf8_decode(&cursor) == 0)
{
break;
}
pos++; pos++;
} }
str_copy(dst, src, size + 1); str_copy(dst, src, size + 1);
@ -2584,33 +2589,22 @@ int str_length(const char *str)
int str_format(char *buffer, int buffer_size, const char *format, ...) int str_format(char *buffer, int buffer_size, const char *format, ...)
{ {
int ret;
#if defined(CONF_FAMILY_WINDOWS) #if defined(CONF_FAMILY_WINDOWS)
va_list ap; va_list ap;
va_start(ap, format); va_start(ap, format);
ret = _vsnprintf(buffer, buffer_size, format, ap); _vsnprintf(buffer, buffer_size, format, ap);
va_end(ap); va_end(ap);
buffer[buffer_size - 1] = 0; /* assure null termination */ buffer[buffer_size - 1] = 0; /* assure null termination */
/* _vsnprintf is documented to return negative values on truncation, but
* in practice we didn't see that. let's handle it anyway just in case. */
if(ret < 0)
ret = buffer_size - 1;
#else #else
va_list ap; va_list ap;
va_start(ap, format); va_start(ap, format);
ret = vsnprintf(buffer, buffer_size, format, ap); vsnprintf(buffer, buffer_size, format, ap);
va_end(ap); va_end(ap);
/* null termination is assured by definition of vsnprintf */ /* null termination is assured by definition of vsnprintf */
#endif #endif
return str_utf8_fix_truncation(buffer);
/* a return value of buffer_size or more indicates truncated output */
if(ret >= buffer_size)
ret = buffer_size - 1;
return ret;
} }
char *str_trim_words(char *str, int words) char *str_trim_words(char *str, int words)
@ -3276,41 +3270,31 @@ int str_utf8_rewind(const char *str, int cursor)
return cursor; return cursor;
} }
int str_utf8_fix_truncation(char *str)
{
int len = str_length(str);
if(len > 0)
{
int last_char_index = str_utf8_rewind(str, len);
const char *last_char = str + last_char_index;
// Fix truncated UTF-8.
if(str_utf8_decode(&last_char) == -1)
{
str[last_char_index] = 0;
return last_char_index;
}
}
return len;
}
int str_utf8_forward(const char *str, int cursor) int str_utf8_forward(const char *str, int cursor)
{ {
const char *buf = str + cursor; const char *ptr = str + cursor;
if(!buf[0]) if(str_utf8_decode(&ptr) == 0)
{
return cursor; return cursor;
if((*buf & 0x80) == 0x0) /* 0xxxxxxx */
return cursor + 1;
else if((*buf & 0xE0) == 0xC0) /* 110xxxxx */
{
if(!buf[1])
return cursor + 1;
return cursor + 2;
} }
else if((*buf & 0xF0) == 0xE0) /* 1110xxxx */ return ptr - str;
{
if(!buf[1])
return cursor + 1;
if(!buf[2])
return cursor + 2;
return cursor + 3;
}
else if((*buf & 0xF8) == 0xF0) /* 11110xxx */
{
if(!buf[1])
return cursor + 1;
if(!buf[2])
return cursor + 2;
if(!buf[3])
return cursor + 3;
return cursor + 4;
}
/* invalid */
return cursor + 1;
} }
int str_utf8_encode(char *ptr, int chr) int str_utf8_encode(char *ptr, int chr)
@ -3464,21 +3448,22 @@ int str_utf8_check(const char *str)
return 1; return 1;
} }
void str_utf8_copy(char *dst, const char *src, int dst_size)
{
str_utf8_truncate(dst, dst_size, src, dst_size);
}
void str_utf8_stats(const char *str, int max_size, int max_count, int *size, int *count) void str_utf8_stats(const char *str, int max_size, int max_count, int *size, int *count)
{ {
const char *cursor = str;
*size = 0; *size = 0;
*count = 0; *count = 0;
while(*size < max_size && *count < max_count) while(*size < max_size && *count < max_count)
{ {
int new_size = str_utf8_forward(str, *size); if(str_utf8_decode(&cursor) == 0)
if(new_size == *size || new_size >= max_size) {
break; break;
*size = new_size; }
if(cursor - str >= max_size)
{
break;
}
*size = cursor - str;
++(*count); ++(*count);
} }
} }

View file

@ -2002,6 +2002,19 @@ void str_utf8_trim_right(char *str);
*/ */
int str_utf8_rewind(const char *str, int cursor); int str_utf8_rewind(const char *str, int cursor);
/*
Function: str_utf8_fix_truncation
Fixes truncation of a Unicode character at the end of a UTF-8
string.
Returns:
The new string length.
Parameters:
str - utf8 string
*/
int str_utf8_fix_truncation(char *str);
/* /*
Function: str_utf8_forward Function: str_utf8_forward
Moves a cursor forwards in an utf8 string Moves a cursor forwards in an utf8 string
@ -2080,22 +2093,6 @@ int str_utf16le_encode(char *ptr, int chr);
*/ */
int str_utf8_check(const char *str); int str_utf8_check(const char *str);
/*
Function: str_utf8_copy
Copies a utf8 string to a buffer.
Parameters:
dst - Pointer to a buffer that shall receive the string.
src - utf8 string to be copied.
dst_size - Size of the buffer dst.
Remarks:
- The strings are treated as zero-terminated strings.
- Guarantees that dst string will contain zero-termination.
- Guarantees that dst always contains a valid utf8 string.
*/
void str_utf8_copy(char *dst, const char *src, int dst_size);
/* /*
Function: str_utf8_stats Function: str_utf8_stats
Determines the byte size and utf8 character count of a utf8 string. Determines the byte size and utf8 character count of a utf8 string.

View file

@ -26,7 +26,7 @@ void CInput::AddEvent(char *pText, int Key, int Flags)
if(!pText) if(!pText)
m_aInputEvents[m_NumEvents].m_aText[0] = 0; m_aInputEvents[m_NumEvents].m_aText[0] = 0;
else else
str_utf8_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText)); str_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText));
m_aInputEvents[m_NumEvents].m_InputCount = m_InputCounter; m_aInputEvents[m_NumEvents].m_InputCount = m_InputCounter;
m_NumEvents++; m_NumEvents++;
} }

View file

@ -22,7 +22,7 @@ public:
m_pSteamFriends = SteamAPI_SteamFriends_v017(); m_pSteamFriends = SteamAPI_SteamFriends_v017();
ReadLaunchCommandLine(); ReadLaunchCommandLine();
str_utf8_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName)); str_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName));
} }
~CSteam() ~CSteam()
{ {

View file

@ -263,15 +263,14 @@ class CTextRender : public IEngineTextRender
int WordLength(const char *pText) int WordLength(const char *pText)
{ {
int Length = 0; const char *pCursor = pText;
while(1) while(1)
{ {
const char *pCursor = (pText + Length);
if(*pCursor == 0) if(*pCursor == 0)
return Length; return pCursor - pText;
if(*pCursor == '\n' || *pCursor == '\t' || *pCursor == ' ') if(*pCursor == '\n' || *pCursor == '\t' || *pCursor == ' ')
return Length + 1; return pCursor - pText + 1;
Length = str_utf8_forward(pText, Length); str_utf8_decode(&pCursor);
} }
} }

View file

@ -400,9 +400,7 @@ bool CServer::SetClientNameImpl(int ClientID, const char *pNameRequest, bool Set
// auto rename // auto rename
for(int i = 1;; i++) for(int i = 1;; i++)
{ {
char aNameTryFull[MAX_NAME_LENGTH + 4]; str_format(aNameTry, sizeof(aNameTry), "(%d)%s", i, aTrimmedName);
str_format(aNameTryFull, sizeof(aNameTryFull), "(%d)%s", i, aTrimmedName);
str_utf8_copy(aNameTry, aNameTryFull, sizeof(aNameTry));
if(IsClientNameAvailable(ClientID, aNameTry)) if(IsClientNameAvailable(ClientID, aNameTry))
break; break;
} }

View file

@ -294,7 +294,7 @@ int CNetConnection::Feed(CNetPacketConstruct *pPacket, NETADDR *pAddr, SECURITY_
if(pPacket->m_DataSize > 1) if(pPacket->m_DataSize > 1)
{ {
// make sure to sanitize the error string form the other party // make sure to sanitize the error string form the other party
str_utf8_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr))); str_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr)));
str_sanitize_cc(aStr); str_sanitize_cc(aStr);
} }

View file

@ -192,7 +192,7 @@ bool CChat::OnInput(IInput::CEvent Event)
if(Text[i] == '\n') if(Text[i] == '\n')
{ {
int max = minimum(i - Begin + 1, (int)sizeof(aLine)); int max = minimum(i - Begin + 1, (int)sizeof(aLine));
str_utf8_copy(aLine, Text + Begin, max); str_copy(aLine, Text + Begin, max);
Begin = i + 1; Begin = i + 1;
SayChat(aLine); SayChat(aLine);
while(Text[i] == '\n') while(Text[i] == '\n')
@ -200,7 +200,7 @@ bool CChat::OnInput(IInput::CEvent Event)
} }
} }
int max = minimum(i - Begin + 1, (int)sizeof(aLine)); int max = minimum(i - Begin + 1, (int)sizeof(aLine));
str_utf8_copy(aLine, Text + Begin, max); str_copy(aLine, Text + Begin, max);
m_Input.Append(aLine); m_Input.Append(aLine);
} }
} }

View file

@ -206,7 +206,7 @@ void CScoreboard::RenderScoreboard(float x, float y, float w, int Team, const ch
pTitle = Localize("Game over"); pTitle = Localize("Game over");
else else
{ {
str_utf8_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf)); str_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf));
while(TextRender()->TextWidth(0, TitleFontsize, aBuf, -1, -1.0f) > TitleWidth) while(TextRender()->TextWidth(0, TitleFontsize, aBuf, -1, -1.0f) > TitleWidth)
aBuf[str_length(aBuf) - 1] = '\0'; aBuf[str_length(aBuf) - 1] = '\0';
if(str_comp(aBuf, Client()->GetCurrentMap())) if(str_comp(aBuf, Client()->GetCurrentMap()))

View file

@ -95,6 +95,33 @@ TEST(Str, Utf8ToLower)
EXPECT_TRUE(str_utf8_find_nocase(str, "z") == NULL); EXPECT_TRUE(str_utf8_find_nocase(str, "z") == NULL);
} }
TEST(Str, Utf8FixTruncation)
{
char aaBuf[][32] = {
"",
"\xff",
"abc",
"abc\xff",
"blub\xffxyz",
"привет Наташа\xff",
"до свидания\xffОлег",
};
const char *apExpected[] = {
"",
"",
"abc",
"abc",
"blub\xffxyz",
"привет Наташа",
"до свидания\xffОлег",
};
for(unsigned i = 0; i < sizeof(aaBuf) / sizeof(aaBuf[0]); i++)
{
EXPECT_EQ(str_utf8_fix_truncation(aaBuf[i]), str_length(apExpected[i]));
EXPECT_STREQ(aaBuf[i], apExpected[i]);
}
}
TEST(Str, Startswith) TEST(Str, Startswith)
{ {
EXPECT_TRUE(str_startswith("abcdef", "abc")); EXPECT_TRUE(str_startswith("abcdef", "abc"));
@ -204,6 +231,32 @@ TEST(Str, StrFormat)
EXPECT_STREQ(aBuf, "99:"); EXPECT_STREQ(aBuf, "99:");
} }
TEST(Str, StrFormatTruncate)
{
const char *pStr = "DDNet最好了";
char aBuf[64];
str_format(aBuf, 7, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet");
str_format(aBuf, 8, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet");
str_format(aBuf, 9, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最");
str_format(aBuf, 10, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最");
str_format(aBuf, 11, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最");
str_format(aBuf, 12, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好");
str_format(aBuf, 13, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好");
str_format(aBuf, 14, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好");
str_format(aBuf, 15, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好了");
str_format(aBuf, 16, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好了");
}
TEST(Str, StrCopyNum) TEST(Str, StrCopyNum)
{ {
const char *foo = "Foobaré"; const char *foo = "Foobaré";
@ -229,29 +282,29 @@ TEST(Str, StrCopyNum)
EXPECT_STREQ(aBuf3, "Foobaré"); EXPECT_STREQ(aBuf3, "Foobaré");
} }
TEST(Str, StrCopyUtf8) TEST(Str, StrCopy)
{ {
const char *foo = "DDNet最好了"; const char *pStr = "DDNet最好了";
char aBuf[64]; char aBuf[64];
str_utf8_copy(aBuf, foo, 7); str_copy(aBuf, pStr, 7);
EXPECT_STREQ(aBuf, "DDNet"); EXPECT_STREQ(aBuf, "DDNet");
str_utf8_copy(aBuf, foo, 8); str_copy(aBuf, pStr, 8);
EXPECT_STREQ(aBuf, "DDNet"); EXPECT_STREQ(aBuf, "DDNet");
str_utf8_copy(aBuf, foo, 9); str_copy(aBuf, pStr, 9);
EXPECT_STREQ(aBuf, "DDNet最"); EXPECT_STREQ(aBuf, "DDNet最");
str_utf8_copy(aBuf, foo, 10); str_copy(aBuf, pStr, 10);
EXPECT_STREQ(aBuf, "DDNet最"); EXPECT_STREQ(aBuf, "DDNet最");
str_utf8_copy(aBuf, foo, 11); str_copy(aBuf, pStr, 11);
EXPECT_STREQ(aBuf, "DDNet最"); EXPECT_STREQ(aBuf, "DDNet最");
str_utf8_copy(aBuf, foo, 12); str_copy(aBuf, pStr, 12);
EXPECT_STREQ(aBuf, "DDNet最好"); EXPECT_STREQ(aBuf, "DDNet最好");
str_utf8_copy(aBuf, foo, 13); str_copy(aBuf, pStr, 13);
EXPECT_STREQ(aBuf, "DDNet最好"); EXPECT_STREQ(aBuf, "DDNet最好");
str_utf8_copy(aBuf, foo, 14); str_copy(aBuf, pStr, 14);
EXPECT_STREQ(aBuf, "DDNet最好"); EXPECT_STREQ(aBuf, "DDNet最好");
str_utf8_copy(aBuf, foo, 15); str_copy(aBuf, pStr, 15);
EXPECT_STREQ(aBuf, "DDNet最好了"); EXPECT_STREQ(aBuf, "DDNet最好了");
str_utf8_copy(aBuf, foo, 16); str_copy(aBuf, pStr, 16);
EXPECT_STREQ(aBuf, "DDNet最好了"); EXPECT_STREQ(aBuf, "DDNet最好了");
} }