4481: Don't create cut off UTF-8 sequences on string manipulation r=def- a=heinrich5991

CC #4463
CC #4465 

## Checklist

- [ ] Tested the change ingame
- [ ] Provided screenshots if it is a visual change
- [ ] Tested in combination with possibly related configuration options
- [x] Written a unit test if it works standalone, system.c especially
- [ ] Considered possible null pointers and out of bounds array indexing
- [ ] Changed no physics that affect existing maps
- [ ] Tested the change with [ASan+UBSan or valgrind's memcheck](https://github.com/ddnet/ddnet/#using-addresssanitizer--undefinedbehavioursanitizer-or-valgrinds-memcheck) (optional)


Co-authored-by: heinrich5991 <heinrich5991@gmail.com>
This commit is contained in:
bors[bot] 2021-12-20 08:58:55 +00:00 committed by GitHub
commit ec0d1172eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 131 additions and 99 deletions

View file

@ -2545,23 +2545,28 @@ void str_append(char *dst, const char *src, int dst_size)
}
dst[dst_size - 1] = 0; /* assure null termination */
str_utf8_fix_truncation(dst);
}
void str_copy(char *dst, const char *src, int dst_size)
{
strncpy(dst, src, dst_size - 1);
dst[dst_size - 1] = 0; /* assure null termination */
str_utf8_fix_truncation(dst);
}
void str_utf8_truncate(char *dst, int dst_size, const char *src, int truncation_len)
{
int size = -1;
int cursor = 0;
const char *cursor = src;
int pos = 0;
while(pos <= truncation_len && cursor < dst_size && size != cursor)
while(pos <= truncation_len && cursor - src < dst_size && size != cursor - src)
{
size = cursor;
cursor = str_utf8_forward(src, cursor);
size = cursor - src;
if(str_utf8_decode(&cursor) == 0)
{
break;
}
pos++;
}
str_copy(dst, src, size + 1);
@ -2584,33 +2589,22 @@ int str_length(const char *str)
int str_format(char *buffer, int buffer_size, const char *format, ...)
{
int ret;
#if defined(CONF_FAMILY_WINDOWS)
va_list ap;
va_start(ap, format);
ret = _vsnprintf(buffer, buffer_size, format, ap);
_vsnprintf(buffer, buffer_size, format, ap);
va_end(ap);
buffer[buffer_size - 1] = 0; /* assure null termination */
/* _vsnprintf is documented to return negative values on truncation, but
* in practice we didn't see that. let's handle it anyway just in case. */
if(ret < 0)
ret = buffer_size - 1;
#else
va_list ap;
va_start(ap, format);
ret = vsnprintf(buffer, buffer_size, format, ap);
vsnprintf(buffer, buffer_size, format, ap);
va_end(ap);
/* null termination is assured by definition of vsnprintf */
#endif
/* a return value of buffer_size or more indicates truncated output */
if(ret >= buffer_size)
ret = buffer_size - 1;
return ret;
return str_utf8_fix_truncation(buffer);
}
char *str_trim_words(char *str, int words)
@ -3276,41 +3270,31 @@ int str_utf8_rewind(const char *str, int cursor)
return cursor;
}
int str_utf8_fix_truncation(char *str)
{
int len = str_length(str);
if(len > 0)
{
int last_char_index = str_utf8_rewind(str, len);
const char *last_char = str + last_char_index;
// Fix truncated UTF-8.
if(str_utf8_decode(&last_char) == -1)
{
str[last_char_index] = 0;
return last_char_index;
}
}
return len;
}
int str_utf8_forward(const char *str, int cursor)
{
const char *buf = str + cursor;
if(!buf[0])
const char *ptr = str + cursor;
if(str_utf8_decode(&ptr) == 0)
{
return cursor;
if((*buf & 0x80) == 0x0) /* 0xxxxxxx */
return cursor + 1;
else if((*buf & 0xE0) == 0xC0) /* 110xxxxx */
{
if(!buf[1])
return cursor + 1;
return cursor + 2;
}
else if((*buf & 0xF0) == 0xE0) /* 1110xxxx */
{
if(!buf[1])
return cursor + 1;
if(!buf[2])
return cursor + 2;
return cursor + 3;
}
else if((*buf & 0xF8) == 0xF0) /* 11110xxx */
{
if(!buf[1])
return cursor + 1;
if(!buf[2])
return cursor + 2;
if(!buf[3])
return cursor + 3;
return cursor + 4;
}
/* invalid */
return cursor + 1;
return ptr - str;
}
int str_utf8_encode(char *ptr, int chr)
@ -3464,21 +3448,22 @@ int str_utf8_check(const char *str)
return 1;
}
void str_utf8_copy(char *dst, const char *src, int dst_size)
{
str_utf8_truncate(dst, dst_size, src, dst_size);
}
void str_utf8_stats(const char *str, int max_size, int max_count, int *size, int *count)
{
const char *cursor = str;
*size = 0;
*count = 0;
while(*size < max_size && *count < max_count)
{
int new_size = str_utf8_forward(str, *size);
if(new_size == *size || new_size >= max_size)
if(str_utf8_decode(&cursor) == 0)
{
break;
*size = new_size;
}
if(cursor - str >= max_size)
{
break;
}
*size = cursor - str;
++(*count);
}
}

View file

@ -2002,6 +2002,19 @@ void str_utf8_trim_right(char *str);
*/
int str_utf8_rewind(const char *str, int cursor);
/*
Function: str_utf8_fix_truncation
Fixes truncation of a Unicode character at the end of a UTF-8
string.
Returns:
The new string length.
Parameters:
str - utf8 string
*/
int str_utf8_fix_truncation(char *str);
/*
Function: str_utf8_forward
Moves a cursor forwards in an utf8 string
@ -2080,22 +2093,6 @@ int str_utf16le_encode(char *ptr, int chr);
*/
int str_utf8_check(const char *str);
/*
Function: str_utf8_copy
Copies a utf8 string to a buffer.
Parameters:
dst - Pointer to a buffer that shall receive the string.
src - utf8 string to be copied.
dst_size - Size of the buffer dst.
Remarks:
- The strings are treated as zero-terminated strings.
- Guarantees that dst string will contain zero-termination.
- Guarantees that dst always contains a valid utf8 string.
*/
void str_utf8_copy(char *dst, const char *src, int dst_size);
/*
Function: str_utf8_stats
Determines the byte size and utf8 character count of a utf8 string.

View file

@ -26,7 +26,7 @@ void CInput::AddEvent(char *pText, int Key, int Flags)
if(!pText)
m_aInputEvents[m_NumEvents].m_aText[0] = 0;
else
str_utf8_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText));
str_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText));
m_aInputEvents[m_NumEvents].m_InputCount = m_InputCounter;
m_NumEvents++;
}

View file

@ -22,7 +22,7 @@ public:
m_pSteamFriends = SteamAPI_SteamFriends_v017();
ReadLaunchCommandLine();
str_utf8_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName));
str_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName));
}
~CSteam()
{

View file

@ -263,15 +263,14 @@ class CTextRender : public IEngineTextRender
int WordLength(const char *pText)
{
int Length = 0;
const char *pCursor = pText;
while(1)
{
const char *pCursor = (pText + Length);
if(*pCursor == 0)
return Length;
return pCursor - pText;
if(*pCursor == '\n' || *pCursor == '\t' || *pCursor == ' ')
return Length + 1;
Length = str_utf8_forward(pText, Length);
return pCursor - pText + 1;
str_utf8_decode(&pCursor);
}
}

View file

@ -400,9 +400,7 @@ bool CServer::SetClientNameImpl(int ClientID, const char *pNameRequest, bool Set
// auto rename
for(int i = 1;; i++)
{
char aNameTryFull[MAX_NAME_LENGTH + 4];
str_format(aNameTryFull, sizeof(aNameTryFull), "(%d)%s", i, aTrimmedName);
str_utf8_copy(aNameTry, aNameTryFull, sizeof(aNameTry));
str_format(aNameTry, sizeof(aNameTry), "(%d)%s", i, aTrimmedName);
if(IsClientNameAvailable(ClientID, aNameTry))
break;
}

View file

@ -294,7 +294,7 @@ int CNetConnection::Feed(CNetPacketConstruct *pPacket, NETADDR *pAddr, SECURITY_
if(pPacket->m_DataSize > 1)
{
// make sure to sanitize the error string form the other party
str_utf8_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr)));
str_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr)));
str_sanitize_cc(aStr);
}

View file

@ -192,7 +192,7 @@ bool CChat::OnInput(IInput::CEvent Event)
if(Text[i] == '\n')
{
int max = minimum(i - Begin + 1, (int)sizeof(aLine));
str_utf8_copy(aLine, Text + Begin, max);
str_copy(aLine, Text + Begin, max);
Begin = i + 1;
SayChat(aLine);
while(Text[i] == '\n')
@ -200,7 +200,7 @@ bool CChat::OnInput(IInput::CEvent Event)
}
}
int max = minimum(i - Begin + 1, (int)sizeof(aLine));
str_utf8_copy(aLine, Text + Begin, max);
str_copy(aLine, Text + Begin, max);
m_Input.Append(aLine);
}
}

View file

@ -206,7 +206,7 @@ void CScoreboard::RenderScoreboard(float x, float y, float w, int Team, const ch
pTitle = Localize("Game over");
else
{
str_utf8_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf));
str_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf));
while(TextRender()->TextWidth(0, TitleFontsize, aBuf, -1, -1.0f) > TitleWidth)
aBuf[str_length(aBuf) - 1] = '\0';
if(str_comp(aBuf, Client()->GetCurrentMap()))

View file

@ -95,6 +95,33 @@ TEST(Str, Utf8ToLower)
EXPECT_TRUE(str_utf8_find_nocase(str, "z") == NULL);
}
TEST(Str, Utf8FixTruncation)
{
char aaBuf[][32] = {
"",
"\xff",
"abc",
"abc\xff",
"blub\xffxyz",
"привет Наташа\xff",
"до свидания\xffОлег",
};
const char *apExpected[] = {
"",
"",
"abc",
"abc",
"blub\xffxyz",
"привет Наташа",
"до свидания\xffОлег",
};
for(unsigned i = 0; i < sizeof(aaBuf) / sizeof(aaBuf[0]); i++)
{
EXPECT_EQ(str_utf8_fix_truncation(aaBuf[i]), str_length(apExpected[i]));
EXPECT_STREQ(aaBuf[i], apExpected[i]);
}
}
TEST(Str, Startswith)
{
EXPECT_TRUE(str_startswith("abcdef", "abc"));
@ -204,6 +231,32 @@ TEST(Str, StrFormat)
EXPECT_STREQ(aBuf, "99:");
}
TEST(Str, StrFormatTruncate)
{
const char *pStr = "DDNet最好了";
char aBuf[64];
str_format(aBuf, 7, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet");
str_format(aBuf, 8, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet");
str_format(aBuf, 9, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最");
str_format(aBuf, 10, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最");
str_format(aBuf, 11, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最");
str_format(aBuf, 12, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好");
str_format(aBuf, 13, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好");
str_format(aBuf, 14, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好");
str_format(aBuf, 15, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好了");
str_format(aBuf, 16, "%s", pStr);
EXPECT_STREQ(aBuf, "DDNet最好了");
}
TEST(Str, StrCopyNum)
{
const char *foo = "Foobaré";
@ -229,29 +282,29 @@ TEST(Str, StrCopyNum)
EXPECT_STREQ(aBuf3, "Foobaré");
}
TEST(Str, StrCopyUtf8)
TEST(Str, StrCopy)
{
const char *foo = "DDNet最好了";
const char *pStr = "DDNet最好了";
char aBuf[64];
str_utf8_copy(aBuf, foo, 7);
str_copy(aBuf, pStr, 7);
EXPECT_STREQ(aBuf, "DDNet");
str_utf8_copy(aBuf, foo, 8);
str_copy(aBuf, pStr, 8);
EXPECT_STREQ(aBuf, "DDNet");
str_utf8_copy(aBuf, foo, 9);
str_copy(aBuf, pStr, 9);
EXPECT_STREQ(aBuf, "DDNet最");
str_utf8_copy(aBuf, foo, 10);
str_copy(aBuf, pStr, 10);
EXPECT_STREQ(aBuf, "DDNet最");
str_utf8_copy(aBuf, foo, 11);
str_copy(aBuf, pStr, 11);
EXPECT_STREQ(aBuf, "DDNet最");
str_utf8_copy(aBuf, foo, 12);
str_copy(aBuf, pStr, 12);
EXPECT_STREQ(aBuf, "DDNet最好");
str_utf8_copy(aBuf, foo, 13);
str_copy(aBuf, pStr, 13);
EXPECT_STREQ(aBuf, "DDNet最好");
str_utf8_copy(aBuf, foo, 14);
str_copy(aBuf, pStr, 14);
EXPECT_STREQ(aBuf, "DDNet最好");
str_utf8_copy(aBuf, foo, 15);
str_copy(aBuf, pStr, 15);
EXPECT_STREQ(aBuf, "DDNet最好了");
str_utf8_copy(aBuf, foo, 16);
str_copy(aBuf, pStr, 16);
EXPECT_STREQ(aBuf, "DDNet最好了");
}