mirror of
https://github.com/ddnet/ddnet.git
synced 2024-11-10 10:08:18 +00:00
Merge #4481
4481: Don't create cut off UTF-8 sequences on string manipulation r=def- a=heinrich5991 CC #4463 CC #4465 ## Checklist - [ ] Tested the change ingame - [ ] Provided screenshots if it is a visual change - [ ] Tested in combination with possibly related configuration options - [x] Written a unit test if it works standalone, system.c especially - [ ] Considered possible null pointers and out of bounds array indexing - [ ] Changed no physics that affect existing maps - [ ] Tested the change with [ASan+UBSan or valgrind's memcheck](https://github.com/ddnet/ddnet/#using-addresssanitizer--undefinedbehavioursanitizer-or-valgrinds-memcheck) (optional) Co-authored-by: heinrich5991 <heinrich5991@gmail.com>
This commit is contained in:
commit
ec0d1172eb
|
@ -2545,23 +2545,28 @@ void str_append(char *dst, const char *src, int dst_size)
|
|||
}
|
||||
|
||||
dst[dst_size - 1] = 0; /* assure null termination */
|
||||
str_utf8_fix_truncation(dst);
|
||||
}
|
||||
|
||||
void str_copy(char *dst, const char *src, int dst_size)
|
||||
{
|
||||
strncpy(dst, src, dst_size - 1);
|
||||
dst[dst_size - 1] = 0; /* assure null termination */
|
||||
str_utf8_fix_truncation(dst);
|
||||
}
|
||||
|
||||
void str_utf8_truncate(char *dst, int dst_size, const char *src, int truncation_len)
|
||||
{
|
||||
int size = -1;
|
||||
int cursor = 0;
|
||||
const char *cursor = src;
|
||||
int pos = 0;
|
||||
while(pos <= truncation_len && cursor < dst_size && size != cursor)
|
||||
while(pos <= truncation_len && cursor - src < dst_size && size != cursor - src)
|
||||
{
|
||||
size = cursor;
|
||||
cursor = str_utf8_forward(src, cursor);
|
||||
size = cursor - src;
|
||||
if(str_utf8_decode(&cursor) == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
str_copy(dst, src, size + 1);
|
||||
|
@ -2584,33 +2589,22 @@ int str_length(const char *str)
|
|||
|
||||
int str_format(char *buffer, int buffer_size, const char *format, ...)
|
||||
{
|
||||
int ret;
|
||||
#if defined(CONF_FAMILY_WINDOWS)
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
ret = _vsnprintf(buffer, buffer_size, format, ap);
|
||||
_vsnprintf(buffer, buffer_size, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
buffer[buffer_size - 1] = 0; /* assure null termination */
|
||||
|
||||
/* _vsnprintf is documented to return negative values on truncation, but
|
||||
* in practice we didn't see that. let's handle it anyway just in case. */
|
||||
if(ret < 0)
|
||||
ret = buffer_size - 1;
|
||||
#else
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
ret = vsnprintf(buffer, buffer_size, format, ap);
|
||||
vsnprintf(buffer, buffer_size, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
/* null termination is assured by definition of vsnprintf */
|
||||
#endif
|
||||
|
||||
/* a return value of buffer_size or more indicates truncated output */
|
||||
if(ret >= buffer_size)
|
||||
ret = buffer_size - 1;
|
||||
|
||||
return ret;
|
||||
return str_utf8_fix_truncation(buffer);
|
||||
}
|
||||
|
||||
char *str_trim_words(char *str, int words)
|
||||
|
@ -3276,41 +3270,31 @@ int str_utf8_rewind(const char *str, int cursor)
|
|||
return cursor;
|
||||
}
|
||||
|
||||
int str_utf8_fix_truncation(char *str)
|
||||
{
|
||||
int len = str_length(str);
|
||||
if(len > 0)
|
||||
{
|
||||
int last_char_index = str_utf8_rewind(str, len);
|
||||
const char *last_char = str + last_char_index;
|
||||
// Fix truncated UTF-8.
|
||||
if(str_utf8_decode(&last_char) == -1)
|
||||
{
|
||||
str[last_char_index] = 0;
|
||||
return last_char_index;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
int str_utf8_forward(const char *str, int cursor)
|
||||
{
|
||||
const char *buf = str + cursor;
|
||||
if(!buf[0])
|
||||
const char *ptr = str + cursor;
|
||||
if(str_utf8_decode(&ptr) == 0)
|
||||
{
|
||||
return cursor;
|
||||
|
||||
if((*buf & 0x80) == 0x0) /* 0xxxxxxx */
|
||||
return cursor + 1;
|
||||
else if((*buf & 0xE0) == 0xC0) /* 110xxxxx */
|
||||
{
|
||||
if(!buf[1])
|
||||
return cursor + 1;
|
||||
return cursor + 2;
|
||||
}
|
||||
else if((*buf & 0xF0) == 0xE0) /* 1110xxxx */
|
||||
{
|
||||
if(!buf[1])
|
||||
return cursor + 1;
|
||||
if(!buf[2])
|
||||
return cursor + 2;
|
||||
return cursor + 3;
|
||||
}
|
||||
else if((*buf & 0xF8) == 0xF0) /* 11110xxx */
|
||||
{
|
||||
if(!buf[1])
|
||||
return cursor + 1;
|
||||
if(!buf[2])
|
||||
return cursor + 2;
|
||||
if(!buf[3])
|
||||
return cursor + 3;
|
||||
return cursor + 4;
|
||||
}
|
||||
|
||||
/* invalid */
|
||||
return cursor + 1;
|
||||
return ptr - str;
|
||||
}
|
||||
|
||||
int str_utf8_encode(char *ptr, int chr)
|
||||
|
@ -3464,21 +3448,22 @@ int str_utf8_check(const char *str)
|
|||
return 1;
|
||||
}
|
||||
|
||||
void str_utf8_copy(char *dst, const char *src, int dst_size)
|
||||
{
|
||||
str_utf8_truncate(dst, dst_size, src, dst_size);
|
||||
}
|
||||
|
||||
void str_utf8_stats(const char *str, int max_size, int max_count, int *size, int *count)
|
||||
{
|
||||
const char *cursor = str;
|
||||
*size = 0;
|
||||
*count = 0;
|
||||
while(*size < max_size && *count < max_count)
|
||||
{
|
||||
int new_size = str_utf8_forward(str, *size);
|
||||
if(new_size == *size || new_size >= max_size)
|
||||
if(str_utf8_decode(&cursor) == 0)
|
||||
{
|
||||
break;
|
||||
*size = new_size;
|
||||
}
|
||||
if(cursor - str >= max_size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
*size = cursor - str;
|
||||
++(*count);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2002,6 +2002,19 @@ void str_utf8_trim_right(char *str);
|
|||
*/
|
||||
int str_utf8_rewind(const char *str, int cursor);
|
||||
|
||||
/*
|
||||
Function: str_utf8_fix_truncation
|
||||
Fixes truncation of a Unicode character at the end of a UTF-8
|
||||
string.
|
||||
|
||||
Returns:
|
||||
The new string length.
|
||||
|
||||
Parameters:
|
||||
str - utf8 string
|
||||
*/
|
||||
int str_utf8_fix_truncation(char *str);
|
||||
|
||||
/*
|
||||
Function: str_utf8_forward
|
||||
Moves a cursor forwards in an utf8 string
|
||||
|
@ -2080,22 +2093,6 @@ int str_utf16le_encode(char *ptr, int chr);
|
|||
*/
|
||||
int str_utf8_check(const char *str);
|
||||
|
||||
/*
|
||||
Function: str_utf8_copy
|
||||
Copies a utf8 string to a buffer.
|
||||
|
||||
Parameters:
|
||||
dst - Pointer to a buffer that shall receive the string.
|
||||
src - utf8 string to be copied.
|
||||
dst_size - Size of the buffer dst.
|
||||
|
||||
Remarks:
|
||||
- The strings are treated as zero-terminated strings.
|
||||
- Guarantees that dst string will contain zero-termination.
|
||||
- Guarantees that dst always contains a valid utf8 string.
|
||||
*/
|
||||
void str_utf8_copy(char *dst, const char *src, int dst_size);
|
||||
|
||||
/*
|
||||
Function: str_utf8_stats
|
||||
Determines the byte size and utf8 character count of a utf8 string.
|
||||
|
|
|
@ -26,7 +26,7 @@ void CInput::AddEvent(char *pText, int Key, int Flags)
|
|||
if(!pText)
|
||||
m_aInputEvents[m_NumEvents].m_aText[0] = 0;
|
||||
else
|
||||
str_utf8_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText));
|
||||
str_copy(m_aInputEvents[m_NumEvents].m_aText, pText, sizeof(m_aInputEvents[m_NumEvents].m_aText));
|
||||
m_aInputEvents[m_NumEvents].m_InputCount = m_InputCounter;
|
||||
m_NumEvents++;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ public:
|
|||
m_pSteamFriends = SteamAPI_SteamFriends_v017();
|
||||
|
||||
ReadLaunchCommandLine();
|
||||
str_utf8_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName));
|
||||
str_copy(m_aPlayerName, SteamAPI_ISteamFriends_GetPersonaName(m_pSteamFriends), sizeof(m_aPlayerName));
|
||||
}
|
||||
~CSteam()
|
||||
{
|
||||
|
|
|
@ -263,15 +263,14 @@ class CTextRender : public IEngineTextRender
|
|||
|
||||
int WordLength(const char *pText)
|
||||
{
|
||||
int Length = 0;
|
||||
const char *pCursor = pText;
|
||||
while(1)
|
||||
{
|
||||
const char *pCursor = (pText + Length);
|
||||
if(*pCursor == 0)
|
||||
return Length;
|
||||
return pCursor - pText;
|
||||
if(*pCursor == '\n' || *pCursor == '\t' || *pCursor == ' ')
|
||||
return Length + 1;
|
||||
Length = str_utf8_forward(pText, Length);
|
||||
return pCursor - pText + 1;
|
||||
str_utf8_decode(&pCursor);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -400,9 +400,7 @@ bool CServer::SetClientNameImpl(int ClientID, const char *pNameRequest, bool Set
|
|||
// auto rename
|
||||
for(int i = 1;; i++)
|
||||
{
|
||||
char aNameTryFull[MAX_NAME_LENGTH + 4];
|
||||
str_format(aNameTryFull, sizeof(aNameTryFull), "(%d)%s", i, aTrimmedName);
|
||||
str_utf8_copy(aNameTry, aNameTryFull, sizeof(aNameTry));
|
||||
str_format(aNameTry, sizeof(aNameTry), "(%d)%s", i, aTrimmedName);
|
||||
if(IsClientNameAvailable(ClientID, aNameTry))
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -294,7 +294,7 @@ int CNetConnection::Feed(CNetPacketConstruct *pPacket, NETADDR *pAddr, SECURITY_
|
|||
if(pPacket->m_DataSize > 1)
|
||||
{
|
||||
// make sure to sanitize the error string form the other party
|
||||
str_utf8_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr)));
|
||||
str_copy(aStr, (char *)&pPacket->m_aChunkData[1], minimum(pPacket->m_DataSize, (int)sizeof(aStr)));
|
||||
str_sanitize_cc(aStr);
|
||||
}
|
||||
|
||||
|
|
|
@ -192,7 +192,7 @@ bool CChat::OnInput(IInput::CEvent Event)
|
|||
if(Text[i] == '\n')
|
||||
{
|
||||
int max = minimum(i - Begin + 1, (int)sizeof(aLine));
|
||||
str_utf8_copy(aLine, Text + Begin, max);
|
||||
str_copy(aLine, Text + Begin, max);
|
||||
Begin = i + 1;
|
||||
SayChat(aLine);
|
||||
while(Text[i] == '\n')
|
||||
|
@ -200,7 +200,7 @@ bool CChat::OnInput(IInput::CEvent Event)
|
|||
}
|
||||
}
|
||||
int max = minimum(i - Begin + 1, (int)sizeof(aLine));
|
||||
str_utf8_copy(aLine, Text + Begin, max);
|
||||
str_copy(aLine, Text + Begin, max);
|
||||
m_Input.Append(aLine);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -206,7 +206,7 @@ void CScoreboard::RenderScoreboard(float x, float y, float w, int Team, const ch
|
|||
pTitle = Localize("Game over");
|
||||
else
|
||||
{
|
||||
str_utf8_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf));
|
||||
str_copy(aBuf, Client()->GetCurrentMap(), sizeof(aBuf));
|
||||
while(TextRender()->TextWidth(0, TitleFontsize, aBuf, -1, -1.0f) > TitleWidth)
|
||||
aBuf[str_length(aBuf) - 1] = '\0';
|
||||
if(str_comp(aBuf, Client()->GetCurrentMap()))
|
||||
|
|
|
@ -95,6 +95,33 @@ TEST(Str, Utf8ToLower)
|
|||
EXPECT_TRUE(str_utf8_find_nocase(str, "z") == NULL);
|
||||
}
|
||||
|
||||
TEST(Str, Utf8FixTruncation)
|
||||
{
|
||||
char aaBuf[][32] = {
|
||||
"",
|
||||
"\xff",
|
||||
"abc",
|
||||
"abc\xff",
|
||||
"blub\xffxyz",
|
||||
"привет Наташа\xff",
|
||||
"до свидания\xffОлег",
|
||||
};
|
||||
const char *apExpected[] = {
|
||||
"",
|
||||
"",
|
||||
"abc",
|
||||
"abc",
|
||||
"blub\xffxyz",
|
||||
"привет Наташа",
|
||||
"до свидания\xffОлег",
|
||||
};
|
||||
for(unsigned i = 0; i < sizeof(aaBuf) / sizeof(aaBuf[0]); i++)
|
||||
{
|
||||
EXPECT_EQ(str_utf8_fix_truncation(aaBuf[i]), str_length(apExpected[i]));
|
||||
EXPECT_STREQ(aaBuf[i], apExpected[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Str, Startswith)
|
||||
{
|
||||
EXPECT_TRUE(str_startswith("abcdef", "abc"));
|
||||
|
@ -204,6 +231,32 @@ TEST(Str, StrFormat)
|
|||
EXPECT_STREQ(aBuf, "99:");
|
||||
}
|
||||
|
||||
TEST(Str, StrFormatTruncate)
|
||||
{
|
||||
const char *pStr = "DDNet最好了";
|
||||
char aBuf[64];
|
||||
str_format(aBuf, 7, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet");
|
||||
str_format(aBuf, 8, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet");
|
||||
str_format(aBuf, 9, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最");
|
||||
str_format(aBuf, 10, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最");
|
||||
str_format(aBuf, 11, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最");
|
||||
str_format(aBuf, 12, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好");
|
||||
str_format(aBuf, 13, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好");
|
||||
str_format(aBuf, 14, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好");
|
||||
str_format(aBuf, 15, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好了");
|
||||
str_format(aBuf, 16, "%s", pStr);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好了");
|
||||
}
|
||||
|
||||
TEST(Str, StrCopyNum)
|
||||
{
|
||||
const char *foo = "Foobaré";
|
||||
|
@ -229,29 +282,29 @@ TEST(Str, StrCopyNum)
|
|||
EXPECT_STREQ(aBuf3, "Foobaré");
|
||||
}
|
||||
|
||||
TEST(Str, StrCopyUtf8)
|
||||
TEST(Str, StrCopy)
|
||||
{
|
||||
const char *foo = "DDNet最好了";
|
||||
const char *pStr = "DDNet最好了";
|
||||
char aBuf[64];
|
||||
str_utf8_copy(aBuf, foo, 7);
|
||||
str_copy(aBuf, pStr, 7);
|
||||
EXPECT_STREQ(aBuf, "DDNet");
|
||||
str_utf8_copy(aBuf, foo, 8);
|
||||
str_copy(aBuf, pStr, 8);
|
||||
EXPECT_STREQ(aBuf, "DDNet");
|
||||
str_utf8_copy(aBuf, foo, 9);
|
||||
str_copy(aBuf, pStr, 9);
|
||||
EXPECT_STREQ(aBuf, "DDNet最");
|
||||
str_utf8_copy(aBuf, foo, 10);
|
||||
str_copy(aBuf, pStr, 10);
|
||||
EXPECT_STREQ(aBuf, "DDNet最");
|
||||
str_utf8_copy(aBuf, foo, 11);
|
||||
str_copy(aBuf, pStr, 11);
|
||||
EXPECT_STREQ(aBuf, "DDNet最");
|
||||
str_utf8_copy(aBuf, foo, 12);
|
||||
str_copy(aBuf, pStr, 12);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好");
|
||||
str_utf8_copy(aBuf, foo, 13);
|
||||
str_copy(aBuf, pStr, 13);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好");
|
||||
str_utf8_copy(aBuf, foo, 14);
|
||||
str_copy(aBuf, pStr, 14);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好");
|
||||
str_utf8_copy(aBuf, foo, 15);
|
||||
str_copy(aBuf, pStr, 15);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好了");
|
||||
str_utf8_copy(aBuf, foo, 16);
|
||||
str_copy(aBuf, pStr, 16);
|
||||
EXPECT_STREQ(aBuf, "DDNet最好了");
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue