Merge pull request #8126 from Robyt3/Engine-LineReader-UTF8-Check

Ensure line reader only returns lines which are valid UTF-8, refactoring
This commit is contained in:
archimede67 2024-03-18 21:19:37 +00:00 committed by GitHub
commit 95402b9559
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 27 deletions

View file

@ -210,24 +210,6 @@ void SStringConfigVariable::CommandCallback(IConsole::IResult *pResult, void *pU
return;
const char *pString = pResult->GetString(0);
if(!str_utf8_check(pString))
{
char aTemp[4];
size_t Length = 0;
while(*pString)
{
size_t Size = str_utf8_encode(aTemp, static_cast<unsigned char>(*pString++));
if(Length + Size < pData->m_MaxSize)
{
mem_copy(pData->m_pStr + Length, aTemp, Size);
Length += Size;
}
else
break;
}
pData->m_pStr[Length] = '\0';
}
else
str_copy(pData->m_pStr, pString, pData->m_MaxSize);
if(pResult->m_ClientId != IConsole::CLIENT_ID_GAME)

View file

@ -24,7 +24,6 @@ char *CLineReader::Get()
// fetch more
// move the remaining part to the front
unsigned Read;
unsigned Left = m_BufferSize - LineStart;
if(LineStart > m_BufferSize)
@ -34,7 +33,7 @@ char *CLineReader::Get()
m_BufferPos = Left;
// fill the buffer
Read = io_read(m_File, &m_aBuffer[m_BufferPos], m_BufferMaxSize - m_BufferPos);
unsigned Read = io_read(m_File, &m_aBuffer[m_BufferPos], m_BufferMaxSize - m_BufferPos);
m_BufferSize = Left + Read;
LineStart = 0;
@ -42,13 +41,18 @@ char *CLineReader::Get()
{
if(Left)
{
m_aBuffer[Left] = 0; // return the last line
m_aBuffer[Left] = '\0'; // return the last line
m_BufferPos = Left;
m_BufferSize = Left;
if(!str_utf8_check(m_aBuffer))
{
LineStart = m_BufferPos;
CRLFBreak = false;
continue; // skip lines containing invalid UTF-8
}
return m_aBuffer;
}
else
return 0x0; // we are done!
return nullptr; // we are done!
}
}
else
@ -66,15 +70,27 @@ char *CLineReader::Get()
continue;
}
else if(m_aBuffer[m_BufferPos + 1] == '\n')
m_aBuffer[m_BufferPos++] = 0;
m_aBuffer[m_BufferPos++] = '\0';
}
m_aBuffer[m_BufferPos++] = '\0';
if(!str_utf8_check(&m_aBuffer[LineStart]))
{
LineStart = m_BufferPos;
CRLFBreak = false;
continue; // skip lines containing invalid UTF-8
}
m_aBuffer[m_BufferPos++] = 0;
return &m_aBuffer[LineStart];
}
else if(CRLFBreak)
{
if(m_aBuffer[m_BufferPos] == '\n')
m_aBuffer[m_BufferPos++] = 0;
m_aBuffer[m_BufferPos++] = '\0';
if(!str_utf8_check(&m_aBuffer[LineStart]))
{
LineStart = m_BufferPos;
CRLFBreak = false;
continue; // skip lines containing invalid UTF-8
}
return &m_aBuffer[LineStart];
}
else

View file

@ -36,3 +36,13 @@ TEST(LineReader, CRLFNewline)
{
TestFileLineReader("foo\r\nbar\r\nbaz", true, {"foo", "bar", "baz"});
}
TEST(LineReader, Invalid)
{
// Lines containing invalid UTF-8 are skipped
TestFileLineReader("foo\xff\nbar\xff\nbaz\xff\n", false, {});
TestFileLineReader("foo\xff\nbar\nbaz\n", false, {"bar", "baz"});
TestFileLineReader("foo\nbar\xff\nbaz\n", false, {"foo", "baz"});
TestFileLineReader("foo\nbar\nbaz\xff\n", false, {"foo", "bar"});
TestFileLineReader("foo\nbar1\xff\nbar2\xff\nfoobar\nbar3\xff\nbaz\n", false, {"foo", "foobar", "baz"});
}