Merge pull request #8126 from Robyt3/Engine-LineReader-UTF8-Check

Ensure line reader only returns lines which are valid UTF-8, refactoring
This commit is contained in:
archimede67 2024-03-18 21:19:37 +00:00 committed by GitHub
commit 95402b9559
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 27 deletions

View file

@ -210,24 +210,6 @@ void SStringConfigVariable::CommandCallback(IConsole::IResult *pResult, void *pU
return; return;
const char *pString = pResult->GetString(0); const char *pString = pResult->GetString(0);
if(!str_utf8_check(pString))
{
char aTemp[4];
size_t Length = 0;
while(*pString)
{
size_t Size = str_utf8_encode(aTemp, static_cast<unsigned char>(*pString++));
if(Length + Size < pData->m_MaxSize)
{
mem_copy(pData->m_pStr + Length, aTemp, Size);
Length += Size;
}
else
break;
}
pData->m_pStr[Length] = '\0';
}
else
str_copy(pData->m_pStr, pString, pData->m_MaxSize); str_copy(pData->m_pStr, pString, pData->m_MaxSize);
if(pResult->m_ClientId != IConsole::CLIENT_ID_GAME) if(pResult->m_ClientId != IConsole::CLIENT_ID_GAME)

View file

@ -24,7 +24,6 @@ char *CLineReader::Get()
// fetch more // fetch more
// move the remaining part to the front // move the remaining part to the front
unsigned Read;
unsigned Left = m_BufferSize - LineStart; unsigned Left = m_BufferSize - LineStart;
if(LineStart > m_BufferSize) if(LineStart > m_BufferSize)
@ -34,7 +33,7 @@ char *CLineReader::Get()
m_BufferPos = Left; m_BufferPos = Left;
// fill the buffer // fill the buffer
Read = io_read(m_File, &m_aBuffer[m_BufferPos], m_BufferMaxSize - m_BufferPos); unsigned Read = io_read(m_File, &m_aBuffer[m_BufferPos], m_BufferMaxSize - m_BufferPos);
m_BufferSize = Left + Read; m_BufferSize = Left + Read;
LineStart = 0; LineStart = 0;
@ -42,13 +41,18 @@ char *CLineReader::Get()
{ {
if(Left) if(Left)
{ {
m_aBuffer[Left] = 0; // return the last line m_aBuffer[Left] = '\0'; // return the last line
m_BufferPos = Left; m_BufferPos = Left;
m_BufferSize = Left; m_BufferSize = Left;
if(!str_utf8_check(m_aBuffer))
{
LineStart = m_BufferPos;
CRLFBreak = false;
continue; // skip lines containing invalid UTF-8
}
return m_aBuffer; return m_aBuffer;
} }
else return nullptr; // we are done!
return 0x0; // we are done!
} }
} }
else else
@ -66,15 +70,27 @@ char *CLineReader::Get()
continue; continue;
} }
else if(m_aBuffer[m_BufferPos + 1] == '\n') else if(m_aBuffer[m_BufferPos + 1] == '\n')
m_aBuffer[m_BufferPos++] = 0; m_aBuffer[m_BufferPos++] = '\0';
}
m_aBuffer[m_BufferPos++] = '\0';
if(!str_utf8_check(&m_aBuffer[LineStart]))
{
LineStart = m_BufferPos;
CRLFBreak = false;
continue; // skip lines containing invalid UTF-8
} }
m_aBuffer[m_BufferPos++] = 0;
return &m_aBuffer[LineStart]; return &m_aBuffer[LineStart];
} }
else if(CRLFBreak) else if(CRLFBreak)
{ {
if(m_aBuffer[m_BufferPos] == '\n') if(m_aBuffer[m_BufferPos] == '\n')
m_aBuffer[m_BufferPos++] = 0; m_aBuffer[m_BufferPos++] = '\0';
if(!str_utf8_check(&m_aBuffer[LineStart]))
{
LineStart = m_BufferPos;
CRLFBreak = false;
continue; // skip lines containing invalid UTF-8
}
return &m_aBuffer[LineStart]; return &m_aBuffer[LineStart];
} }
else else

View file

@ -36,3 +36,13 @@ TEST(LineReader, CRLFNewline)
{ {
TestFileLineReader("foo\r\nbar\r\nbaz", true, {"foo", "bar", "baz"}); TestFileLineReader("foo\r\nbar\r\nbaz", true, {"foo", "bar", "baz"});
} }
TEST(LineReader, Invalid)
{
// Lines containing invalid UTF-8 are skipped
TestFileLineReader("foo\xff\nbar\xff\nbaz\xff\n", false, {});
TestFileLineReader("foo\xff\nbar\nbaz\n", false, {"bar", "baz"});
TestFileLineReader("foo\nbar\xff\nbaz\n", false, {"foo", "baz"});
TestFileLineReader("foo\nbar\nbaz\xff\n", false, {"foo", "bar"});
TestFileLineReader("foo\nbar1\xff\nbar2\xff\nfoobar\nbar3\xff\nbaz\n", false, {"foo", "foobar", "baz"});
}