mirror of
https://github.com/ddnet/ddnet.git
synced 2024-11-10 01:58:19 +00:00
Add utility functions for converting UTF-8 bytes ↔ chars offsets
Add `str_utf8_offset_bytes_to_chars` and `str_utf8_offset_chars_to_bytes` functions to base system to convert between byte and UTF-8 character offsets in UTF-8 strings. Previously, this was separately implemented in the textrender and in the lineinput helper. These textrender functions are entirely replaced by the new functions: - `ITextRender::SelectionToUTF8OffSets` (by `str_utf8_offset_chars_to_bytes`) - `ITextRender::UTF8OffToDecodedOff` (by `str_utf8_offset_bytes_to_chars`) - `ITextRender::DecodedOffToUTF8Off` (by `str_utf8_offset_chars_to_bytes`) These lineinput helper functions are reimplemented using the new functions: - `CLineInput::OffsetFromActualToDisplay` (uses `str_utf8_offset_bytes_to_chars`) - `CLineInput::OffsetFromDisplayToActual` (uses `str_utf8_offset_chars_to_bytes`)
This commit is contained in:
parent
107ac6705d
commit
af3870a64d
|
@ -3953,6 +3953,34 @@ void str_utf8_stats(const char *str, size_t max_size, size_t max_count, size_t *
|
|||
}
|
||||
}
|
||||
|
||||
size_t str_utf8_offset_bytes_to_chars(const char *str, size_t byte_offset)
|
||||
{
|
||||
size_t char_offset = 0;
|
||||
size_t current_offset = 0;
|
||||
while(current_offset < byte_offset)
|
||||
{
|
||||
const size_t prev_byte_offset = current_offset;
|
||||
current_offset = str_utf8_forward(str, current_offset);
|
||||
if(current_offset == prev_byte_offset)
|
||||
break;
|
||||
char_offset++;
|
||||
}
|
||||
return char_offset;
|
||||
}
|
||||
|
||||
size_t str_utf8_offset_chars_to_bytes(const char *str, size_t char_offset)
|
||||
{
|
||||
size_t byte_offset = 0;
|
||||
for(size_t i = 0; i < char_offset; i++)
|
||||
{
|
||||
const size_t prev_byte_offset = byte_offset;
|
||||
byte_offset = str_utf8_forward(str, byte_offset);
|
||||
if(byte_offset == prev_byte_offset)
|
||||
break;
|
||||
}
|
||||
return byte_offset;
|
||||
}
|
||||
|
||||
unsigned str_quickhash(const char *str)
|
||||
{
|
||||
unsigned hash = 5381;
|
||||
|
|
|
@ -2491,6 +2491,32 @@ int str_utf8_check(const char *str);
|
|||
*/
|
||||
void str_utf8_stats(const char *str, size_t max_size, size_t max_count, size_t *size, size_t *count);
|
||||
|
||||
/**
|
||||
* Converts a byte offset of a utf8 string to the utf8 character offset.
|
||||
*
|
||||
* @param text Pointer to the string.
|
||||
* @param byte_offset Offset in bytes.
|
||||
*
|
||||
* @return Offset in utf8 characters. Clamped to the maximum length of the string in utf8 characters.
|
||||
*
|
||||
* @remark The string is treated as a zero-terminated utf8 string.
|
||||
* @remark It's the user's responsibility to make sure the bounds are aligned.
|
||||
*/
|
||||
size_t str_utf8_offset_bytes_to_chars(const char *str, size_t byte_offset);
|
||||
|
||||
/**
|
||||
* Converts a utf8 character offset of a utf8 string to the byte offset.
|
||||
*
|
||||
* @param text Pointer to the string.
|
||||
* @param char_offset Offset in utf8 characters.
|
||||
*
|
||||
* @return Offset in bytes. Clamped to the maximum length of the string in bytes.
|
||||
*
|
||||
* @remark The string is treated as a zero-terminated utf8 string.
|
||||
* @remark It's the user's responsibility to make sure the bounds are aligned.
|
||||
*/
|
||||
size_t str_utf8_offset_chars_to_bytes(const char *str, size_t char_offset);
|
||||
|
||||
/*
|
||||
Function: str_next_token
|
||||
Writes the next token after str into buf, returns the rest of the string.
|
||||
|
|
|
@ -2218,103 +2218,6 @@ public:
|
|||
return WidthOfText;
|
||||
}
|
||||
|
||||
bool SelectionToUTF8OffSets(const char *pText, int SelStart, int SelEnd, int &OffUTF8Start, int &OffUTF8End) const override
|
||||
{
|
||||
const char *pIt = pText;
|
||||
|
||||
OffUTF8Start = -1;
|
||||
OffUTF8End = -1;
|
||||
|
||||
int CharCount = 0;
|
||||
while(*pIt)
|
||||
{
|
||||
const char *pTmp = pIt;
|
||||
int Character = str_utf8_decode(&pTmp);
|
||||
if(Character == -1)
|
||||
return false;
|
||||
|
||||
if(CharCount == SelStart)
|
||||
OffUTF8Start = (int)((std::intptr_t)(pIt - pText));
|
||||
|
||||
if(CharCount == SelEnd)
|
||||
OffUTF8End = (int)((std::intptr_t)(pIt - pText));
|
||||
|
||||
pIt = pTmp;
|
||||
++CharCount;
|
||||
}
|
||||
|
||||
if(CharCount == SelStart)
|
||||
OffUTF8Start = (int)((std::intptr_t)(pIt - pText));
|
||||
|
||||
if(CharCount == SelEnd)
|
||||
OffUTF8End = (int)((std::intptr_t)(pIt - pText));
|
||||
|
||||
return OffUTF8Start != -1 && OffUTF8End != -1;
|
||||
}
|
||||
|
||||
bool UTF8OffToDecodedOff(const char *pText, int UTF8Off, int &DecodedOff) const override
|
||||
{
|
||||
const char *pIt = pText;
|
||||
|
||||
DecodedOff = -1;
|
||||
|
||||
int CharCount = 0;
|
||||
while(*pIt)
|
||||
{
|
||||
if((int)(intptr_t)(pIt - pText) == UTF8Off)
|
||||
{
|
||||
DecodedOff = CharCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
const char *pTmp = pIt;
|
||||
int Character = str_utf8_decode(&pTmp);
|
||||
if(Character == -1)
|
||||
return false;
|
||||
|
||||
pIt = pTmp;
|
||||
++CharCount;
|
||||
}
|
||||
|
||||
if((int)(std::intptr_t)(pIt - pText) == UTF8Off)
|
||||
{
|
||||
DecodedOff = CharCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DecodedOffToUTF8Off(const char *pText, int DecodedOff, int &UTF8Off) const override
|
||||
{
|
||||
const char *pIt = pText;
|
||||
|
||||
UTF8Off = -1;
|
||||
|
||||
int CharCount = 0;
|
||||
while(*pIt)
|
||||
{
|
||||
const char *pTmp = pIt;
|
||||
int Character = str_utf8_decode(&pTmp);
|
||||
if(Character == -1)
|
||||
return false;
|
||||
|
||||
if(CharCount == DecodedOff)
|
||||
{
|
||||
UTF8Off = (int)((std::intptr_t)(pIt - pText));
|
||||
return true;
|
||||
}
|
||||
|
||||
pIt = pTmp;
|
||||
++CharCount;
|
||||
}
|
||||
|
||||
if(CharCount == DecodedOff)
|
||||
UTF8Off = (int)((std::intptr_t)(pIt - pText));
|
||||
|
||||
return UTF8Off != -1;
|
||||
}
|
||||
|
||||
void OnPreWindowResize() override
|
||||
{
|
||||
for(auto *pTextContainer : m_vpTextContainers)
|
||||
|
|
|
@ -286,10 +286,6 @@ public:
|
|||
virtual float GetGlyphOffsetX(int FontSize, char TextCharacter) const = 0;
|
||||
virtual int CalculateTextWidth(const char *pText, int TextLength, int FontWidth, int FontSize) const = 0;
|
||||
|
||||
virtual bool SelectionToUTF8OffSets(const char *pText, int SelStart, int SelEnd, int &OffUTF8Start, int &OffUTF8End) const = 0;
|
||||
virtual bool UTF8OffToDecodedOff(const char *pText, int UTF8Off, int &DecodedOff) const = 0;
|
||||
virtual bool DecodedOffToUTF8Off(const char *pText, int DecodedOff, int &UTF8Off) const = 0;
|
||||
|
||||
// old foolish interface
|
||||
virtual void TextColor(float r, float g, float b, float a) = 0;
|
||||
virtual void TextColor(ColorRGBA rgb) = 0;
|
||||
|
|
|
@ -765,12 +765,9 @@ void CGameConsole::OnRender()
|
|||
if(m_WantsSelectionCopy)
|
||||
{
|
||||
const bool HasNewLine = !SelectionString.empty();
|
||||
int OffUTF8Start = 0;
|
||||
int OffUTF8End = 0;
|
||||
if(TextRender()->SelectionToUTF8OffSets(pEntry->m_aText, pConsole->m_CurSelStart, pConsole->m_CurSelEnd, OffUTF8Start, OffUTF8End))
|
||||
{
|
||||
SelectionString.insert(0, (std::string(&pEntry->m_aText[OffUTF8Start], OffUTF8End - OffUTF8Start) + (HasNewLine ? "\n" : "")));
|
||||
}
|
||||
const size_t OffUTF8Start = str_utf8_offset_chars_to_bytes(pEntry->m_aText, pConsole->m_CurSelStart);
|
||||
const size_t OffUTF8End = str_utf8_offset_chars_to_bytes(pEntry->m_aText, pConsole->m_CurSelEnd);
|
||||
SelectionString.insert(0, (std::string(&pEntry->m_aText[OffUTF8Start], OffUTF8End - OffUTF8Start) + (HasNewLine ? "\n" : "")));
|
||||
}
|
||||
pConsole->m_HasSelection = true;
|
||||
}
|
||||
|
|
|
@ -170,32 +170,14 @@ size_t CLineInput::OffsetFromActualToDisplay(size_t ActualOffset) const
|
|||
{
|
||||
if(!IsHidden())
|
||||
return ActualOffset;
|
||||
size_t DisplayOffset = 0;
|
||||
size_t CurrentOffset = 0;
|
||||
while(CurrentOffset < ActualOffset)
|
||||
{
|
||||
const size_t PrevOffset = CurrentOffset;
|
||||
CurrentOffset = str_utf8_forward(m_pStr, CurrentOffset);
|
||||
if(CurrentOffset == PrevOffset)
|
||||
break;
|
||||
DisplayOffset++;
|
||||
}
|
||||
return DisplayOffset;
|
||||
return str_utf8_offset_bytes_to_chars(m_pStr, ActualOffset);
|
||||
}
|
||||
|
||||
size_t CLineInput::OffsetFromDisplayToActual(size_t DisplayOffset) const
|
||||
{
|
||||
if(!IsHidden())
|
||||
return DisplayOffset;
|
||||
size_t ActualOffset = 0;
|
||||
for(size_t i = 0; i < DisplayOffset; i++)
|
||||
{
|
||||
const size_t PrevOffset = ActualOffset;
|
||||
ActualOffset = str_utf8_forward(m_pStr, ActualOffset);
|
||||
if(ActualOffset == PrevOffset)
|
||||
break;
|
||||
}
|
||||
return ActualOffset;
|
||||
return str_utf8_offset_chars_to_bytes(m_pStr, DisplayOffset);
|
||||
}
|
||||
|
||||
bool CLineInput::ProcessInput(const IInput::CEvent &Event)
|
||||
|
@ -462,11 +444,11 @@ STextBoundingBox CLineInput::Render(const CUIRect *pRect, float FontSize, int Al
|
|||
m_LastCompositionCursorPos = CaretOffset;
|
||||
const size_t DisplayCompositionEnd = DisplayCursorOffset + Input()->GetCompositionLength();
|
||||
Cursor.m_CursorMode = TEXT_CURSOR_CURSOR_MODE_SET;
|
||||
TextRender()->UTF8OffToDecodedOff(pDisplayStr, CaretOffset, Cursor.m_CursorCharacter);
|
||||
Cursor.m_CursorCharacter = str_utf8_offset_bytes_to_chars(pDisplayStr, CaretOffset);
|
||||
Cursor.m_CalculateSelectionMode = TEXT_CURSOR_SELECTION_MODE_SET;
|
||||
Cursor.m_SelectionHeightFactor = 0.1f;
|
||||
TextRender()->UTF8OffToDecodedOff(pDisplayStr, DisplayCursorOffset, Cursor.m_SelectionStart);
|
||||
TextRender()->UTF8OffToDecodedOff(pDisplayStr, DisplayCompositionEnd, Cursor.m_SelectionEnd);
|
||||
Cursor.m_SelectionStart = str_utf8_offset_bytes_to_chars(pDisplayStr, DisplayCursorOffset);
|
||||
Cursor.m_SelectionEnd = str_utf8_offset_bytes_to_chars(pDisplayStr, DisplayCompositionEnd);
|
||||
TextRender()->TextSelectionColor(1.0f, 1.0f, 1.0f, 0.8f);
|
||||
TextRender()->TextEx(&Cursor, pDisplayStr);
|
||||
TextRender()->TextSelectionColor(TextRender()->DefaultTextSelectionColor());
|
||||
|
@ -476,38 +458,30 @@ STextBoundingBox CLineInput::Render(const CUIRect *pRect, float FontSize, int Al
|
|||
const size_t Start = OffsetFromActualToDisplay(GetSelectionStart());
|
||||
const size_t End = OffsetFromActualToDisplay(GetSelectionEnd());
|
||||
Cursor.m_CursorMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_CURSOR_MODE_CALCULATE : TEXT_CURSOR_CURSOR_MODE_SET;
|
||||
TextRender()->UTF8OffToDecodedOff(pDisplayStr, CaretOffset, Cursor.m_CursorCharacter);
|
||||
Cursor.m_CursorCharacter = str_utf8_offset_bytes_to_chars(pDisplayStr, CaretOffset);
|
||||
Cursor.m_CalculateSelectionMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_SELECTION_MODE_CALCULATE : TEXT_CURSOR_SELECTION_MODE_SET;
|
||||
TextRender()->UTF8OffToDecodedOff(pDisplayStr, Start, Cursor.m_SelectionStart);
|
||||
TextRender()->UTF8OffToDecodedOff(pDisplayStr, End, Cursor.m_SelectionEnd);
|
||||
Cursor.m_SelectionStart = str_utf8_offset_bytes_to_chars(pDisplayStr, Start);
|
||||
Cursor.m_SelectionEnd = str_utf8_offset_bytes_to_chars(pDisplayStr, End);
|
||||
TextRender()->TextEx(&Cursor, pDisplayStr);
|
||||
}
|
||||
else
|
||||
{
|
||||
Cursor.m_CursorMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_CURSOR_MODE_CALCULATE : TEXT_CURSOR_CURSOR_MODE_SET;
|
||||
TextRender()->UTF8OffToDecodedOff(pDisplayStr, CaretOffset, Cursor.m_CursorCharacter);
|
||||
Cursor.m_CursorCharacter = str_utf8_offset_bytes_to_chars(pDisplayStr, CaretOffset);
|
||||
Cursor.m_CalculateSelectionMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_SELECTION_MODE_CALCULATE : TEXT_CURSOR_SELECTION_MODE_NONE;
|
||||
TextRender()->TextEx(&Cursor, pDisplayStr);
|
||||
}
|
||||
|
||||
if(Cursor.m_CursorMode == TEXT_CURSOR_CURSOR_MODE_CALCULATE)
|
||||
{
|
||||
int NewCursorOffset;
|
||||
TextRender()->DecodedOffToUTF8Off(pDisplayStr, Cursor.m_CursorCharacter, NewCursorOffset);
|
||||
if(NewCursorOffset >= 0)
|
||||
{
|
||||
SetCursorOffset(OffsetFromDisplayToActual(NewCursorOffset));
|
||||
}
|
||||
const size_t NewCursorOffset = str_utf8_offset_chars_to_bytes(pDisplayStr, Cursor.m_CursorCharacter);
|
||||
SetCursorOffset(OffsetFromDisplayToActual(NewCursorOffset));
|
||||
}
|
||||
if(Cursor.m_CalculateSelectionMode == TEXT_CURSOR_SELECTION_MODE_CALCULATE)
|
||||
{
|
||||
int NewSelectionStart, NewSelectionEnd;
|
||||
TextRender()->DecodedOffToUTF8Off(pDisplayStr, Cursor.m_SelectionStart, NewSelectionStart);
|
||||
TextRender()->DecodedOffToUTF8Off(pDisplayStr, Cursor.m_SelectionEnd, NewSelectionEnd);
|
||||
if(NewSelectionStart >= 0 && NewSelectionEnd >= 0)
|
||||
{
|
||||
SetSelection(OffsetFromDisplayToActual(NewSelectionStart), OffsetFromDisplayToActual(NewSelectionEnd));
|
||||
}
|
||||
const size_t NewSelectionStart = str_utf8_offset_chars_to_bytes(pDisplayStr, Cursor.m_SelectionStart);
|
||||
const size_t NewSelectionEnd = str_utf8_offset_chars_to_bytes(pDisplayStr, Cursor.m_SelectionEnd);
|
||||
SetSelection(OffsetFromDisplayToActual(NewSelectionStart), OffsetFromDisplayToActual(NewSelectionEnd));
|
||||
}
|
||||
|
||||
m_CaretPosition = Cursor.m_CursorRenderedPosition;
|
||||
|
|
|
@ -696,6 +696,60 @@ TEST(Str, Utf8Stats)
|
|||
EXPECT_EQ(Count, 3);
|
||||
}
|
||||
|
||||
TEST(Str, Utf8OffsetBytesToChars)
|
||||
{
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("", 0), 0);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("", 100), 0);
|
||||
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 0), 0);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 1), 1);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 2), 2);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 3), 3);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 100), 3);
|
||||
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 0), 0);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 2), 1);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 4), 2);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 6), 3);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 8), 4);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 10), 5);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 12), 6);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 100), 6);
|
||||
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 5), 5);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 8), 6);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 11), 7);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 14), 8);
|
||||
EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 100), 8);
|
||||
}
|
||||
|
||||
TEST(Str, Utf8OffsetCharsToBytes)
|
||||
{
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("", 0), 0);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("", 100), 0);
|
||||
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 0), 0);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 1), 1);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 2), 2);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 3), 3);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 100), 3);
|
||||
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 0), 0);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 1), 2);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 2), 4);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 3), 6);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 4), 8);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 5), 10);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 6), 12);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 100), 12);
|
||||
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 5), 5);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 6), 8);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 7), 11);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 8), 14);
|
||||
EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 100), 14);
|
||||
}
|
||||
|
||||
TEST(Str, Time)
|
||||
{
|
||||
char aBuf[32] = "foobar";
|
||||
|
|
Loading…
Reference in a new issue