2016-10-30 11:25:49 +00:00
|
|
|
#include "confusables_data.h"
|
|
|
|
|
2019-01-07 22:49:20 +00:00
|
|
|
#include "../system.h"
|
2016-10-30 11:25:49 +00:00
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
|
|
|
static int str_utf8_skeleton(int ch, const int **skeleton, int *skeleton_len)
|
2014-11-24 16:09:01 +00:00
|
|
|
{
|
2016-10-30 11:25:49 +00:00
|
|
|
int i;
|
|
|
|
for(i = 0; i < NUM_DECOMPS; i++)
|
2014-11-24 16:09:01 +00:00
|
|
|
{
|
2016-10-30 11:25:49 +00:00
|
|
|
if(ch == decomp_chars[i])
|
|
|
|
{
|
|
|
|
int offset = decomp_slices[i].offset;
|
|
|
|
int length = decomp_lengths[decomp_slices[i].length];
|
|
|
|
|
|
|
|
*skeleton = &decomp_data[offset];
|
|
|
|
*skeleton_len = length;
|
|
|
|
return 1;
|
|
|
|
}
|
2016-11-07 20:42:19 +00:00
|
|
|
else if(ch < decomp_chars[i])
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
2016-10-30 11:25:49 +00:00
|
|
|
}
|
|
|
|
*skeleton = NULL;
|
|
|
|
*skeleton_len = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct SKELETON
|
|
|
|
{
|
|
|
|
const int *skeleton;
|
|
|
|
int skeleton_len;
|
|
|
|
const char *str;
|
|
|
|
};
|
|
|
|
|
|
|
|
void str_utf8_skeleton_begin(struct SKELETON *skel, const char *str)
|
|
|
|
{
|
|
|
|
skel->skeleton = NULL;
|
|
|
|
skel->skeleton_len = 0;
|
|
|
|
skel->str = str;
|
|
|
|
}
|
|
|
|
|
|
|
|
int str_utf8_skeleton_next(struct SKELETON *skel)
|
|
|
|
{
|
2016-11-19 00:42:20 +00:00
|
|
|
int ch = 0;
|
2016-10-30 11:25:49 +00:00
|
|
|
while(skel->skeleton_len == 0)
|
|
|
|
{
|
|
|
|
ch = str_utf8_decode(&skel->str);
|
|
|
|
if(ch == 0)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
str_utf8_skeleton(ch, &skel->skeleton, &skel->skeleton_len);
|
|
|
|
}
|
|
|
|
skel->skeleton_len--;
|
|
|
|
if(skel->skeleton != NULL)
|
|
|
|
{
|
|
|
|
ch = *skel->skeleton;
|
|
|
|
skel->skeleton++;
|
|
|
|
}
|
|
|
|
return ch;
|
|
|
|
}
|
|
|
|
|
2018-03-06 17:41:18 +00:00
|
|
|
int str_utf8_to_skeleton(const char *str, int *buf, int buf_len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct SKELETON skel;
|
|
|
|
str_utf8_skeleton_begin(&skel, str);
|
|
|
|
for(i = 0; i < buf_len; i++)
|
|
|
|
{
|
|
|
|
int ch = str_utf8_skeleton_next(&skel);
|
|
|
|
if(ch == 0)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
buf[i] = ch;
|
|
|
|
}
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2016-10-30 11:25:49 +00:00
|
|
|
int str_utf8_comp_confusable(const char *str1, const char *str2)
|
|
|
|
{
|
|
|
|
struct SKELETON skel1;
|
|
|
|
struct SKELETON skel2;
|
|
|
|
|
|
|
|
str_utf8_skeleton_begin(&skel1, str1);
|
|
|
|
str_utf8_skeleton_begin(&skel2, str2);
|
|
|
|
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
int ch1 = str_utf8_skeleton_next(&skel1);
|
|
|
|
int ch2 = str_utf8_skeleton_next(&skel2);
|
|
|
|
|
|
|
|
if(ch1 == 0 || ch2 == 0)
|
|
|
|
return ch1 != ch2;
|
|
|
|
|
|
|
|
if(ch1 != ch2)
|
|
|
|
return 1;
|
2014-11-24 16:09:01 +00:00
|
|
|
}
|
|
|
|
}
|