538 lines
13 KiB
C++
538 lines
13 KiB
C++
#include "stdafx.h"
|
|
#ifdef _COUNTRY_TL_
|
|
|
|
#include "Thailand.h"
|
|
#include <kfile/KFileManager.h>
|
|
|
|
namespace LocalizationTL
|
|
{
|
|
|
|
TEWordBreak g_TEWordBreak;
|
|
|
|
void TEWordBreak::LoadDic()
|
|
{
|
|
std::string strFile1 = KFileManager::Instance().CreateTemporaryFileFromResource( "tewordbreak.txt" );
|
|
std::string strFile2 = KFileManager::Instance().CreateTemporaryFileFromResource( "iptdict.txt" );
|
|
std::string strFile3 = KFileManager::Instance().CreateTemporaryFileFromResource( "BaseChar.txt" );
|
|
|
|
int i=0;
|
|
FILE *fd;
|
|
FILE *fp;
|
|
FILE *fs;
|
|
|
|
if((fs=fopen(strFile1.c_str(), "r"))==NULL)
|
|
{
|
|
//PostQuitMessage(0);
|
|
}
|
|
while(!feof(fs))
|
|
{
|
|
fgets(Buffer,126,fs);
|
|
x_sc2[i] = checkchar(Buffer);
|
|
i++;
|
|
}
|
|
fclose(fs);
|
|
|
|
i = 0;
|
|
|
|
if((fd=fopen(strFile2.c_str(), "r"))==NULL)
|
|
{
|
|
//PostQuitMessage(0);
|
|
}
|
|
while(!feof(fd))
|
|
{
|
|
fgets(Buffer,126,fd);
|
|
Buffer[strlen(Buffer)-1] = '\0';
|
|
Dic[i] = Buffer;
|
|
//Dic[i].at(Dic[i].length()-1) = NULL;
|
|
i++;
|
|
}
|
|
fclose(fd);
|
|
|
|
if((fp=fopen(strFile3.c_str(), "r"))==NULL)
|
|
{
|
|
//PostQuitMessage(0);
|
|
}
|
|
while(!feof(fp))
|
|
{
|
|
fgets(Buffer,126,fp);
|
|
Buffer[strlen(Buffer)-1] = '\0';
|
|
BaseChar += Buffer;
|
|
}
|
|
fclose(fp);
|
|
|
|
KFileManager::Instance().DeleteTemporaryFile( "tewordbreak.txt" );
|
|
KFileManager::Instance().DeleteTemporaryFile( "iptdict.txt" );
|
|
KFileManager::Instance().DeleteTemporaryFile( "BaseChar.txt" );
|
|
}
|
|
|
|
string TEWordBreak::BreakString(const char *buf)
|
|
{
|
|
RSBuf = "";
|
|
o=0;
|
|
ob=0;
|
|
l=0;
|
|
q=0;
|
|
p=strlen(buf);
|
|
|
|
p_buf = buf;
|
|
|
|
string b_test;
|
|
b_test = buf;
|
|
if(B_strlen(b_test)<=n_ch) RSBuf = b_test;
|
|
else
|
|
{
|
|
for(m=0;m<p;m++)
|
|
{
|
|
int c_po;
|
|
c_po = p-o;
|
|
if(c_po>=x_sc2[37]) l=x_sc2[37];
|
|
else l=c_po;
|
|
int r = l;
|
|
for(j=0;j<r;j++)
|
|
{
|
|
q=o+l;
|
|
if((buf[q]>=-25 && buf[q] <=-19) || (buf[q]>=-48 && buf[q] <=-38))
|
|
{
|
|
l--;
|
|
q=o+l;
|
|
if((buf[q]>=-25 && buf[q] <=-19) || (buf[q]>=-48 && buf[q] <=-38))
|
|
{
|
|
l--;
|
|
q=o+l;
|
|
}
|
|
}
|
|
l--;
|
|
if(l<=0)
|
|
{
|
|
string bdata;
|
|
bdata = buf[o];
|
|
ob += B_strlen(bdata);
|
|
if(ob>n_ch)
|
|
{
|
|
RSBuf += wcut+buf[o];
|
|
ob=B_strlen(bdata);
|
|
}
|
|
else RSBuf += buf[o];
|
|
o++;
|
|
j=10;
|
|
}
|
|
abuf = "";
|
|
int t_o_t = 0;
|
|
|
|
/*if((buf[o]>=65 && buf[o]<=90) || (buf[o]>=97 && buf[o]<=122))
|
|
{
|
|
int n2 = 10-ob;
|
|
if(n2
|
|
for(int n=o;n<o+10-ob;n++)
|
|
{
|
|
if((buf[n]>=65 && buf[n]<=90) || (buf[n]>=97 && buf[n]<=122))
|
|
{
|
|
abuf += buf[n];
|
|
}
|
|
else
|
|
{
|
|
n=p;
|
|
}
|
|
}
|
|
|
|
t_o_t = 1;
|
|
}*/
|
|
if((buf[o]>=-95 && buf[o]<=-28))
|
|
{
|
|
for(int n=o;n<q;n++)
|
|
{
|
|
if((buf[n]>=-95 && buf[n]<=-19))
|
|
{
|
|
abuf += buf[n];
|
|
}
|
|
else
|
|
{
|
|
l--;
|
|
n=q;
|
|
}
|
|
}
|
|
|
|
t_o_t = 2;
|
|
}
|
|
else
|
|
{
|
|
abuf += buf[o];
|
|
|
|
t_o_t = 3;
|
|
}
|
|
|
|
if(t_o_t==3)
|
|
{
|
|
if(buf[o]==10)
|
|
{
|
|
ob = 0;
|
|
RSBuf += buf[o];
|
|
o++;
|
|
j=10;
|
|
}
|
|
else
|
|
{
|
|
string bdata;
|
|
bdata = buf[o];
|
|
ob += B_strlen(bdata);
|
|
if(ob>n_ch)
|
|
{
|
|
RSBuf += wcut+buf[o];
|
|
ob=B_strlen(bdata);
|
|
}
|
|
else RSBuf += buf[o];
|
|
o++;
|
|
j=10;
|
|
if(o>=p) m=strlen(buf);
|
|
}
|
|
}
|
|
else if(t_o_t==2)
|
|
{
|
|
if(buf[o]==-95) SearchDic(0,x_sc2[0]-1);
|
|
else if(buf[o]==-94) SearchDic(x_sc2[0],x_sc2[1]-1);
|
|
else if(buf[o]==-92) SearchDic(x_sc2[1],x_sc2[2]-1);
|
|
else if(buf[o]==-90 || buf[o]==-89) SearchDic(x_sc2[2],x_sc2[3]-1);
|
|
else if(buf[o]==-88) SearchDic(x_sc2[3],x_sc2[4]-1);
|
|
else if(buf[o]==-87) SearchDic(x_sc2[4],x_sc2[5]-1);
|
|
else if(buf[o]==-86) SearchDic(x_sc2[5],x_sc2[6]-1);
|
|
else if(buf[o]==-85 || buf[o]==-84) SearchDic(x_sc2[6],x_sc2[7]-1);
|
|
else if(buf[o]==-83 || buf[o]==-82 || buf[o]==-80 || buf[o]==-77) SearchDic(x_sc2[7],x_sc2[8]-1);
|
|
else if(buf[o]==-76) SearchDic(x_sc2[8],x_sc2[9]-1);
|
|
else if(buf[o]==-75) SearchDic(x_sc2[9],x_sc2[10]-1);
|
|
else if(buf[o]==-74) SearchDic(x_sc2[10],x_sc2[11]-1);
|
|
else if(buf[o]==-73) SearchDic(x_sc2[11],x_sc2[12]-1);
|
|
else if(buf[o]==-72) SearchDic(x_sc2[12],x_sc2[13]-1);
|
|
else if(buf[o]==-71) SearchDic(x_sc2[13],x_sc2[14]-1);
|
|
else if(buf[o]==-70) SearchDic(x_sc2[14],x_sc2[15]-1);
|
|
else if(buf[o]==-69) SearchDic(x_sc2[15],x_sc2[16]-1);
|
|
else if(buf[o]==-68) SearchDic(x_sc2[16],x_sc2[17]-1);
|
|
else if(buf[o]==-67) SearchDic(x_sc2[17],x_sc2[18]-1);
|
|
else if(buf[o]==-66) SearchDic(x_sc2[18],x_sc2[19]-1);
|
|
else if(buf[o]==-65) SearchDic(x_sc2[19],x_sc2[20]-1);
|
|
else if(buf[o]==-64) SearchDic(x_sc2[20],x_sc2[21]-1);
|
|
else if(buf[o]==-63) SearchDic(x_sc2[21],x_sc2[22]-1);
|
|
else if(buf[o]==-62) SearchDic(x_sc2[22],x_sc2[23]-1);
|
|
else if(buf[o]==-61) SearchDic(x_sc2[23],x_sc2[24]-1);
|
|
else if(buf[o]==-60 || buf[o]==-59 || buf[o]==-58) SearchDic(x_sc2[24],x_sc2[25]-1);
|
|
else if(buf[o]==-57) SearchDic(x_sc2[25],x_sc2[26]-1);
|
|
else if(buf[o]==-56) SearchDic(x_sc2[26],x_sc2[27]-1);
|
|
else if(buf[o]==-55 || buf[o]==-54) SearchDic(x_sc2[27],x_sc2[28]-1);
|
|
else if(buf[o]==-53) SearchDic(x_sc2[28],x_sc2[29]-1);
|
|
else if(buf[o]==-51) SearchDic(x_sc2[29],x_sc2[30]-1);
|
|
else if(buf[o]==-50) SearchDic(x_sc2[30],x_sc2[31]-1);
|
|
else if(buf[o]==-49 || buf[o]==-32) SearchDic(x_sc2[31],x_sc2[32]-1);
|
|
else if(buf[o]==-31) SearchDic(x_sc2[32],x_sc2[33]-1);
|
|
else if(buf[o]==-30) SearchDic(x_sc2[33],x_sc2[34]-1);
|
|
else if(buf[o]==-29) SearchDic(x_sc2[34],x_sc2[35]-1);
|
|
else if(buf[o]==-28) SearchDic(x_sc2[35],x_sc2[36]-1);
|
|
}
|
|
/*else if(t_o_t==1)
|
|
{
|
|
o += abuf.length();
|
|
ob += B_strlen(abuf);
|
|
if(ob>n_ch)
|
|
{
|
|
RSBuf += wcut+abuf;
|
|
ob=B_strlen(abuf);
|
|
}
|
|
else
|
|
{
|
|
RSBuf += abuf;
|
|
}
|
|
j=10;
|
|
if(o>=p) m=strlen(buf);
|
|
}*/
|
|
}
|
|
if(o>=p) m=strlen(buf);
|
|
}
|
|
}
|
|
return RSBuf;
|
|
}
|
|
|
|
int TEWordBreak::B_strlen(string x)
|
|
{
|
|
int y=0;
|
|
int len=0;
|
|
char x2;
|
|
for(size_t c=0;c<x.length();c++)
|
|
{
|
|
x2=x.at(c);
|
|
if((x2==-45) || (x2==32) || (x2==-96)) len = 1;
|
|
else if(x2==10) len=0;
|
|
else len=BaseChar.find(x2,0);
|
|
if(len>0)
|
|
{
|
|
y++;
|
|
}
|
|
}
|
|
return y;
|
|
}
|
|
|
|
bool TEWordBreak::E_strcmp(string x)
|
|
{
|
|
bool a=false;
|
|
int y=0;
|
|
for(size_t z=0;z<x.length();z++)
|
|
{
|
|
if((x[z]>=65 && x[z]<=90) || (x[z]>=97 && x[z]<=122))
|
|
{
|
|
a=true;
|
|
}
|
|
else
|
|
{
|
|
z=x.length()+1;
|
|
a=false;
|
|
}
|
|
}
|
|
return a;
|
|
}
|
|
|
|
void TEWordBreak::SearchDic(int x, int y)
|
|
{
|
|
int x2 = 0;
|
|
for(int i=x;i<=y;i++)
|
|
{
|
|
if(abuf.length() == (Dic[i].length()))
|
|
{
|
|
for(size_t a=0;a<abuf.length();a++)
|
|
{
|
|
if(abuf.at(a) == Dic[i].at(a))
|
|
{
|
|
x2++;
|
|
}
|
|
else
|
|
{
|
|
a=abuf.length();
|
|
x2 = 0;
|
|
}
|
|
|
|
if(x2==abuf.length())
|
|
{
|
|
o += abuf.length();
|
|
ob += B_strlen(abuf);
|
|
if(ob>n_ch)
|
|
{
|
|
RSBuf += wcut+abuf;
|
|
ob=B_strlen(abuf);
|
|
}
|
|
else
|
|
{
|
|
RSBuf += abuf;
|
|
}
|
|
j=10;
|
|
i=y+1;
|
|
if(o>=p) m=p;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int TEWordBreak::checkchar(string x)
|
|
{
|
|
int x2[5];
|
|
|
|
for(size_t i=0;i<x.length()-1;i++)
|
|
{
|
|
if(x.at(i)=='0') x2[i] = 0;
|
|
else if(x.at(i)=='1') x2[i] = 1;
|
|
else if(x.at(i)=='2') x2[i] = 2;
|
|
else if(x.at(i)=='3') x2[i] = 3;
|
|
else if(x.at(i)=='4') x2[i] = 4;
|
|
else if(x.at(i)=='5') x2[i] = 5;
|
|
else if(x.at(i)=='6') x2[i] = 6;
|
|
else if(x.at(i)=='7') x2[i] = 7;
|
|
else if(x.at(i)=='8') x2[i] = 8;
|
|
else if(x.at(i)=='9') x2[i] = 9;
|
|
}
|
|
int x3;
|
|
int x4;
|
|
|
|
x3 = x.length()-1;
|
|
if(x3==5) x4 = x2[0]*10000 + x2[1]*1000 + x2[2]*100 + x2[3]*10 + x2[4];
|
|
else if(x3==4) x4 = x2[0]*1000 + x2[1]*100 + x2[2]*10 + x2[3];
|
|
else if(x3==3) x4 = x2[0]*100 + x2[1]*10 + x2[2];
|
|
else if(x3==2) x4 = x2[0]*10 + x2[1];
|
|
else if(x3==1) x4 = x2[0];
|
|
|
|
return x4;
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
namespace THAI_CHARNEXT
|
|
{
|
|
enum CLASS {
|
|
CTRL= 0,
|
|
NON= 1,
|
|
CONS= 2,
|
|
LV= 3,
|
|
FV1= 4,
|
|
FV2= 5,
|
|
FV3= 6,
|
|
BV1= 7,
|
|
BV2= 8,
|
|
BD= 9,
|
|
TONE=10,
|
|
AD1=11,
|
|
AD2=12,
|
|
AD3=13,
|
|
AV1=14,
|
|
AV2=15,
|
|
AV3=16,
|
|
};
|
|
|
|
CLASS CLASS_TABLE[256] = {
|
|
CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL,
|
|
CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL,
|
|
NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON,
|
|
NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON,
|
|
NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON,
|
|
NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON,
|
|
NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON,
|
|
NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, CTRL,
|
|
CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL,
|
|
CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL, CTRL,
|
|
NON, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS,
|
|
CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS,
|
|
CONS, CONS, CONS, CONS, FV3, CONS, FV3, CONS, CONS, CONS, CONS, CONS, CONS, CONS, CONS, NON,
|
|
FV1, AV2, FV1, FV1, AV1, AV3, AV2, AV3, BV1, BV2, BD, NON, NON, NON, NON, NON,
|
|
LV, LV, LV, LV, LV, FV2, NON, AD2, TONE, TONE, TONE, TONE, AD1, AD1, AD3, NON,
|
|
NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, CTRL,
|
|
};
|
|
|
|
namespace INPUT
|
|
{
|
|
enum TYPE { A, C, S, R, X };
|
|
|
|
bool COMPOSIBLE[3][5] = {
|
|
{ true, true, true, true, true },
|
|
{ true, true, true,false, true },
|
|
{ true, true,false,false, true },
|
|
};
|
|
|
|
TYPE STATE_CHECK[17][17] = {
|
|
{ X,A,A,A,R,R,R,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,R,R,R,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,A,S,A,C,C,C,C,C,C,C,C,C,C },
|
|
{ X,S,A,S,S,S,S,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,S,A,S,A,S,A,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,A,S,A,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,S,A,S,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,A,S,A,R,R,R,C,C,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,C,R,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,A,A,A,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,R,R,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,C,C,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,C,R,R,R,R,R,R },
|
|
{ X,A,A,A,S,S,A,R,R,R,C,R,C,R,R,R,R },
|
|
};
|
|
}
|
|
|
|
namespace OUTPUT
|
|
{
|
|
enum TYPE { N, C, X };
|
|
|
|
TYPE STATE_CHECK[17][17] = {
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,C,C,C,C,C,C,C,C,C,C,C,C,C },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,C,C,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,C,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,C,C,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,C,N,N,N,N,N,N },
|
|
{ X,N,N,N,N,N,N,N,N,N,C,N,C,N,N,N,N },
|
|
};
|
|
}
|
|
}
|
|
|
|
bool IsComposible(unsigned char prev, unsigned char curr, int mode)
|
|
{
|
|
using namespace THAI_CHARNEXT;
|
|
using namespace INPUT;
|
|
|
|
if(mode > 2) {
|
|
return false;
|
|
} else {
|
|
CLASS prev_class = CLASS_TABLE[prev];
|
|
CLASS curr_class = CLASS_TABLE[curr];
|
|
TYPE state_type = STATE_CHECK[prev_class][curr_class];
|
|
|
|
return COMPOSIBLE[mode][state_type];
|
|
}
|
|
}
|
|
|
|
// Unicode of thai : 0x0E00~0x0E7F (Direct Mapping)
|
|
// Thai Character -> Unicode (char+0x0E00)-0xA0 = char+0x0D60
|
|
// Unicode -> Thai Character (wchar_t-0x0E00)+0xA0 = wchar_t-0x0D60
|
|
|
|
const char* CharNextTh(const char* lpsz)
|
|
{
|
|
using namespace THAI_CHARNEXT;
|
|
using namespace OUTPUT;
|
|
|
|
const BYTE* stream = (const BYTE*)(lpsz);
|
|
|
|
while(STATE_CHECK[CLASS_TABLE[stream[0]]][CLASS_TABLE[stream[1]]] == C) ++stream;
|
|
|
|
return (const char*)(stream+1);
|
|
}
|
|
|
|
const char* CharPrevTh(const char* lpszStart, const char* lpszCurrent)
|
|
{
|
|
while(lpszStart < lpszCurrent) {
|
|
|
|
const char* lpszNext = CharNextTh(lpszStart);
|
|
|
|
if(lpszNext < lpszCurrent) {
|
|
lpszStart = lpszNext;
|
|
} else {
|
|
return lpszStart;
|
|
}
|
|
}
|
|
|
|
return lpszStart;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool isOnlyEnglishWord(const std::string& str)
|
|
{
|
|
bool isOnlyEng = true;
|
|
for (size_t i=0; i<str.length(); i++)
|
|
{
|
|
if (str[i] < 0) isOnlyEng = false;
|
|
}
|
|
return isOnlyEng;
|
|
}
|
|
|
|
int GetCursorLength(const wchar_t* wstr, int len, int cursorpos)
|
|
{
|
|
char buf[20000] = {0, };
|
|
WORD wCodePage = 874;
|
|
int nLength = ::WideCharToMultiByte(wCodePage, 0, wstr, len, NULL, 0, NULL, NULL); // 저장된 문자열 길이
|
|
::WideCharToMultiByte(wCodePage, 0, wstr, len, buf, nLength, NULL, NULL);
|
|
|
|
// 한 글자가 몇바이트 까지인가 체크해서
|
|
const char* nextChar = CharNextTh(&(buf[cursorpos-1]));
|
|
return (nextChar - buf + 1) - cursorpos;
|
|
}
|
|
|
|
}
|
|
|
|
#endif |