#include "stdafx.h" #ifdef _COUNTRY_TL_ #include "Thailand.h" #include namespace LocalizationTL { TEWordBreak g_TEWordBreak; void TEWordBreak::LoadDic() { std::string strFile1 = KFileManager::Instance().CreateTemporaryFileFromResource( "tewordbreak.txt" ); std::string strFile2 = KFileManager::Instance().CreateTemporaryFileFromResource( "iptdict.txt" ); std::string strFile3 = KFileManager::Instance().CreateTemporaryFileFromResource( "BaseChar.txt" ); int i=0; FILE *fd; FILE *fp; FILE *fs; if((fs=fopen(strFile1.c_str(), "r"))==NULL) { //PostQuitMessage(0); } while(!feof(fs)) { fgets(Buffer,126,fs); x_sc2[i] = checkchar(Buffer); i++; } fclose(fs); i = 0; if((fd=fopen(strFile2.c_str(), "r"))==NULL) { //PostQuitMessage(0); } while(!feof(fd)) { fgets(Buffer,126,fd); Buffer[strlen(Buffer)-1] = '\0'; Dic[i] = Buffer; //Dic[i].at(Dic[i].length()-1) = NULL; i++; } fclose(fd); if((fp=fopen(strFile3.c_str(), "r"))==NULL) { //PostQuitMessage(0); } while(!feof(fp)) { fgets(Buffer,126,fp); Buffer[strlen(Buffer)-1] = '\0'; BaseChar += Buffer; } fclose(fp); KFileManager::Instance().DeleteTemporaryFile( "tewordbreak.txt" ); KFileManager::Instance().DeleteTemporaryFile( "iptdict.txt" ); KFileManager::Instance().DeleteTemporaryFile( "BaseChar.txt" ); } string TEWordBreak::BreakString(const char *buf) { RSBuf = ""; o=0; ob=0; l=0; q=0; p=strlen(buf); p_buf = buf; string b_test; b_test = buf; if(B_strlen(b_test)<=n_ch) RSBuf = b_test; else { for(m=0;m=x_sc2[37]) l=x_sc2[37]; else l=c_po; int r = l; for(j=0;j=-25 && buf[q] <=-19) || (buf[q]>=-48 && buf[q] <=-38)) { l--; q=o+l; if((buf[q]>=-25 && buf[q] <=-19) || (buf[q]>=-48 && buf[q] <=-38)) { l--; q=o+l; } } l--; if(l<=0) { string bdata; bdata = buf[o]; ob += B_strlen(bdata); if(ob>n_ch) { RSBuf += wcut+buf[o]; ob=B_strlen(bdata); } else RSBuf += buf[o]; o++; j=10; } abuf = ""; int t_o_t = 0; /*if((buf[o]>=65 && buf[o]<=90) || (buf[o]>=97 && buf[o]<=122)) { int n2 = 10-ob; if(n2 for(int n=o;n=65 && buf[n]<=90) || (buf[n]>=97 && buf[n]<=122)) { abuf += buf[n]; } else { n=p; } } t_o_t = 1; }*/ if((buf[o]>=-95 && buf[o]<=-28)) { for(int n=o;n=-95 && buf[n]<=-19)) { abuf += buf[n]; } else { l--; n=q; } } t_o_t = 2; } else { abuf += buf[o]; t_o_t = 3; } if(t_o_t==3) { if(buf[o]==10) { ob = 0; RSBuf += buf[o]; o++; j=10; } else { string bdata; bdata = buf[o]; ob += B_strlen(bdata); if(ob>n_ch) { RSBuf += wcut+buf[o]; ob=B_strlen(bdata); } else RSBuf += buf[o]; o++; j=10; if(o>=p) m=strlen(buf); } } else if(t_o_t==2) { if(buf[o]==-95) SearchDic(0,x_sc2[0]-1); else if(buf[o]==-94) SearchDic(x_sc2[0],x_sc2[1]-1); else if(buf[o]==-92) SearchDic(x_sc2[1],x_sc2[2]-1); else if(buf[o]==-90 || buf[o]==-89) SearchDic(x_sc2[2],x_sc2[3]-1); else if(buf[o]==-88) SearchDic(x_sc2[3],x_sc2[4]-1); else if(buf[o]==-87) SearchDic(x_sc2[4],x_sc2[5]-1); else if(buf[o]==-86) SearchDic(x_sc2[5],x_sc2[6]-1); else if(buf[o]==-85 || buf[o]==-84) SearchDic(x_sc2[6],x_sc2[7]-1); else if(buf[o]==-83 || buf[o]==-82 || buf[o]==-80 || buf[o]==-77) SearchDic(x_sc2[7],x_sc2[8]-1); else if(buf[o]==-76) SearchDic(x_sc2[8],x_sc2[9]-1); else if(buf[o]==-75) SearchDic(x_sc2[9],x_sc2[10]-1); else if(buf[o]==-74) SearchDic(x_sc2[10],x_sc2[11]-1); else if(buf[o]==-73) SearchDic(x_sc2[11],x_sc2[12]-1); else if(buf[o]==-72) SearchDic(x_sc2[12],x_sc2[13]-1); else if(buf[o]==-71) SearchDic(x_sc2[13],x_sc2[14]-1); else if(buf[o]==-70) SearchDic(x_sc2[14],x_sc2[15]-1); else if(buf[o]==-69) SearchDic(x_sc2[15],x_sc2[16]-1); else if(buf[o]==-68) SearchDic(x_sc2[16],x_sc2[17]-1); else if(buf[o]==-67) SearchDic(x_sc2[17],x_sc2[18]-1); else if(buf[o]==-66) SearchDic(x_sc2[18],x_sc2[19]-1); else if(buf[o]==-65) SearchDic(x_sc2[19],x_sc2[20]-1); else if(buf[o]==-64) SearchDic(x_sc2[20],x_sc2[21]-1); else if(buf[o]==-63) SearchDic(x_sc2[21],x_sc2[22]-1); else if(buf[o]==-62) SearchDic(x_sc2[22],x_sc2[23]-1); else if(buf[o]==-61) SearchDic(x_sc2[23],x_sc2[24]-1); else if(buf[o]==-60 || buf[o]==-59 || buf[o]==-58) SearchDic(x_sc2[24],x_sc2[25]-1); else if(buf[o]==-57) SearchDic(x_sc2[25],x_sc2[26]-1); else if(buf[o]==-56) SearchDic(x_sc2[26],x_sc2[27]-1); else if(buf[o]==-55 || buf[o]==-54) SearchDic(x_sc2[27],x_sc2[28]-1); else if(buf[o]==-53) SearchDic(x_sc2[28],x_sc2[29]-1); else if(buf[o]==-51) SearchDic(x_sc2[29],x_sc2[30]-1); else if(buf[o]==-50) SearchDic(x_sc2[30],x_sc2[31]-1); else if(buf[o]==-49 || buf[o]==-32) SearchDic(x_sc2[31],x_sc2[32]-1); else if(buf[o]==-31) SearchDic(x_sc2[32],x_sc2[33]-1); else if(buf[o]==-30) SearchDic(x_sc2[33],x_sc2[34]-1); else if(buf[o]==-29) SearchDic(x_sc2[34],x_sc2[35]-1); else if(buf[o]==-28) SearchDic(x_sc2[35],x_sc2[36]-1); } /*else if(t_o_t==1) { o += abuf.length(); ob += B_strlen(abuf); if(ob>n_ch) { RSBuf += wcut+abuf; ob=B_strlen(abuf); } else { RSBuf += abuf; } j=10; if(o>=p) m=strlen(buf); }*/ } if(o>=p) m=strlen(buf); } } return RSBuf; } int TEWordBreak::B_strlen(string x) { int y=0; int len=0; char x2; for(size_t c=0;c0) { y++; } } return y; } bool TEWordBreak::E_strcmp(string x) { bool a=false; int y=0; for(size_t z=0;z=65 && x[z]<=90) || (x[z]>=97 && x[z]<=122)) { a=true; } else { z=x.length()+1; a=false; } } return a; } void TEWordBreak::SearchDic(int x, int y) { int x2 = 0; for(int i=x;i<=y;i++) { if(abuf.length() == (Dic[i].length())) { for(size_t a=0;an_ch) { RSBuf += wcut+abuf; ob=B_strlen(abuf); } else { RSBuf += abuf; } j=10; i=y+1; if(o>=p) m=p; } } } } } int TEWordBreak::checkchar(string x) { int x2[5]; for(size_t i=0;i 2) { return false; } else { CLASS prev_class = CLASS_TABLE[prev]; CLASS curr_class = CLASS_TABLE[curr]; TYPE state_type = STATE_CHECK[prev_class][curr_class]; return COMPOSIBLE[mode][state_type]; } } // Unicode of thai : 0x0E00~0x0E7F (Direct Mapping) // Thai Character -> Unicode (char+0x0E00)-0xA0 = char+0x0D60 // Unicode -> Thai Character (wchar_t-0x0E00)+0xA0 = wchar_t-0x0D60 const char* CharNextTh(const char* lpsz) { using namespace THAI_CHARNEXT; using namespace OUTPUT; const BYTE* stream = (const BYTE*)(lpsz); while(STATE_CHECK[CLASS_TABLE[stream[0]]][CLASS_TABLE[stream[1]]] == C) ++stream; return (const char*)(stream+1); } const char* CharPrevTh(const char* lpszStart, const char* lpszCurrent) { while(lpszStart < lpszCurrent) { const char* lpszNext = CharNextTh(lpszStart); if(lpszNext < lpszCurrent) { lpszStart = lpszNext; } else { return lpszStart; } } return lpszStart; } ///////////////////////////////////////////////////////////////////////////// bool isOnlyEnglishWord(const std::string& str) { bool isOnlyEng = true; for (size_t i=0; i