rtoss

Subversion Repositories:
Compare Path: Rev
With Path: Rev
/GreenPad/kilib/textfile.cpp @ 129  →  /GreenPad/kilib/textfile.cpp @ 130
/GreenPad/kilib/textfile.cpp
@@ -812,10 +812,26 @@
if( siz == u5sum )
return UTF5;
 
//-- chardet and MLang detection
if( app().isNewShell() )
{ // chardet works better when size > 64
if( siz > 80 )
{
cs = chardetAutoDetection( ptr, siz );
if( cs ) return cs;
}
cs = MLangAutoDetection( ptr, siz );
if( cs ) return cs;
}
else
{ // chardet is the only auto detection method
cs = chardetAutoDetection( ptr, siz );
if( cs ) return cs;
}
 
// last resort
//-- Žb’è”Å UTF-8 / “ú–{ŒêEUC ƒ`ƒFƒbƒN
 
cs = defCs;
 
// ‰üsƒR[ƒh‚ªLF‚©A‚ ‚é’ö“x‚Ì‘å‚«‚³‚©A‚Å‚È‚¢‚Æ
// –³ðŒ‚Å ANSI-CP ‚ÆŒ©‚È‚µ‚Ä‚µ‚Ü‚¤B
if( bit8 && (siz>4096 || lb_==1
@@ -823,7 +839,7 @@
{
// UHC‚âGBK‚ÍEUC-JP‚Æ”ñí‚ɍ¬“¯‚µ‚â‚·‚¢‚̂ŁA‚»‚Á‚¿‚ªƒfƒtƒHƒ‹ƒg‚̏ꍇ‚Í
// EUC-JPŽ©“®”»’è‚ðØ‚é
if( Jp && ::GetACP()!=UHC && ::GetACP()!=GBK && ::GetACP()!=Big5 )
if( Jp && defCs==SJIS )
{
// EUC‚Æ‚µ‚Ä‚¨‚©‚µ‚¢’l‚ª–³‚¢‚©ƒ`ƒFƒbƒN
bool be=true;
@@ -857,6 +873,73 @@
}
}
 
 
//-- ”»’茋‰Ê
 
return cs ? cs : defCs;
}
 
int TextFileR::MLangAutoDetection( const uchar* ptr, ulong siz )
{
int cs = 0;
#if !defined(TARGET_VER) || (defined(TARGET_VER) && TARGET_VER>310)
#ifndef NO_MLANG
app().InitModule( App::OLE );
IMultiLanguage2 *lang = NULL;
if( S_OK == ::CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_ALL, IID_IMultiLanguage2, (LPVOID*)&lang ) )
{
int detectEncCount = 5;
DetectEncodingInfo detectEnc[5];
lang->DetectInputCodepage(MLDETECTCP_DBCS, 0, (char *)(ptr), (INT *)(&siz), detectEnc, &detectEncCount); // 2 ugly C-cast here
 
// MLang fine tunes
if ( detectEncCount > 1 && detectEnc[0].nCodePage == 1252 ) // sometimes it gives multiple results with 1252 in the first
{
if ( detectEncCount == 2 && detectEnc[1].nCodePage == 850 ) // seems to be wrongly detected
{
cs = 0;
}
else
{
cs = detectEnc[detectEncCount-1].nCodePage; // always use last one
}
}
else if ( detectEncCount > 1 && detectEnc[0].nCodePage > 950 ) // non asian codepage in first
{
int highestConfidence = 0;
for(int x=0;x<detectEncCount;x++)
{
if(highestConfidence < detectEnc[x].nConfidence)
{
highestConfidence = detectEnc[x].nConfidence; // use codepage with highest Confidence
cs = detectEnc[x].nCodePage;
}
}
}
else
{
cs = detectEnc[0].nCodePage;
}
 
# ifdef MLANG_DEBUG
TCHAR tmp[10];
::wsprintf(tmp,TEXT("%d"),cs);
::MessageBox(NULL,tmp,TEXT("MLangDetect"),0);
# endif
 
if (cs == 20127) cs = 0; // 20127 == ASCII, 0 = unknown
 
if (lang)
lang->Release();
}
#endif //NO_MLANG
#endif //TARGET_VER
return cs;
}
 
int TextFileR::chardetAutoDetection( const uchar* ptr, ulong siz )
{
int cs = 0;
#ifndef NO_CHARDET
// function calls
int (__cdecl*chardet_create)(chardet_t*) = 0;
@@ -915,67 +998,9 @@
}
# undef STR2CP
#endif //NO_CHARDET
 
#if !defined(TARGET_VER) || (defined(TARGET_VER) && TARGET_VER>310)
#ifndef NO_MLANG
app().InitModule( App::OLE );
IMultiLanguage2 *lang = NULL;
if( S_OK == ::CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_ALL, IID_IMultiLanguage2, (LPVOID*)&lang ) )
{
int detectEncCount = 5;
DetectEncodingInfo detectEnc[5];
lang->DetectInputCodepage(MLDETECTCP_DBCS, 0, (char *)(ptr), (INT *)(&siz), detectEnc, &detectEncCount); // 2 ugly C-cast here
 
// MLang fine tunes
if ( detectEncCount > 1 && detectEnc[0].nCodePage == 1252 ) // sometimes it gives multiple results with 1252 in the first
{
if ( detectEncCount == 2 && detectEnc[1].nCodePage == 850 ) // seems to be wrongly detected
{
cs = defCs;
}
else
{
cs = detectEnc[detectEncCount-1].nCodePage; // always use last one
}
}
else if ( detectEncCount > 1 && detectEnc[0].nCodePage > 950 ) // non asian codepage in first
{
int highestConfidence = 0;
for(int x=0;x<detectEncCount;x++)
{
if(highestConfidence < detectEnc[x].nConfidence)
{
highestConfidence = detectEnc[x].nConfidence; // use codepage with highest Confidence
cs = detectEnc[x].nCodePage;
}
}
}
else
{
cs = detectEnc[0].nCodePage;
}
 
# ifdef MLANG_DEBUG
TCHAR tmp[10];
::wsprintf(tmp,TEXT("%d"),cs);
::MessageBox(NULL,tmp,TEXT("MLangDetect"),0);
# endif
 
if (cs == 20127 || !cs) cs = defCs; // 20127 == ASCII, 0 = unknown
 
if (lang)
lang->Release();
}
#endif //NO_MLANG
#endif //TARGET_VER
 
//-- ”»’茋‰Ê
 
return cs;
}
 
 
 
//=========================================================================
// ƒeƒLƒXƒgƒtƒ@ƒCƒ‹o—Í‹¤’ʃCƒ“ƒ^[ƒtƒFƒCƒX
//=========================================================================