diff options
| -rw-r--r-- | README | 7 | ||||
| -rw-r--r-- | ToDo | 8 | ||||
| -rw-r--r-- | engines/Makefile.am | 4 | ||||
| -rw-r--r-- | engines/librcd.c (renamed from engines/russian.c) | 4 | ||||
| -rw-r--r-- | examples/example2.c | 4 | ||||
| -rw-r--r-- | examples/rcc-example.xml | 4 | ||||
| -rw-r--r-- | examples/rcc.xml | 2 | ||||
| -rw-r--r-- | src/engine.c | 48 | ||||
| -rw-r--r-- | src/engine.h | 1 | ||||
| -rw-r--r-- | src/internal.h | 11 | ||||
| -rw-r--r-- | src/librcc.c | 47 | ||||
| -rw-r--r-- | src/librcc.h | 35 | ||||
| -rw-r--r-- | src/lngconfig.c | 37 | ||||
| -rw-r--r-- | src/lngconfig.h | 2 | ||||
| -rw-r--r-- | src/lngrecode.c | 47 | ||||
| -rw-r--r-- | src/rccconfig.c | 7 | ||||
| -rw-r--r-- | src/rccconfig.h | 1 | ||||
| -rw-r--r-- | src/rcciconv.c | 2 | ||||
| -rw-r--r-- | src/rcclocale.h | 3 | ||||
| -rw-r--r-- | src/recode.c | 358 | ||||
| -rw-r--r-- | ui/rccnames.c | 2 | 
21 files changed, 416 insertions, 218 deletions
| @@ -0,0 +1,7 @@ + +Language Autodetection +---------------------- + + +Translation +----------- @@ -10,6 +10,14 @@      - Revise locking subsystem      - Libtranslate can leave translated message partly in old language. This causes problems        because of recoding from UTF8 to Current language. (With UTF-8 encoding should be Okey). +    - Lating languages. If in the string all characters < 0x7F then we have one of the Latin +    languages? +    - Statistic approach of language detection. +    - LibRCD autolearning using db4 +	+ Charset detection +	+ Language detection (same as charsets, but for UTF8...) +	    * Consider word recognition based on probability +	+ Autolearning is triggered by large enough dictionary words  1.x: diff --git a/engines/Makefile.am b/engines/Makefile.am index 404cc32..678fc8b 100644 --- a/engines/Makefile.am +++ b/engines/Makefile.am @@ -3,8 +3,8 @@ lib_LTLIBRARIES = libwestern.la  libdir = $(pkgdatadir)/engines  if HAVE_RCD -lib_LTLIBRARIES += librussian.la -librussian_la_SOURCES = russian.c +lib_LTLIBRARIES += librcd.la +librussian_la_SOURCES = librcd.c  librussian_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo"  endif diff --git a/engines/russian.c b/engines/librcd.c index 0df145c..c24d244 100644 --- a/engines/russian.c +++ b/engines/librcd.c @@ -9,11 +9,11 @@ static rcc_autocharset_id AutoengineRussian(rcc_engine_context ctx, const char *  }  static rcc_engine russian_engine = { -    "Russian", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL} +    "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL}  };  static rcc_engine ukrainian_engine = { -    "Russian", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", NULL} +    "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", NULL}  };  rcc_engine *rccGetInfo(const char *lang) { diff --git a/examples/example2.c b/examples/example2.c index 5ef3efb..2083fcc 100644 --- a/examples/example2.c +++ b/examples/example2.c @@ -15,7 +15,7 @@ int main(int argc, char *argv[]) {      rcc_class classes[] = {  	{ "input", RCC_CLASS_STANDARD, NULL, NULL, "Input Encoding", 0 }, -	{ "output", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, "Output Encoding", 0 }, +	{ "output", RCC_CLASS_TRANSLATE_LOCALE, "LC_CTYPE", NULL, "Output Encoding", 0 },  	{ NULL }      }; @@ -24,7 +24,7 @@ int main(int argc, char *argv[]) {      rccInit();      rccInitDefaultContext(NULL, 0, 0, classes, 0);      rccInitDb4(NULL, "example", 0); -    rccSetOption(NULL, RCC_OPTION_TRANSLATE, RCC_OPTION_TRANSLATE_FULL); +    rccSetOption(NULL, RCC_OPTION_TRANSLATE, RCC_OPTION_TRANSLATE_SKIP_PARRENT);      current_language_id = rccGetCurrentLanguage(NULL);      english_language_id = rccGetLanguageByName(NULL, "en"); diff --git a/examples/rcc-example.xml b/examples/rcc-example.xml index 6bad1f3..8dc068c 100644 --- a/examples/rcc-example.xml +++ b/examples/rcc-example.xml @@ -11,7 +11,7 @@  	    <FullName>Russian</FullName>  	    <FullName locale="ru">Русский</FullName>  	    <Engines> -		<Engine>russian</Engine> +		<Engine>librcd</Engine>  	    </Engines>  	    <Charsets>  		<Charset>UTF-8</Charset> @@ -25,7 +25,7 @@  	<Language name="uk">  	    <FullName>Ukrainian</FullName>  	    <Engines> -		<Engine>russian</Engine> +		<Engine>librcd</Engine>  	    </Engines>  	    <Charsets>  		<Charset>UTF-8</Charset> diff --git a/examples/rcc.xml b/examples/rcc.xml index 50d2ee2..562f38a 100644 --- a/examples/rcc.xml +++ b/examples/rcc.xml @@ -13,6 +13,7 @@  	<Language name="de">  	    <FullName>German</FullName>  	    <Charsets> +		<Charset>ISO8859-1</Charset>  		<Charset>UTF-8</Charset>  	    </Charsets>  	    <Engines> @@ -22,6 +23,7 @@  	<Language name="fr">  	    <FullName>French</FullName>  	    <Charsets> +		<Charset>ISO8859-1</Charset>  		<Charset>UTF-8</Charset>  	    </Charsets>  	    <Engines> diff --git a/src/engine.c b/src/engine.c index 8058faf..f9c2284 100644 --- a/src/engine.c +++ b/src/engine.c @@ -151,3 +151,51 @@ rcc_context rccEngineGetRccContext(rcc_engine_context ctx) {      return ctx->config->ctx;  } + +#define bit(i) (1<<i) + +static int CheckWestern(const unsigned char *buf, int len) { +    long i,j; +    int bytes=0; + +    if (!len) len = strlen(buf); +    for (i=0;i<len;i++) { +	if (bytes>0) { +		    // Western is 0x100-0x17e +	    if ((buf[i]&0xC0)==0x80) bytes--; +	    else return 0; +	} else { +	    if (buf[i]<128) continue; +	     +	    for (j=6;j>=0;j--) +		if ((buf[i]&bit(j))==0) break; + +	    if ((j==0)||(j==6)) return 0; + +	    bytes=6-j; +	    if (bytes==1) { +		// Western Languages (C2-C3) +		if ((buf[i]!=0xC2)&&(buf[i]!=0xC3)) return 0; +	    } else return 0; +	} +    } +    return 1; +} + + +rcc_autocharset_id rccEngineDetectCharset(rcc_engine_context ctx, const char *buf, size_t len) { +    rcc_autocharset_id utf; + +    if (CheckWestern(buf, len)) { +	utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF-8"); +	if (utf != (rcc_autocharset_id)-1) return utf; +	utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF8"); +	if (utf != (rcc_autocharset_id)-1) return utf; +	utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF_8"); +	return utf; +    } +     +    if ((ctx)&&(ctx->func)) return ctx->func(ctx, buf, len); +    return (rcc_autocharset_id)-1; +} + diff --git a/src/engine.h b/src/engine.h index 445e962..96e6db6 100644 --- a/src/engine.h +++ b/src/engine.h @@ -38,5 +38,6 @@ void rccEngineFreeContext(rcc_engine_context engine_ctx);  int rccEngineConfigure(rcc_engine_context ctx);  rcc_charset_id rccAutoengineRussian(rcc_engine_context ctx, const char *buf, int len); +rcc_autocharset_id rccEngineDetectCharset(rcc_engine_context ctx, const char *buf, size_t len);  #endif /* _RCC_ENGINE_H */ diff --git a/src/internal.h b/src/internal.h index d5797fc..089311f 100644 --- a/src/internal.h +++ b/src/internal.h @@ -28,11 +28,20 @@  #include "rcciconv.h"  #include "rccstring.h"  #include "rccmutex.h" +#include "rcclocale.h"  typedef rcc_language_id rcc_language_parrent_list[RCC_MAX_LANGUAGE_PARRENTS]; +struct rcc_language_internal_t { +    rcc_language language; +    rcc_language_id parrents[RCC_MAX_LANGUAGE_PARRENTS + 1]; +    unsigned char latin; +}; +typedef struct rcc_language_internal_t rcc_language_internal; +typedef rcc_language_internal *rcc_language_internal_ptr; +  struct rcc_context_t {      char locale_variable[RCC_MAX_VARIABLE_CHARS+1]; @@ -43,8 +52,8 @@ struct rcc_context_t {      unsigned int max_languages;      unsigned int n_languages; +    rcc_language_internal *ilang;      rcc_language_ptr *languages; -    rcc_language_parrent_list *language_parrents;      rcc_language_config configs;      unsigned int max_classes; diff --git a/src/librcc.c b/src/librcc.c index 208fcb3..c27c47d 100644 --- a/src/librcc.c +++ b/src/librcc.c @@ -140,7 +140,7 @@ rcc_context rccCreateContext(const char *locale_variable, unsigned int max_langu      rcc_context ctx;      rcc_language_ptr *languages; -    rcc_language_parrent_list *language_parrents; +    rcc_language_internal *ilang;      rcc_class_ptr *classes;      rcc_language_config configs;      rcc_iconv *from; @@ -167,18 +167,18 @@ rcc_context rccCreateContext(const char *locale_variable, unsigned int max_langu      languages = (rcc_language_ptr*)malloc((max_languages+1)*sizeof(rcc_language_ptr));      classes = (rcc_class_ptr*)malloc((max_classes+1)*sizeof(rcc_class_ptr));      from = (rcc_iconv*)malloc((max_classes)*sizeof(rcc_iconv)); -    language_parrents = (rcc_language_parrent_list*)malloc((max_languages+1)*sizeof(rcc_language_parrent_list)); +    ilang = (rcc_language_internal*)malloc((max_languages+1)*sizeof(rcc_language_internal));      mutex = rccMutexCreate();      configs = (rcc_language_config)malloc((max_languages)*sizeof(struct rcc_language_config_t)); -    if ((!ctx)||(!languages)||(!classes)||(!mutex)||(!language_parrents)) { +    if ((!ctx)||(!languages)||(!classes)||(!mutex)||(!from)||(!ilang)||(!mutex)) {  	if (mutex) rccMutexFree(mutex);  	if (from) free(from);  	if (configs) free(configs);  	if (classes) free(classes);  	if (languages) free(languages); -	if (language_parrents) free(language_parrents); +	if (ilang) free(ilang);  	if (ctx) free(ctx);  	return NULL;      } @@ -193,8 +193,7 @@ rcc_context rccCreateContext(const char *locale_variable, unsigned int max_langu      for (i=0;rcc_default_aliases[i].alias;i++)  	rccRegisterLanguageAlias(ctx, rcc_default_aliases + i); -    ctx->language_parrents = language_parrents; -    for (i=0;i<max_languages;i++) language_parrents[i][0] = (rcc_language_id)-1; +    ctx->ilang = ilang;      ctx->languages = languages;      ctx->max_languages = max_languages; @@ -306,7 +305,7 @@ void rccFreeContext(rcc_context ctx) {  	    free(ctx->configs);  	}  	if (ctx->classes) free(ctx->classes); -	if (ctx->language_parrents) free(ctx->language_parrents); +	if (ctx->ilang) free(ctx->ilang);  	if (ctx->languages) free(ctx->languages);  	if (ctx->mutex) rccMutexFree(ctx->mutex);  	free(ctx); @@ -365,6 +364,7 @@ int rccUnlockConfiguration(rcc_context ctx, unsigned int lock_code) {  }  rcc_language_id rccRegisterLanguage(rcc_context ctx, rcc_language *language) { +    unsigned int i;      if (!ctx) {  	if (rcc_default_ctx) ctx = rcc_default_ctx;  	else return (rcc_language_id)-1; @@ -373,7 +373,21 @@ rcc_language_id rccRegisterLanguage(rcc_context ctx, rcc_language *language) {      if (ctx->configuration_lock) return (rcc_language_id)-1;      if (ctx->n_languages == ctx->max_languages) return (rcc_language_id)-1; -    ctx->languages[ctx->n_languages++] = language; +     +    memcpy(ctx->ilang + ctx->n_languages, language, sizeof(rcc_language)); +    ctx->ilang[ctx->n_languages].parrents[0] = (rcc_language_id)-1; +    ctx->ilang[ctx->n_languages].latin = 0; +     +    for (i=0;language->charsets[i];i++) +	if ((strstr(language->charsets[i],"8859"))&&(language->charsets[i][strlen(language->charsets[i])-1]=='1')) { +	    ctx->ilang[ctx->n_languages].latin = 1; +	    break; +	} + +    if ((i==1)&&(!language->charsets[1])&&(rccIsUTF8(language->charsets[0]))) +	    ctx->ilang[ctx->n_languages].latin = 1; + +    ctx->languages[ctx->n_languages++] = (rcc_language_ptr)(ctx->ilang + ctx->n_languages);      ctx->languages[ctx->n_languages] = NULL;      if (!ctx->current_language) @@ -388,6 +402,10 @@ rcc_charset_id rccLanguageRegisterCharset(rcc_language *language, rcc_charset ch      if ((!language)||(!charset)) return (rcc_charset_id)-1;      for (i=0;language->charsets[i];i++);      if (i>=RCC_MAX_CHARSETS) return (rcc_charset_id)-1; + +    if ((strstr(charset,"8859"))&&(charset[strlen(charset)-1]=='1'))  +	((rcc_language_internal*)language)->latin = 1; +          language->charsets[i++] = charset;      language->charsets[i] = NULL;      return i-1; @@ -443,7 +461,7 @@ rcc_relation_id rccRegisterLanguageRelation(rcc_context ctx, rcc_language_relati      if (language_id == (rcc_language_id)-1) return (rcc_relation_id)-1; -    list = ctx->language_parrents[language_id]; +    list = ((rcc_language_internal*)ctx->languages[language_id])->parrents;      language_id = rccGetLanguageByName(ctx, parrent);      if (language_id == (rcc_language_id)-1) return (rcc_relation_id)0; @@ -478,6 +496,8 @@ rcc_class_id rccRegisterClass(rcc_context ctx, rcc_class *cl) {  rcc_class_type rccGetClassType(rcc_context ctx, rcc_class_id class_id) { +    rcc_class_type clt; +          if (!ctx) {  	if (rcc_default_ctx) ctx = rcc_default_ctx;  	else return RCC_CLASS_INVALID; @@ -485,7 +505,14 @@ rcc_class_type rccGetClassType(rcc_context ctx, rcc_class_id class_id) {      if ((class_id<0)||(class_id>=ctx->n_classes)) return RCC_CLASS_INVALID; -    return ctx->classes[class_id]->class_type; +    /*DS: temporary solution */ +     +    clt = ctx->classes[class_id]->class_type; +     +    if ((!strcasecmp(ctx->classes[class_id]->name, "out"))&&(clt == RCC_CLASS_STANDARD)) +	clt = RCC_CLASS_TRANSLATE_LOCALE; +     +    return clt;  } diff --git a/src/librcc.h b/src/librcc.h index 63a6f80..0529682 100644 --- a/src/librcc.h +++ b/src/librcc.h @@ -13,13 +13,6 @@  #define RCC_MAX_ALIASES 64  #define RCC_MAX_CLASSES 16 -#define RCC_MAX_ERRORS 3 - -#define RCC_MAX_CHARSET_CHARS 16 -#define RCC_MAX_LANGUAGE_CHARS 16 -#define RCC_MAX_VARIABLE_CHARS 16 - -  /* ID's */  /**    * Language ID.  @@ -301,7 +294,10 @@ typedef enum rcc_class_type_t {      RCC_CLASS_INVALID = 0,	/**< Invalid value */      RCC_CLASS_STANDARD,		/**< Standard class */      RCC_CLASS_KNOWN,		/**< Class encoding is known and no autodetection should be performed */ -    RCC_CLASS_FS		/**< Class strings are representing file names */ +    RCC_CLASS_FS,		/**< Class strings are representing file names */ +    RCC_CLASS_TRANSLATE_LOCALE,	/**< It is permited to translate class strings to current Locale Language in rccTo */ +    RCC_CLASS_TRANSLATE_CURRENT,/**< It is permited to translate class strings to Current Language in rccTo */ +    RCC_CLASS_TRANSLATE_FROM,	/**< It is permited to translate class strings to Current Language in rccFrom */  } rcc_class_type;  /** @@ -390,22 +386,13 @@ typedef int rcc_option_value;    */  #define RCC_OPTION_LEARNING_FLAG_LEARN 2 -/** -  * Switch translation off. -  */ -#define RCC_OPTION_TRANSLATE_OFF 0 -/** -  * Translate data to english language (Current language don't matter). -  */ -#define RCC_OPTION_TRANSLATE_TO_ENGLISH 1 -/** -  * Skip translation of the english text. -  */ -#define RCC_OPTION_TRANSLATE_SKIP_ENGLISH 2 -/** -  * Translate whole data to the current language. -  */ -#define RCC_OPTION_TRANSLATE_FULL 3 +typedef enum rcc_option_translate_t { +    RCC_OPTION_TRANSLATE_OFF = 0,  	/**< Switch translation off. */ +    RCC_OPTION_TRANSLATE_TO_ENGLISH, 	/**< Translate data to english language (Current language don't matter). */ +    RCC_OPTION_TRANSLATE_SKIP_RELATED, 	/**< Skip translation of the text's between related languages. */ +    RCC_OPTION_TRANSLATE_SKIP_PARRENT, 	/**< Skip translation of the text's from parrent languages (from english). */ +    RCC_OPTION_TRANSLATE_FULL 		/**< Translate whole data to the current language */ +} rcc_option_translate;  /**    * List of options available diff --git a/src/lngconfig.c b/src/lngconfig.c index f9d1d6d..7e5a428 100644 --- a/src/lngconfig.c +++ b/src/lngconfig.c @@ -353,7 +353,7 @@ rcc_speller rccConfigGetSpeller(rcc_language_config config) {  	if (config->speller) language_id = rccConfigGetLanguage(config);  	else language_id = (rcc_language_id)-1; -	if (language_id != (rcc_language_id)-1) parrents = config->ctx->language_parrents[language_id]; +	if (language_id != (rcc_language_id)-1) parrents = ((rcc_language_internal*)config->language)->parrents;  	else parrents = NULL;  	if (parrents) { @@ -508,10 +508,12 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_      if (config->default_charset[class_id]) return config->default_charset[class_id]; -    charset_id = rccConfigGetLocaleCharset(config, defvalue); -    if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { -	config->default_charset[class_id] = charset_id; -	return charset_id; +    if (cl->defvalue) { +	charset_id = rccConfigGetLocaleCharset(config, defvalue); +	if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { +	    config->default_charset[class_id] = charset_id; +	    return charset_id; +	}      }      if (cl->defvalue) { @@ -537,7 +539,7 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_  	    }      }	 -    charset_id = rccConfigGetLocaleUnicodeCharset(config, defvalue); +    charset_id = rccConfigGetLocaleCharset(config, defvalue);      if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {  	config->default_charset[class_id] = charset_id;  	return charset_id; @@ -634,6 +636,7 @@ int rccConfigSetCharsetByName(rcc_language_config config, rcc_class_id class_id,  rcc_charset_id rccConfigGetLocaleCharset(rcc_language_config config, const char *locale_variable) {      const char *lv;      rcc_language_id language_id; +    char lang[RCC_MAX_CHARSET_CHARS+1];      char stmp[RCC_MAX_CHARSET_CHARS+1];      if ((!config)||(!config->language)) return (rcc_charset_id)-1; @@ -642,29 +645,17 @@ rcc_charset_id rccConfigGetLocaleCharset(rcc_language_config config, const char      language_id = rccGetLanguageByName(config->ctx, config->language->sn);      if (language_id != (rcc_language_id)-1) { -	if (!rccLocaleGetLanguage(stmp, lv, RCC_MAX_CHARSET_CHARS)) { -	    if (!strcmp(config->language->sn, stmp)) { -		if (!rccLocaleGetCharset(stmp, lv, RCC_MAX_CHARSET_CHARS)) -		    return rccConfigGetCharsetByName(config, stmp); -	    } +	if (!rccLocaleGetCharset(stmp, lv, RCC_MAX_CHARSET_CHARS)) { +	    if (rccIsUnicode(stmp))  +		return rccConfigGetCharsetByName(config, stmp); +	    if ((!rccLocaleGetLanguage(lang, lv, RCC_MAX_CHARSET_CHARS))&&(!strcmp(config->language->sn, lang))) +		return rccConfigGetCharsetByName(config, stmp);  	}      }       return (rcc_charset_id)-1;  } -rcc_charset_id rccConfigGetLocaleUnicodeCharset(rcc_language_config config, const char *locale_variable) { -    char stmp[RCC_MAX_CHARSET_CHARS+1]; - -    if ((!config)||(!config->language)) return (rcc_charset_id)-1; - -    if (!rccLocaleGetCharset(stmp, locale_variable?locale_variable:config->ctx->locale_variable, RCC_MAX_CHARSET_CHARS)) { -	if (rccIsUTF8(stmp)) return rccConfigGetCharsetByName(config, stmp); -    }  -	 -    return (rcc_charset_id)-1; -} -  int rccConfigConfigure(rcc_language_config config) {      int err;      rcc_context ctx;     diff --git a/src/lngconfig.h b/src/lngconfig.h index edfc782..b9e9a6b 100644 --- a/src/lngconfig.h +++ b/src/lngconfig.h @@ -47,8 +47,6 @@ void rccConfigClear(rcc_language_config config);  int rccConfigConfigure(rcc_language_config config); -rcc_charset_id rccConfigGetLocaleUnicodeCharset(rcc_language_config config, const char *locale_variable); -  const char *rccConfigGetAutoCharsetName(rcc_language_config config, rcc_autocharset_id charset_id);  rcc_autocharset_id rccConfigGetAutoCharsetByName(rcc_language_config config, const char *name); diff --git a/src/lngrecode.c b/src/lngrecode.c index aef8e24..4b4f298 100644 --- a/src/lngrecode.c +++ b/src/lngrecode.c @@ -7,8 +7,38 @@  #include "internal.h"  #include "fs.h" +static rcc_autocharset_id rccConfigDetectCharsetInternal(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { +    int err; +    rcc_context ctx; +    rcc_class_type class_type; +    rcc_autocharset_id autocharset_id; +     +    if ((!buf)||(!config)) return (rcc_autocharset_id)-1; +     +    ctx = config->ctx; + +    err = rccConfigConfigure(config); +    if (err) return (rcc_autocharset_id)-1; +     +    class_type = rccGetClassType(ctx, class_id); +    if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) { +	rccMutexLock(config->mutex); +	autocharset_id = rccEngineDetectCharset(&config->engine_ctx, buf, len); +	rccMutexUnLock(config->mutex); +	return autocharset_id; +    } +     +    return (rcc_autocharset_id)-1; +} + + +rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { +    return rccConfigDetectCharsetInternal(config, class_id, buf, len); +} +  rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {      rcc_context ctx; +    rcc_class_type class_type;      rcc_string result;      rcc_option_value usedb4;      rcc_autocharset_id charset_id; @@ -30,7 +60,10 @@ rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id,  	}      } -    charset_id = rccConfigDetectCharset(config, class_id, buf, len); +    class_type = rccGetClassType(ctx, class_id); + +    if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1; +    else charset_id = rccConfigDetectCharset(config, class_id, buf, len);      if (charset_id != (rcc_autocharset_id)-1)  	charset = rccConfigGetAutoCharsetName(config, charset_id);      else @@ -71,6 +104,7 @@ char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_co  char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {      rcc_context ctx; +    rcc_class_type class_type;      rcc_string result;      rcc_option_value usedb4;      rcc_autocharset_id charset_id; @@ -97,7 +131,10 @@ char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_cl  	}      } -    charset_id = rccConfigDetectCharset(config, from, buf, len); +    class_type = rccGetClassType(ctx, from); + +    if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1; +    else charset_id = rccConfigDetectCharset(config, from, buf, len);      if (charset_id != (rcc_autocharset_id)-1)  	fromcharset = rccConfigGetAutoCharsetName(config, charset_id);      else @@ -115,6 +152,7 @@ char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_cl  char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen) {      rcc_context ctx; +    rcc_class_type class_type;      rcc_string result;      rcc_option_value usedb4;      rcc_autocharset_id charset_id; @@ -141,7 +179,10 @@ char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id cla  	}      } -    charset_id = rccConfigDetectCharset(config, class_id, buf, len); +    class_type = rccGetClassType(ctx, class_id); + +    if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1; +    else charset_id = rccConfigDetectCharset(config, class_id, buf, len);      if (charset_id != (rcc_autocharset_id)-1)  	ocharset = rccConfigGetAutoCharsetName(config, charset_id);      else diff --git a/src/rccconfig.c b/src/rccconfig.c index a54b778..5fecb6b 100644 --- a/src/rccconfig.c +++ b/src/rccconfig.c @@ -127,7 +127,7 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {  rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL };  rcc_option_value_name rcc_sn_learning[] = { "OFF", "ON", "RELEARN", "LEARN", NULL };  rcc_option_value_name rcc_sn_clo[] = { "ALL", "CONFIGURED_AND_AUTO", "CONFIGURED_ONLY", NULL }; -rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_ENGLISH", "FULL", NULL }; +rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL };  rcc_option_description rcc_option_descriptions[RCC_MAX_OPTIONS+1];  rcc_option_description rcc_option_descriptions_embeded[RCC_MAX_OPTIONS+1] = { @@ -197,6 +197,11 @@ int rccIsUTF8(const char *name) {      return 1;  } +int rccIsUnicode(const char *name) { +    if ((!name)||(strncasecmp(name, "UTF",3)&&strncasecmp(name, "UCS",3))) return 0; +    return 1; +} +  unsigned int rccDefaultDropLanguageRelations(const char *lang) {      unsigned long i, j;      for (i=0,j=0;rcc_default_relations[i].lang;i++) { diff --git a/src/rccconfig.h b/src/rccconfig.h index fe7b912..7361910 100644 --- a/src/rccconfig.h +++ b/src/rccconfig.h @@ -38,5 +38,6 @@ rcc_language_id rccDefaultGetLanguageByName(const char *name);  unsigned int rccDefaultDropLanguageRelations(const char *lang);  int rccIsUTF8(const char *name); +int rccIsUnicode(const char *name);  #endif /* _RCC_CONFIG_H */ diff --git a/src/rcciconv.c b/src/rcciconv.c index 93278a7..b518cd7 100644 --- a/src/rcciconv.c +++ b/src/rcciconv.c @@ -7,6 +7,8 @@  #include "internal.h"  #include "rcciconv.h" +#define RCC_MAX_ERRORS 3 +  static void rccIConvCopySymbol(char **in_buf, int *in_left, char **out_buf, int *out_left) {      if ((out_left>0)&&(in_left>0)) {  /*	(**out_buf)=(**in_buf); diff --git a/src/rcclocale.h b/src/rcclocale.h index dc2c4e7..b6832ed 100644 --- a/src/rcclocale.h +++ b/src/rcclocale.h @@ -1,5 +1,8 @@  #ifndef _RCC_LOCALE_H  #define _RCC_LOCALE_H +#define RCC_MAX_CHARSET_CHARS 16 +#define RCC_MAX_LANGUAGE_CHARS 16 +#define RCC_MAX_VARIABLE_CHARS 16  #endif /* _RCC_LOCALE_H */ diff --git a/src/recode.c b/src/recode.c index 27dff92..ee9ac53 100644 --- a/src/recode.c +++ b/src/recode.c @@ -21,10 +21,17 @@  #define RCC_ACCEPTABLE_PROBABILITY	0  #define RCC_ACCEPTABLE_LENGTH		3 -static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) { +typedef enum rcc_detect_language_confidence_t { +    RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE = 0, +    RCC_DETECT_LANGUAGE_CONFIDENCE_ALMOST, +    RCC_DETECT_LANGUAGE_CONFIDENCE_SURE, +    RCC_DETECT_LANGUAGE_CONFIDENCE_CACHED +} rcc_detect_language_confidence; + +static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring, rcc_detect_language_confidence *confidence) {      rcc_speller speller = NULL; -    unsigned long i, nlanguages; -    rcc_language_config config, config0 = NULL; +    long i, nlanguages; +    rcc_language_config config, config0 = NULL, config1 = NULL;      rcc_string recoded;      unsigned char *utf8;      size_t j, mode; @@ -48,6 +55,9 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c      unsigned long k;      rcc_language_id *parrents;      size_t chars = 0; +    char llang[RCC_MAX_LANGUAGE_CHARS]; +    rcc_language_id locale_lang; +    unsigned char defstep = 0;      unsigned long accepted_nonenglish_langs = 0; @@ -61,6 +71,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c  	        english_lang = rccStringGetLanguage(recoded);  	        if (retstring) *retstring = recoded;  		else free(recoded); +		if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_CACHED;  	        return english_lang;  	    }  	} @@ -72,17 +83,33 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c      english_lang = rccGetLanguageByName(ctx, rcc_english_language_sn); -    for (i=0;i<nlanguages;i++) { -	if (i) config = rccGetUsableConfig(ctx, (rcc_language_id)i); -	else config = rccGetCurrentConfig(ctx); -	if (!config) continue; -	 +    for (i=0;i<nlanguages;(defstep>1)?i++:i) {  	if (i) { -	    if (config==config0) continue; -	} else config0=config; +	    config = rccGetUsableConfig(ctx, (rcc_language_id)i); +	    if ((!config)||(config==config0)||(config==config1)) continue; +	} else { +	    switch (defstep) { +		case 0: +		    config = rccGetCurrentConfig(ctx); +		    config0 = config; +		break; +		case 1: +		    if (!rccLocaleGetLanguage(llang ,ctx->locale_variable, RCC_MAX_LANGUAGE_CHARS)) { +			locale_lang = rccGetLanguageByName(ctx, llang); +			config = rccGetConfig(ctx, locale_lang); +		    } else config = NULL; +		    config1 = config; +		break; +		default: +		    config = NULL; +	    } +	    defstep++; +	    if ((!config)||(config0==config1)) continue; +	} +	  	if (bestfixlang != (rcc_language_id)-1) { -	    parrents = ctx->language_parrents[i]; +	    parrents = ((rcc_language_internal*)config->language)->parrents;  	    for (k = 0;parrents[k] != (rcc_language_id)-1;k++)  		if (parrents[k] == bestfixlang) break; @@ -192,6 +219,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c  	if (english_string) free(english_string);  	if (retstring) *retstring = best_string;  	else if (best_string) free(best_string); + +	if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_SURE;          return bestlang;      }  @@ -199,6 +228,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c  	if (best_string) free(best_string);  	if (retstring) *retstring = english_string;  	else if (english_string) free(english_string); +	 +	if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_SURE;          return english_lang;      } @@ -206,6 +237,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c  	if (english_string) free(english_string);  	if (retstring) *retstring = best_string;  	else if (best_string) free(best_string); + +	if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_ALMOST;          return bestlang;      }  @@ -213,6 +246,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c  	if (best_string) free(best_string);	  	if (retstring) *retstring = english_string;  	else if (english_string) free(english_string); + +	if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_ALMOST;          return english_lang;      }  @@ -220,89 +255,152 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c  	if (english_string) free(english_string);  	if (retstring) *retstring = best_string;  	else if (best_string) free(best_string); + +	if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE;          return bestlang;      } else if (best_string) free(best_string);      if ((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) {  	if (retstring) *retstring = english_string;  	else if (english_string) free(english_string); + +	if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE;          return english_lang;      } else if (english_string) free(english_string);      return (rcc_language_id)-1;  } -  rcc_language_id rccDetectLanguage(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {      if (!ctx) {  	if (rcc_default_ctx) ctx = rcc_default_ctx;  	else return -1;      } -    return rccDetectLanguageInternal(ctx, class_id, buf, len, NULL); +    return rccDetectLanguageInternal(ctx, class_id, buf, len, NULL, NULL);  } -static rcc_autocharset_id rccConfigDetectCharsetInternal(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { -    int err; -    rcc_context ctx; -    rcc_class_type class_type; -    rcc_engine_ptr engine; -    rcc_autocharset_id autocharset_id; -     -    if ((!buf)||(!config)) return (rcc_autocharset_id)-1; +static int rccIsParrentLanguage(rcc_language_config config, rcc_language_id parrent) { +    unsigned int i; +    rcc_language_id language; +    rcc_language_id *list; + +    language = rccConfigGetLanguage(config); +    if (parrent == language) return 1; -    ctx = config->ctx; +    list = ((rcc_language_internal*)config->language)->parrents; +    for (i=0;list[i] != (rcc_language_id)-1;i++) +        if  (list[i] == parrent) return 1; -    err = rccConfigConfigure(config); -    if (err) return (rcc_autocharset_id)-1; +    return 0; +} + + +static int rccAreRelatedLanguages(rcc_language_config c1, rcc_language_config c2) { +    rcc_language_id l1, l2; + +    l1 = rccConfigGetLanguage(c1); +    l2 = rccConfigGetLanguage(c2); -    class_type = rccGetClassType(ctx, class_id); -    if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) { -	rccMutexLock(config->mutex); -	engine = rccConfigGetCurrentEnginePointer(config); -	if ((engine)&&(engine->func)) autocharset_id = engine->func(&config->engine_ctx, buf, len); -	else autocharset_id = (rcc_autocharset_id)-1; -	rccMutexUnLock(config->mutex); -	return autocharset_id; -    } +    if (rccIsParrentLanguage(c1, l2)) return 1; +    if (rccIsParrentLanguage(c2, l1)) return 1; -    return (rcc_autocharset_id)-1; +    return 0;  } -rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { -    return rccConfigDetectCharsetInternal(config, class_id, buf, len); -} +static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_id, const char *utfstring) { +    rcc_context ctx; +    rcc_language_config curconfig; +     +    rcc_option_value translate; +    rcc_class_type ctype; +    rcc_language_id language_id, english_language_id, current_language_id; -static int rccAreLanguagesRelated(rcc_context ctx, rcc_language_id l1, rcc_language_id l2, rcc_language_id skip) { -    unsigned int i; -    rcc_language_id *list; +    char llang[RCC_MAX_LANGUAGE_CHARS]; -    if ((l1 == skip)||(l2 == skip)) return 0; +    rcc_translate trans, entrans; -    if (l1 == l2) return 1; +    char *translated; + +    ctx = (*config)->ctx; + +    translate = rccGetOption(ctx, RCC_OPTION_TRANSLATE); +    if (translate == RCC_OPTION_TRANSLATE_OFF) return NULL; + +    ctype = rccGetClassType(ctx, class_id); +    if ((ctype != RCC_CLASS_TRANSLATE_LOCALE)&&(ctype != RCC_CLASS_TRANSLATE_CURRENT)&&(ctype != RCC_CLASS_TRANSLATE_FROM)) return NULL; + +    language_id = rccConfigGetLanguage(*config);	 +	 +    english_language_id = rccGetLanguageByName(ctx, rcc_english_language_sn); -    list = ctx->language_parrents[l1]; -    for (i=0;list[i] != (rcc_language_id)-1;i++) -        if  (list[i] == l2) return 1; +    if (translate == RCC_OPTION_TRANSLATE_TO_ENGLISH) { +	current_language_id = english_language_id ; +    } else { +	if (ctype == RCC_CLASS_TRANSLATE_LOCALE) { +	    if (!rccLocaleGetLanguage(llang ,ctx->locale_variable, RCC_MAX_LANGUAGE_CHARS)) +		current_language_id = rccGetLanguageByName(ctx, llang); +	    else  +		current_language_id = (rcc_language_id)-1; +	} else  +	    current_language_id = rccGetCurrentLanguage(ctx); +    } +	 +    if (current_language_id == (rcc_language_id)-1) return NULL; +    if (language_id == current_language_id) return NULL; -    list = ctx->language_parrents[l2]; -    for (i=0;list[i] != (rcc_language_id)-1;i++) -        if  (list[i] == l1) return 1; +    curconfig = rccGetConfig(ctx, current_language_id); +    if (!curconfig) return NULL; -    return 0; +    if (rccConfigConfigure(curconfig)) return NULL; +     +    if (translate == RCC_OPTION_TRANSLATE_SKIP_RELATED) { +	if (rccAreRelatedLanguages(curconfig, *config)) return NULL; +    } +     +    if (translate == RCC_OPTION_TRANSLATE_SKIP_PARRENT) { +	if (rccIsParrentLanguage(curconfig, language_id)) return NULL; +    } + +    trans = rccConfigGetTranslator(*config, current_language_id); +    if (trans) { +        translated = rccTranslate(trans, utfstring); +        if (translated) { +            if ((!((rcc_language_internal*)curconfig->language)->latin)&&(rccIsASCII(translated))) { +	        free(translated); +	        translated = NULL; +	    } +	} +    } else translated = NULL; +     +    if ((!translated)&&(current_language_id != english_language_id)&&(!rccAreRelatedLanguages(*config, curconfig))) { +	curconfig = rccGetConfig(ctx, english_language_id); +	if (!curconfig) return NULL; +	if (rccConfigConfigure(curconfig)) return NULL; +	 +	entrans = rccConfigGetEnglishTranslator(*config); +	if (entrans) translated = rccTranslate(entrans, utfstring); +    } + +    if (translated) *config = curconfig; +    return translated;  }  rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {      int err;      size_t ret; +    rcc_language_config config;      rcc_language_id language_id, detected_language_id;      rcc_autocharset_id charset_id;      rcc_iconv icnv = NULL;      rcc_string result; +    rcc_class_type class_type;      rcc_option_value usedb4;      const char *charset; +    char *translate = NULL; +    rcc_detect_language_confidence confidence;      if (!ctx) {  	if (rcc_default_ctx) ctx = rcc_default_ctx; @@ -318,29 +416,38 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,      if (language_id == (rcc_language_id)-1) return NULL;      if (!strcasecmp(ctx->languages[language_id]->sn, rcc_disabled_language_sn)) return NULL; - +    class_type = rccGetClassType(ctx, class_id);      usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); -/* -    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { -	result = rccDb4GetKey(ctx->db4ctx, buf, len); -	if (result) { -	     if (rccStringFixID(result, ctx)) free(result); -	     else return result; -	} -    } -     -    if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) { -	detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len); -	if (detected_language_id != (rcc_language_id)-1) -	    language_id = detected_language_id; -    } -*/ -    detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result); +    detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result, &confidence);      if (detected_language_id != (rcc_language_id)-1) {  #ifdef RCC_DEBUG_LANGDETECT -	printf("Language %i(%s): %s\n", rccStringGetLanguage(result), rccStringGetLanguage(result)?rccGetLanguageName(ctx, rccStringGetLanguage(result)):"", result); +	    printf("Language %i(%s): %s\n", rccStringGetLanguage(result), rccStringGetLanguage(result)?rccGetLanguageName(ctx, rccStringGetLanguage(result)):"", result);  #endif /* RCC_DEBUG_LANGDETECT */ + +	if ((result)&&(rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(class_type == RCC_CLASS_TRANSLATE_FROM)) { +	    rccMutexLock(ctx->mutex); +	    config = rccGetCurrentConfig(ctx); +	    translate = rccRecodeTranslate(&config, class_id, rccStringGetString(result)); +	    rccMutexUnLock(ctx->mutex); +	     +	    if (translate) { +		language_id = rccConfigGetLanguage(config); +		free(result); +		result = rccCreateString(language_id, translate, 0); +	    } +	} + + +	if ((result)&& +	    (usedb4&RCC_OPTION_LEARNING_FLAG_LEARN)&& +	    (confidence!=RCC_DETECT_LANGUAGE_CONFIDENCE_CACHED)&& +	    ((language_id==detected_language_id)||(confidence!=RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE))&& +	    (!rccStringSetLang(result, ctx->languages[language_id]->sn))) { + +	    rccDb4SetKey(ctx->db4ctx, buf, len, result); +	} +  	return result;      } @@ -349,7 +456,8 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,      if (err) return NULL;      rccMutexLock(ctx->mutex); -    charset_id = rccDetectCharset(ctx, class_id, buf, len); +    if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1; +    else charset_id = rccDetectCharset(ctx, class_id, buf, len);      if (charset_id != (rcc_autocharset_id)-1) {  	icnv = ctx->iconv_auto[charset_id];  	if (rccGetOption(ctx, RCC_OPTION_AUTOENGINE_SET_CURRENT)) { @@ -362,10 +470,24 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,      if (icnv) {  	ret = rccIConvInternal(ctx, icnv, buf, len);  	if (ret == (size_t)-1) return NULL; -	result = rccCreateString(language_id, ctx->tmpbuffer, ret); +	 +	if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(rccGetClassType(ctx, class_id) == RCC_CLASS_TRANSLATE_FROM)) { +	    config = rccGetCurrentConfig(ctx); +	    translate = rccRecodeTranslate(&config , class_id, ctx->tmpbuffer); +	    if (translate) language_id = rccConfigGetLanguage(config); +	} +	 +	result = rccCreateString(language_id, translate?translate:ctx->tmpbuffer, translate?0:ret);      } else { -	result = rccCreateString(language_id, buf, len); +	if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(rccGetClassType(ctx, class_id) == RCC_CLASS_TRANSLATE_FROM)) { +	    config = rccGetCurrentConfig(ctx); +	    translate = rccRecodeTranslate(&config , class_id, buf); +	    if (translate) language_id = rccConfigGetLanguage(config); +	} + +	result = rccCreateString(language_id, translate?translate:buf, translate?0:len);      } +      rccMutexUnLock(ctx->mutex);      if ((result)&&(usedb4&RCC_OPTION_LEARNING_FLAG_LEARN)) { @@ -385,13 +507,7 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      char *translated = NULL;      rcc_language_config config;      rcc_language_id language_id; -    rcc_language_id current_language_id; -    rcc_language_id english_language_id;      rcc_class_type class_type; -    rcc_option_value translate; -    rcc_translate trans, entrans; -    const char *langname; -    unsigned char english_source;      rcc_iconv icnv;      if (!ctx) { @@ -414,74 +530,10 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      if (err) return NULL;      class_type = rccGetClassType(ctx, class_id); -    translate = rccGetOption(ctx, RCC_OPTION_TRANSLATE); -    langname = rccGetLanguageName(ctx, language_id); -    if (strcasecmp(langname, rcc_english_language_sn)) english_source = 0; -    else english_source = 1; -     -    if ((class_type != RCC_CLASS_FS)&&((translate==RCC_OPTION_TRANSLATE_FULL)||((translate)&&(!english_source)))) { -	english_language_id = rccGetLanguageByName(ctx, rcc_english_language_sn); -	 +    if (((class_type == RCC_CLASS_TRANSLATE_LOCALE)||(class_type == RCC_CLASS_TRANSLATE_CURRENT))&&(rccGetOption(ctx, RCC_OPTION_TRANSLATE))) {  	rccMutexLock(ctx->mutex); -	 -	current_language_id = rccGetCurrentLanguage(ctx); -	if (current_language_id != language_id) { -	    if (translate != RCC_OPTION_TRANSLATE_TO_ENGLISH) { -		trans = rccConfigGetTranslator(config, current_language_id); -		if (trans) { -		    translated = rccTranslate(trans, utfstring); -		    if (translated) { -			if ((current_language_id != english_language_id)&&(rccIsASCII(translated))) { -			    /* Ffrench to german (no umlauts) => not related -			       english to german (no umlauts) => skiping english relations -			       DS: Problem if we have relation between french and german  */ -			    if (rccAreLanguagesRelated(ctx, language_id, current_language_id, english_language_id)) { -				free(translated); -				translated = NULL; -				translate = 0; -			    } -			} -		    } -		    if (translated) { -			language_id = current_language_id; -		     -			config = rccGetConfig(ctx, language_id); -			if (!config) { -			    rccMutexUnLock(ctx->mutex); -			    free(translated); -			    return NULL; -			} - -			err = rccConfigConfigure(config); -			if (err) { -			    rccMutexUnLock(ctx->mutex); -			    free(translated); -			    return NULL; -			} -		    }  -		} -	    } -	     -	    if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((translate)&&(!translated)&&(!english_language_id == current_language_id)&&(!rccAreLanguagesRelated(ctx, language_id, current_language_id, (rcc_language_id)-1)))) { -		entrans = rccConfigGetEnglishTranslator(config); -		if (entrans) { -		    translated = rccTranslate(config->entrans, utfstring); -/* -		    config = rccGetConfig(ctx, language_id); -		    if (!config) { -			rccMutexUnLock(ctx->mutex); -			return translated; -		    } - -		    err = rccConfigConfigure(config); -		    if (err) { -			rccMutexUnLock(ctx->mutex); -			return translated; -		    }*/ -		} -	    } -	} +	translated = rccRecodeTranslate(&config, class_id, utfstring);  	rccMutexUnLock(ctx->mutex);      } @@ -492,7 +544,7 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s  	    return result;  	}      } - +          rccMutexLock(ctx->mutex);      rccMutexLock(config->mutex);      icnv =  config->iconv_to[class_id]; @@ -536,10 +588,14 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const      if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding;      if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)) goto recoding;      if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) goto recoding; -    if (rccGetOption(ctx, RCC_OPTION_TRANSLATE)) goto recoding; +    if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&((class_type == RCC_CLASS_TRANSLATE_LOCALE)||(class_type == RCC_CLASS_TRANSLATE_CURRENT))) goto recoding; + +    class_type = rccGetClassType(ctx, from); +    if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(class_type == RCC_CLASS_TRANSLATE_FROM)) goto recoding;      rccMutexLock(ctx->mutex); -    from_charset_id = rccDetectCharset(ctx, from, buf, len); +    if (class_type == RCC_CLASS_KNOWN) from_charset_id = (rcc_autocharset_id)-1; +    else from_charset_id = rccDetectCharset(ctx, from, buf, len);      if (from_charset_id != (rcc_charset_id)-1) {  	from_charset = rccGetAutoCharsetName(ctx, from_charset_id);  	to_charset = rccGetCurrentCharsetName(ctx, to); @@ -606,6 +662,18 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp  	    rccMutexUnLock(config->mutex);  	    rccMutexUnLock(ctx->mutex);  	} else result = NULL; + +	if (!result) { +	    config = rccGetCurrentConfig(ctx); +	    if (config) { +		rccMutexLock(ctx->mutex); +		rccMutexLock(config->mutex); +		result = rccFS3(config, to, prefix, rccStringGetString(string)); +		rccMutexUnLock(config->mutex); +		rccMutexUnLock(ctx->mutex); +	    } +	} +  	free(string);      } else result = NULL; diff --git a/ui/rccnames.c b/ui/rccnames.c index d18f524..8b5b4a0 100644 --- a/ui/rccnames.c +++ b/ui/rccnames.c @@ -32,7 +32,7 @@ rcc_name rcc_default_language_names_embeded[RCC_MAX_LANGUAGES+1] = {  rcc_option_value_name rcc_default_option_boolean_names[] = { "Off", "On", NULL };  rcc_option_value_name rcc_default_option_learning_names[] = { "Off", "On", "Relearn", "Learn", NULL };  rcc_option_value_name rcc_default_option_clo_names[] = { "All Languages", "Configured / AutoEngine", "Configured Only", NULL }; -rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip English Translation", "Full", NULL }; +rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL };  rcc_option_name rcc_default_option_names[RCC_MAX_OPTIONS+1];  rcc_option_name rcc_default_option_names_embeded[RCC_MAX_OPTIONS+1] = { | 
