diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile.am | 5 | ||||
| -rw-r--r-- | src/librcc.h | 119 | ||||
| -rw-r--r-- | src/lng.c | 45 | ||||
| -rw-r--r-- | src/lng.h | 2 | ||||
| -rw-r--r-- | src/lngconfig.c | 229 | ||||
| -rw-r--r-- | src/lngconfig.h | 7 | ||||
| -rw-r--r-- | src/rccconfig.c | 27 | ||||
| -rw-r--r-- | src/rccconfig.h | 5 | ||||
| -rw-r--r-- | src/rccexternal.c | 2 | ||||
| -rw-r--r-- | src/rcciconv.c | 5 | ||||
| -rw-r--r-- | src/rcciconv.h | 2 | ||||
| -rw-r--r-- | src/rccspell.c | 63 | ||||
| -rw-r--r-- | src/rccspell.h | 29 | ||||
| -rw-r--r-- | src/rccstring.c | 8 | ||||
| -rw-r--r-- | src/rccstring.h | 1 | ||||
| -rw-r--r-- | src/rcctranslate.c | 10 | ||||
| -rw-r--r-- | src/recode.c | 233 | 
17 files changed, 723 insertions, 69 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index baa08a4..4ba3c35 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,6 +12,7 @@ librcc_la_SOURCES = librcc.c \      fake_enca.h fake_rcd.h \      rccenca.c rccenca.h \      rccdb4.c rccdb4.h \ +    rccspell.c rccspell.h \      engine.c engine.h \      rccstring.c rccstring.h \      rccxml.c rccxml.h \ @@ -22,7 +23,7 @@ librcc_la_SOURCES = librcc.c \      internal.h  include_HEADERS = librcc.h -AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ -librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ +AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ +librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@  librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@ diff --git a/src/librcc.h b/src/librcc.h index 52e6be4..d08937e 100644 --- a/src/librcc.h +++ b/src/librcc.h @@ -364,6 +364,23 @@ typedef int rcc_option_value;  #define RCC_OPTION_LEARNING_FLAG_LEARN 2  /** +  * Switch translation off. +  */ +#define RCC_OPTION_TRANSLATE_OFF 0 +/** +  * Translate data to english language (Current language don't matter). +  */ +#define RCC_OPTION_TRANSLATE_TO_ENGLISH 1 +/** +  * Skip translation of the english text. +  */ +#define RCC_OPTION_TRANSLATE_SKIP_ENGLISH 2 +/** +  * Translate whole data to the current language. +  */ +#define RCC_OPTION_TRANSLATE_FULL 3 + +/**    * List of options available    */  typedef enum rcc_option_t { @@ -371,8 +388,9 @@ typedef enum rcc_option_t {      RCC_OPTION_AUTODETECT_FS_TITLES,	/**< Detect titles of #RCC_CLASS_FS classes */      RCC_OPTION_AUTODETECT_FS_NAMES,	/**< Try to find encoding of #RCC_CLASS_FS by accessing fs */      RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, /**< Use only configured languages or languages with auto-engines */ -    RCC_OPTION_TRANSLATE,		/**< Translate #rcc_string if it's language differs from current one */      RCC_OPTION_AUTOENGINE_SET_CURRENT,	/**< If enabled autodetection engine will set current charset */ +    RCC_OPTION_AUTODETECT_LANGUAGE,	/**< Enables language detection */ +    RCC_OPTION_TRANSLATE,		/**< Translate #rcc_string if it's language differs from current one */      RCC_MAX_OPTIONS  } rcc_option; @@ -970,6 +988,26 @@ int rccTranslateSetTimeout(rcc_translate translate, unsigned long us);  char *rccTranslate(rcc_translate translate, const char *buf);  /* recode.c */ + +/** +  * Tries to detect language of string +  * @param ctx is working context ( or default one if NULL supplied ) +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is language_id or -1 if autodetection is failed +  */ +rcc_language_id rccDetectLanguage(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len); +/** +  * Tries to detect charset of string +  * @param ctx is working context ( or default one if NULL supplied ) +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is auto_charset_id or -1 if autodetection is failed +  */ +int rccDetectCharset(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len); +  /**    * Recode string from specified encoding class to #rcc_string. Encoding detection engines and    * recoding cache are used (if possible) to detect original 'buf' encoding. Otherwise the  @@ -1079,7 +1117,7 @@ char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char    * @param rlen in rlen the size of recoded string will be returned.    * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory.    */ -char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen); +rcc_string rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen);  /**    * Recode string between specified encodings.     * @@ -1094,6 +1132,77 @@ char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const ch  char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to, const char *buf, size_t len, size_t *rlen); +/** +  * Tries to detect charset of string +  * @param config is language configuration +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is auto_charset_id or -1 if autodetection is failed +  */ +rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len); + +/** +  * Recode string from specified encoding class to #rcc_string. Encoding detection engines and +  * recoding cache are used (if possible) to detect original 'buf' encoding. Otherwise the  +  * preconfigured encoding of class is assumed. +  * +  * @param config is language configuration +  * @param class_id is encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len); +/** +  * Recode string from #rcc_string to specified encoding class. If encoding class is of  +  * 'File System' type, the autoprobing for file names can be performed. In the other cases +  * the rcc_string will be recoded in preconfigured class encoding. +  * +  * @param config is language configuration +  * @param class_id is encoding class +  * @param buf is original zero terminated string +  * @param rlen in rlen the size of recoded string will be returned. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_const_string buf, size_t *rlen); +/** +  * Recode string between different encoding classes. The conversion is relays on rccConfigSizedFrom +  * and rccConfigSizedTo functions. +  * @see rccConfigSizedFrom +  * @see rccConfigSizedTo +  * +  * @param config is language configuration +  * @param from is source encoding class +  * @param to is destination encoding class +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @param rlen in rlen the size of recoded string will be returned. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen); +/** +  * Recode string from specified encoding to #rcc_string.  +  * +  * @param config is language configuration +  * @param charset is source encoding +  * @param buf is original string (perhaps not zero terminated) +  * @param len is exact size of string or 0. In the last case the size is determined using 'strlen' function. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +rcc_string rccConfigSizedRecodeFromCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen); +/** +  * Recode string from #rcc_string to specified encoding.  +  * +  * @param config is language configuration +  * @param charset is destination encoding +  * @param buf is original zero terminated string +  * @param rlen in rlen the size of recoded string will be returned. +  * @result is recoded string or NULL if recoding is not required or failed. It is up to the caller to free memory. +  */ +char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen); + +  #define rccFrom(ctx, class_id, buf) rccSizedFrom(ctx, class_id, buf, 0)  #define rccTo(ctx, class_id, buf) rccSizedTo(ctx, class_id, buf, NULL)  #define rccRecode(ctx, from, to, buf) rccSizedRecode(ctx, from, to, buf, 0, NULL) @@ -1104,6 +1213,12 @@ char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to,  #define rccRecodeFromCharset(ctx, class_id, charset, buf) rccSizedRecodeFromCharset(ctx, class_id, charset, buf, 0, NULL)  #define rccRecodeCharsets(ctx, from, to, buf) rccSizedRecodeCharsets(ctx, from, to, buf, 0, NULL) +#define rccConfigFrom(ctx, class_id, buf) rccConfigSizedFrom(ctx, class_id, buf, 0) +#define rccConfigTo(ctx, class_id, buf) rccConfigSizedTo(ctx, class_id, buf, NULL) +#define rccConfigRecode(ctx, from, to, buf) rccConfigSizedRecode(ctx, from, to, buf, 0, NULL) +#define rccConfigRecodeToCharset(ctx, class_id, charset, buf) rccConfigSizedRecodeToCharset(ctx, class_id, charset, buf, 0, NULL) +#define rccConfigRecodeFromCharset(ctx, class_id, charset, buf) rccConfigSizedRecodeFromCharset(ctx, class_id, charset, buf, 0, NULL) +  /*******************************************************************************  ******************************** Options ***************************************  *******************************************************************************/ @@ -36,11 +36,39 @@ rcc_language_id rccGetLanguageByName(rcc_context ctx, const char *name) {      return (rcc_language_id)-1;  } -static rcc_language_id rccGetDefaultLanguage(rcc_context ctx) { -    unsigned int i; +int rccCheckLanguageUsability(rcc_context ctx, rcc_language_id language_id) { +    rcc_language_config config;      rcc_option_value clo;      rcc_engine_ptr *engines; -    rcc_language_config config; +    rcc_charset *charsets; + +    if (!ctx) { +	if (rcc_default_ctx) ctx = rcc_default_ctx; +	else return 0; +    } +    if (language_id>=ctx->n_languages) return 0; + +    language_id = rccGetRealLanguage(ctx, language_id); +     +    clo = rccGetOption(ctx, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY); +    if (clo) { +	config = rccCheckConfig(ctx, (rcc_language_id)language_id); +	if ((!config)||(!config->configured)) { +	    charsets = ctx->languages[language_id]->charsets; +	    if ((charsets[0])&&(charsets[1])&&(charsets[2])) { +		if (clo == 1) { +		    engines = ctx->languages[language_id]->engines; +		    if ((!engines[0])||(!engines[1])) return 0; +		} else return 0; +	    } +	} +    } +    return 1; +} + + +static rcc_language_id rccGetDefaultLanguage(rcc_context ctx) { +    unsigned int i;      char stmp[RCC_MAX_LANGUAGE_CHARS+1];      if (ctx->default_language) return ctx->default_language; @@ -48,16 +76,7 @@ static rcc_language_id rccGetDefaultLanguage(rcc_context ctx) {      if (!rccLocaleGetLanguage(stmp, ctx->locale_variable, RCC_MAX_LANGUAGE_CHARS)) {      	for (i=0;ctx->languages[i];i++) {  	    if (!strcmp(ctx->languages[i]->sn, stmp)) { -		clo = rccGetOption(ctx, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY); -		if (clo) { -		    config = rccCheckConfig(ctx, (rcc_language_id)i); -		    if ((!config)||(!config->configured)) { -			if (clo == 1) { -			    engines = ctx->languages[i]->engines; -			    if ((!engines[0])||(!engines[1])) break; -			} else break; -		    } -		} +		if (!rccCheckLanguageUsability(ctx, (rcc_language_id)i)) break;  		ctx->default_language = (rcc_language_id)i;  		return (rcc_language_id)i;  	    } @@ -4,6 +4,8 @@  #include "internal.h"  #include "lngconfig.h" + +int rccCheckLanguageUsability(rcc_context ctx, rcc_language_id language_id);  rcc_language_ptr rccGetLanguagePointer(rcc_context ctx, rcc_language_id language_id);  #define rccGetCurrentEnginePointer(ctx) rccConfigGetCurrentEnginePointer(ctx->current_config) diff --git a/src/lngconfig.c b/src/lngconfig.c index c50ee74..26d0779 100644 --- a/src/lngconfig.c +++ b/src/lngconfig.c @@ -2,9 +2,12 @@  #include <stdlib.h>  #include <string.h> +#include "../config.h" +  #include "internal.h"  #include "rccconfig.h"  #include "rcclocale.h" +#include "lng.h"  rcc_engine_ptr rccConfigGetEnginePointer(rcc_language_config config, rcc_engine_id engine_id) {      unsigned int i; @@ -165,6 +168,7 @@ int rccConfigInit(rcc_language_config config, rcc_context ctx) {      config->fsiconv = NULL;      config->trans = NULL; +    config->entrans = NULL;      config->ctx = ctx;      config->language = NULL; @@ -172,6 +176,7 @@ int rccConfigInit(rcc_language_config config, rcc_context ctx) {      config->engine = -1;      config->default_charset = dcharsets;      config->configured = 0; +    config->speller = NULL;      config->iconv_to = iconv_to;      config->configure = 1; @@ -204,6 +209,10 @@ void rccConfigClear(rcc_language_config config) {  	    rccTranslateClose(config->trans);  	    config->trans = NULL;  	} +	if (config->entrans) { +	    rccTranslateClose(config->entrans); +	    config->entrans = NULL; +	}  	if (config->iconv_to) {  	    free(config->iconv_to);  	    config->iconv_to = NULL; @@ -216,31 +225,55 @@ void rccConfigClear(rcc_language_config config) {  	    free(config->default_charset);  	    config->default_charset = NULL;  	} +	if (config->speller) { +	    rccSpellerFree(config->speller); +	    config->speller = NULL; +	}      }  } -rcc_language_config rccCheckConfig(rcc_context ctx, rcc_language_id language_id) { -    rcc_language_id new_language_id; -     -    new_language_id = rccGetRealLanguage(ctx, language_id); -    if ((new_language_id == (rcc_language_id)-1)||(new_language_id != language_id)) return NULL; -    if (!ctx->configs[language_id].charset) return NULL; -    if (!strcasecmp(ctx->languages[language_id]->sn, "off")) return NULL; +static rcc_language_config rccGetConfigPointer(rcc_context ctx, rcc_language_id language_id, rcc_language_id *r_language_id) { + +    language_id = rccGetRealLanguage(ctx, language_id); +    if (!strcasecmp(ctx->languages[language_id]->sn, rcc_disabled_language_sn)) return NULL; +    if (r_language_id) *r_language_id = language_id;      return ctx->configs + language_id;  } +rcc_language_config rccCheckConfig(rcc_context ctx, rcc_language_id language_id) { +    rcc_language_config config; + +    config = rccGetConfigPointer(ctx, language_id, NULL); +    if ((config)&&(!config->charset)) return NULL; + +    return config; +} + + +rcc_language_config rccGetUsableConfig(rcc_context ctx, rcc_language_id language_id) { +    rcc_language_config config; + +    config = rccGetConfigPointer(ctx, language_id, &language_id); +    if (config) { +	if (!rccCheckLanguageUsability(ctx, language_id)) return NULL; +	if ((!config->charset)&&(rccConfigInit(config, ctx))) return NULL; +	config->language = ctx->languages[language_id]; +    } + +    return config; +} +  rcc_language_config rccGetConfig(rcc_context ctx, rcc_language_id language_id) { -    language_id = rccGetRealLanguage(ctx, language_id); -    if (language_id == (rcc_language_id)-1) return NULL; -    if (!strcasecmp(ctx->languages[language_id]->sn, "off")) return NULL; -     -    if (!ctx->configs[language_id].charset) { -	if (rccConfigInit(ctx->configs+language_id, ctx)) return NULL; -    }     +    rcc_language_config config; -    ctx->configs[language_id].language = ctx->languages[language_id]; -    return ctx->configs + language_id; +    config = rccGetConfigPointer(ctx, language_id, &language_id); +    if (config) { +	if ((!config->charset)&&(rccConfigInit(config, ctx))) return NULL; +	config->language = ctx->languages[language_id]; +    } + +    return config;  }  rcc_language_config rccGetConfigByName(rcc_context ctx, const char *name) { @@ -261,6 +294,15 @@ rcc_language_config rccGetCurrentConfig(rcc_context ctx) {      return rccGetConfig(ctx, language_id);  } +rcc_speller rccConfigGetSpeller(rcc_language_config config) { +    if (!config) return NULL; +     +    if (config->speller) return config->speller; + +    config->speller = rccSpellerCreate(config->language->sn); +    return config->speller; +} +  rcc_engine_id rccConfigGetSelectedEngine(rcc_language_config config) {      if (!config) return (rcc_engine_id)-1; @@ -532,6 +574,161 @@ int rccConfigConfigure(rcc_language_config config) {      return 0;  } + +rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { +    rcc_context ctx; +    rcc_string result; +    rcc_option_value usedb4; +    rcc_autocharset_id charset_id; +    const char *charset; + + +    if (!config) return NULL; +    ctx = config->ctx; + +    if (rccStringSizedCheck(buf, len)) return NULL; +     +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	result = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (result) { +	     if (rccStringFixID(result, ctx)) free(result); +	     else return result; +	} +    } + +    charset_id = rccConfigDetectCharset(config, class_id, buf, len); +    if (charset_id != (rcc_autocharset_id)-1) +	charset = rccConfigGetAutoCharsetName(config, charset_id); +    else +	charset = rccConfigGetCurrentCharsetName(config, class_id); +     +    if (charset) { +	result = rccSizedFromCharset(ctx, charset, buf, len); +	if (result) rccStringChangeID(result, rccGetLanguageByName(ctx, config->language->sn)); +	return result; +    } +     +    return NULL; +} + +char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_const_string buf, size_t *rlen) { +    rcc_context ctx; +    const char *charset; + +    if (!config) return NULL; +    ctx = config->ctx; + +    charset = rccConfigGetCurrentCharsetName(config, class_id); + +    if (charset) +	return rccSizedToCharset(ctx, charset, buf, rlen); +     +    return NULL; +} + + +char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) { +    rcc_context ctx; +    rcc_string result; +    rcc_option_value usedb4; +    rcc_autocharset_id charset_id; +    rcc_string stmp; +    const char *tocharset, *fromcharset; + + +    if (!config) return NULL; +    ctx = config->ctx; + +    if (rccStringSizedCheck(buf, len)) return NULL; +     +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	stmp = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (stmp) { +	     if (rccStringFixID(stmp, ctx)) free(stmp); +	     else { +		result = rccConfigSizedTo(config, to, stmp, rlen); +		free(stmp); +		return result; +	    } +	} +    } + +    charset_id = rccConfigDetectCharset(config, from, buf, len); +    if (charset_id != (rcc_autocharset_id)-1) +	fromcharset = rccConfigGetAutoCharsetName(config, charset_id); +    else +	fromcharset = rccConfigGetCurrentCharsetName(config, from); +     +    tocharset = rccConfigGetCurrentCharsetName(config, to); +     +    if ((fromcharset)&&(tocharset)) +	return rccSizedRecodeCharsets(ctx, fromcharset, tocharset, buf, len, rlen); + +    return NULL; + +} + + +char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen) { +    rcc_context ctx; +    rcc_string result; +    rcc_option_value usedb4; +    rcc_autocharset_id charset_id; +    rcc_string stmp; +    const char *ocharset; + + +    if (!config) return NULL; +    ctx = config->ctx; + +    if (rccStringSizedCheck(buf, len)) return NULL; +     +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	stmp = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (stmp) { +	     if (rccStringFixID(stmp, ctx)) free(stmp); +	     else { +		result = rccSizedToCharset(ctx, charset, stmp, rlen); +		free(stmp); +	        return result; +	    } +	} +    } + +    charset_id = rccConfigDetectCharset(config, class_id, buf, len); +    if (charset_id != (rcc_autocharset_id)-1) +	ocharset = rccConfigGetAutoCharsetName(config, charset_id); +    else +	ocharset = rccConfigGetCurrentCharsetName(config, class_id); +     +    if (ocharset) +	return rccSizedRecodeCharsets(ctx, ocharset, charset, buf, len, rlen); + +    return NULL; +} + +char *rccConfigSizedRecodeFromCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) { +    rcc_context ctx; +    const char *ocharset; + +    if (!config) return NULL; +    ctx = config->ctx; + +    ocharset = rccConfigGetCurrentCharsetName(config, class_id); + +    if (ocharset) +	return rccSizedRecodeCharsets(ctx, charset, ocharset, buf, len, rlen); + +    return NULL; +} + +  /*      rcc_option_value options[RCC_MAX_OPTIONS]; diff --git a/src/lngconfig.h b/src/lngconfig.h index 92cc050..9d23139 100644 --- a/src/lngconfig.h +++ b/src/lngconfig.h @@ -3,6 +3,7 @@  #include "rcciconv.h"  #include "rcctranslate.h" +#include "rccspell.h"  struct rcc_language_config_t {      rcc_context ctx; @@ -17,8 +18,10 @@ struct rcc_language_config_t {      unsigned char configured; +    rcc_speller speller;      rcc_translate trans;      rcc_language_id translang; +    rcc_translate entrans;      rcc_iconv fsiconv;  }; @@ -30,9 +33,13 @@ rcc_engine_ptr rccConfigCheckEnginePointer(rcc_language_config config, rcc_engin  rcc_engine_ptr rccConfigGetCurrentEnginePointer(rcc_language_config config);  rcc_engine_ptr rccConfigCheckCurrentEnginePointer(rcc_language_config config); +rcc_speller rccConfigGetSpeller(rcc_language_config config); +  int rccConfigInit(rcc_language_config config, rcc_context ctx);  void rccConfigClear(rcc_language_config config); +rcc_language_config rccGetUsableConfig(rcc_context ctx, rcc_language_id language_id); +  int rccConfigConfigure(rcc_language_config config);  rcc_charset_id rccConfigGetLocaleUnicodeCharset(rcc_language_config config, const char *locale_variable); diff --git a/src/rccconfig.c b/src/rccconfig.c index ed6d30a..f820606 100644 --- a/src/rccconfig.c +++ b/src/rccconfig.c @@ -12,13 +12,18 @@ rcc_language_alias rcc_default_aliases[] = {      { NULL, NULL}  }; +const char rcc_default_language_sn[] = "default"; +const char rcc_disabled_language_sn[] = "Off"; +const char rcc_english_language_sn[] = "en"; +const char rcc_disabled_engine_sn[] = "Off";  const char rcc_default_charset[] = "Default"; +  const char rcc_utf8_charset[] = "UTF-8";  const char rcc_engine_nonconfigured[] = "Default";  const char rcc_option_nonconfigured[] = "DEFAULT";  rcc_engine rcc_default_engine = { -    "Off", NULL, NULL, NULL, {NULL} +    rcc_disabled_engine_sn, NULL, NULL, NULL, {NULL}  };  rcc_engine rcc_russian_engine = { @@ -32,11 +37,11 @@ rcc_engine rcc_ukrainian_engine = {  rcc_language rcc_default_languages[RCC_MAX_LANGUAGES + 1];  rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = { -{"default", {rcc_default_charset, NULL}, { +{rcc_default_language_sn, {rcc_default_charset, NULL}, {      &rcc_default_engine,      NULL  }}, -{"off", {rcc_default_charset, NULL}, { +{rcc_disabled_language_sn, {rcc_default_charset, NULL}, {      &rcc_default_engine,      NULL  }}, @@ -112,14 +117,28 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {  rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL };  rcc_option_value_name rcc_sn_learning[] = { "OFF", "ON", "RELEARN", "LEARN", NULL };  rcc_option_value_name rcc_sn_clo[] = { "ALL", "CONFIGURED_AND_AUTO", "CONFIGURED_ONLY", NULL }; +rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_ENGLISH", "FULL", NULL };  rcc_option_description rcc_option_descriptions[RCC_MAX_OPTIONS+1];  rcc_option_description rcc_option_descriptions_embeded[RCC_MAX_OPTIONS+1] = { +#ifdef HAVE_DB_H      {RCC_OPTION_LEARNING_MODE, 1, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1 }, RCC_OPTION_TYPE_STANDARD,  "LEARNING_MODE", rcc_sn_learning }, +#else +    {RCC_OPTION_LEARNING_MODE, 1, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1 }, RCC_OPTION_TYPE_INVISIBLE,  "LEARNING_MODE", rcc_sn_learning }, +#endif /* HAVE_DB_H */      {RCC_OPTION_AUTODETECT_FS_NAMES, 1, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD,  "AUTODETECT_FS_NAMES", rcc_sn_boolean},      {RCC_OPTION_AUTODETECT_FS_TITLES, 1, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_INVISIBLE, "AUTODETECT_FS_TITLES", rcc_sn_boolean},      {RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 1, { RCC_OPTION_RANGE_TYPE_MENU, 0, 2, 1}, RCC_OPTION_TYPE_INVISIBLE, "CONFIGURED_LANGUAGES_ONLY", rcc_sn_clo}, -    {RCC_OPTION_TRANSLATE, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD, "TRANSLATE", rcc_sn_boolean }, +#ifdef HAVE_ASPELL +    {RCC_OPTION_AUTODETECT_LANGUAGE, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD, "AUTODETECT_LANGUAGE", rcc_sn_boolean}, +#else +    {RCC_OPTION_AUTODETECT_LANGUAGE, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_INVISIBLE, "AUTODETECT_LANGUAGE", rcc_sn_boolean}, +#endif  +#ifdef HAVE_LIBTRANSLATE +    {RCC_OPTION_TRANSLATE, 0, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1}, RCC_OPTION_TYPE_STANDARD, "TRANSLATE", rcc_sn_translate }, +#else +    {RCC_OPTION_TRANSLATE, 0, { RCC_OPTION_RANGE_TYPE_MENU, 0, 3, 1}, RCC_OPTION_TYPE_INVISIBLE, "TRANSLATE", rcc_sn_translate }, +#endif /* HAVE_LIBTRANSLATE */      {RCC_OPTION_AUTOENGINE_SET_CURRENT, 0, { RCC_OPTION_RANGE_TYPE_BOOLEAN, 0, 0, 0}, RCC_OPTION_TYPE_STANDARD, "AUTOENGINE_SET_CURRENT", rcc_sn_boolean },      {RCC_MAX_OPTIONS}  }; diff --git a/src/rccconfig.h b/src/rccconfig.h index b94a39b..8e794ba 100644 --- a/src/rccconfig.h +++ b/src/rccconfig.h @@ -6,6 +6,11 @@  #undef RCC_DEBUG  #define RCC_LOCALE_VARIABLE "LC_CTYPE" +extern const char rcc_default_language_sn[]; +extern const char rcc_english_language_sn[]; +extern const char rcc_disabled_language_sn[]; +extern const char rcc_disabled_engine_sn[]; +  extern rcc_language_alias rcc_default_aliases[];  extern const char rcc_default_charset[];  extern const char rcc_utf8_charset[]; diff --git a/src/rccexternal.c b/src/rccexternal.c index 16b3667..4a09948 100644 --- a/src/rccexternal.c +++ b/src/rccexternal.c @@ -153,7 +153,7 @@ int rccExternalConnect(unsigned char module) {      fd_set fdcon;      if (pid == (pid_t)-1) return -1; - +          sock = socket(PF_UNIX, SOCK_STREAM, 0);      if (sock<=0) return -1; diff --git a/src/rcciconv.c b/src/rcciconv.c index d9903de..93278a7 100644 --- a/src/rcciconv.c +++ b/src/rcciconv.c @@ -48,6 +48,11 @@ void rccIConvClose(rcc_iconv icnv) {      }  } +int rccIConvGetError(rcc_iconv icnv) { +    if ((!icnv)||(icnv->icnv == (iconv_t)-1)) return -1; +    return 0; +} +  size_t rccIConvRecode(rcc_iconv icnv, char *outbuf, size_t outsize, const char *buf, size_t size) {      char *in_buf, *out_buf, err;      int in_left, out_left; diff --git a/src/rcciconv.h b/src/rcciconv.h index 0070696..1520534 100644 --- a/src/rcciconv.h +++ b/src/rcciconv.h @@ -8,6 +8,8 @@ struct rcc_iconv_t {  };  typedef struct rcc_iconv_t rcc_iconv_s; +int rccIConvGetError(rcc_iconv icnv); +  size_t rccIConvInternal(rcc_context ctx, rcc_iconv icnv, const char *buf, size_t len);  /**  diff --git a/src/rccspell.c b/src/rccspell.c new file mode 100644 index 0000000..c54e267 --- /dev/null +++ b/src/rccspell.c @@ -0,0 +1,63 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "rccspell.h" + +rcc_speller rccSpellerCreate(const char *lang) { +#ifdef HAVE_ASPELL +    rcc_speller rccspeller; +    AspellSpeller *speller = NULL; +    AspellConfig *config; +    AspellCanHaveError *possible_err; + +    if (!lang) return NULL; + +    rccspeller = (rcc_speller)malloc(sizeof(rcc_speller_s)); +    if (!rccspeller) return rccspeller; +     +    config = new_aspell_config(); + +    if (config) {     +	if (aspell_config_replace(config, "encoding", "utf-8")&&aspell_config_replace(config, "master", lang)) { +	    possible_err = new_aspell_speller(config); +	    if (aspell_error_number(possible_err) == 0) { +		speller = to_aspell_speller(possible_err); +	    } +	} +	delete_aspell_config(config); +    } +     +    rccspeller->speller = speller; +    return rccspeller; +#else  +    return NULL; +#endif /* HAVE_ASPELL */ +} + +void rccSpellerFree(rcc_speller rccspeller) { +#ifdef HAVE_ASPELL +    if ((rccspeller)&&(rccspeller->speller)) +	delete_aspell_speller(rccspeller->speller); +    free(rccspeller); +#endif /* HAVE_ASPELL */ +} + +int rccSpellerGetError(rcc_speller rccspeller) { +    if ((!rccspeller)||(!rccspeller->speller)) return -1; +    return 0; +} + +int rccSpellerSized(rcc_speller speller, const char *word, size_t len) { +#ifdef HAVE_ASPELL +    int res; +     +    if (rccSpellerGetError(speller)) return 0; +    res = aspell_speller_check(speller->speller, word, len?len:-1); +    return res<0?0:res;     +#endif /* HAVE_ASPELL */ +    return 0; +} + +int rccSpeller(rcc_speller speller, const char *word) { +    return rccSpellerSized(speller, word, 0); +} diff --git a/src/rccspell.h b/src/rccspell.h new file mode 100644 index 0000000..49e39f4 --- /dev/null +++ b/src/rccspell.h @@ -0,0 +1,29 @@ +#ifndef _RCC_SPELL_H +#define _RCC_SPELL_H + +#include "../config.h" + +#ifdef HAVE_ASPELL +#include <aspell.h> +#endif /* HAVE_ASPELL */ + +struct rcc_speller_t { +#ifdef HAVE_ASPELL +    struct AspellSpeller *speller; +#else  +    void *speller; +#endif /* HAVE_ASPELL */ +}; + +typedef struct rcc_speller_t *rcc_speller; +typedef struct rcc_speller_t rcc_speller_s; + +rcc_speller rccSpellerCreate(const char *lang); +void rccSpellerFree(rcc_speller speller); + +int rccSpellerGetError(rcc_speller rccspeller); + +int rccSpellerSized(rcc_speller speller, const char *word, size_t len); +int rccSpeller(rcc_speller speller, const char *word); + +#endif /* _RCC_SPELL_H */ diff --git a/src/rccstring.c b/src/rccstring.c index d6c6805..9c4c19f 100644 --- a/src/rccstring.c +++ b/src/rccstring.c @@ -58,6 +58,14 @@ int rccStringFixID(rcc_string string, rcc_context ctx) {      return 0;  } +int rccStringChangeID(rcc_string string, rcc_language_id language_id) { +    if ((!string)&&(language_id != (rcc_language_id)-1)) return -1; +     +    ((rcc_string_header*)string)->language_id = language_id; +    return 0; +} + +  void rccStringFree(rcc_string str) {      if (str) free(str);  } diff --git a/src/rccstring.h b/src/rccstring.h index 3c5d8d7..e9e9734 100644 --- a/src/rccstring.h +++ b/src/rccstring.h @@ -16,6 +16,7 @@ void rccStringFree(rcc_string str);  int rccStringSetLang(rcc_string string, const char *sn);  int rccStringFixID(rcc_string string, rcc_context ctx); +int rccStringChangeID(rcc_string string, rcc_language_id language_id);  #ifdef HAVE_STRNLEN  # ifndef strnlen diff --git a/src/rcctranslate.c b/src/rcctranslate.c index 3bbd916..d7bb4e4 100644 --- a/src/rcctranslate.c +++ b/src/rcctranslate.c @@ -66,18 +66,22 @@ int rccTranslateSetTimeout(rcc_translate translate, unsigned long us) {  char *rccTranslate(rcc_translate translate, const char *buf) {  #ifdef HAVE_LIBTRANSLATE -    size_t i;      rcc_external_command_s resp;      size_t err, len;      char *buffer; - -    if ((!translate)||(!buf)) return NULL; +/* +    size_t i; +*/ +    if ((!translate)||(!buf)) return NULL; + +/*          if (!strcmp(translate->prefix.to, "en")) {  	for (i=0;buf[i];i++)   	    if ((unsigned char)buf[i]>0x7F) break;  	if (!buf[i]) return NULL;      } +*/      if (translate->sock == -1) {  	translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE); diff --git a/src/recode.c b/src/recode.c index c44095c..7e12343 100644 --- a/src/recode.c +++ b/src/recode.c @@ -2,6 +2,8 @@  #include <stdlib.h>  #include <string.h> +#include "../config.h" +  #include "internal.h"  #include "rcciconv.h"  #include "fs.h" @@ -10,19 +12,140 @@  #include "rccconfig.h"  #include "rccdb4.h"  #include "rcctranslate.h" +#include "rccspell.h" + +#define isSpace(ch) ((ch<0x7F)&&((ch<'A')||(ch>'z')||((ch>'Z')&&(ch<'a')))) +#define RCC_REQUIRED_PROBABILITY	0.66 + +rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) { +    rcc_speller speller; +    unsigned long i, nlanguages; +    rcc_language_config config, config0 = NULL; +    rcc_string recoded; +    unsigned char *utf8; +    size_t j, mode; +    unsigned long words, english, result; +    unsigned char english_mode, english_word = 1; +    rcc_language_id english_lang = (rcc_language_id)-1; +    double res, english_res = 0; +    rcc_option_value usedb4; +     + +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + +    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { +	recoded = rccDb4GetKey(ctx->db4ctx, buf, len); +	if (recoded) { +	     if (rccStringFixID(recoded, ctx)) free(recoded); +	     else { +	        english_lang = rccStringGetLanguage(recoded); +	        if (retstring) *retstring = recoded; +		else free(recoded); +	        return english_lang; +	    } +	} +    } +     +    if (!rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) return (rcc_language_id)-1; + +    nlanguages = ctx->n_languages; + +    for (i=0;i<nlanguages;i++) { +	config = rccGetUsableConfig(ctx, (rcc_language_id)i); +	if (!config) continue; + +	if (i) { +	    if (config==config0) continue; +	} else config0=config; +	 +	speller = rccConfigGetSpeller(config); +	if (rccSpellerGetError(speller)) continue; + +	recoded = rccConfigSizedFrom(config, class_id, buf, len); +	if (!recoded) continue; +	 +	if (!strcasecmp(config->language->sn, rcc_english_language_sn)) english_mode = 1; +	else english_mode = 0; +	 +	utf8 = (char*)rccStringGetString(recoded); +	for (result=0,english=0,words=0,mode=0,j=0;utf8[j];j++) { +	    if (isSpace(utf8[j])) { +		if (mode) { +		    if ((!english_mode)&&(english_word)) english++; +		    result+=rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; +		    words++; +		    mode = 0; +		} else continue; +	    } else { +		if (mode) { +		    if (utf8[j]>0x7F) english_word = 0; +		} else { +		    mode = j + 1; +		    english_word = 1; +		} +	    } +	} +	if (mode) { +	    result+=rccSpeller(speller, utf8 + mode - 1)?1:0; +	    words++; +	} +	 +	if (english_mode) { +	    english_res = 1.*result/words; +	    english_lang = (rcc_language_id)i;     +	} else if (words) { +	    res = 1.*result/words; +	    if (res > RCC_REQUIRED_PROBABILITY) { +		if (retstring) *retstring = recoded; +		else free(recoded); +		return (rcc_language_id)i; +	    } +	    if (words > english) { +		res = 1.*(result - english)/(words - english); +		if (res > RCC_REQUIRED_PROBABILITY) { +		    if (retstring) *retstring = recoded; +		    else free(recoded); +		    return (rcc_language_id)i; +		} +	    } +	} +	 +	free(recoded); +    } + +    if (english_res > RCC_REQUIRED_PROBABILITY) { +        if (retstring) { +	    *retstring = rccCreateString(english_lang, buf, len); +	} +        return english_lang; +    } +     +    return (rcc_language_id)-1; +} +rcc_language_id rccDetectLanguage(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) { +    if (!ctx) { +	if (rcc_default_ctx) ctx = rcc_default_ctx; +	else return -1; +    } +     +    return rccDetectLanguageInternal(ctx, class_id, buf, len, NULL); +} -static rcc_autocharset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) { +rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) { +    rcc_context ctx;      rcc_class_type class_type;      rcc_engine_ptr engine; -    if (!buf) return (rcc_autocharset_id)-1; +    if ((!buf)||(!config)) return (rcc_autocharset_id)-1; +     +    ctx = config->ctx;      class_type = rccGetClassType(ctx, class_id);      if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) { -	engine = rccGetCurrentEnginePointer(ctx); +	engine = rccConfigGetCurrentEnginePointer(config);  	if ((!engine)||(!engine->func)) return (rcc_autocharset_id)-1;  	return engine->func(&ctx->engine_ctx, buf, len);      } @@ -30,16 +153,26 @@ static rcc_autocharset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, c      return (rcc_autocharset_id)-1;  } +int rccDetectCharset(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) { +    if (!ctx) { +	if (rcc_default_ctx) ctx = rcc_default_ctx; +	else return -1; +    } + +    return rccConfigDetectCharset(ctx->current_config, class_id, buf, len); +} + +  rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {      int err;      size_t ret; -    rcc_language_id language_id; +    rcc_language_id language_id, detected_language_id;      rcc_autocharset_id charset_id;      rcc_iconv icnv = NULL;      rcc_string result;      rcc_option_value usedb4;      const char *charset; - +          if (!ctx) {  	if (rcc_default_ctx) ctx = rcc_default_ctx;  	else return NULL; @@ -52,10 +185,11 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,      language_id = rccGetCurrentLanguage(ctx);      if (language_id == (rcc_language_id)-1) return NULL; -    if (!strcasecmp(ctx->languages[language_id]->sn, "off")) return NULL; +    if (!strcasecmp(ctx->languages[language_id]->sn, rcc_disabled_language_sn)) return NULL; -    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); +    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); +/*      if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {  	result = rccDb4GetKey(ctx->db4ctx, buf, len);  	if (result) { @@ -63,11 +197,22 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,  	     else return result;  	}      } +     +    if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) { +	detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len); +	if (detected_language_id != (rcc_language_id)-1) +	    language_id = detected_language_id; +    } +*/ +     +    detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result); +    if (detected_language_id != (rcc_language_id)-1) return result; +          err = rccConfigure(ctx);      if (err) return NULL; -    charset_id = rccIConvAuto(ctx, class_id, buf, len); +    charset_id = rccDetectCharset(ctx, class_id, buf, len);      if (charset_id != (rcc_autocharset_id)-1) {  	icnv = ctx->iconv_auto[charset_id];  	if (rccGetOption(ctx, RCC_OPTION_AUTOENGINE_SET_CURRENT)) { @@ -105,6 +250,9 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      rcc_language_id language_id;      rcc_language_id current_language_id;      rcc_class_type class_type; +    rcc_option_value translate; +    const char *langname; +    unsigned char english_source;      rcc_iconv icnv;      if (!ctx) { @@ -127,33 +275,60 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      if (err) return NULL;      class_type = rccGetClassType(ctx, class_id); -    if ((class_type != RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_TRANSLATE))) { +    translate = rccGetOption(ctx, RCC_OPTION_TRANSLATE); +     +    langname = rccGetLanguageName(ctx, language_id); +    if (strcasecmp(langname, rcc_english_language_sn)) english_source = 0; +    else english_source = 1; +     +    if ((class_type != RCC_CLASS_FS)&&((translate==RCC_OPTION_TRANSLATE_FULL)||((translate)&&(!english_source)))) {  	current_language_id = rccGetCurrentLanguage(ctx);  	if (current_language_id != language_id) {  	    if ((config->trans)&&(config->translang != current_language_id)) {  		rccTranslateClose(config->trans);  		config->trans = NULL;  	    } -	    if (!config->trans) { -		config->trans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rccGetLanguageName(ctx, current_language_id)); -		config->translang = current_language_id; +	     +	    if (translate != RCC_OPTION_TRANSLATE_TO_ENGLISH) { +		if (!config->trans) { +		    config->trans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rccGetLanguageName(ctx, current_language_id)); +		    config->translang = current_language_id; +		} + +		if (config->trans) { +		    translated = rccTranslate(config->trans, utfstring); +		    if (translated) { +			language_id = current_language_id; +		     +			config = rccGetConfig(ctx, language_id); +			if (!config) { +			    free(translated); +			    return NULL; +			} + +			err = rccConfigConfigure(config); +			if (err) { +			    free(translated); +			    return NULL; +			} +		    }  +		}  	    } -	    if (config->trans) { -		translated = rccTranslate(config->trans, utfstring); -		if (translated) { -		    language_id = current_language_id; +	     +	    if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((config->trans)&&(!translated))) { +		puts("entrans"); +		if (!config->entrans) { +		    config->entrans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rcc_english_language_sn); +		} +		if (config->entrans) { +		    translated = rccTranslate(config->entrans, utfstring); +  		    config = rccGetConfig(ctx, language_id); -		    if (!config) { -			free(translated); -			return NULL; -		    } +		    if (!config) return translated;  		    err = rccConfigConfigure(config); -		    if (err) { -			free(translated); -			return NULL; -		    } +		    if (err) return translated;  		}  	    }  	} @@ -183,7 +358,7 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s      icnv =  config->iconv_to[class_id];      if (icnv) { -	newlen = rccIConvInternal(ctx, icnv, translated?translated:utfstring, newlen); +	newlen = rccIConvInternal(ctx, icnv, translated?translated:utfstring, translated?0:newlen);  	if (translated) free(translated);  	if (newlen == (size_t)-1) return NULL; @@ -237,7 +412,7 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const      err = rccConfigure(ctx);      if (err) return NULL; -    from_charset_id = rccIConvAuto(ctx, from, buf, len); +    from_charset_id = rccDetectCharset(ctx, from, buf, len);      if (from_charset_id != (rcc_charset_id)-1) {  	from_charset = rccGetAutoCharsetName(ctx, from_charset_id);  	to_charset = rccGetCurrentCharsetName(ctx, to); @@ -385,13 +560,15 @@ char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char      return extracted;  } -/* Convert to class_id from Charset */ +/* Convert to class_id from Charset. +Usage of this function assuming the knowledge about the incoming string.  +The charset as well as the language. So no detection (DB4,Aspell) of language +will be performed. */  char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) {      size_t res;      rcc_iconv icnv;      rcc_string str;      char *extracted; -          if (!charset) return NULL;  | 
