diff options
| -rw-r--r-- | INSTALL | 37 | ||||
| -rw-r--r-- | NEWS | 6 | ||||
| -rw-r--r-- | README | 162 | ||||
| -rw-r--r-- | ToDo | 63 | ||||
| -rw-r--r-- | examples/rcc-gtk-config.c | 2 | ||||
| -rw-r--r-- | external/rccexternal.c | 42 | ||||
| -rw-r--r-- | src/librcc.h | 10 | ||||
| -rw-r--r-- | src/lngconfig.c | 59 | ||||
| -rw-r--r-- | src/rccconfig.c | 11 | ||||
| -rw-r--r-- | src/rccconfig.h | 3 | ||||
| -rw-r--r-- | src/recode.c | 47 | ||||
| -rw-r--r-- | ui/rccnames.c | 2 | 
12 files changed, 382 insertions, 62 deletions
| @@ -0,0 +1,37 @@ +Dependencies +============ +    LibRCC is dependent on LibXML2 library. However it requires some other  +    libraries to provide considered services. +     +    * LibRCD and Enca libraries are used to provide encoding autodetection. +    * DB4 is used to cache translations and recodings.  +    * Aspell is required for language autodetection. +    * LibTranslate is required for translation.  +	o The Libtranslate uses online services to translate the text. In the  +	defaultIt version there are no ways to limit translation time. The  +	LibRCC will respect the maximum time for recoding in any case, but +	nevertheless it will be good idea to use patched version of Libtranslate, +	providing API call to make time limited translate. The patch can be +	downloaded from the RusXMMS(http://RusXMMS.sf.net) project page. + +    The configure script will report which options are available. + + +Build +===== +    LibRCC utilizes standard GNU autoconf build system. To build LibRCC release +    you should type: +	./configure +	make +     +    Installation can be performed using following command: +	make install +     +    The configure script accepts following options: +    --enable-force-dynamic-engines: +	Will force encoding autodetection to load Enca and LibRCD libraries +	dynamically, rather than compiling them in. +    --disable-libtranslate: +	Will not compile support for language translation. +	Even if Libtranslate library is available. +	
\ No newline at end of file @@ -0,0 +1,6 @@ +LibRCC-0.2.0: +    + Language AutoDetection +    + Language Translation +    + Language Transliteration for Russian, Ukrainian and Languages supported by IConv. +    + Support for per-class Charset Configuration + @@ -1,7 +1,161 @@ +RusXMMS Project +=============== -Language Autodetection ----------------------- +Originally, the project was aimed to provide means to work with multiple encodings  +of the same language through adapting encoding of ID3 tags, M3U and PLS playlists  +(including file names) to local settings on-the-fly. Both the tag reading and  +writing back using any selected encoding was supported. +Nowadays there are library available providing the same functionality for almost +any program with just a few lines of code. The library is not limited to ID3 tags, +it can be useful for any program working with small titles or file names in  +different languages and encodings. The patches for several music players, ID3 tag +libraries and some other programs are available on the project page. +The Abilities of LibRCC Library +=============================== -Translation ------------ +    * Language Autodetection +    * On the fly translation between languages, using online-services! +    * Encoding Autodetection for most of European Languages1 +    * Support for encoding detection plugins (besides Enca and LibRCD) +    * Recoding/translation of multi-language playlists! +    * Cache to speed-up re-recoding. +    * Possibility to configure new languages and encodings. +    * Shared configuration file. For example mentioned TagLib and LibID3 patches +    do not have their own user interface, but will utilize the same recoding  +    configuration as XMMS. +    * As well the separate program for configuration adjustment is available. +    * GTK/GTK2 UI Library: you can add properties page to your GTK application  +    with 3 lines of code. +    * Menu localization opportunity + +The Available Patches +===================== + +    * RusXMMS: Visualization and editing of the whole range of ID3 tags using any +    of eight byte or unicode encodings. Support for playlists with non-english  +    filenames. The translation of foreign languages to english or locale one is  +    supported as well. The embeded properties page. The patch makes XMMS the best  +    player to  work with ID3 titles. +    * TagLib: Visualization and editing of ID3 v.1 and v.2 titles. Any TagLib  +    based application will correctly work with ID3 tags out of the box. The  +    properties page can be added to application with several lines of code.  +    Additionaly, after applying the patch, the 'tagwriter' program from the  +    TagLib examples can be used to convert titles of all your MP3 files to  +    unicode ID3 v.2 tags just using command: "tagwriter *.mp3". +    * LibID3: Visualization of ID3 v.1 and v.2 titles. Any LibID3 based  +    application will correctly display (but not edit) ID3 tags out of the box. +    * Mpg123: Visualization of ID3 v.1 titles. +    * GFtp: Recoding file names between FTP servers using different encodings. +    * Unzip: Recoding file names from Windows created archives. + + +Gratitudes +========== +    * Me ;) +    * Michael Shigorin - Ideas and great help in wiping bugs +    * Dmitry A. Koptev - Slackware packages +    * IPE, ForschungsZentrum, Karlsruhe +    * CRD, Yerevan Physics Institute +    * Georgian and Spanish winemakers :) + +Important Notes +=============== + +1. It have much more sense to report problems here, then just claiming nothing is  +   working on miscellaneous forums and mailing lists. +2. If you want patches presented here to be included in the correspondent project  +   trees, please, ask authors. The same thing should be concerned about inclusion  +   of LibRCC and LibRCD in the official Gentoo portage tree. +3. Most of the patches will modify configure.in and Makefile.am files, so the full +   autoconfiguration should be performed.  +   You should run "aclocal; automake; autoconf" prior to using configure script. +4. Output encoding normally must correspond to current "LC_CTYPE" locale. If  +   you would set it to another value without really knowing what are you doing,  +   it can raise problems. + +Preferences +=========== + +    * Current Language. The English, Russian, Ukrainian, Belarussian, Bulgarian, +    Czech, Estonian, Croatian, Hungarian, Lithuanian, Latvian, Polish, Slovak,  +    Slovenian and Chinese are embeded in the library. To get other languages you  +    should configure them in the "/etc/rcc.xml" or in user-defined configuration  +    "~/.rcc/rcc.xml". By default the language will be determined using LC_CTYPE  +    environmental variable. +    * Current encoding for supported encoding classes. For RusXMMS the following +    classes are defined: +          o ID3 Encoding +          o ID3 v.2 Encoding (uses ID3 by default) +          o PlayList Encoding (uses ID3 by default) +          o Encoding for Filenames in the Playlists (defaults to FS encoding) +          o FileSystem Encoding (uses locale encoding by default) +          o Output Encoding (uses locale encoding by default) +    The default encoding will be resolved using: +          o The unicode encoding selected for english language. +          o The encoding of the parrent class if any. +          o The unicode encoding defined by locale variable or any locale encoding  +	  in the case of locale language is used. +          o First available usable encoding. +    * Encoding autodetection engine. First available is used by default. +    * Mode for recoding cache: +          o Off: Do not use recoding cache +          o On: Use recoding cache to find out encoding and language +          o ReLearn: Fill recoding cache with detected values +          o Learn: Try to use recoding cache to find out encoding. If there are  +	  no cached encoding for current title try to detect it and store in the  +	  cache. +    * Autodetect File Name: If option is switched on the encoding of the file  +    will be resolved using search over file system. +    * Autoengine Set Current Encoding: Encoding autodetection engine will  +    automatically set detected encoding to be used by default. +    * Autodetect Language: Try to autodetect used language. Quite slow. +    * Translate Text: Translate text from detected language to the locale  +    language. Very slow and requires internet connection. The different modes is  +    available. In the full mode the string will be translated to the current  +    locale language. If translation to locale language is failed, the string will +    be translated to english. With "Skip Related" and "Skip parrent" options the +    translation between related languages will be omited (The language is  +    considered to be parrent language in the case then it is expected to have  +    words from that language in the strings of child language. The english  +    language is considered to be parrent language for any other.).  +    With "Translate to English" option it is possible to translate all strings  +    to english. +    * Since the translation is slow, it is possible to limit maximum time used to +    recode/translate string. In this case if translation in the specified amount  +    of time is not finished, untranslated string will be returned. However, the  +    string will be queued for translation and in the next access the translated  +    and cached value will be returned. +    * Additionally, for RusXMMS only it is possible to specify font used by shade +    form of xmms playlist. + +Using Multi-Language Playlist +============================= +There are two ways of using multi-language playlists. The first is to use  +Language autodetection: + +    * The UTF-8 locale should be set. +    * The LibRCC should be compiled with aspell support. +    * The aspell dictionaries for all languages used should be installed. + +However this is quite slow, and errnous in the cases then mostly non-dictionary  +words are used. The second option, is to use recodings cache: + +    * The UTF-8 locale should be set. +    * Select "Learn" mode for recoding caching policy in the preferences prior +    to loading new files. +    * Afterwards the titles for the loaded files will be recoded correctly  +    whenever the recoding caching is enabled. + +Using Language Translation +========================== +It is possible to translate titles to your languages using libtranslate.  +The LibRCC should be compiled with LibTranslate support (It will be quite good  +idea to use patched libtranslated with posibility to limit maximum amount of  +time spent for translation) and you should have internet connection. Since  +libtranslate utilizes online translation services it takes a lot of time to get +translation. To solve this problem the translation queueing and caching is used. +If translation is failed in considered amount of time error will be returned to  +caller and string will be queued for translation. The translated string will be +stored in the DB4 cache and will be returned to caller on the following requests +for translation. @@ -1,40 +1,39 @@  0.3.x:      - Buffer managment:  	+ SetBufferSize ( 0 - autogrow ) -    - Language autodetection and translation improvements -	+ Look on ofline translation libraries and other possibilities to improove  -	translation and language detection. -	+ Implement ispell support -	+ Configurable timeouts -    - Move all recoding functionality on rccConfig level -    - Revise locking subsystem -    - Libtranslate can leave translated message partly in old language. This causes problems -      because of recoding from UTF8 to Current language. (With UTF-8 encoding should be Okey). -    - Lating languages. If in the string all characters < 0x7F then we have one of the Latin -    languages? -    - Statistic approach of language detection. -    - LibRCD autolearning using db4 -	+ Charset detection -	+ Language detection (same as charsets, but for UTF8...) -	    * Consider word recognition based on probability -	+ Autolearning is triggered by large enough dictionary words -    - Configurable common classes +    - Move all recoding functionality on the rccConfig Level +    - Revise Locking Subsystem +    - Load class configurations from the XML files. -1.x: -    - Common encodings: -	+ Provide way to add to all languages several default Unicode encodings (UTF8, UTF16, UTF16BE) -	+ Special type of classes to select only from Unicode encodings (or even just specified subset of encodings) -	+ Special pluggable encodings. For example translate to english. -	    * rccToEncoding(current_language, *new_language, buf, size)? -	    * rccFromEncoding(current_language, utf8_language, buf, size)? -	    * Code some options in charset name. (SpecialEncodingPrefix_Encoding_EncodingOptions) -    - Recoding options: -	+ Skip Translation -    - Switch to Get/Ref/UnRef system + +0.4.x: +    - Language and Encoding autodetection improvements. +	+ LibRCD should use DB4 with statistic for different languages +	+ The statistic should be gathered using: +	    * Aspell dictionaries. +	    * Special program getting text on the standard input. +	    * From LibRCC when language is preciesely detected. +	+ The LibRCD engine should be used to fast language detection as well. +	    * Just analyze output UTF8 string +	+ Add ispell support +    - Translation improvemtns +	+ Look if there are any offline translation libraries available. +	+ Use stardict (or other dictionary) to translate on per-word basis. +	+ Try to translate to first parrent encoding if translation to the current one is failed. +	+ Transliterate translation mode + +0.5.x: +    - Special encoding. +	+ Instead of IConv call considered function. +	    * For example: Transliterate +	    * For example: Translate to English +	+ The options for encoding should be passed as a part of encoding name. +	    * Develope naming conventions +	+ Pluggable special encodings. +	 +1.0.x: +    - Switch to Get/Ref/UnRef calls.      - Drop down 'Class' keywords in all 'ClassCharset' function. Make it default behaviour.  on request:      - Multibyte(not-UTF8) support for FS classes -    - If there are neccessity in western-european language relating. -	+ Check for correctness between related western-european languages while  -	invalid translation checking (rccTo). Can be done with rccSpeller. diff --git a/examples/rcc-gtk-config.c b/examples/rcc-gtk-config.c index 9a3f988..da73608 100644 --- a/examples/rcc-gtk-config.c +++ b/examples/rcc-gtk-config.c @@ -16,7 +16,7 @@ static rcc_class classes[] = {      { "ftp", RCC_CLASS_STANDARD, NULL, NULL, "FTP Encoding", 0 },      { "http", RCC_CLASS_STANDARD, NULL, NULL, "HTTP Encoding", 0 },      { "ssh", RCC_CLASS_STANDARD, NULL, NULL, "SSH Encoding", 0 }, -    { "out", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, NULL, 0 }, +    { "out", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, "Output Encoding", 0 },      { NULL }  }; diff --git a/external/rccexternal.c b/external/rccexternal.c index 47f628a..292ee5d 100644 --- a/external/rccexternal.c +++ b/external/rccexternal.c @@ -1,6 +1,7 @@  #include <stdio.h>  #include <stdlib.h>  #include <string.h> +#include <errno.h>  #include "../config.h" @@ -24,19 +25,37 @@  #ifdef HAVE_SYS_UN_H  # include <sys/un.h>  #endif /* HAVE_SYS_UN_H */ +#ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +#endif /* JAVE_SYS_TIME_H */ + +#ifdef HAVE_SIGNAL_H +# include <signal.h> +#endif /* HAVE_SIGNAL_H */  #include <glib/gthread.h>  #include "../src/rccexternal.h"  #include "rcclibtranslate.h" +#define RCC_EXIT_CHECK_TIMEOUT 10 /* seconds */ +  int main() { +#ifdef HAVE_SIGNAL_H +    struct sigaction act; +#endif /* HAVE_PWD_H */ +    int err; +    struct timeval tv; +    fd_set fdcon; +      int s, sd;      char addr[376];      const char *rcc_home_dir;      struct sockaddr_un mysock, clisock;      socklen_t socksize; +     +    pid_t parentpid;      pid_t mypid;      unsigned char loopflag = 1; @@ -49,6 +68,8 @@ int main() {      struct passwd *pw;  #endif /* HAVE_PWD_H */ + +    parentpid = getppid();          mypid = getpid();      rcc_home_dir = getenv ("HOME"); @@ -78,13 +99,34 @@ int main() {      mysock.sun_path[sizeof(mysock.sun_path)-1]=0;      unlink(addr); +      if (bind(s,(struct sockaddr*)&mysock,sizeof(mysock))==-1) return -1;      if (listen(s,1)<0) {  	unlink(addr);  	return -1;      } +#ifdef HAVE_SIGNAL_H +    act.sa_handler = SIG_IGN; +    sigemptyset(&act.sa_mask); +    act.sa_flags = 0; +    sigaction(SIGPIPE,&act,NULL); +    sigaction(SIGINT,&act,NULL); +#endif /* HAVE_SIGNAL_H */ +      while (loopflag) { +	tv.tv_sec = RCC_EXIT_CHECK_TIMEOUT; +	tv.tv_usec = 0; + +	FD_ZERO(&fdcon); +	FD_SET(s, &fdcon); +	 +	err = select(s+1, &fdcon, NULL, NULL, &tv); +	if (err<=0) { +	    if (getppid() != parentpid) break; +	    continue; +	} +  	sd = accept(s,(struct sockaddr*)&clisock,&socksize);  	if (sd < 0) continue; diff --git a/src/librcc.h b/src/librcc.h index 9b064d1..98ca1a6 100644 --- a/src/librcc.h +++ b/src/librcc.h @@ -427,6 +427,7 @@ typedef int rcc_option_value;  typedef enum rcc_option_translate_t {      RCC_OPTION_TRANSLATE_OFF = 0,  	/**< Switch translation off. */ +    RCC_OPTION_TRANSLATE_TRANSLITERATE,	/**< Transliterate data. */      RCC_OPTION_TRANSLATE_TO_ENGLISH, 	/**< Translate data to english language (Current language don't matter). */      RCC_OPTION_TRANSLATE_SKIP_RELATED, 	/**< Skip translation of the text's between related languages. */      RCC_OPTION_TRANSLATE_SKIP_PARRENT, 	/**< Skip translation of the text's from parrent languages (from english). */ @@ -821,7 +822,7 @@ rcc_charset_id rccConfigGetClassCharsetByName(rcc_language_config config, rcc_cl    * Checks if charset is disabled for the specified class.    * @param config is language configuration    * @param class_id is class id. -  * @param charset is charset name. +  * @param charset_id is charset id.    * @return 1 if charset is disabled, 0 if charset is enabled, -1 in the case of error.    */  int rccConfigIsDisabledCharset(rcc_language_config config, rcc_class_id class_id, rcc_charset_id charset_id); @@ -885,10 +886,13 @@ const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_clas  /**    * Return current encoding_id. The default value will be resolved to paticular encoding id.     * The following procedure is used to detect default encoding: +  *	- If Unicode encoding selected for the same class english language. Return this encoding.    *	- If the parrent class is defined in #defcharset, - return current encoding of parrent class. -  *	- If the locale variable is defined in #defcharset and config language coincide with locale language, use locale encoding. +  *	- If the locale variable is defined in #defcharset and either config language coincide with locale language or unciode encoding defined, use locale encoding.    *	- If the default value for config language is defined in #defvalue return that default value. -  *	- Return language with id 0. Normally this should be dummy language which indicates that RCC library is not used. +  *	- If the default value for all languages is defined in #defvalue return that default value. +  *	- If either config language is coincide with locale language or unicode locale is used, return locale encoding. +  *	- Return first by the list non-dissabled encoding.    *    * @param config is language configuration    * @param class_id is encoding class diff --git a/src/lngconfig.c b/src/lngconfig.c index 20aff63..631abd1 100644 --- a/src/lngconfig.c +++ b/src/lngconfig.c @@ -567,9 +567,11 @@ const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_clas  }  rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_id class_id) { +    rcc_language_config enconfig;      unsigned int i, max;      rcc_charset_id charset_id;      rcc_charset_id all_charset_id = (rcc_language_id)-1; +    const char *charset;      rcc_class_default_charset *defcharset;      const char *lang; @@ -582,10 +584,19 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_      const char *defvalue;      if ((!config)||(!config->ctx)||(class_id<0)||(class_id>=config->ctx->n_classes)) return -1; -     +      charset_id = config->charset[class_id];      if (charset_id) return charset_id; +    enconfig = rccGetConfigByName(config->ctx, rcc_english_language_sn); +    if ((enconfig)&&(enconfig!=config)) { +	charset_id = enconfig->charset[class_id]; +	if (charset_id) { +	    charset = rccConfigGetClassCharsetName(enconfig, class_id, charset_id); +	    if ((charset)&&(rccIsUnicode(charset))) return charset_id; +	} +    } +          if (!config->language) return (rcc_charset_id)-1;      else language = config->language; @@ -598,23 +609,27 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_  	    if (!strcmp(classes[i]->name, defvalue))   		return rccConfigGetCurrentCharset(config, i);   	} -    } else defvalue = config->ctx->locale_variable; +    }      if (config->default_charset[class_id]) return config->default_charset[class_id];      if (cl->defvalue) {  	charset_id = rccConfigGetLocaleClassCharset(config, class_id, defvalue);  	if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { -	    config->default_charset[class_id] = charset_id; -	    return charset_id; +	    if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { +		config->default_charset[class_id] = charset_id; +		return charset_id; +	    }  	}      }      if (cl->defvalue) {  	charset_id = rccConfigGetClassCharsetByName(config, class_id, defvalue);  	if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { -	    config->default_charset[class_id] = charset_id; -	    return charset_id; +	    if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { +		config->default_charset[class_id] = charset_id; +		return charset_id; +	    }  	}      } @@ -626,9 +641,17 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_  		if (!strcasecmp(lang, defcharset[i].lang)) {  		    charset_id = rccConfigGetClassCharsetByName(config, class_id, defcharset[i].charset);  		    if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { -			config->default_charset[class_id] = charset_id; -			return charset_id; -		    } else break; +			if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { +			    config->default_charset[class_id] = charset_id; +			    return charset_id; +			} else { +			    all_charset_id = (rcc_charset_id)-1; +			    break; +			} +		    } else { +			all_charset_id = (rcc_charset_id)-1; +			break; +		    }  		} else if (!strcasecmp(rcc_default_all, defcharset[i].lang)) {  		    charset_id = rccConfigGetClassCharsetByName(config, class_id, defcharset[i].charset);  		    if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { @@ -638,20 +661,26 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_  	    }  	    if (all_charset_id != (rcc_language_id)-1) { -		config->default_charset[class_id] = all_charset_id; -		return all_charset_id; +		if (!rccConfigIsDisabledCharset(config, class_id, all_charset_id)) { +		    config->default_charset[class_id] = all_charset_id; +		    return all_charset_id; +		}  	    }      }	 -    charset_id = rccConfigGetLocaleClassCharset(config, class_id, defvalue); +    charset_id = rccConfigGetLocaleClassCharset(config, class_id, config->ctx->locale_variable);      if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { -	config->default_charset[class_id] = charset_id; -	return charset_id; +	if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { +	    config->default_charset[class_id] = charset_id; +	    return charset_id; +	}      }      max = rccConfigGetClassCharsetNumber(config, class_id);      for (i = 1; i< max; i++) -	if (!rccConfigIsDisabledCharset(config, class_id, (rcc_charset_id)i)) return (rcc_charset_id)i; +	if (!rccConfigIsDisabledCharset(config, class_id, (rcc_charset_id)i)) { +	    return (rcc_charset_id)i; +	}      return (rcc_charset_id)-1;  } diff --git a/src/rccconfig.c b/src/rccconfig.c index 0752ee3..ae47a63 100644 --- a/src/rccconfig.c +++ b/src/rccconfig.c @@ -31,6 +31,8 @@ const char rcc_default_all[] = "all";  const char rcc_default_language_sn[] = "default";  const char rcc_disabled_language_sn[] = "Off";  const char rcc_english_language_sn[] = "en"; +const char rcc_russian_language_sn[] = "ru"; +const char rcc_ukrainian_language_sn[] = "uk";  const char rcc_disabled_engine_sn[] = "Off";  const char rcc_default_charset[] = "Default"; @@ -61,18 +63,18 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {      &rcc_default_engine,      NULL  }}, -{"en", {rcc_default_charset, rcc_utf8_charset, NULL}, { +{rcc_english_language_sn, {rcc_default_charset, rcc_utf8_charset, "ISO8859-1", NULL}, {      &rcc_default_engine,      NULL  }}, -{"ru", {rcc_default_charset,"KOI8-R","CP1251",rcc_utf8_charset,"IBM866","MACCYRILLIC","ISO8859-5", NULL}, { +{rcc_russian_language_sn, {rcc_default_charset,"KOI8-R","CP1251",rcc_utf8_charset,"IBM866","MACCYRILLIC","ISO8859-5", NULL}, {      &rcc_default_engine,  #ifdef RCC_RCD_SUPPORT      &rcc_russian_engine,  #endif /* RCC_RCD_SUPPORT */      NULL  }}, -{"uk", {rcc_default_charset,"KOI8-U","CP1251",rcc_utf8_charset,"IBM855","MACCYRILLIC","ISO8859-5","CP1125", NULL}, { +{rcc_ukrainian_language_sn, {rcc_default_charset,"KOI8-U","CP1251",rcc_utf8_charset,"IBM855","MACCYRILLIC","ISO8859-5","CP1125", NULL}, {      &rcc_default_engine,  #ifdef RCC_RCD_SUPPORT      &rcc_ukrainian_engine, @@ -129,11 +131,10 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {  }},  {NULL}  }; -  rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL };  rcc_option_value_name rcc_sn_learning[] = { "OFF", "ON", "RELEARN", "LEARN", NULL };  rcc_option_value_name rcc_sn_clo[] = { "ALL", "CONFIGURED_AND_AUTO", "CONFIGURED_ONLY", NULL }; -rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL }; +rcc_option_value_name rcc_sn_translate[] = { "OFF", "TRANSLITERATE", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL };  rcc_option_description rcc_option_descriptions[RCC_MAX_OPTIONS+1];  rcc_option_description rcc_option_descriptions_embeded[RCC_MAX_OPTIONS+1] = { diff --git a/src/rccconfig.h b/src/rccconfig.h index f7f70dd..8b5ac0d 100644 --- a/src/rccconfig.h +++ b/src/rccconfig.h @@ -10,6 +10,9 @@  extern const char rcc_default_all[];  extern const char rcc_default_language_sn[];  extern const char rcc_english_language_sn[]; +extern const char rcc_russian_language_sn[]; +extern const char rcc_ukrainian_language_sn[]; +  extern const char rcc_disabled_language_sn[];  extern const char rcc_disabled_engine_sn[]; diff --git a/src/recode.c b/src/recode.c index a528481..9e19078 100644 --- a/src/recode.c +++ b/src/recode.c @@ -322,7 +322,9 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_      rcc_translate trans, entrans; +    unsigned int i;      char *translated; +    unsigned char change_case;      ctx = (*config)->ctx; @@ -336,7 +338,7 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_      english_language_id = rccGetLanguageByName(ctx, rcc_english_language_sn); -    if (translate == RCC_OPTION_TRANSLATE_TO_ENGLISH) { +    if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||(translate == RCC_OPTION_TRANSLATE_TRANSLITERATE)) {  	current_language_id = english_language_id ;      } else {  	if (ctype == RCC_CLASS_TRANSLATE_LOCALE) { @@ -356,6 +358,49 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_      if (rccConfigConfigure(curconfig)) return NULL; +    if (translate == RCC_OPTION_TRANSLATE_TRANSLITERATE) { +	if (!strcasecmp((*config)->language->sn, rcc_russian_language_sn)) { +	    translated = rccSizedRecodeCharsets(ctx, "UTF-8", "KOI8-R", utfstring, 0, NULL); +	    if (!translated) return NULL; +	    for (i=0;translated[i];i++) { +		if (translated[i]&0x80) change_case = 1; +		else change_case = 0; +		 +		translated[i]=translated[i]&0x7F; +		if (change_case) { +	    	    if ((translated[i]<'Z')&&(translated[i]>'A')) +			translated[i]=translated[i]-'A'+'a'; +		    else if ((translated[i]<'z')&&(translated[i]>'a')) +			translated[i]=translated[i]-'a'+'A'; +		} +	    } +	    *config = curconfig; +	    return translated; +	} +	if (!strcasecmp((*config)->language->sn, rcc_ukrainian_language_sn)) { +	    translated = rccSizedRecodeCharsets(ctx, "UTF-8", "KOI8-U", utfstring, 0, NULL); +	    if (!translated) return NULL; +	    for (i=0;translated[i];i++) { +		if (translated[i]&0x80) change_case = 1; +		else change_case = 0; +		 +		translated[i]=translated[i]&0x7F; +		if (change_case) { +	    	    if ((translated[i]<'Z')&&(translated[i]>'A')) +			translated[i]=translated[i]-'A'+'a'; +		    else if ((translated[i]<'z')&&(translated[i]>'a')) +			translated[i]=translated[i]-'a'+'A'; +		} +	    } +	    *config = curconfig; +	    return translated; +	} + +	translated = rccSizedRecodeCharsets(ctx, "UTF-8", "US-ASCII//TRANSLIT", utfstring, 0, NULL); +	if (translated) *config = curconfig; +	return translated; +    } +          if (translate == RCC_OPTION_TRANSLATE_SKIP_RELATED) {  	if (rccAreRelatedLanguages(curconfig, *config)) return NULL;      } diff --git a/ui/rccnames.c b/ui/rccnames.c index d3d54d7..7f4f912 100644 --- a/ui/rccnames.c +++ b/ui/rccnames.c @@ -32,7 +32,7 @@ rcc_name rcc_default_language_names_embeded[RCC_MAX_LANGUAGES+1] = {  rcc_option_value_name rcc_default_option_boolean_names[] = { "Off", "On", NULL };  rcc_option_value_name rcc_default_option_learning_names[] = { "Off", "On", "Relearn", "Learn", NULL };  rcc_option_value_name rcc_default_option_clo_names[] = { "All Languages", "Configured / AutoEngine", "Configured Only", NULL }; -rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL }; +rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Transliterate", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL };  rcc_option_name rcc_default_option_names[RCC_MAX_OPTIONS+1];  rcc_option_name rcc_default_option_names_embeded[RCC_MAX_OPTIONS+1] = { | 
