API Improvements

- Removed 'rlen' return parameters there not necessary for multibyte encodings - Two versions of recode functions: rccRecode -> rccRecode, rccSizedRecode - Class Types: CONST, SKIP_SAVELOAD - New recode functions: rccToCharset, rccFromCharset - More new recode functions: rccRecodeToCharset, rccRecodeFromCharset, rccRecodeCharsets - New function: rccGetCompiledConfiguration - All warnings are fixed - Perform "File Name" search only if there are non ISO8859-1 chars in the name. - Do not copy invalid characters, - skip them. - Fixed error in rccRecode with 'Recoding Cache' switched On. - Strip leading and trailing spaces in rccDB4 get/set
author: Suren A. Chilingaryan <csa@dside.dyndns.org> 2005-07-18 15:22:28 +0000
committer: Suren A. Chilingaryan <csa@dside.dyndns.org> 2005-07-18 15:22:28 +0000
commit: 537c4b33fdf6e143243d5a0d286eeb247362e806 (patch)
tree: d8a94cfaa4a71ffc826b7d8176c54445369539f3 /src/recode.c
parent: 4032f92867e5570f130e4175b3b4fb61240f9752 (diff)
download: librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.gz
librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.bz2
librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.xz
librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.zip
1 files changed, 175 insertions, 26 deletions
diff --git a/src/recode.c b/src/recode.c
index 2164a4d..435e1a6 100644
--- a/src/recode.c
+++ b/src/recode.c
@@ -13,27 +13,27 @@
 
 
 
-static rcc_charset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) {
+static rcc_autocharset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) {
     rcc_class_type class_type;
     rcc_engine_ptr engine;
     
-    if (!buf) return (rcc_charset_id)-1;
-
+    if (!buf) return (rcc_autocharset_id)-1;
+    
     class_type = rccGetClassType(ctx, class_id);
-    if ((class_type == RCC_CLASS_STANDARD)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
+    if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
 	engine = rccGetCurrentEnginePointer(ctx);
-	if ((!engine)||(!engine->func)) return (rcc_charset_id)-1;
+	if ((!engine)||(!engine->func)) return (rcc_autocharset_id)-1;
 	return engine->func(&ctx->engine_ctx, buf, len);
     }
     
-    return (rcc_charset_id)-1;
+    return (rcc_autocharset_id)-1;
 }
 
-rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, size_t *rlen) {
+rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {
     int err;
     size_t ret;
     rcc_language_id language_id;
-    rcc_charset_id charset_id;
+    rcc_autocharset_id charset_id;
     rcc_iconv icnv = NULL;
     rcc_string result;
     rcc_option_value usedb4;
@@ -64,15 +64,15 @@ rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size
     language_id = rccGetCurrentLanguage(ctx);
 
     charset_id = rccIConvAuto(ctx, class_id, buf, len);
-    if (charset_id != (rcc_charset_id)-1) icnv = ctx->iconv_auto[charset_id];
+    if (charset_id != (rcc_autocharset_id)-1) icnv = ctx->iconv_auto[charset_id];
     else icnv = ctx->iconv_from[class_id];
 
     if (icnv) {
 	ret = rccIConv(ctx, icnv, buf, len);
 	if (ret == (size_t)-1) return NULL;
-	result = rccCreateString(language_id, ctx->tmpbuffer, ret, rlen);
+	result = rccCreateString(language_id, ctx->tmpbuffer, ret);
     } else {
-	result = rccCreateString(language_id, buf, len, rlen);
+	result = rccCreateString(language_id, buf, len);
     }
 
     if ((result)&&(usedb4&RCC_OPTION_LEARNING_FLAG_LEARN)) {
@@ -84,7 +84,7 @@ rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size
     return result;
 }
 
-char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t len, size_t *rlen) {
+char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t *rlen) {
     int err;
     size_t newlen;
     char *result;
@@ -92,7 +92,6 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
     const char *utfstring;
     rcc_language_config config;
     rcc_language_id language_id;
-    rcc_charset_id charset_id;
     rcc_class_type class_type;
     rcc_iconv icnv;
 
@@ -102,7 +101,7 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
     }
     if ((class_id<0)||(class_id>=ctx->n_classes)||(!buf)) return NULL;
 
-    newlen = rccStringSizedCheck((const char*)buf, len);
+    newlen = rccStringCheck((const char*)buf);
     if (!newlen) return NULL;
 
     language_id = rccStringGetLanguage(buf);
@@ -117,6 +116,12 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
 
     class_type = rccGetClassType(ctx, class_id);
     if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) {
+	    if (rccIsASCII(utfstring)) {
+		result = rccStringExtractString(buf);
+		if ((result)&&(rlen)) *rlen = strlen(result);
+		return result;
+	    }
+
 	    name = (char*)utfstring;
 	    prefix = NULL;
 	    
@@ -127,17 +132,18 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
 		    if (prefix) free(prefix);
 		    free(name);
 		}
-		if ((rlen)&&(result)) *rlen = strlen(result);
+		if (rlen) *rlen = strlen(result);
 		return result;
 	    }
     }
 
     icnv =  config->iconv_to[class_id];
     if (icnv) {
-	newlen = rccIConv(ctx, icnv, rccStringGetString(buf), len?newlen:0);
+	newlen = rccIConv(ctx, icnv, rccStringGetString((const char*)buf), newlen);
 	if (newlen == (size_t)-1) return NULL;
 
-	result = rccCreateResult(ctx, newlen, rlen);
+	result = rccCreateResult(ctx, newlen);
+	if (rlen) *rlen = newlen;
     } else {
 	result = rccStringExtractString(buf);
 	if (rlen) *rlen = newlen;
@@ -146,13 +152,13 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
     return result;
 }
 
-char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {
-    size_t nlen;
+char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {
     rcc_string stmp;
     char *result;
     const char *from_charset, *to_charset;
     rcc_charset_id from_charset_id, to_charset_id;
     rcc_class_type class_type;
+    rcc_option_value usedb4;
 
     if (!ctx) {
 	if (rcc_default_ctx) ctx = rcc_default_ctx;
@@ -163,6 +169,20 @@ char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char
     class_type = rccGetClassType(ctx, to);
     if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding;
     if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) goto recoding;
+
+    usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
+    if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
+	stmp = rccDb4GetKey(ctx->db4ctx, buf, len);
+	if (stmp) {
+	     if (rccStringFixID(stmp, ctx)) free(stmp);
+	     else {
+		result = rccSizedTo(ctx, to, stmp, rlen);
+		free(stmp);
+		return result;
+	    }
+	}
+    }
+
     
     from_charset_id = rccIConvAuto(ctx, from, buf, len);
     if (from_charset_id != (rcc_charset_id)-1) {
@@ -174,11 +194,11 @@ char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char
 	to_charset_id = rccGetCurrentCharset(ctx, to);
 	if (from_charset_id == to_charset_id) return NULL;
     }
-
+    
 recoding:    
-    stmp = rccFrom(ctx, from, buf, len, &nlen);
+    stmp = rccSizedFrom(ctx, from, buf, len);
     if (stmp) {
-	result = rccTo(ctx, to, stmp, nlen, rlen);
+	result = rccSizedTo(ctx, to, stmp, rlen);
 	free(stmp);
 	return result;
     } 
@@ -192,7 +212,6 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp
     char *prefix = (char*)path, *name = (char*)filename; /*DS*/
     rcc_string string;
 
-    char *stmp;
     char *result = NULL;
 
     if (!ctx) {
@@ -209,16 +228,16 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp
 	if (err < 0) return NULL;
 	
 	if (err&1) {
-	    if (err&2) return NULL;
 	    if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) {
-	        string = rccFrom(ctx, from, name, 0, NULL);
+	        string = rccFrom(ctx, from, name);
 		if (string) free(string);
 	    }
+	    if (err&2) return NULL;
 	    return name;
 	}
     }
 
-    string = rccFrom(ctx, from, name, 0, NULL);
+    string = rccFrom(ctx, from, name);
     if (string) {
 	config = rccGetConfig(ctx, rccStringGetLanguage(string));
 	if (config) result = rccFS3(config, to, prefix, rccStringGetString(string));
@@ -233,3 +252,133 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp
     }
     return result;
 }
+
+
+rcc_string rccSizedFromCharset(rcc_context ctx, const char *charset, const char *buf, size_t len) {
+    rcc_iconv icnv;
+    rcc_language_config config;
+    rcc_language_id language_id;
+    size_t res;
+    
+    if ((!buf)||(!charset)) return NULL;
+    
+    language_id = rccGetCurrentLanguage(ctx);
+    if ((language_id == (rcc_language_id)-1)||(language_id == 0)) return NULL;
+    config = rccGetConfig(ctx, language_id);
+    if (!config) return NULL;
+    
+    icnv = rccIConvOpen("UTF-8", charset);
+    if (icnv) {
+	res = rccIConv(ctx, icnv, buf, len);
+	rccIConvClose(icnv);
+	if (res == (size_t)-1) return NULL;
+	return rccCreateString(language_id, ctx->tmpbuffer, res);
+    } 
+    return rccCreateString(language_id, buf, len);
+}
+
+char *rccSizedToCharset(rcc_context ctx, const char *charset, const rcc_string buf, size_t *rlen) {
+    rcc_iconv icnv;
+    size_t res;
+    
+    if ((!buf)||(!charset)) return NULL;
+
+    res = rccStringCheck(buf);
+    if (!res) return NULL;
+    
+    icnv = rccIConvOpen(charset, "UTF-8");
+    if (icnv) {
+	res = rccIConv(ctx, icnv, rccStringGetString(buf), res);
+	rccIConvClose(icnv);
+	if (res == (size_t)-1) return NULL;
+	
+	if (rlen) *rlen = res;
+	return rccCreateResult(ctx, res);
+    } 
+
+    if (rlen) *rlen = res;
+    return rccStringExtractString(buf);
+}
+
+/* Convert from class_id to Charset */
+char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const rcc_string buf, size_t len, size_t *rlen) {
+    size_t res;
+    rcc_iconv icnv;
+    const char *str;
+    char *utf8, *extracted;
+
+    if (!charset) return NULL;
+    
+    utf8 = rccSizedFrom(ctx, class_id, buf, len);
+    if (!utf8) return utf8;
+    
+    str = rccStringGetString(utf8);
+
+    icnv = rccIConvOpen(charset, "UTF-8");
+    if (icnv) {
+	res = rccIConv(ctx, icnv, str, 0);
+	rccIConvClose(icnv);
+	free(utf8);
+
+	if (res == (size_t)-1) return NULL;
+	if (rlen) *rlen = res;
+	return rccCreateResult(ctx, res);
+    }
+    
+    extracted = rccStringExtractString(utf8);
+    free(utf8);
+
+    if ((rlen)&&(extracted)) *rlen = strlen(extracted);
+    return extracted;
+}
+
+/* Convert to class_id from Charset */
+char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) {
+    size_t res;
+    rcc_iconv icnv;
+    rcc_string str;
+    char *extracted;
+    
+
+    if (!charset) return NULL;
+    
+    icnv = rccIConvOpen("UTF-8", charset);
+    if (icnv) {
+	res = rccIConv(ctx, icnv, buf, len);
+	rccIConvClose(icnv);
+
+	if (res == (size_t)-1) return NULL;
+
+	str = rccCreateString(rccGetCurrentLanguage(ctx), ctx->tmpbuffer, res);
+    } else str = rccCreateString(rccGetCurrentLanguage(ctx), buf, len);
+
+    if (!str) return NULL;
+
+    extracted = rccSizedTo(ctx, class_id, str, rlen);
+    free(str);
+
+    return extracted;
+}
+
+char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to, const char *buf, size_t len, size_t *rlen) {
+    char *str;
+    size_t res;
+    rcc_iconv icnv;
+
+    icnv = rccIConvOpen(to, from);
+    if (!icnv) return NULL;
+
+    res = rccIConv(ctx, icnv, buf, len);
+    rccIConvClose(icnv);
+    
+    if (res == (size_t)-1) return NULL;
+    
+    
+
+    str = (char*)malloc((res+1)*sizeof(char));
+    if (!str) return NULL;
+    memcpy(str, ctx->tmpbuffer, res);
+    if (rlen) *rlen = res;
+
+    return str;
+}
author	Suren A. Chilingaryan <csa@dside.dyndns.org>	2005-07-18 15:22:28 +0000
committer	Suren A. Chilingaryan <csa@dside.dyndns.org>	2005-07-18 15:22:28 +0000
commit	537c4b33fdf6e143243d5a0d286eeb247362e806 (patch)
tree	d8a94cfaa4a71ffc826b7d8176c54445369539f3 /src/recode.c
parent	4032f92867e5570f130e4175b3b4fb61240f9752 (diff)
download	librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.gz librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.bz2 librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.xz librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.zip