summaryrefslogtreecommitdiffstats
path: root/src/recode.c
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2005-07-18 15:22:28 +0000
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2005-07-18 15:22:28 +0000
commit537c4b33fdf6e143243d5a0d286eeb247362e806 (patch)
treed8a94cfaa4a71ffc826b7d8176c54445369539f3 /src/recode.c
parent4032f92867e5570f130e4175b3b4fb61240f9752 (diff)
downloadlibrcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.gz
librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.bz2
librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.tar.xz
librcc-537c4b33fdf6e143243d5a0d286eeb247362e806.zip
API Improvements
- Removed 'rlen' return parameters there not necessary for multibyte encodings - Two versions of recode functions: rccRecode -> rccRecode, rccSizedRecode - Class Types: CONST, SKIP_SAVELOAD - New recode functions: rccToCharset, rccFromCharset - More new recode functions: rccRecodeToCharset, rccRecodeFromCharset, rccRecodeCharsets - New function: rccGetCompiledConfiguration - All warnings are fixed - Perform "File Name" search only if there are non ISO8859-1 chars in the name. - Do not copy invalid characters, - skip them. - Fixed error in rccRecode with 'Recoding Cache' switched On. - Strip leading and trailing spaces in rccDB4 get/set
Diffstat (limited to 'src/recode.c')
-rw-r--r--src/recode.c201
1 files changed, 175 insertions, 26 deletions
diff --git a/src/recode.c b/src/recode.c
index 2164a4d..435e1a6 100644
--- a/src/recode.c
+++ b/src/recode.c
@@ -13,27 +13,27 @@
-static rcc_charset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) {
+static rcc_autocharset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) {
rcc_class_type class_type;
rcc_engine_ptr engine;
- if (!buf) return (rcc_charset_id)-1;
-
+ if (!buf) return (rcc_autocharset_id)-1;
+
class_type = rccGetClassType(ctx, class_id);
- if ((class_type == RCC_CLASS_STANDARD)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
+ if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
engine = rccGetCurrentEnginePointer(ctx);
- if ((!engine)||(!engine->func)) return (rcc_charset_id)-1;
+ if ((!engine)||(!engine->func)) return (rcc_autocharset_id)-1;
return engine->func(&ctx->engine_ctx, buf, len);
}
- return (rcc_charset_id)-1;
+ return (rcc_autocharset_id)-1;
}
-rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, size_t *rlen) {
+rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {
int err;
size_t ret;
rcc_language_id language_id;
- rcc_charset_id charset_id;
+ rcc_autocharset_id charset_id;
rcc_iconv icnv = NULL;
rcc_string result;
rcc_option_value usedb4;
@@ -64,15 +64,15 @@ rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size
language_id = rccGetCurrentLanguage(ctx);
charset_id = rccIConvAuto(ctx, class_id, buf, len);
- if (charset_id != (rcc_charset_id)-1) icnv = ctx->iconv_auto[charset_id];
+ if (charset_id != (rcc_autocharset_id)-1) icnv = ctx->iconv_auto[charset_id];
else icnv = ctx->iconv_from[class_id];
if (icnv) {
ret = rccIConv(ctx, icnv, buf, len);
if (ret == (size_t)-1) return NULL;
- result = rccCreateString(language_id, ctx->tmpbuffer, ret, rlen);
+ result = rccCreateString(language_id, ctx->tmpbuffer, ret);
} else {
- result = rccCreateString(language_id, buf, len, rlen);
+ result = rccCreateString(language_id, buf, len);
}
if ((result)&&(usedb4&RCC_OPTION_LEARNING_FLAG_LEARN)) {
@@ -84,7 +84,7 @@ rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size
return result;
}
-char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t len, size_t *rlen) {
+char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t *rlen) {
int err;
size_t newlen;
char *result;
@@ -92,7 +92,6 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
const char *utfstring;
rcc_language_config config;
rcc_language_id language_id;
- rcc_charset_id charset_id;
rcc_class_type class_type;
rcc_iconv icnv;
@@ -102,7 +101,7 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
}
if ((class_id<0)||(class_id>=ctx->n_classes)||(!buf)) return NULL;
- newlen = rccStringSizedCheck((const char*)buf, len);
+ newlen = rccStringCheck((const char*)buf);
if (!newlen) return NULL;
language_id = rccStringGetLanguage(buf);
@@ -117,6 +116,12 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
class_type = rccGetClassType(ctx, class_id);
if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) {
+ if (rccIsASCII(utfstring)) {
+ result = rccStringExtractString(buf);
+ if ((result)&&(rlen)) *rlen = strlen(result);
+ return result;
+ }
+
name = (char*)utfstring;
prefix = NULL;
@@ -127,17 +132,18 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
if (prefix) free(prefix);
free(name);
}
- if ((rlen)&&(result)) *rlen = strlen(result);
+ if (rlen) *rlen = strlen(result);
return result;
}
}
icnv = config->iconv_to[class_id];
if (icnv) {
- newlen = rccIConv(ctx, icnv, rccStringGetString(buf), len?newlen:0);
+ newlen = rccIConv(ctx, icnv, rccStringGetString((const char*)buf), newlen);
if (newlen == (size_t)-1) return NULL;
- result = rccCreateResult(ctx, newlen, rlen);
+ result = rccCreateResult(ctx, newlen);
+ if (rlen) *rlen = newlen;
} else {
result = rccStringExtractString(buf);
if (rlen) *rlen = newlen;
@@ -146,13 +152,13 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t
return result;
}
-char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {
- size_t nlen;
+char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {
rcc_string stmp;
char *result;
const char *from_charset, *to_charset;
rcc_charset_id from_charset_id, to_charset_id;
rcc_class_type class_type;
+ rcc_option_value usedb4;
if (!ctx) {
if (rcc_default_ctx) ctx = rcc_default_ctx;
@@ -163,6 +169,20 @@ char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char
class_type = rccGetClassType(ctx, to);
if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding;
if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) goto recoding;
+
+ usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
+ if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
+ stmp = rccDb4GetKey(ctx->db4ctx, buf, len);
+ if (stmp) {
+ if (rccStringFixID(stmp, ctx)) free(stmp);
+ else {
+ result = rccSizedTo(ctx, to, stmp, rlen);
+ free(stmp);
+ return result;
+ }
+ }
+ }
+
from_charset_id = rccIConvAuto(ctx, from, buf, len);
if (from_charset_id != (rcc_charset_id)-1) {
@@ -174,11 +194,11 @@ char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char
to_charset_id = rccGetCurrentCharset(ctx, to);
if (from_charset_id == to_charset_id) return NULL;
}
-
+
recoding:
- stmp = rccFrom(ctx, from, buf, len, &nlen);
+ stmp = rccSizedFrom(ctx, from, buf, len);
if (stmp) {
- result = rccTo(ctx, to, stmp, nlen, rlen);
+ result = rccSizedTo(ctx, to, stmp, rlen);
free(stmp);
return result;
}
@@ -192,7 +212,6 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp
char *prefix = (char*)path, *name = (char*)filename; /*DS*/
rcc_string string;
- char *stmp;
char *result = NULL;
if (!ctx) {
@@ -209,16 +228,16 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp
if (err < 0) return NULL;
if (err&1) {
- if (err&2) return NULL;
if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) {
- string = rccFrom(ctx, from, name, 0, NULL);
+ string = rccFrom(ctx, from, name);
if (string) free(string);
}
+ if (err&2) return NULL;
return name;
}
}
- string = rccFrom(ctx, from, name, 0, NULL);
+ string = rccFrom(ctx, from, name);
if (string) {
config = rccGetConfig(ctx, rccStringGetLanguage(string));
if (config) result = rccFS3(config, to, prefix, rccStringGetString(string));
@@ -233,3 +252,133 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp
}
return result;
}
+
+
+rcc_string rccSizedFromCharset(rcc_context ctx, const char *charset, const char *buf, size_t len) {
+ rcc_iconv icnv;
+ rcc_language_config config;
+ rcc_language_id language_id;
+ size_t res;
+
+ if ((!buf)||(!charset)) return NULL;
+
+ language_id = rccGetCurrentLanguage(ctx);
+ if ((language_id == (rcc_language_id)-1)||(language_id == 0)) return NULL;
+ config = rccGetConfig(ctx, language_id);
+ if (!config) return NULL;
+
+ icnv = rccIConvOpen("UTF-8", charset);
+ if (icnv) {
+ res = rccIConv(ctx, icnv, buf, len);
+ rccIConvClose(icnv);
+ if (res == (size_t)-1) return NULL;
+ return rccCreateString(language_id, ctx->tmpbuffer, res);
+ }
+ return rccCreateString(language_id, buf, len);
+}
+
+char *rccSizedToCharset(rcc_context ctx, const char *charset, const rcc_string buf, size_t *rlen) {
+ rcc_iconv icnv;
+ size_t res;
+
+ if ((!buf)||(!charset)) return NULL;
+
+ res = rccStringCheck(buf);
+ if (!res) return NULL;
+
+ icnv = rccIConvOpen(charset, "UTF-8");
+ if (icnv) {
+ res = rccIConv(ctx, icnv, rccStringGetString(buf), res);
+ rccIConvClose(icnv);
+ if (res == (size_t)-1) return NULL;
+
+ if (rlen) *rlen = res;
+ return rccCreateResult(ctx, res);
+ }
+
+ if (rlen) *rlen = res;
+ return rccStringExtractString(buf);
+}
+
+/* Convert from class_id to Charset */
+char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const rcc_string buf, size_t len, size_t *rlen) {
+ size_t res;
+ rcc_iconv icnv;
+ const char *str;
+ char *utf8, *extracted;
+
+ if (!charset) return NULL;
+
+ utf8 = rccSizedFrom(ctx, class_id, buf, len);
+ if (!utf8) return utf8;
+
+ str = rccStringGetString(utf8);
+
+ icnv = rccIConvOpen(charset, "UTF-8");
+ if (icnv) {
+ res = rccIConv(ctx, icnv, str, 0);
+ rccIConvClose(icnv);
+ free(utf8);
+
+ if (res == (size_t)-1) return NULL;
+ if (rlen) *rlen = res;
+ return rccCreateResult(ctx, res);
+ }
+
+ extracted = rccStringExtractString(utf8);
+ free(utf8);
+
+ if ((rlen)&&(extracted)) *rlen = strlen(extracted);
+ return extracted;
+}
+
+/* Convert to class_id from Charset */
+char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) {
+ size_t res;
+ rcc_iconv icnv;
+ rcc_string str;
+ char *extracted;
+
+
+ if (!charset) return NULL;
+
+ icnv = rccIConvOpen("UTF-8", charset);
+ if (icnv) {
+ res = rccIConv(ctx, icnv, buf, len);
+ rccIConvClose(icnv);
+
+ if (res == (size_t)-1) return NULL;
+
+ str = rccCreateString(rccGetCurrentLanguage(ctx), ctx->tmpbuffer, res);
+ } else str = rccCreateString(rccGetCurrentLanguage(ctx), buf, len);
+
+ if (!str) return NULL;
+
+ extracted = rccSizedTo(ctx, class_id, str, rlen);
+ free(str);
+
+ return extracted;
+}
+
+char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to, const char *buf, size_t len, size_t *rlen) {
+ char *str;
+ size_t res;
+ rcc_iconv icnv;
+
+ icnv = rccIConvOpen(to, from);
+ if (!icnv) return NULL;
+
+ res = rccIConv(ctx, icnv, buf, len);
+ rccIConvClose(icnv);
+
+ if (res == (size_t)-1) return NULL;
+
+
+
+ str = (char*)malloc((res+1)*sizeof(char));
+ if (!str) return NULL;
+ memcpy(str, ctx->tmpbuffer, res);
+ if (rlen) *rlen = res;
+
+ return str;
+}