From 537c4b33fdf6e143243d5a0d286eeb247362e806 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Mon, 18 Jul 2005 15:22:28 +0000 Subject: API Improvements - Removed 'rlen' return parameters there not necessary for multibyte encodings - Two versions of recode functions: rccRecode -> rccRecode, rccSizedRecode - Class Types: CONST, SKIP_SAVELOAD - New recode functions: rccToCharset, rccFromCharset - More new recode functions: rccRecodeToCharset, rccRecodeFromCharset, rccRecodeCharsets - New function: rccGetCompiledConfiguration - All warnings are fixed - Perform "File Name" search only if there are non ISO8859-1 chars in the name. - Do not copy invalid characters, - skip them. - Fixed error in rccRecode with 'Recoding Cache' switched On. - Strip leading and trailing spaces in rccDB4 get/set --- src/recode.c | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 175 insertions(+), 26 deletions(-) (limited to 'src/recode.c') diff --git a/src/recode.c b/src/recode.c index 2164a4d..435e1a6 100644 --- a/src/recode.c +++ b/src/recode.c @@ -13,27 +13,27 @@ -static rcc_charset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) { +static rcc_autocharset_id rccIConvAuto(rcc_context ctx, rcc_class_id class_id, const char *buf, int len) { rcc_class_type class_type; rcc_engine_ptr engine; - if (!buf) return (rcc_charset_id)-1; - + if (!buf) return (rcc_autocharset_id)-1; + class_type = rccGetClassType(ctx, class_id); - if ((class_type == RCC_CLASS_STANDARD)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) { + if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) { engine = rccGetCurrentEnginePointer(ctx); - if ((!engine)||(!engine->func)) return (rcc_charset_id)-1; + if ((!engine)||(!engine->func)) return (rcc_autocharset_id)-1; return engine->func(&ctx->engine_ctx, buf, len); } - return (rcc_charset_id)-1; + return (rcc_autocharset_id)-1; } -rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, size_t *rlen) { +rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) { int err; size_t ret; rcc_language_id language_id; - rcc_charset_id charset_id; + rcc_autocharset_id charset_id; rcc_iconv icnv = NULL; rcc_string result; rcc_option_value usedb4; @@ -64,15 +64,15 @@ rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size language_id = rccGetCurrentLanguage(ctx); charset_id = rccIConvAuto(ctx, class_id, buf, len); - if (charset_id != (rcc_charset_id)-1) icnv = ctx->iconv_auto[charset_id]; + if (charset_id != (rcc_autocharset_id)-1) icnv = ctx->iconv_auto[charset_id]; else icnv = ctx->iconv_from[class_id]; if (icnv) { ret = rccIConv(ctx, icnv, buf, len); if (ret == (size_t)-1) return NULL; - result = rccCreateString(language_id, ctx->tmpbuffer, ret, rlen); + result = rccCreateString(language_id, ctx->tmpbuffer, ret); } else { - result = rccCreateString(language_id, buf, len, rlen); + result = rccCreateString(language_id, buf, len); } if ((result)&&(usedb4&RCC_OPTION_LEARNING_FLAG_LEARN)) { @@ -84,7 +84,7 @@ rcc_string rccFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size return result; } -char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t len, size_t *rlen) { +char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t *rlen) { int err; size_t newlen; char *result; @@ -92,7 +92,6 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t const char *utfstring; rcc_language_config config; rcc_language_id language_id; - rcc_charset_id charset_id; rcc_class_type class_type; rcc_iconv icnv; @@ -102,7 +101,7 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t } if ((class_id<0)||(class_id>=ctx->n_classes)||(!buf)) return NULL; - newlen = rccStringSizedCheck((const char*)buf, len); + newlen = rccStringCheck((const char*)buf); if (!newlen) return NULL; language_id = rccStringGetLanguage(buf); @@ -117,6 +116,12 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t class_type = rccGetClassType(ctx, class_id); if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) { + if (rccIsASCII(utfstring)) { + result = rccStringExtractString(buf); + if ((result)&&(rlen)) *rlen = strlen(result); + return result; + } + name = (char*)utfstring; prefix = NULL; @@ -127,17 +132,18 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t if (prefix) free(prefix); free(name); } - if ((rlen)&&(result)) *rlen = strlen(result); + if (rlen) *rlen = strlen(result); return result; } } icnv = config->iconv_to[class_id]; if (icnv) { - newlen = rccIConv(ctx, icnv, rccStringGetString(buf), len?newlen:0); + newlen = rccIConv(ctx, icnv, rccStringGetString((const char*)buf), newlen); if (newlen == (size_t)-1) return NULL; - result = rccCreateResult(ctx, newlen, rlen); + result = rccCreateResult(ctx, newlen); + if (rlen) *rlen = newlen; } else { result = rccStringExtractString(buf); if (rlen) *rlen = newlen; @@ -146,13 +152,13 @@ char *rccTo(rcc_context ctx, rcc_class_id class_id, const rcc_string buf, size_t return result; } -char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) { - size_t nlen; +char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) { rcc_string stmp; char *result; const char *from_charset, *to_charset; rcc_charset_id from_charset_id, to_charset_id; rcc_class_type class_type; + rcc_option_value usedb4; if (!ctx) { if (rcc_default_ctx) ctx = rcc_default_ctx; @@ -163,6 +169,20 @@ char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char class_type = rccGetClassType(ctx, to); if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding; if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) goto recoding; + + usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); + if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { + stmp = rccDb4GetKey(ctx->db4ctx, buf, len); + if (stmp) { + if (rccStringFixID(stmp, ctx)) free(stmp); + else { + result = rccSizedTo(ctx, to, stmp, rlen); + free(stmp); + return result; + } + } + } + from_charset_id = rccIConvAuto(ctx, from, buf, len); if (from_charset_id != (rcc_charset_id)-1) { @@ -174,11 +194,11 @@ char *rccRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char to_charset_id = rccGetCurrentCharset(ctx, to); if (from_charset_id == to_charset_id) return NULL; } - + recoding: - stmp = rccFrom(ctx, from, buf, len, &nlen); + stmp = rccSizedFrom(ctx, from, buf, len); if (stmp) { - result = rccTo(ctx, to, stmp, nlen, rlen); + result = rccSizedTo(ctx, to, stmp, rlen); free(stmp); return result; } @@ -192,7 +212,6 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp char *prefix = (char*)path, *name = (char*)filename; /*DS*/ rcc_string string; - char *stmp; char *result = NULL; if (!ctx) { @@ -209,16 +228,16 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp if (err < 0) return NULL; if (err&1) { - if (err&2) return NULL; if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) { - string = rccFrom(ctx, from, name, 0, NULL); + string = rccFrom(ctx, from, name); if (string) free(string); } + if (err&2) return NULL; return name; } } - string = rccFrom(ctx, from, name, 0, NULL); + string = rccFrom(ctx, from, name); if (string) { config = rccGetConfig(ctx, rccStringGetLanguage(string)); if (config) result = rccFS3(config, to, prefix, rccStringGetString(string)); @@ -233,3 +252,133 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp } return result; } + + +rcc_string rccSizedFromCharset(rcc_context ctx, const char *charset, const char *buf, size_t len) { + rcc_iconv icnv; + rcc_language_config config; + rcc_language_id language_id; + size_t res; + + if ((!buf)||(!charset)) return NULL; + + language_id = rccGetCurrentLanguage(ctx); + if ((language_id == (rcc_language_id)-1)||(language_id == 0)) return NULL; + config = rccGetConfig(ctx, language_id); + if (!config) return NULL; + + icnv = rccIConvOpen("UTF-8", charset); + if (icnv) { + res = rccIConv(ctx, icnv, buf, len); + rccIConvClose(icnv); + if (res == (size_t)-1) return NULL; + return rccCreateString(language_id, ctx->tmpbuffer, res); + } + return rccCreateString(language_id, buf, len); +} + +char *rccSizedToCharset(rcc_context ctx, const char *charset, const rcc_string buf, size_t *rlen) { + rcc_iconv icnv; + size_t res; + + if ((!buf)||(!charset)) return NULL; + + res = rccStringCheck(buf); + if (!res) return NULL; + + icnv = rccIConvOpen(charset, "UTF-8"); + if (icnv) { + res = rccIConv(ctx, icnv, rccStringGetString(buf), res); + rccIConvClose(icnv); + if (res == (size_t)-1) return NULL; + + if (rlen) *rlen = res; + return rccCreateResult(ctx, res); + } + + if (rlen) *rlen = res; + return rccStringExtractString(buf); +} + +/* Convert from class_id to Charset */ +char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const rcc_string buf, size_t len, size_t *rlen) { + size_t res; + rcc_iconv icnv; + const char *str; + char *utf8, *extracted; + + if (!charset) return NULL; + + utf8 = rccSizedFrom(ctx, class_id, buf, len); + if (!utf8) return utf8; + + str = rccStringGetString(utf8); + + icnv = rccIConvOpen(charset, "UTF-8"); + if (icnv) { + res = rccIConv(ctx, icnv, str, 0); + rccIConvClose(icnv); + free(utf8); + + if (res == (size_t)-1) return NULL; + if (rlen) *rlen = res; + return rccCreateResult(ctx, res); + } + + extracted = rccStringExtractString(utf8); + free(utf8); + + if ((rlen)&&(extracted)) *rlen = strlen(extracted); + return extracted; +} + +/* Convert to class_id from Charset */ +char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) { + size_t res; + rcc_iconv icnv; + rcc_string str; + char *extracted; + + + if (!charset) return NULL; + + icnv = rccIConvOpen("UTF-8", charset); + if (icnv) { + res = rccIConv(ctx, icnv, buf, len); + rccIConvClose(icnv); + + if (res == (size_t)-1) return NULL; + + str = rccCreateString(rccGetCurrentLanguage(ctx), ctx->tmpbuffer, res); + } else str = rccCreateString(rccGetCurrentLanguage(ctx), buf, len); + + if (!str) return NULL; + + extracted = rccSizedTo(ctx, class_id, str, rlen); + free(str); + + return extracted; +} + +char *rccSizedRecodeCharsets(rcc_context ctx, const char *from, const char *to, const char *buf, size_t len, size_t *rlen) { + char *str; + size_t res; + rcc_iconv icnv; + + icnv = rccIConvOpen(to, from); + if (!icnv) return NULL; + + res = rccIConv(ctx, icnv, buf, len); + rccIConvClose(icnv); + + if (res == (size_t)-1) return NULL; + + + + str = (char*)malloc((res+1)*sizeof(char)); + if (!str) return NULL; + memcpy(str, ctx->tmpbuffer, res); + if (rlen) *rlen = res; + + return str; +} -- cgit v1.2.3