diff options
author | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2013-04-26 09:03:42 +0200 |
---|---|---|
committer | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2013-04-26 09:03:42 +0200 |
commit | 77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8 (patch) | |
tree | 18d5dc9ba400ec95d58747666bd50a42f8a98d61 /src | |
parent | 0d488beb76dfc46883e23c52ccf4ac5d2f3726a4 (diff) | |
download | librcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.tar.gz librcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.tar.bz2 librcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.tar.xz librcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.zip |
Language autodetection: prefer english over the languages without any non-parent word (english is global parent)
Diffstat (limited to 'src')
-rw-r--r-- | src/recode.c | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/src/recode.c b/src/recode.c index ac6e11b..3b1bc02 100644 --- a/src/recode.c +++ b/src/recode.c @@ -172,12 +172,12 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c mode = 0; } else continue; } else { - if (mode) { - if (utf8[j]>0x7F) english_word = 0; - } else { + if (!mode) { mode = j + 1; english_word = 1; } + + if (utf8[j]>0x7F) english_word = 0; } } @@ -200,7 +200,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c words++; } - + if (english_mode) { if (english_string) free(english_string); @@ -256,7 +256,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c return english_lang; } - if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)) { + if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)&&(bestown>0)) { if (english_string) free(english_string); if (retstring) *retstring = best_string; else if (best_string) free(best_string); @@ -265,7 +265,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c return bestlang; } - if ((is_english_string)&&(english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) { + if ((is_english_string)&&(((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH))||(!bestown))) { if (best_string) free(best_string); if (retstring) *retstring = english_string; else if (english_string) free(english_string); |