summaryrefslogtreecommitdiffstats
path: root/src/fake_enca.h
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2005-06-16 23:14:30 +0000
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2005-06-16 23:14:30 +0000
commit3aa2acb1aa6931d9a5cab87fe9bef94086e25d16 (patch)
treec3b86c2f004e7a8498efbe41e72f42d81acde9ea /src/fake_enca.h
downloadlibrcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.tar.gz
librcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.tar.bz2
librcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.tar.xz
librcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.zip
Initial Import
Diffstat (limited to 'src/fake_enca.h')
-rw-r--r--src/fake_enca.h289
1 files changed, 289 insertions, 0 deletions
diff --git a/src/fake_enca.h b/src/fake_enca.h
new file mode 100644
index 0000000..4483efb
--- /dev/null
+++ b/src/fake_enca.h
@@ -0,0 +1,289 @@
+/* This header file is in the public domain. */
+#ifndef ENCA_H
+#define ENCA_H
+
+#include <stdlib.h>
+/* According to autoconf stdlib may not be enough for size_t */
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Enumerated types */
+
+/**
+ * EncaSurface:
+ * @ENCA_SURFACE_EOL_CR: End-of-lines are represented with CR's.
+ * @ENCA_SURFACE_EOL_LF: End-of-lines are represented with LF's.
+ * @ENCA_SURFACE_EOL_CRLF: End-of-lines are represented with CRLF's.
+ * @ENCA_SURFACE_EOL_MIX: Several end-of-line types, mixed.
+ * @ENCA_SURFACE_EOL_BIN: End-of-line concept not applicable (binary data).
+ * @ENCA_SURFACE_MASK_EOL: Mask for end-of-line surfaces.
+ * @ENCA_SURFACE_PERM_21: Odd and even bytes swapped.
+ * @ENCA_SURFACE_PERM_4321: Reversed byte sequence in 4byte words.
+ * @ENCA_SURFACE_PERM_MIX: Chunks with both endianess, concatenated.
+ * @ENCA_SURFACE_MASK_PERM: Mask for permutation surfaces.
+ * @ENCA_SURFACE_QP: Quoted printables.
+ * @ENCA_SURFACE_REMOVE: Recode `remove' surface.
+ * @ENCA_SURFACE_UNKNOWN: Unknown surface.
+ * @ENCA_SURFACE_MASK_ALL: Mask for all bits, withnout #ENCA_SURFACE_UNKNOWN.
+ *
+ * Surface flags.
+ **/
+typedef enum { /*< flags >*/
+ ENCA_SURFACE_EOL_CR = 1 << 0,
+ ENCA_SURFACE_EOL_LF = 1 << 1,
+ ENCA_SURFACE_EOL_CRLF = 1 << 2,
+ ENCA_SURFACE_EOL_MIX = 1 << 3,
+ ENCA_SURFACE_EOL_BIN = 1 << 4,
+ ENCA_SURFACE_MASK_EOL = (ENCA_SURFACE_EOL_CR
+ | ENCA_SURFACE_EOL_LF
+ | ENCA_SURFACE_EOL_CRLF
+ | ENCA_SURFACE_EOL_MIX
+ | ENCA_SURFACE_EOL_BIN),
+ ENCA_SURFACE_PERM_21 = 1 << 5,
+ ENCA_SURFACE_PERM_4321 = 1 << 6,
+ ENCA_SURFACE_PERM_MIX = 1 << 7,
+ ENCA_SURFACE_MASK_PERM = (ENCA_SURFACE_PERM_21
+ | ENCA_SURFACE_PERM_4321
+ | ENCA_SURFACE_PERM_MIX),
+ ENCA_SURFACE_QP = 1 << 8,
+ ENCA_SURFACE_REMOVE = 1 << 13,
+ ENCA_SURFACE_UNKNOWN = 1 << 14,
+ ENCA_SURFACE_MASK_ALL = (ENCA_SURFACE_MASK_EOL
+ | ENCA_SURFACE_MASK_PERM
+ | ENCA_SURFACE_QP
+ | ENCA_SURFACE_REMOVE)
+} EncaSurface;
+
+/**
+ * EncaNameStyle:
+ * @ENCA_NAME_STYLE_ENCA: Default, implicit charset name in Enca.
+ * @ENCA_NAME_STYLE_RFC1345: RFC 1345 charset name.
+ * @ENCA_NAME_STYLE_CSTOCS: Cstocs charset name.
+ * @ENCA_NAME_STYLE_ICONV: Iconv charset name.
+ * @ENCA_NAME_STYLE_HUMAN: Human comprehensible description.
+ *
+ * Charset naming styles and conventions.
+ **/
+typedef enum {
+ ENCA_NAME_STYLE_ENCA,
+ ENCA_NAME_STYLE_RFC1345,
+ ENCA_NAME_STYLE_CSTOCS,
+ ENCA_NAME_STYLE_ICONV,
+ ENCA_NAME_STYLE_HUMAN
+} EncaNameStyle;
+
+/**
+ * EncaCharsetFlags:
+ * @ENCA_CHARSET_7BIT: Characters are represented with 7bit characters.
+ * @ENCA_CHARSET_8BIT: Characters are represented with bytes.
+ * @ENCA_CHARSET_16BIT: Characters are represented with 2byte words.
+ * @ENCA_CHARSET_32BIT: Characters are represented with 4byte words.
+ * @ENCA_CHARSET_FIXED: One characters consists of one fundamental piece.
+ * @ENCA_CHARSET_VARIABLE: One character consists of variable number of
+ * fundamental pieces.
+ * @ENCA_CHARSET_BINARY: Charset is binary from ASCII viewpoint.
+ * @ENCA_CHARSET_REGULAR: Language dependent (8bit) charset.
+ * @ENCA_CHARSET_MULTIBYTE: Multibyte charset.
+ *
+ * Charset properties.
+ *
+ * Flags %ENCA_CHARSET_7BIT, %ENCA_CHARSET_8BIT, %ENCA_CHARSET_16BIT,
+ * %ENCA_CHARSET_32BIT tell how many bits a `fundamental piece' consists of.
+ * This is different from bits per character; r.g. UTF-8 consists of 8bit
+ * pieces (bytes), but character can be composed from 1 to 6 of them.
+ **/
+typedef enum { /*< flags >*/
+ ENCA_CHARSET_7BIT = 1 << 0,
+ ENCA_CHARSET_8BIT = 1 << 1,
+ ENCA_CHARSET_16BIT = 1 << 2,
+ ENCA_CHARSET_32BIT = 1 << 3,
+ ENCA_CHARSET_FIXED = 1 << 4,
+ ENCA_CHARSET_VARIABLE = 1 << 5,
+ ENCA_CHARSET_BINARY = 1 << 6,
+ ENCA_CHARSET_REGULAR = 1 << 7,
+ ENCA_CHARSET_MULTIBYTE = 1 << 8
+} EncaCharsetFlags;
+
+/**
+ * EncaErrno:
+ * @ENCA_EOK: OK.
+ * @ENCA_EINVALUE: Invalid value (usually of an option).
+ * @ENCA_EEMPTY: Sample is empty.
+ * @ENCA_EFILTERED: After filtering, (almost) nothing remained.
+ * @ENCA_ENOCS8: Mulitibyte tests failed and language contains no 8bit charsets.
+ * @ENCA_ESIGNIF: Too few significant characters.
+ * @ENCA_EWINNER: No clear winner.
+ * @ENCA_EGARBAGE: Sample is garbage.
+ *
+ * Error codes.
+ **/
+typedef enum {
+ ENCA_EOK = 0,
+ ENCA_EINVALUE,
+ ENCA_EEMPTY,
+ ENCA_EFILTERED,
+ ENCA_ENOCS8,
+ ENCA_ESIGNIF,
+ ENCA_EWINNER,
+ ENCA_EGARBAGE
+} EncaErrno;
+
+/**
+ * ENCA_CS_UNKNOWN:
+ *
+ * Unknown character set id.
+ *
+ * Use enca_charset_is_known() to check for unknown charset instead of direct
+ * comparsion.
+ **/
+#define ENCA_CS_UNKNOWN (-1)
+
+/**
+ * ENCA_NOT_A_CHAR:
+ *
+ * Not-a-character in unicode tables.
+ **/
+#define ENCA_NOT_A_CHAR 0xffff
+
+/* Published (opaque) typedefs */
+typedef struct _EncaAnalyserState *EncaAnalyser;
+
+/* Public (transparent) typedefs */
+typedef struct _EncaEncoding EncaEncoding;
+
+/**
+ * EncaEncoding:
+ * @charset: Numeric charset identifier.
+ * @surface: Surface flags.
+ *
+ * Encoding, i.e. charset and surface.
+ *
+ * This is what enca_analyse() and enca_analyse_const() return.
+ *
+ * The @charset field is an opaque numerical charset identifier, which has no
+ * meaning outside Enca library.
+ * You will probably want to use it only as enca_charset_name() argument.
+ * It is only guaranteed not to change meaning
+ * during program execution time; change of its interpretation (e.g. due to
+ * addition of new charsets) is not considered API change.
+ *
+ * The @surface field is a combination of #EncaSurface flags. You may want
+ * to ignore it completely; you should use enca_set_interpreted_surfaces()
+ * to disable weird surfaces then.
+ **/
+struct _EncaEncoding { int charset; EncaSurface surface; };
+
+void (*enca_set_multibyte) (EncaAnalyser analyser, int multibyte);
+void (*enca_set_interpreted_surfaces) (EncaAnalyser analyser, int interpreted_surfaces);
+void (*enca_set_ambiguity) (EncaAnalyser analyser, int ambiguity);
+void (*enca_set_filtering) (EncaAnalyser analyser, int filtering);
+void (*enca_set_garbage_test) (EncaAnalyser analyser, int garabage_test);
+void (*enca_set_termination_strictness) (EncaAnalyser analyser, int termination_strictness);
+int (*enca_set_significant) (EncaAnalyser analyser, size_t significant);
+int (*enca_set_threshold) (EncaAnalyser analyser, double threshold);
+const char* (*enca_charset_name) (int charset, EncaNameStyle whatname);
+int* (*enca_get_language_charsets) (const char *langname, size_t *n);
+EncaAnalyser (*enca_analyser_alloc) (const char *langname);
+void (*enca_analyser_free) (EncaAnalyser analyser);
+EncaEncoding (*enca_analyse_const) (EncaAnalyser analyser,const unsigned char *buffer, size_t size);
+
+/**
+ * enca_charset_is_known:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when the charset is known (i.e. it's not
+ * ENCA_CS_UNKNOWN).
+ **/
+#define enca_charset_is_known(cs) \
+ ((cs) != ENCA_CS_UNKNOWN)
+
+/**
+ * enca_charset_is_7bit:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when characters are represented with 7bit characters.
+ **/
+#define enca_charset_is_7bit(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_7BIT)
+
+/**
+ * enca_charset_is_8bit:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when characters are represented with bytes.
+ **/
+#define enca_charset_is_8bit(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_8BIT)
+
+/**
+ * enca_charset_is_16bit:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when characters are represented with 2byte words.
+ **/
+#define enca_charset_is_16bit(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_16BIT)
+
+/**
+ * enca_charset_is_32bit:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when characters are represented with 4byte words.
+ **/
+#define enca_charset_is_32bit(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_32BIT)
+
+/**
+ * enca_charset_is_fixed:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when one characters consists of one fundamental piece.
+ **/
+#define enca_charset_is_fixed(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_FIXED)
+
+/**
+ * enca_charset_is_variable:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when one character consists of variable number of
+ * fundamental pieces.
+ **/
+#define enca_charset_is_variable(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_VARIABLE)
+
+/**
+ * enca_charset_is_binary:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when charset is binary from ASCII viewpoint.
+ **/
+#define enca_charset_is_binary(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_BINARY)
+
+/**
+ * enca_charset_is_regular:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when charset is language dependent (8bit) charset.
+ **/
+#define enca_charset_is_regular(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_REGULAR)
+
+/**
+ * enca_charset_is_multibyte:
+ * @cs: Charset id.
+ *
+ * Expands to nonzero when charset is multibyte.
+ **/
+#define enca_charset_is_multibyte(cs) \
+ (enca_charset_properties(cs) & ENCA_CHARSET_MULTIBYTE)
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif