Commit fa3fa475 authored by Simon Middleton's avatar Simon Middleton
Browse files

Added new file 'languages.h' with some ISO639 language codes.

Added a default language field to each encoding (using above codes).
Added a max char size field to each encoding.
Tidied up some of the reencoders behaviour when output ptr NULL.
Fixed a load of charset numbers which were wrong.
New UTF8 function to skiop multiple characters in a string.
Fixed RISC OS build which was out of date.

Version 0.04. Tagged as 'Unicode-0_04'
...@@ -81,10 +81,7 @@ OBJS = autojp.o \ ...@@ -81,10 +81,7 @@ OBJS = autojp.o \
utf8.o \ utf8.o \
encoding.o \ encoding.o \
iso2022.o \ iso2022.o \
koi8r.o \ eightbit.o \
microsoft.o \
acorn.o \
apple.o \
shiftjis.o \ shiftjis.o \
bigfive.o \ bigfive.o \
enc_utf8.o \ enc_utf8.o \
...@@ -98,10 +95,7 @@ OBJSZ = \ ...@@ -98,10 +95,7 @@ OBJSZ = \
oz.utf8 \ oz.utf8 \
oz.encoding \ oz.encoding \
oz.iso2022 \ oz.iso2022 \
oz.koi8r \ oz.eightbit \
oz.microsoft \
oz.acorn \
oz.apple \
oz.shiftjis \ oz.shiftjis \
oz.bigfive \ oz.bigfive \
oz.enc_utf8 \ oz.enc_utf8 \
...@@ -115,10 +109,7 @@ OBJSD = \ ...@@ -115,10 +109,7 @@ OBJSD = \
od.utf8 \ od.utf8 \
od.encoding \ od.encoding \
od.iso2022 \ od.iso2022 \
od.koi8r \ od.eightbit \
od.microsoft \
od.acorn \
od.apple \
od.shiftjis \ od.shiftjis \
od.bigfive \ od.bigfive \
od.enc_utf8 \ od.enc_utf8 \
...@@ -126,9 +117,9 @@ OBJSD = \ ...@@ -126,9 +117,9 @@ OBJSD = \
od.enc_utf16 \ od.enc_utf16 \
od.enc_ucs4 od.enc_ucs4
HDRS = autojp.h charsets.h encoding.h iso10646.h unictype.h utf8.h HDRS = autojp.h charsets.h encoding.h iso10646.h languages.h unictype.h utf8.h
LIBS = ${TARGET} ${TARGETD} LIBS = ${TARGET} ${TARGETD} ${TARGETZ}
# #
# Rule patterns # Rule patterns
...@@ -173,6 +164,7 @@ export_hdrs: ${HDRS} dirs ...@@ -173,6 +164,7 @@ export_hdrs: ${HDRS} dirs
${CP} h.charsets ${UNICODELIBDIR}.h.* ${CPFLAGS} ${CP} h.charsets ${UNICODELIBDIR}.h.* ${CPFLAGS}
${CP} h.encoding ${UNICODELIBDIR}.h.* ${CPFLAGS} ${CP} h.encoding ${UNICODELIBDIR}.h.* ${CPFLAGS}
${CP} h.iso10646 ${UNICODELIBDIR}.h.* ${CPFLAGS} ${CP} h.iso10646 ${UNICODELIBDIR}.h.* ${CPFLAGS}
${CP} h.languages ${UNICODELIBDIR}.h.* ${CPFLAGS}
${CP} h.unictype ${UNICODELIBDIR}.h.* ${CPFLAGS} ${CP} h.unictype ${UNICODELIBDIR}.h.* ${CPFLAGS}
${CP} h.utf8 ${UNICODELIBDIR}.h.* ${CPFLAGS} ${CP} h.utf8 ${UNICODELIBDIR}.h.* ${CPFLAGS}
@echo ${COMPONENT}: export complete (hdrs) @echo ${COMPONENT}: export complete (hdrs)
...@@ -180,9 +172,9 @@ export_hdrs: ${HDRS} dirs ...@@ -180,9 +172,9 @@ export_hdrs: ${HDRS} dirs
export_libs: ${LIBS} dirs export_libs: ${LIBS} dirs
${CP} ${TARGET} ${UNICODELIBDIR}.o.* ${CPFLAGS} ${CP} ${TARGET} ${UNICODELIBDIR}.o.* ${CPFLAGS}
${CP} ${TARGETD} ${UNICODELIBDIR}.o.* ${CPFLAGS} ${CP} ${TARGETD} ${UNICODELIBDIR}.o.* ${CPFLAGS}
${CP} ${TARGETZ} ${UNICODELIBDIR}.o.* ${CPFLAGS}
@echo ${COMPONENT}: export complete (libs) @echo ${COMPONENT}: export complete (libs)
# ${CP} ${TARGETZ} ${UNICODELIBDIR}.o.* ${CPFLAGS}
local_dirs: local_dirs:
${MKDIR} o ${MKDIR} o
...@@ -202,7 +194,14 @@ unictype.c: mkunictype data.UNIDATA2 ...@@ -202,7 +194,14 @@ unictype.c: mkunictype data.UNIDATA2
mkunictype: mkunictype.o ${CLIB} mkunictype: mkunictype.o ${CLIB}
${LD} -o $@ mkunictype.o ${CLIB} ${LD} -o $@ mkunictype.o ${CLIB}
unictype.o oz.unictype od.unictype: unictype.c unictype.o od.unictype oz.unictype: unictype.c
links:
./mklinks
echo Made links
#od.unictype: unictype.c
# ${CC} ${CCFLAGS} -DROM=0 -DDEBUG=1 -o $@ unictype.c
# #
# Final link # Final link
......
/* (0.03) /* (0.04)
* *
* This file is automatically maintained by srccommit, do not edit manually. * This file is automatically maintained by srccommit, do not edit manually.
* *
*/ */
#define Module_MajorVersion_CMHG 0.03 #define Module_MajorVersion_CMHG 0.04
#define Module_MinorVersion_CMHG #define Module_MinorVersion_CMHG
#define Module_Date_CMHG 12 Nov 1997 #define Module_Date_CMHG 21 Nov 1997
#define Module_MajorVersion "0.03" #define Module_MajorVersion "0.04"
#define Module_MinorVersion "" #define Module_MinorVersion ""
#define Module_Date "12 Nov 1997" #define Module_Date "21 Nov 1997"
...@@ -98,7 +98,7 @@ static int ucs4_write(EncodingPriv *e, UCS4 u, char **pucs4, int *bufsize) ...@@ -98,7 +98,7 @@ static int ucs4_write(EncodingPriv *e, UCS4 u, char **pucs4, int *bufsize)
{ {
char *ucs4; char *ucs4;
if ((*bufsize -= 4) < 0) if ((*bufsize -= 4) < 0 || !pucs4)
return 0; return 0;
ucs4 = *pucs4; ucs4 = *pucs4;
......
...@@ -129,7 +129,7 @@ static int utf16_write(EncodingPriv *e, UCS4 u, char **putf16, int *bufsize) ...@@ -129,7 +129,7 @@ static int utf16_write(EncodingPriv *e, UCS4 u, char **putf16, int *bufsize)
c = 0xFFFD; c = 0xFFFD;
} }
if ((*bufsize -= (cc ? 4 : 2)) < 0) if ((*bufsize -= (cc ? 4 : 2)) < 0 || !putf16)
return 0; return 0;
*(*putf16)++ = (c >> 8) & 0xff; *(*putf16)++ = (c >> 8) & 0xff;
......
...@@ -106,7 +106,7 @@ static int utf8_write(EncodingPriv *e, UCS4 u, char **utf8, int *bufsize) ...@@ -106,7 +106,7 @@ static int utf8_write(EncodingPriv *e, UCS4 u, char **utf8, int *bufsize)
{ {
int len = UTF8_codelen(u); int len = UTF8_codelen(u);
if ((*bufsize -= len) < 0) if ((*bufsize -= len) < 0 || !utf8)
return 0; return 0;
*utf8 = UCS4_to_UTF8(*utf8, u); *utf8 = UCS4_to_UTF8(*utf8, u);
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "enc_ucs4.h" #include "enc_ucs4.h"
#include "charsets.h" #include "charsets.h"
#include "languages.h"
#include "VersionNum" #include "VersionNum"
...@@ -52,17 +53,17 @@ static char version[] = "Unicode library " Module_MajorVersion " " Module_Date " ...@@ -52,17 +53,17 @@ static char version[] = "Unicode library " Module_MajorVersion " " Module_Date "
static EncList enclist[] = static EncList enclist[] =
{ {
{ 3, "/US-ASCII/", (EncodingPriv *)ENC_ascii, NULL }, { csASCII /* 3 */, 1, "/US-ASCII/", lang_ENGLISH, (EncodingPriv *)ENC_ascii, NULL },
{ 4, "/ISO-8859-1/ISO-IR-101/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x41\x1B\x2F\x50" /* Select Latin-1 right half and G3 supplement */ }, { csISOLatin1 /* 4 */, 1, "/ISO-8859-1/ISO-IR-101/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x41\x1B\x2F\x50" /* Select Latin-1 right half and G3 supplement */ },
{ 5, "/ISO-8859-2/ISO-IR-102/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x42\x1B\x2F\x50" }, /* Select Latin-2 right half */ { csISOLatin2 /* 5 */, 1, "/ISO-8859-2/ISO-IR-102/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x42\x1B\x2F\x50" }, /* Select Latin-2 right half */
{ 6, "/ISO-8859-3/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x43\x1B\x2F\x50" }, /* Select Latin-3 right half */ { csISOLatin3 /* 6 */, 1, "/ISO-8859-3/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x43\x1B\x2F\x50" }, /* Select Latin-3 right half */
{ 7, "/ISO-8859-4/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x44\x1B\x2F\x50" }, /* Select Latin-4 right half */ { csISOLatin4 /* 7 */, 1, "/ISO-8859-4/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x44\x1B\x2F\x50" }, /* Select Latin-4 right half */
{ 8, "/ISO-8859-5/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x4C" }, /* Select Cyrillic right half */ { csISOLatinCyrillic /* 8 */, 1, "/ISO-8859-5/", lang_RUSSIAN, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x4C" }, /* Select Cyrillic right half */
{ 10, "/ISO-8859-7/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x46" }, /* Select Greek right half */ { csISOLatinGreek /* 10 */, 1, "/ISO-8859-7/", lang_GREEK, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x46" }, /* Select Greek right half */
{ 11, "/ISO-8859-8/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x48" }, /* Select Hebrew right half */ { csISOLatinHebrew /* 11 */, 1, "/ISO-8859-8/", lang_HEBREW, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x48" }, /* Select Hebrew right half */
{ 12, "/ISO-8859-9/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x4D\x1B\x2F\x50" }, /* Select Latin-5 right half */ { csISOLatin5 /* 12 */, 1, "/ISO-8859-9/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x4D\x1B\x2F\x50" }, /* Select Latin-5 right half */
{ 13, "/ISO-8859-10/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x56\x1B\x2E\x58" }, /* Select Latin-6 right half, and Sami supplement as G2 */ { csISOLatin6 /* 13 */, 1, "/ISO-8859-10/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x56\x1B\x2E\x58" }, /* Select Latin-6 right half, and Sami supplement as G2 */
{ 17, "/SHIFT_JIS/X-SJIS/", (EncodingPriv *)ENC_shiftjis }, { csShiftJIS /* 17 */, 2, "/SHIFT_JIS/X-SJIS/", lang_JAPANESE, (EncodingPriv *)ENC_shiftjis },
/* /*
* Select G0 = JIS X 0201 Roman (ESC ( J) * Select G0 = JIS X 0201 Roman (ESC ( J)
...@@ -72,7 +73,7 @@ static EncList enclist[] = ...@@ -72,7 +73,7 @@ static EncList enclist[] =
* C1 = ISO 6429 (ESC " C) * C1 = ISO 6429 (ESC " C)
* Single shift range = GR (ESC SP \) * Single shift range = GR (ESC SP \)
*/ */
{ 18, "/EUC-JP/", (EncodingPriv *)ENC_iso2022_euc, { csEUCPkdFmtJapanese /* 18 */, 2, "/EUC-JP/", lang_JAPANESE, (EncodingPriv *)ENC_iso2022_euc,
"\x1B\x28\x4A" "\x1B\x28\x4A"
"\x1B\x24\x29\x42" "\x1B\x24\x29\x42"
"\x1B\x2A\x49" "\x1B\x2A\x49"
...@@ -80,43 +81,41 @@ static EncList enclist[] = ...@@ -80,43 +81,41 @@ static EncList enclist[] =
"\x1B\x22\x43" "\x1B\x22\x43"
"\x1B\x20\x5C" }, "\x1B\x20\x5C" },
{ 37, "/ISO-2022-KR/", (EncodingPriv *)ENC_iso2022_shifts, NULL, { csISO2022KR /* 37 */, 3, "/ISO-2022-KR/", lang_KOREAN, (EncodingPriv *)ENC_iso2022_shifts, NULL,
"\x1B\x24\x29\x43" }, "\x1B\x24\x29\x43" },
{ 38, "/EUC-KR/", (EncodingPriv *)ENC_iso2022_euc, { csEUCKR /* 38 */, 0, "/EUC-KR/", lang_KOREAN, (EncodingPriv *)ENC_iso2022_euc,
"\x1B\x24\x29\x43" }, /* Select G1 = KS C 5601 */ "\x1B\x24\x29\x43" }, /* Select G1 = KS C 5601 */
{ 39, "/ISO-2022-JP/JIS_Encoding/", (EncodingPriv *)ENC_iso2022_escapes, NULL, { csISO2022JP /* 39 */, 0, "/ISO-2022-JP/JIS_Encoding/", lang_JAPANESE, (EncodingPriv *)ENC_iso2022_escapes, NULL,
"\x1B\x28\x42" "\x1B\x28\x42"
"\x1B\x28\x4A" "\x1B\x28\x4A"
"\x1B\x24\x40" "\x1B\x24\x40"
"\x1B\x24\x42" }, "\x1B\x24\x42" },
{ 40, "/ISO-2022-JP-2/", (EncodingPriv *)ENC_iso2022_escapes, NULL, { csISO2022JP2 /* 40 */, 0, "/ISO-2022-JP-2/", lang_JAPANESE, (EncodingPriv *)ENC_iso2022_escapes, NULL,
"\x1B\x28\x42" "\x1B\x28\x42"
"\x1B\x28\x4A" "\x1B\x28\x4A"
"\x1B\x24\x40" "\x1B\x24\x40"
"\x1B\x24\x42" "\x1B\x24\x42"
"\x1B\x24\x28\x43" "\x1B\x24\x28\x43"
"\x1B\x24\x28\x44" }, "\x1B\x24\x28\x44" },
{ 104, "/ISO-2022-CN/", (EncodingPriv *)ENC_iso2022 { csISO2022CN /* 104 */, 0, "/ISO-2022-CN/", lang_CHINESE, (EncodingPriv *)ENC_iso2022 },
}, { csISO2022CN_EXT /* 105 */, 0, "/ISO-2022-CN-EXT/", lang_CHINESE, (EncodingPriv *)ENC_iso2022 },
{ 105, "/ISO-2022-CN-EXT/", (EncodingPriv *)ENC_iso2022 { csUTF8 /* 106 */, 6, "/UTF-8/UNICODE-1-1-UTF-8/UNICODE-2-0-UTF-8/", lang_ENGLISH, (EncodingPriv *)ENC_utf8 }, /* More general!!! */
}, { csUCS4 /* 1001 */, 4, "/ISO-10646-UCS-4/UCS-4/", lang_ENGLISH, (EncodingPriv *)ENC_ucs4 },
{ 106, "/UTF-8/UNICODE-1-1-UTF-8/UNICODE-2-0-UTF-8/", (EncodingPriv *)ENC_utf8 }, /* More general!!! */ { csUnicode11 /* 1010 */, 8, "/UCS-2/UTF-16/ISO-10646-UCS-2/UNICODE-1-1/UNICODE-2-0/", lang_ENGLISH, (EncodingPriv *)ENC_utf16 }, /* More general!!! */
{ 1001, "/ISO-10646-UCS-4/UCS-4/", (EncodingPriv *)ENC_ucs4 }, { 2022, 0, "/ISO-2022/", lang_ENGLISH, (EncodingPriv *)ENC_iso2022 },
{ 1010, "/UCS-2/UTF-16/ISO-10646-UCS-2/UNICODE-1-1/UNICODE-2-0/", (EncodingPriv *)ENC_utf16 }, /* More general!!! */ { csGB2312 /* 2025 */, 2, "/X-EUC_CN/GB2312/CN-GB/GB_2312-80", lang_CHINESE, (EncodingPriv *)ENC_iso2022_euc,
{ 2022, "/ISO-2022/", (EncodingPriv *)ENC_iso2022 },
{ 2025, "/X-EUC_CN/GB2312/CN-GB/GB_2312-80", (EncodingPriv *)ENC_iso2022_euc,
"\x1B\x24\x29\x41" }, /* Select G1 = GB 2312-80 */ "\x1B\x24\x29\x41" }, /* Select G1 = GB 2312-80 */
{ 2026, "/CN-BIG5/BIG5/", (EncodingPriv *)ENC_bigfive }, { csBig5 /* 2026 */, 2, "/CN-BIG5/BIG5/", lang_CHINESE, (EncodingPriv *)ENC_bigfive },
{ 2027, "/MACINTOSH/", (EncodingPriv *)ENC_eightbit, "Apple.MacRoman" }, { csMacintosh /* 2027 */, 1, "/MACINTOSH/", lang_ENGLISH, (EncodingPriv *)ENC_eightbit, "Apple.MacRoman" },
{ 2084, "/KOI8-R/", (EncodingPriv *)ENC_eightbit, "KOI8-R" }, { csKOI8R /* 2084 */, 1, "/KOI8-R/", lang_RUSSIAN, (EncodingPriv *)ENC_eightbit, "KOI8-R" },
{ 2250, "/WINDOWS-1250/", (EncodingPriv *)ENC_eightbit, "Microsoft.CP1250" }, { csWindows1250 /* 2250 */, 1, "/WINDOWS-1250/", lang_ENGLISH, (EncodingPriv *)ENC_eightbit, "Microsoft.CP1250" },
{ 2252, "/WINDOWS-1252/", (EncodingPriv *)ENC_eightbit, "Microsoft.CP1252" }, { csWindows1252 /* 2252 */, 1, "/WINDOWS-1252/", lang_ENGLISH, (EncodingPriv *)ENC_eightbit, "Microsoft.CP1252" },
{ csWelsh, "/ISO-IR-182/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x5C" }, /* Select Welsh right half */ { csWelsh, 1, "/ISO-IR-182/", lang_WELSH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x5C" }, /* Select Welsh right half */
/* { 4001, "/ISO-IR-179/", (EncodingPriv *)ENC_baltic_rim }, */ /* { 4001, "/ISO-IR-179/", (EncodingPriv *)ENC_baltic_rim }, */
{ csSami, "/ISO-8859-15/ISO-IR-197/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x5D" }, /* Select Sami right half */ { csSami, 1, "/ISO-8859-15/ISO-IR-197/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x5D" }, /* Select Sami right half */
{ csISOLatin13, "/ISO-8859-13/", (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x59" }, /* Select Baltic Rim right half */ { csISOLatin13, 1, "/ISO-8859-13/", lang_ENGLISH, (EncodingPriv *)ENC_iso8859, "\x1B\x2D\x59" }, /* Select Baltic Rim right half */
{ csAcornLatin1, "/X-ACORN-LATIN1/", (EncodingPriv *)ENC_eightbit, "Acorn.Latin1" }, { csAcornLatin1, 1, "/X-ACORN-LATIN1/", lang_ENGLISH, (EncodingPriv *)ENC_eightbit, "Acorn.Latin1" },
{ 0, NULL, NULL } { 0, 0, NULL, NULL }
}; };
/* ----------------------------------------------------------------------------- */ /* ----------------------------------------------------------------------------- */
...@@ -176,16 +175,23 @@ static void fixup(void) ...@@ -176,16 +175,23 @@ static void fixup(void)
/* ----------------------------------------------------------------------------- */ /* ----------------------------------------------------------------------------- */
static EncList *find_enclist(int n)
{
EncList *e;
for (e = enclist; e->identifier; e++)
if (e->identifier == n)
return e;
return NULL;
}
Encoding *encoding_new(int n, int for_encoding) Encoding *encoding_new(int n, int for_encoding)
{ {
struct EncList *e = enclist; struct EncList *e;
EncodingPriv *enc; EncodingPriv *enc;
fixup(); fixup();
for (e = enclist; e->identifier; e++) if ((e = find_enclist(n)) != NULL)
{
if (e->identifier == n)
{ {
int size = sizeof(EncodingPriv) + e->encoding->ws_size; int size = sizeof(EncodingPriv) + e->encoding->ws_size;
enc = encoding__alloc(size); enc = encoding__alloc(size);
...@@ -205,7 +211,6 @@ Encoding *encoding_new(int n, int for_encoding) ...@@ -205,7 +211,6 @@ Encoding *encoding_new(int n, int for_encoding)
return enc; return enc;
} }
}
return NULL; return NULL;
} }
...@@ -253,6 +258,18 @@ int encoding_write(Encoding *e, UCS4 c, char **buf, int *bufsize) ...@@ -253,6 +258,18 @@ int encoding_write(Encoding *e, UCS4 c, char **buf, int *bufsize)
return 0; return 0;
} }
int encoding_max_char_size(int enc_num)
{
EncList *e = find_enclist(enc_num);
return e ? e->max_char_size : 0;
}
const char *encoding_default_language(int enc_num)
{
EncList *e = find_enclist(enc_num);
return e ? e->lang : "en";
}
/* ----------------------------------------------------------------------------- */ /* ----------------------------------------------------------------------------- */
#define DEPTH_CUTOFF 3 #define DEPTH_CUTOFF 3
......
...@@ -650,7 +650,7 @@ static int iso2022_write_escapes(EncodingPriv *e, UCS4 u, char **ps, int *bufsiz ...@@ -650,7 +650,7 @@ static int iso2022_write_escapes(EncodingPriv *e, UCS4 u, char **ps, int *bufsiz
} }
/* check space */ /* check space */
if ((*bufsize -= out) < 0 || !*ps) if ((*bufsize -= out) < 0 || !ps)
return 0; return 0;
/* copy out and update ptr */ /* copy out and update ptr */
......
...@@ -234,6 +234,7 @@ int UTF8_seqlen(char c) ...@@ -234,6 +234,7 @@ int UTF8_seqlen(char c)
*/ */
char *UTF8_next(const char *p) char *UTF8_next(const char *p)
{ {
if (*p)
while ((*++p & 0xC0u) == 0x80u) while ((*++p & 0xC0u) == 0x80u)
continue; continue;
...@@ -250,3 +251,18 @@ char *UTF8_prev(const char *p) ...@@ -250,3 +251,18 @@ char *UTF8_prev(const char *p)
return (char *) p; return (char *) p;
} }
/*
* Skip forward the given number of UTF8 'characters'
*/
char *UTF8_next_n(const char *p, int n_chars)
{
const char *s = p;
while (n_chars--)
s = UTF8_next(s);
return (char *)s;
}
/* eof utf8.c */
...@@ -42,7 +42,9 @@ ...@@ -42,7 +42,9 @@
#define csEUCKR 38 #define csEUCKR 38
#define csISO2022JP 39 #define csISO2022JP 39
#define csISO2022JP2 40 #define csISO2022JP2 40
#define csISO13JISC6220jp 104 #define csISO2022CN 104
#define csISO2022CN_EXT 105
#define csISO13JISC6220jp 41
#define csISO14JISC6220ro 42 #define csISO14JISC6220ro 42
#define csISO15Italian 22 #define csISO15Italian 22
#define csISO16Portuguese 43 #define csISO16Portuguese 43
...@@ -188,7 +190,8 @@ ...@@ -188,7 +190,8 @@
#define csKOI8R 2084 #define csKOI8R 2084
#define csUnicode11 1010 #define csUnicode11 1010
#define csUnicode11UTF7 103 #define csUnicode11UTF7 103
#define csJISEncoding 106 #define csUTF8 106
#define csJISEncoding 16
#define csShiftJIS 17 #define csShiftJIS 17
#define csEUCPkdFmtJapanese 18 #define csEUCPkdFmtJapanese 18
#define csEUCFixWidJapanese 19 #define csEUCFixWidJapanese 19
......
...@@ -86,4 +86,7 @@ typedef void (*encoding_free_fn)(void *ptr); ...@@ -86,4 +86,7 @@ typedef void (*encoding_free_fn)(void *ptr);
extern void encoding_set_alloc_fns(encoding_alloc_fn alloc, encoding_free_fn free); extern void encoding_set_alloc_fns(encoding_alloc_fn alloc, encoding_free_fn free);
extern int encoding_max_char_size(int enc_num);
extern const char *encoding_default_language(int enc_num);
#endif #endif
...@@ -35,7 +35,9 @@ typedef struct EncList EncList; ...@@ -35,7 +35,9 @@ typedef struct EncList EncList;
struct EncList struct EncList
{ {
int identifier; int identifier;
int max_char_size; /* maximum size of an encoded character in bytes, 0 means could be huge */
char *names; char *names;
char *lang; /* default language for this encoding */
EncodingPriv *encoding; EncodingPriv *encoding;
char *preload; char *preload;
char *encoder_data; char *encoder_data;
......
/* Copyright 1997 Acorn Computers Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* > languages.h
*
*/
#ifndef __unicode_languages_h
#define __unicode_languages_h
/* ISO 639 Language codes - a selection */
#define lang_AFAR "aa"
#define lang_ABKHAZIAN "ab"
#define lang_AFRIKAANS "af"
#define lang_AMHARIC "am"
#define lang_ARABIC "ar"
#define lang_ASSAMESE "as"
#define lang_AYMARA "ay"
#define lang_AZERBAIJANI "az"
#define lang_BASHKIR "ba"
#define lang_BYELORUSSIAN "be"
#define lang_BULGARIAN "bg"
#define lang_BIHARI "bh"
#define lang_BISLAMA "bi"
#define lang_BENGALI "bn"
#define lang_TIBETAN "bo"
#define lang_BRETON "br"
#define lang_CATALAN "ca"
#define lang_CORSICAN "co"
#define lang_CZECH "cs"
#define lang_WELSH "cy"
#define lang_DANISH "da"
#define lang_GERMAN "de"
#define lang_BHUTANI "dz"
#define lang_GREEK "el"
#define lang_ENGLISH "en"
#define lang_ESPERANTO "eo"
#define lang_SPANISH "es"
#define lang_ESTONIAN "et"
#define lang_BASQUE "eu"
#define lang_PERSIAN_FARSI "pa"
#define lang_FINNISH "fi"
#define lang_FIJI "fj"
#define lang_FAROESE "fo"
#define lang_FRENCH "fr"
#define lang_FRISIAN "fy"
#define lang_IRISH "ga"
#define lang_SCOTS_GAELIC "gd"
#define lang_GALICIAN "gl"
#define lang_GUARANI "gn"
#define lang_GUJARATI "gu"
#define lang_HAUSA "ha"
#define lang_HINDI "hi"
#define lang_CROATIAN "hr"
#define lang_HUNGARIAN "hu"
#define lang_ARMENIAN "hy"
#define lang_INTERLINGUA "ia"
#define lang_INTERLINGUE "ie"
#define lang_INUPIAK "ik"
#define lang_INDONESIAN "in"
#define lang_ICELANDIC "is"
#define lang_ITALIAN "it"
#define lang_HEBREW "iw"
#define lang_JAPANESE "ja"
#define lang_YIDDISH "ji"
#define lang_JAVANESE "jv"
#define lang_GEORGIAN "ka"
#define lang_KAZAKH "kk"
#define lang_GREENLANDIC "kl"
#define lang_CAMBODIAN "km"
#define lang_KANNADA "kn"
#define lang_KOREAN "ko"
#define lang_KASHMIRI "ks"
#define lang_KURDISH "ku"
#define lang_KIRGHIZ "ky"
#define lang_LATIN "la"
#define lang_LINGALA "ln"
#define lang_LAOTHIAN "lo"
#define lang_LITHUANIAN "lt"
#define lang_LATVIAN "lv"
#define lang_MALAGASY "mg"
#define lang_MAORI "mi"
#define lang_MACEDONIAN "mk"
#define lang_MALAYALAM "ml"
#define lang_MONGOLIAN "mn"
#define lang_MOLDAVIAN "mo"
#define lang_MARATHI "mr"
#define lang_MALAY "ms"
#define lang_MALTESE "mt"
#define lang_BURMESE "my"
#define lang_NAURU "na"
#define lang_NEPALI "ne"
#define lang_DUTCH "nl"
#define lang_NORWEGIAN "no"
#define lang_OCCITAN "oc"
#define lang_AFAN "om"
#define lang_ORIYA "or"
#define lang_PUNJABI "pa"
#define lang_POLISH "pl"
#define lang_PASHTO "ps"
#define lang_PORTUGUESE "pt"
#define lang_QUECHUA "qu"
#define lang_RHAETO_ROMANCE "rm"
#define lang_KURUNDI "rn"
#define lang_ROMANIAN "ro"
#define lang_RUSSIAN "ru"
#define lang_KINYARWANDA "rw"
#define lang_SANSKRIT "sa"
#define lang_SINDHI "sd"
#define lang_SANGHO "sg"
#define lang_SERBO_CROATIAN "sh"
#define lang_SINGHALESE "si"
#define lang_SLOVAK "sk"
#define lang_SLOVENIAN "sl"
#define lang_SAMOAN "sm"
#define lang_SHONA "sn"
#define lang_SOMALI "so"
#define lang_ALBANIAN "sq"
#define lang_SERBIAN "sr"
#define lang_SISWATI "ss"
#define lang_SESOTHO "st"
#define lang_SUNDANESE "su"
#define lang_SWEDISH "sv"
#define lang_SWAHILI "sw"
#define lang_TAMIL "ta"
#define lang_TELUGU "te"
#define lang_TAJIK "tg"
#define lang_THAI "th"
#define lang_TIGRINYA "ti"
#define lang_TURKMEN "tk"
#define lang_TAGALOG "tl"
#define lang_SETSWANA "tn"
#define lang_TONGA "to"
#define lang_TURKISH "tr"
#define lang_TSONGA "ts"
#define lang_TATAR "tt"
#define lang_TWI "tw"
#define lang_UKRAINIAN "uk"
#define lang_URDU "ur"
#define lang_UZBEK "uz"
#define lang_VIETNAMESE "vi"
#define lang_VOLAPUK "vo"
#define lang_WOLOF "wo"
#define lang_XHOSA "xh"
#define lang_YORUBA "yo"
#define lang_CHINESE "zh"
#define lang_ZULU "zu"
#endif
/* eof languages.h */
...@@ -55,13 +55,20 @@ extern int UTF8_to_UCS4(const char *in, UCS4 *code_out); ...@@ -55,13 +55,20 @@ extern int UTF8_to_UCS4(const char *in, UCS4 *code_out);
* Note that p + UTF8_seqlen(p[0]) != UTF8_next(p) if p points to the * Note that p + UTF8_seqlen(p[0]) != UTF8_next(p) if p points to the
* middle of a sequence. * middle of a sequence.
*/ */
char *UTF8_next(const char *p); extern char *UTF8_next(const char *p);
/* /*
* UTF8_prev reverses the pointer to the previous UTF-8 code in a string. * UTF8_prev reverses the pointer to the previous UTF-8 code in a string.
* If p points to the middle of a UTF-8 sequence, it will be reversed * If p points to the middle of a UTF-8 sequence, it will be reversed
* to the start of that UTF-8 sequence. * to the start of that UTF-8 sequence.
*/ */
char *UTF8_prev(const char *p); extern char *UTF8_prev(const char *p);
/*
* UTF8_next_n advances the pointer over 'n' UTF8 characters and returns the
* new pointer. It uses UTF8_next so has its behaviour over starting conditions.
*/
extern char *UTF8_next_n(const char *p, int n_chars);
#endif #endif
...@@ -93,6 +93,7 @@ riscos_export: riscos_libs ...@@ -93,6 +93,7 @@ riscos_export: riscos_libs
@cp -p unictype.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/ @cp -p unictype.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/
@cp -p encoding.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/ @cp -p encoding.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/
@cp -p iso10646.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/ @cp -p iso10646.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/
@cp -p languages.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/
@cp -p utf8.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/ @cp -p utf8.h $(RISCOS_BUILD_EXPORT)/Lib/Unicode/
@echo Copied libraries @echo Copied libraries
...@@ -100,6 +101,9 @@ clean: ...@@ -100,6 +101,9 @@ clean:
@-rm mkunictype @-rm mkunictype
@-rm unictype.c @-rm unictype.c
@-rm *.o @-rm *.o
@-rm *.od
@-rm *.oz
@-rm ucodelib ucodelibd ucodelibz
echo Done clean echo Done clean
realclean: clean realclean: clean
......
...@@ -21,6 +21,7 @@ wantlink ../c/unictype unictype.c ...@@ -21,6 +21,7 @@ wantlink ../c/unictype unictype.c
wantlink ../h/unictype unictype.h wantlink ../h/unictype unictype.h
wantlink ../c/mkunictype mkunictype.c wantlink ../c/mkunictype mkunictype.c
wantlink ../h/charsets charsets.h wantlink ../h/charsets charsets.h
wantlink ../h/languages languages.h
wantlink ../h/encpriv encpriv.h wantlink ../h/encpriv encpriv.h
wantlink ../h/iso10646 iso10646.h wantlink ../h/iso10646 iso10646.h
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment