Commit a4e5d90c authored by John-Mark Bell's avatar John-Mark Bell
Browse files

Add encoding_read_in_multibyte_sequence API.

Detail:
  The new API allows the client to determine if a read codec has run
  out of input in the middle of a multibyte sequence. This permits a
  client to distinguish between the two cases in which no output
  character is generated:

    a) insufficient input has been provided to complete an output character
    b) the input consisted of a complete multibyte sequence which generates
       no output (e.g. an ISO-2022 shift sequence)
Admin:
  Tested using Iconv.


Version 0.60. Tagged as 'Unicode-0_60'
parent 813b8a51
/* (0.59)
/* (0.60)
*
* This file is automatically maintained by srccommit, do not edit manually.
* Last processed by srccommit version: 1.1.
*
*/
#define Module_MajorVersion_CMHG 0.59
#define Module_MajorVersion_CMHG 0.60
#define Module_MinorVersion_CMHG
#define Module_Date_CMHG 14 Jan 2012
#define Module_Date_CMHG 06 Jan 2013
#define Module_MajorVersion "0.59"
#define Module_Version 59
#define Module_MajorVersion "0.60"
#define Module_Version 60
#define Module_MinorVersion ""
#define Module_Date "14 Jan 2012"
#define Module_Date "06 Jan 2013"
#define Module_ApplicationDate "14-Jan-12"
#define Module_ApplicationDate "06-Jan-13"
#define Module_ComponentName "Unicode"
#define Module_ComponentPath "castle/RiscOS/Sources/Lib/Unicode"
#define Module_FullVersion "0.59"
#define Module_HelpVersion "0.59 (14 Jan 2012)"
#define Module_LibraryVersionInfo "0:59"
#define Module_FullVersion "0.60"
#define Module_HelpVersion "0.60 (06 Jan 2013)"
#define Module_LibraryVersionInfo "0:60"
......@@ -114,6 +114,7 @@ static int acorn_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize)
EncodingPriv enc_acorn =
{
eightbit_read,
eightbit_read_in_multibyte_sequence,
eightbit_reset,
sizeof(EightBit_Encoding) - sizeof(EncodingPriv),
eightbit_delete,
......
......@@ -141,6 +141,13 @@ static unsigned int bigfive_read(Encoding *e,
return n - count;
}
static int bigfive_read_in_multibyte_sequence(EncodingPriv *e)
{
BigFive_Encoding *s5 = (BigFive_Encoding *) e;
return s5->prev != 0;
}
static int bigfive_write(EncodingPriv *e, UCS4 u, unsigned char **bf, int *bufsize)
{
BigFive_Encoding *s5 = (BigFive_Encoding *) e;
......@@ -200,6 +207,7 @@ static void bigfive_delete(EncodingPriv *e)
EncodingPriv enc_bigfive =
{
bigfive_read,
bigfive_read_in_multibyte_sequence,
bigfive_reset,
sizeof(BigFive_Encoding) - sizeof(EncodingPriv),
bigfive_delete,
......
......@@ -80,6 +80,13 @@ unsigned int eightbit_read(EncodingPriv *e,
return n - count;
}
int eightbit_read_in_multibyte_sequence(EncodingPriv *e)
{
return 0;
NOT_USED(e);
}
int eightbit_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize)
{
EightBit_Encoding *ee = (EightBit_Encoding *) e;
......@@ -115,6 +122,7 @@ void eightbit_delete(EncodingPriv *e)
EncodingPriv enc_eightbit =
{
eightbit_read,
eightbit_read_in_multibyte_sequence,
eightbit_reset,
sizeof(EightBit_Encoding) - sizeof(EncodingPriv),
eightbit_delete,
......
......@@ -73,6 +73,13 @@ static unsigned int ascii_read(Encoding *e,
NOT_USED(e);
}
static int ascii_read_in_multibyte_sequence(EncodingPriv *e)
{
return 0;
NOT_USED(e);
}
static int ascii_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize)
{
int c;
......@@ -101,6 +108,7 @@ static int ascii_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize)
EncodingPriv enc_ascii =
{
ascii_read,
ascii_read_in_multibyte_sequence,
0, /* ascii_reset, */
0,
0, /* ascii_delete */
......
......@@ -415,6 +415,13 @@ static unsigned int scsu_read(Encoding *e,
return n - count;
}
static int scsu_read_in_multibyte_sequence(EncodingPriv *e)
{
SCSU_Encoding *se = (SCSU_Encoding *) e;
return (se->state != SingleByte && se->state != Unicode) || se->surrogate;
}
/*
* What window offset should I use for this character?
* Returns 0 if not accessible via a standard window.
......@@ -1074,6 +1081,7 @@ static int scsu_write(EncodingPriv *e, UCS4 u, unsigned char **buf, int *bufsize
EncodingPriv enc_scsu =
{
scsu_read,
scsu_read_in_multibyte_sequence,
scsu_reset,
sizeof(SCSU_Encoding) - sizeof(EncodingPriv),
0, /* scsu_delete */
......
......@@ -58,11 +58,13 @@ static int system_reset(Encoding *e, int for_encoding)
if (alphabet == 111)
{
e->read = enc_utf8.read;
e->read_in_multibyte_sequence = enc_utf8.read_in_multibyte_sequence;
e->write = enc_utf8.write;
}
else
{
e->read = enc_system.read;
e->read_in_multibyte_sequence = enc_system.read_in_multibyte_sequence;
e->write = enc_system.write;
se->lookup = encoding_alphabet_ucs_table(alphabet);
}
......@@ -104,6 +106,13 @@ static unsigned int system_read(EncodingPriv *e,
return n - count;
}
static int system_read_in_multibyte_sequence(EncodingPriv *e)
{
return 0;
NOT_USED(e);
}
static int system_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize)
{
System_Encoding *se = (System_Encoding *) e;
......@@ -158,6 +167,7 @@ static void system_delete(EncodingPriv *e)
EncodingPriv enc_system =
{
system_read,
system_read_in_multibyte_sequence,
system_reset,
sizeof(System_Encoding) - sizeof(EncodingPriv),
system_delete,
......
......@@ -119,6 +119,13 @@ static unsigned int ucs4_read(Encoding *e,
return n - count;
}
static int ucs4_read_in_multibyte_sequence(EncodingPriv *e)
{
UCS4_Encoding *ue = (UCS4_Encoding *) e;
return ue->sync != 3;
}
static unsigned char *write_be(unsigned char *p, UCS4 u)
{
*p++ = (u >> 24) & 0xff;
......@@ -181,6 +188,7 @@ static int ucs4_write(EncodingPriv *e, UCS4 u, unsigned char **pucs4, int *bufsi
EncodingPriv enc_ucs4 =
{
ucs4_read,
ucs4_read_in_multibyte_sequence,
ucs4_reset,
sizeof(UCS4_Encoding) - sizeof(EncodingPriv),
0, /* ucs4_delete */
......
......@@ -132,6 +132,13 @@ static unsigned int utf16_read(Encoding *e,
return n - count;
}
static int utf16_read_in_multibyte_sequence(EncodingPriv *e)
{
UTF16_Encoding *ue = (UTF16_Encoding *) e;
return ue->sync != 0 || ue->prev_surrogate != 0;
}
static unsigned char *write_be(unsigned char *p, UCS2 u)
{
*p++ = u >> 8;
......@@ -212,6 +219,7 @@ static int utf16_write(EncodingPriv *e, UCS4 u, unsigned char **putf16, int *buf
EncodingPriv enc_utf16 =
{
utf16_read,
utf16_read_in_multibyte_sequence,
utf16_reset,
sizeof(UTF16_Encoding) - sizeof(EncodingPriv),
0, /* utf16_delete */
......
......@@ -188,6 +188,13 @@ static unsigned int utf7_read(Encoding *e,
return n - count;
}
static int utf7_read_in_multibyte_sequence(EncodingPriv *e)
{
UTF7_Encoding *ue = (UTF7_Encoding *) e;
return (ue->state == Shifted && ue->bits > 0) || ue->surrogate;
}
static int output(unsigned char c, unsigned char **out, int *outsize)
{
if ((*outsize)-- > 0)
......@@ -299,6 +306,7 @@ static int utf7_write(EncodingPriv *e, UCS4 u, unsigned char **utf7, int *bufsiz
EncodingPriv enc_utf7 =
{
utf7_read,
utf7_read_in_multibyte_sequence,
utf7_reset,
sizeof(UTF7_Encoding) - sizeof(Encoding),
0, /* utf7_delete */
......
......@@ -131,6 +131,13 @@ static unsigned int utf8_read(Encoding *e,
return n - count;
}
static int utf8_read_in_multibyte_sequence(EncodingPriv *e)
{
UTF8_Encoding *ue = (UTF8_Encoding *) e;
return ue->count > 0;
}
static int utf8_write(EncodingPriv *e, UCS4 u, unsigned char **utf8, int *bufsize)
{
UTF8_Encoding *ue = (UTF8_Encoding *) e;
......@@ -156,6 +163,7 @@ static int utf8_write(EncodingPriv *e, UCS4 u, unsigned char **utf8, int *bufsiz
EncodingPriv enc_utf8 =
{
utf8_read,
utf8_read_in_multibyte_sequence,
utf8_reset,
sizeof(UTF8_Encoding) - sizeof(EncodingPriv),
0, /* utf8_delete */
......
......@@ -270,6 +270,15 @@ unsigned encoding_read(Encoding *e, encoding_read_callback_fn ucs_out,
return 0;
}
int encoding_read_in_multibyte_sequence(Encoding *e)
{
if (e && e->for_encoding == encoding_READ)
{
return e->read_in_multibyte_sequence(e);
}
return 0;
}
int encoding_write(Encoding *e, UCS4 c, char **buf, int *bufsize)
{
if (e && e->for_encoding != encoding_READ)
......
......@@ -721,6 +721,13 @@ static unsigned int iso2022_read(EncodingPriv *e,
return n - count;
}
static int iso2022_read_in_multibyte_sequence(EncodingPriv *e)
{
ISO2022_Encoding *i = (ISO2022_Encoding *) e;
return i->sync[_GL] != 0 || i->sync[_GR] != 0 || i->esc_pending != 0;
}
/* ---------------------------------------------------------------------------------------------------- */
static int write_index(int index, unsigned char *out, int table_size, int sevenbit)
......@@ -906,6 +913,7 @@ static int iso2022_write_shifts(EncodingPriv *e, UCS4 u, unsigned char **ps, int
EncodingPriv enc_iso2022 =
{
iso2022_read,
iso2022_read_in_multibyte_sequence,
iso2022_reset,
sizeof (ISO2022_Encoding) - sizeof(EncodingPriv),
iso2022_delete,
......@@ -1018,6 +1026,7 @@ retry:
EncodingPriv enc_iso2022_euc =
{
iso2022_read,
iso2022_read_in_multibyte_sequence,
iso2022_reset,
sizeof (ISO2022_Encoding) - sizeof(EncodingPriv),
iso2022_delete,
......
......@@ -461,6 +461,13 @@ static unsigned int iso6937_read(Encoding *e,
return n - count;
}
static int iso6937_read_in_multibyte_sequence(EncodingPriv *e)
{
ISO6937_Encoding *ie = (ISO6937_Encoding *) e;
return ie->accent != None;
}
static int iso6937_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize)
{
ISO6937_Encoding *ie = (ISO6937_Encoding *) e;
......@@ -503,6 +510,7 @@ static void iso6937_delete(EncodingPriv *e)
EncodingPriv enc_iso6937 =
{
iso6937_read,
iso6937_read_in_multibyte_sequence,
iso6937_reset,
sizeof(ISO6937_Encoding) - sizeof(EncodingPriv),
iso6937_delete,
......
......@@ -277,6 +277,13 @@ static unsigned int johab_read(Encoding *e,
return n - count;
}
static int johab_read_in_multibyte_sequence(EncodingPriv *e)
{
Johab_Encoding *je = (Johab_Encoding *) e;
return je->prev != 0;
}
static int ucs_hangul_to_johab(UCS4 u)
{
int initial, medial, final;
......@@ -400,6 +407,7 @@ static void johab_delete(EncodingPriv *e)
EncodingPriv enc_johab =
{
johab_read,
johab_read_in_multibyte_sequence,
johab_reset,
sizeof(Johab_Encoding) - sizeof(EncodingPriv),
johab_delete,
......
......@@ -210,6 +210,13 @@ static unsigned int shiftjis_read(Encoding *e,
return n - count;
}
static int shiftjis_read_in_multibyte_sequence(EncodingPriv *e)
{
ShiftJIS_Encoding *sj = (ShiftJIS_Encoding *) e;
return sj->prev != 0;
}
static int lookup_table(UCS4 u, ShiftJIS_Encoding *sj, int *index, int *table_no)
{
int i;
......@@ -354,6 +361,7 @@ static void shiftjis_delete(EncodingPriv *e)
EncodingPriv enc_shiftjis =
{
shiftjis_read,
shiftjis_read_in_multibyte_sequence,
shiftjis_reset,
sizeof(ShiftJIS_Encoding) - sizeof(EncodingPriv),
shiftjis_delete,
......
......@@ -40,5 +40,6 @@ extern unsigned int eightbit_read(EncodingPriv *e,
const unsigned char *s,
unsigned int n,
void *handle);
extern int eightbit_read_in_multibyte_sequence(EncodingPriv *e);
extern int eightbit_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize);
extern void eightbit_delete(EncodingPriv *e);
......@@ -57,6 +57,13 @@ extern unsigned encoding_read(Encoding *e,
unsigned int n,
void *handle);
/*
* Determine if the read codec has run out of input in the middle of
* a multibyte sequence. Returns true if the codec is waiting for input
* that forms part of a multibyte sequence.
*/
extern int encoding_read_in_multibyte_sequence(Encoding *e);
/*
* write the UCS4 character c to the specified buffer
*
......
......@@ -41,6 +41,8 @@ typedef unsigned int (*encoding_read_fn)(EncodingPriv *e,
unsigned int n,
void *handle);
typedef int (*encoding_read_in_multibyte_sequence_fn)(EncodingPriv *e);
typedef int (*encoding_write_fn)(EncodingPriv *e, UCS4 c, unsigned char **buf, int *bufsize);
typedef struct EncList EncList;
......@@ -66,6 +68,7 @@ struct EncodingPriv
{
/* values set up by the encoding scheme */
encoding_read_fn read;
encoding_read_in_multibyte_sequence_fn read_in_multibyte_sequence;
encoding_reset_fn reset;
int ws_size;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment