Merge of bug fixes from NCBrowser tree.

Detail: Buffer overrun fixed; some buffer counting problems fixed too. There is now helpful initialisation and tidyup routines you can call too (called encoding_initialise and encoding_tidyup) Admin: I've built this with cc 5.45 in basic build environment - it built OK. This source code now matches that in NCBrowser 5.28. Version 0.47. Tagged as 'Unicode-0_47'

Merge of bug fixes from NCBrowser tree.
Detail: Buffer overrun fixed; some buffer counting problems fixed too. There is now helpful initialisation and tidyup routines you can call too (called encoding_initialise and encoding_tidyup) Admin: I've built this with cc 5.45 in basic build environment - it built OK. This source code now matches that in NCBrowser 5.28. Version 0.47. Tagged as 'Unicode-0_47'
0524cabb · Stewart Brodie · 71647d81 · 0524cabb · 0524cabb · 0524cabb
Commit 0524cabb authored 22 years ago by Stewart Brodie
17 changed files
--- a/VersionNum
+++ b/VersionNum
-/* (0.46)
+/* (0.47)
 *
 * This file is automatically maintained by srccommit, do not edit manually.
+ * Last processed by srccommit version: 1.64.
 *
 */
-#define Module_MajorVersion_CMHG        0.46
+#define Module_MajorVersion_CMHG        0.47
 #define Module_MinorVersion_CMHG        
-#define Module_Date_CMHG                13 Oct 2000
+#define Module_Date_CMHG                10 Jun 2002

-#define Module_MajorVersion             "0.46"
-#define Module_Version                  46
+#define Module_MajorVersion             "0.47"
+#define Module_Version                  47
 #define Module_MinorVersion             ""
-#define Module_Date                     "13 Oct 2000"
+#define Module_Date                     "10 Jun 2002"

-#define Module_ApplicationDate2         "13-Oct-00"
-#define Module_ApplicationDate4         "13-Oct-2000"
+#define Module_ApplicationDate2         "10-Jun-02"
+#define Module_ApplicationDate4         "10-Jun-2002"

-#define Module_FullVersion              "0.46"
-#define Module_HelpVersion              "0.46 (13 Oct 2000)"
+#define Module_ComponentName            "Unicode"
+#define Module_ComponentPath            "RiscOS/Sources/Lib/Unicode"
+
+#define Module_FullVersion              "0.47"
+#define Module_HelpVersion              "0.47 (10 Jun 2002)"
+#define Module_LibraryVersionInfo       "0:47"
--- a/c/autojp
+++ b/c/autojp
@@ -58,7 +58,7 @@
 #define SS2         142

 #if DEBUG
-static char *states[] =
+static const char *states[] =
 {
    "HAD_NONE",
    "HAD_ESC",

--- a/c/bigfive
+++ b/c/bigfive
@@ -128,8 +128,14 @@ static unsigned int bigfive_read(Encoding *e,
        }

        if (ucs_out)
+        {
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
+        }
    }

    return n - count;
@@ -182,7 +188,6 @@ static int bigfive_write(EncodingPriv *e, UCS4 u, unsigned char **bf, int *bufsi
 	*(*bf)++ = cc;

    return 1;
-    s5 = s5;
 }

 static void bigfive_delete(EncodingPriv *e)

--- a/c/eightbit
+++ b/c/eightbit
@@ -70,7 +70,11 @@ unsigned int eightbit_read(EncodingPriv *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;

--- a/c/enc_ascii
+++ b/c/enc_ascii
@@ -61,7 +61,11 @@ static unsigned int ascii_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;
@@ -90,7 +94,8 @@ static int ascii_write(EncodingPriv *e, UCS4 u, unsigned char **s, int *bufsize)
    (*s)++;

    return 1;
-    e = e;
+
+    NOT_USED(e);
 }

 EncodingPriv enc_ascii =

--- a/c/enc_scsu
+++ b/c/enc_scsu
@@ -405,7 +405,11 @@ static unsigned int scsu_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;

--- a/c/enc_system
+++ b/c/enc_system
@@ -71,7 +71,7 @@ static int system_reset(Encoding *e, int for_encoding)
 }

 static unsigned int system_read(EncodingPriv *e,
-          		        encoding_read_callback_fn ucs_out,
+        		        encoding_read_callback_fn ucs_out,
                                const unsigned char *s,
                                unsigned int n,
                                void *handle)
@@ -94,7 +94,11 @@ static unsigned int system_read(EncodingPriv *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;

--- a/c/enc_ucs4
+++ b/c/enc_ucs4
@@ -109,7 +109,11 @@ static unsigned int ucs4_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;

--- a/c/enc_utf16
+++ b/c/enc_utf16
@@ -122,7 +122,11 @@ static unsigned int utf16_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* The character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;
@@ -217,4 +221,3 @@ EncodingPriv enc_utf16 =
    0,
    0
 };
-
--- a/c/enc_utf7
+++ b/c/enc_utf7
@@ -178,7 +178,11 @@ static unsigned int utf7_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;

--- a/c/enc_utf8
+++ b/c/enc_utf8
@@ -80,7 +80,11 @@ static unsigned int utf8_read(Encoding *e,
            {
                if (ucs_out)
                    if (ucs_out(handle, 0xFFFD))
+                    {
+                        /* Character has been used, so ensure its counted */
+                        count--;
                        break;
+                    }

                ue->count = 0;

@@ -113,7 +117,11 @@ static unsigned int utf8_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;

--- a/c/encoding
+++ b/c/encoding
@@ -321,6 +321,7 @@ struct table_info

 static encoding_table enc_table_list = NULL;

+
 UCS2 *encoding_table_ptr(encoding_table t)
 {
    return t ? t->table : NULL;
@@ -362,6 +363,31 @@ static void encoding_table_free(encoding_table t)
    }
 }

+/*
+ * startup and closedown funcs
+ */
+
+void encoding_initialise(void)
+{
+    enc_table_list = NULL;
+}
+
+void encoding_tidyup(void)
+{
+    encoding_table t;
+
+    t = enc_table_list;
+    while (t)
+    {
+	encoding_table next = t->next;
+        encoding_table_free(t);
+	t = next;
+    }
+
+    enc_table_list = NULL;
+}
+
+
 /* Remove tables that are unused and are further down the list than
 * the the given 'start_depth'. */


--- a/c/iso2022
+++ b/c/iso2022
@@ -652,7 +652,11 @@ static unsigned int iso2022_read(EncodingPriv *e,

            if (ucs_out)
                if (ucs_out(handle, u))
+                {
+                    /* Character has been used, so ensure its counted */
+                    count--;
                    break;
+                }
        }
    }


--- a/c/johab
+++ b/c/johab
@@ -265,7 +265,11 @@ static unsigned int johab_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;

--- a/c/shiftjis
+++ b/c/shiftjis
@@ -200,7 +200,11 @@ static unsigned int shiftjis_read(Encoding *e,

        if (ucs_out)
            if (ucs_out(handle, u))
+            {
+                /* Character has been used, so ensure its counted */
+                count--;
                break;
+            }
    }

    return n - count;
@@ -327,7 +331,6 @@ static int shiftjis_write(EncodingPriv *e, UCS4 u, unsigned char **sjis, int *bu
 	*(*sjis)++ = cc;

    return 1;
-    sj = sj;
 }

 static void shiftjis_delete(EncodingPriv *e)

--- a/c/utf8
+++ b/c/utf8
@@ -256,8 +256,16 @@ int UTF8_seqlen(char cc)
 char *UTF8_next(const char *p)
 {
    if (*p)
-	while ((*++p & 0xC0u) == 0x80u)
-	    continue;
+    {
+        /* check for single bytes character to avoid looking at the
+         * next byte which may not be in valid memory if not in a
+         * null-terminated string */
+        if (*p < 0xC0u)
+            p++;
+        else
+            while ((*++p & 0xC0u) == 0x80u)
+                continue;
+    }

    return (char *) p;
 }
@@ -288,7 +296,7 @@ char *UTF8_next_n(const char *p, int n_chars)

 /*
 * Count the number of utf8 'characters' in the string
- * Count only upto n_bytes or NUL.
+ * Count only up to n_bytes or NUL.
 */

 int UTF8_strlen_n(const char *p, int n_bytes)
@@ -305,7 +313,15 @@ int UTF8_strlen_n(const char *p, int n_bytes)

 int UTF8_strlen(const char *p)
 {
-    return UTF8_strlen_n(p, INT_MAX);
+    int nchars = 0;
+
+    while (*p)
+    {
+	p = UTF8_next(p);
+	nchars++;
+    }
+
+    return nchars;
 }

 /* eof utf8.c */
--- a/h/encoding
+++ b/h/encoding
@@ -35,6 +35,12 @@ typedef struct EncodingPriv Encoding;

 typedef int (*encoding_read_callback_fn)(void *handle, UCS4 c);

+/*
+ * startup and close down functions
+ */
+extern void encoding_initialise(void);
+extern void encoding_tidyup(void);
+
 /*
 * read processes the next part of a stream. s points to the data, which
 * is n bytes long. For each UCS character found, ucs_out() will be called