encoding 8.6 KB
/* Copyright 1997 Acorn Computers Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "swis.h"

#include "encpriv.h"
#include "enc_ascii.h"
#include "iso2022.h"
#include "shiftjis.h"
#include "bigfive.h"
#include "koi8r.h"
#include "microsoft.h"
#include "apple.h"
#include "acorn.h"
#include "enc_utf8.h"
#include "enc_utf16.h"
#include "enc_ucs4.h"

#include "charsets.h"

#include "VersionNum"

static char version[] = "Unicode library " Module_MajorVersion " " Module_Date " " Module_MinorVersion ;

static EncList enclist[] =
{
 {    3, "/US-ASCII/", &enc_ascii, NULL },
 {    4, "/ISO-8859-1/ISO-IR-101/", &enc_iso8859, "\x1B\x2D\x41\x1B\x2F\x50"	/* Select Latin-1 right half and G3 supplement */ },
 {    5, "/ISO-8859-2/ISO-IR-102/", &enc_iso8859, "\x1B\x2D\x42\x1B\x2F\x50" },	/* Select Latin-2 right half */
 {    6, "/ISO-8859-3/", &enc_iso8859, "\x1B\x2D\x43\x1B\x2F\x50" },		/* Select Latin-3 right half */
 {    7, "/ISO-8859-4/", &enc_iso8859, "\x1B\x2D\x44\x1B\x2F\x50" },		/* Select Latin-4 right half */
 {    8, "/ISO-8859-5/", &enc_iso8859, "\x1B\x2D\x4C" },			/* Select Cyrillic right half */
 {   10, "/ISO-8859-7/", &enc_iso8859, "\x1B\x2D\x46" },			/* Select Greek right half */
 {   11, "/ISO-8859-8/", &enc_iso8859, "\x1B\x2D\x48" },			/* Select Hebrew right half */
 {   12, "/ISO-8859-9/", &enc_iso8859, "\x1B\x2D\x4D\x1B\x2F\x50" },		/* Select Latin-5 right half */
 {   13, "/ISO-8859-10/", &enc_iso8859, "\x1B\x2D\x56\x1B\x2E\x58" },		/* Select Latin-6 right half, and Sami supplement as G2 */
 {   17, "/SHIFT_JIS/X-SJIS/", &enc_shiftjis },

     /*
     * Select G0 = JIS X 0201 Roman      (ESC ( J)
     *        G1 = JIS X 0208            (ESC $ ) B)
     *        G2 = JIS X 0201 Katakana   (ESC * I)
     *        G3 = JIS X 0212            (ESC $ + D)
     *        C1 = ISO 6429              (ESC " C)
     * Single shift range = GR           (ESC SP \)
     */
 {   18, "/EUC-JP/", &enc_iso2022_euc,
			      "\x1B\x28\x4A"
                          "\x1B\x24\x29\x42"
                              "\x1B\x2A\x49"
                          "\x1B\x24\x2B\x44"
                              "\x1B\x22\x43"
                              "\x1B\x20\x5C" },

 {   37, "/ISO-2022-KR/", &enc_iso2022_shifts, NULL,
		"\x1B\x24\x29\x43" },
 {   38, "/EUC-KR/", &enc_iso2022_euc,
		"\x1B\x24\x29\x43" },			/* Select G1 = KS C 5601 */
 {   39, "/ISO-2022-JP/JIS_Encoding/", &enc_iso2022_escapes, NULL,
		"\x1B\x28\x42"
		"\x1B\x28\x4A"
		"\x1B\x24\x40"
		"\x1B\x24\x42" },
 {   40, "/ISO-2022-JP-2/", &enc_iso2022_escapes, NULL,
		"\x1B\x28\x42"
		"\x1B\x28\x4A"
		"\x1B\x24\x40"
		"\x1B\x24\x42"
                "\x1B\x24\x28\x43"
                "\x1B\x24\x28\x44" },
 {  104, "/ISO-2022-CN/", &enc_iso2022
 },
 {  105, "/ISO-2022-CN-EXT/", &enc_iso2022
 },
 {  106, "/UTF-8/UNICODE-1-1-UTF-8/UNICODE-2-0-UTF-8/", &enc_utf8 },		/* More general!!! */
 { 1001, "/ISO-10646-UCS-4/UCS-4/", &enc_ucs4 },
 { 1010, "/UCS-2/UTF-16/ISO-10646-UCS-2/UNICODE-1-1/UNICODE-2-0/", &enc_utf16 }, /* More general!!! */
 { 2022, "/ISO-2022/", &enc_iso2022 },
 { 2025, "/X-EUC_CN/GB2312/CN-GB/GB_2312-80", &enc_iso2022_euc,
		"\x1B\x24\x29\x41" },						/* Select G1 = GB 2312-80 */
 { 2026, "/CN-BIG5/BIG5/", &enc_bigfive },
 { 2027, "/MACINTOSH/", &enc_mac_roman },
 { 2084, "/KOI8-R/", &enc_koi8r },
 { 2250, "/WINDOWS-1250/", &enc_cp1250 },
 { 2252, "/WINDOWS-1252/", &enc_cp1252 },
 { csWelsh, "/ISO-IR-182/", &enc_iso8859, "\x1B\x2D\x5C" },			/* Select Welsh right half */
/*  { 4001, "/ISO-IR-179/", &enc_baltic_rim }, */
 { csSami, "/ISO-8859-15/ISO-IR-197/", &enc_iso8859, "\x1B\x2D\x5D" },		/* Select Sami right half */
 { csISOLatin13, "/ISO-8859-13/", &enc_iso8859, "\x1B\x2D\x59" },		/* Select Baltic Rim right half */
 { csAcornLatin1, "/X-ACORN-LATIN1/", &enc_acorn_latin1 },
 { 0, NULL, NULL }
};

/* ----------------------------------------------------------------------------- */

Encoding *encoding_new(int n, int for_encoding)
{
    struct EncList *e = enclist;
    EncodingPriv *enc;

    for (e = enclist; e->identifier; e++)
    {
        if (e->identifier == n)
        {
	    int size = sizeof(EncodingPriv) + e->encoding->ws_size;
            enc = encoding__alloc(size);
            if (!enc) return NULL;

	    memset(enc, 0, size);
            *enc = *e->encoding;

	    enc->list_entry = e;
	    enc->for_encoding = for_encoding;

	    if (enc->reset && !enc->reset(enc, for_encoding))
	    {
		encoding__free(enc);
		enc = NULL;
	    }

	    return enc;
        }
    }

    return NULL;
}

void encoding_delete(Encoding *e)
{
    if (e)
    {
	if (e->delete_enc)
	    e->delete_enc(e);

	encoding__free(e);
    }
}

int encoding_reset(Encoding *e)
{
    if (!e)
	return 0;

    if (!e->reset)
	return 1;

    return e->reset(e, e->for_encoding);
}

unsigned encoding_read(Encoding *e, encoding_read_callback_fn ucs_out,
		   const char *s,
		   unsigned int n,
		   void *handle)
{
    if (e && !e->for_encoding)
    {
	return e->read(e, ucs_out, s, n, handle);
    }
    return 0;
}

int encoding_write(Encoding *e, UCS4 c, char **buf, int *bufsize)
{
    if (e && e->for_encoding)
    {
	return e->write(e, c, buf, bufsize);
    }
    return 0;
}

/* ----------------------------------------------------------------------------- */

static int mime_token_char(UCS4 c)
{
    if (c < 0x21 || c > 0x7E) return 0;

    switch (c)
    {
        case '(': case ')': case '<': case '>': case '@':
        case ',': case ';': case ':': case '\\': case '"':
        case '/': case '[': case ']': case '?': case '=':
            return 0;
        default:
            return 1;
    }
}

int encoding_number_from_name(const char *name)
{
    const char *p = name;
    char *temp, *tp;
    struct EncList *e;
    char c;

    do
    {
        c = *p++;

    } while (mime_token_char(c));

    temp = (char *) encoding__alloc(p - name + 2);
    if (!temp) return 0;

    sprintf(temp, "/%.*s/", p - name - 1, name);

    /* Yuck, but it's simple, and we're only dealing with ASCII */
    for (tp = temp; *tp; tp++)
        if (*tp >= 'a' && *tp <= 'z')
            *tp -= 0x20;

    for (e = enclist; e->identifier; e++)
        if (strstr(e->names, temp))
            break;

    encoding__free(temp);
    return e->identifier;
}

/* To be supplied by the application */
extern void encoding_leaf_to_path(char *out, const char *leaf);

void *encoding_load_map_file(const char *leaf)
{
    int ftype = 0, flen, fh, filesys;
    void *inthandle;
    char fname[1024];
    _kernel_oserror *e;
    void *table;

    encoding_leaf_to_path(fname, leaf);

    /* Check it's a file, and get it's length */
    _swix(OS_File, _INR(0,1)|_OUT(0)|_OUT(4), 23, fname, &ftype, &flen);

    if (ftype != 1)
        return NULL;

    /* Open the file for input */
    e = _swix(OS_Find, _INR(0,1)|_OUT(0), 0x4F, fname, &fh);
    if (e)
        return NULL;

    /* Obtain the filing system number and internal handle */
    e = _swix(OS_FSControl, _INR(0,1)|_OUTR(1,2), 21, fh, &inthandle, &filesys);
    if (e)
        return NULL;

    /* Close the file */
    _swix(OS_Find, _INR(0,1), 0, fh);

    /* If it's ResourceFS, return a direct pointer */
    if ((filesys & 0xFF) == 46)
        return inthandle;

    /* Otherwise, load it */
    table = encoding__alloc(flen+4);
    if (!table)
        return NULL;

    e = _swix(OS_File, _INR(0,3), 16, fname, (char *)table+4, 0);
    if (e)
    {
        encoding__free(table);
        return NULL;
    }

    *(int *)table = flen+4;

    return (char *)table + 4;
}

encoding_alloc_fn encoding__alloc = malloc;
encoding_free_fn encoding__free = free;

void encoding_set_alloc_fns(encoding_alloc_fn alloc, encoding_free_fn free)
{
    encoding__alloc = alloc;
    encoding__free = free;
}


/* Table size is number of bytes including the length word
 * Need to convert to number of 16bit entris
 */

int encoding_n_table_entries(const UCS2 *table)
{
    return table ? (((const int *)table)[-1] - 4)/2 : 0;
}

int encoding_lookup_in_table(UCS4 u, const UCS2 *table, int n_entries)
{
    const UCS2 *tt = table;

    if (tt)
    {
	int i;
	for (i = 0; i < n_entries; i++, tt++)
	    if ((UCS4)*tt == u)
		return i;
    }

    return -1;
}

/* eof encoding.c */