/* Copyright 1998 Acorn Computers Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "Unicode/iso10646.h"

typedef struct charinfo
{
    UCS4 code;
    char *name;
} charinfo;

int numchars=0;
static charinfo charlist[0x10000];

void error(const char *p);

static void parse_line(char *s)
{
    char *sp = s, *dp;
    UCS4 code;
    char name[256];

    while (*sp != ';' && *sp != '\0')
        sp++;
    *sp='\0';

    code = (UCS4) strtol(s, NULL, 16);
    sp++;

    dp = name;
    while (*sp != ';' && *sp != '\0')
        *dp++=*sp++;
    *dp='\0';

    if (name[0]=='<') return;

    sp = malloc(dp+1-name);

    if (sp)
    {
        charlist[numchars].code=code;
        charlist[numchars++].name=sp;
        strcpy(sp, name);
    }
}

static int cmp_name(const void *p1, const void *p2)
{
    charinfo *c1 = (charinfo *) p1;
    charinfo *c2 = (charinfo *) p2;

/*    printf("Comparing %04X(%04X) to %04X(%04X)\n", c1-charlist, c1->code, c2-charlist, c2->code);

    if (c1->name == NULL)
    {
        if (c2->name == NULL)
            return 0;
        else
            return +1;
    }
    else if (c2->name == NULL)
        return -1;*/

    return strcmp(c1->name, c2->name);
}

static int cmp_name2(const void *p1, const void *p2)
{
    char *n = (char *) p1;
    charinfo *c2 = (charinfo *) p2;

    return strcmp(n, c2->name);
}

void load_unidata(const char *filename)
{
    FILE *f;
    char buffer[512];

    f = fopen(filename, "r");
    if (!f)
    {
        perror("fopen");
        exit(1);
    }

    while (fgets(buffer, 512, f) != NULL)
    {
        if (strlen(buffer)==0 || buffer[0]==';')
            continue;
        parse_line(buffer);
    }

    fclose(f);

    charlist[numchars].code=0xFFFFFFFF;
    charlist[numchars++].name="-";

    qsort(charlist, numchars, sizeof charlist[0], cmp_name);
}

static UCS4 get_choseong(const char **p)
{
    static const char list[19][3] = { "G", "GG", "N", "D", "DD", "L", "M", "B", "BB",
                                      "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H" };

    int i;

    for (i=18; i>=0; i--)
    {
        if (list[i][0]=='\0') continue;
        if (strncmp(*p, list[i], strlen(list[i]))==0)
        {
            (*p)+=strlen(list[i]);
            return i+0x1100;
        }
    }

    return 0x110B;
}

static UCS4 get_jungseong(const char **p)
{
    static const char list[21][4] = { "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
                                      "WA", "WAE", "OE" , "YO", "U", "WEO", "WE", "WI",
                                      "YU", "EU", "YI", "I" };
    int i;
    int l=0;
    int c=-1;

    for (i=0; i<21; i++)
    {
        if (strncmp(*p, list[i], strlen(list[i]))==0)
        {
            if (strlen(list[i]) > l)
            {
                l = strlen(list[i]);
                c = i;
            }
        }
    }

    if (c != -1)
    {
        (*p)+=l;
        return c+0x1161;
    }

    error("Bad jungseong");
    return NULL_UCS4;
}

static UCS4 get_jongseong(const char **p)
{
    static const char list[27][3] = { "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
                                      "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
                                      "S", "SS", "NG", "J", "C", "K", "T", "P", "H" };
    int i;

    if (**p == '\0')
        return 0;

    for (i=26; i>=0; i--)
    {
        if (strncmp(*p, list[i], strlen(list[i]))==0)
        {
            (*p)+=strlen(list[i]);
            return i+0x11A8;
        }
    }

    error("Bad jungseong");
    return NULL_UCS4;
}

static UCS4 hangul_syllable_code(const char *p)
{
    UCS4 L, V, T;

    L=get_choseong(&p);
    V=get_jungseong(&p);
    T=get_jongseong(&p);

    if (T==0)
        T=0x11A7;

    if (*p != '\0') {
        error("Bad hangul syllable");
        return NULL_UCS4;
    }

    return ((L-0x1100)*21 + (V-0x1161))*28 + (T-0x11A7) + 0xAC00;
}

static UCS4 extract_code(const char *p)
{
    char *end;

    UCS4 c = (UCS4) strtol(p, &end, 16);
    if (*end != '\0')
    {
        error("Bad hex");
        return NULL_UCS4;
    }

    return c;
}


UCS4 UCS_from_name(const char *name)
{
    charinfo *ci;

    if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22)==0) {
        UCS4 c=extract_code(name+22);
        if (c<0x4E00 || c>0x9FA5)
            error("Bad ideograph");
        return c;
    }

    if (strncmp(name, "HANGUL SYLLABLE ", 16)==0)
        return hangul_syllable_code(name+16);

    if (strncmp(name, "U+", 2)==0) {
        UCS4 c=extract_code(name+2);
        if (c > 0x7FFFFFFF)
            error("Bad UCS");
        return c;
    }

    ci = (charinfo *) bsearch(name, charlist, numchars, sizeof charlist[0], cmp_name2);
    if (!ci)
    {
        error("Unknown character");
        return NULL_UCS4;
    }

    return ci->code;
}

const char *name_from_UCS(UCS4 u)
{
    char buffer[256];
    int i;

    if (u >= 0x4E00 && u <= 0x9FA5)
    {
        sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", u);
        return buffer;
    }
    else if (u >= 0xAC00 && u <= 0xD7A3)
    {
        /* Hangul syllable - too hard */
        sprintf(buffer, "U+%X\n", u);
        return buffer;
    }

    for (i=0; i<numchars; i++)
        if (charlist[i].code == u)
            return charlist[i].name;

    sprintf(buffer, "U+%X\n", u);
    return buffer;
}

#if 0
void error(const char *p)
{
    fputs(p, stderr);
    fputc('\n', stderr);
    //exit(1);
}

int main()
{
    //int i=0;
    char buffer[512];
    load_unidata("UniData215");
    /*while (charlist[i].code != 0xFFFFFFFF)
    {
        printf("%50s %08X\n", charlist[i].name, charlist[i].code);
        i++;
    }
    */
    for (;;) {
    gets(buffer);

    printf("%08X\n", UCS_from_name(buffer));
    }


    return 0;
}

#endif