/* Copyright 1997 Acorn Computers Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/***************************************************/
/* File   : URLutils.c                             */
/*                                                 */
/* Purpose: URL manipulation for the browser.      */
/*                                                 */
/* Author : Merlyn Kline for Customer browser     */
/*          This source adapted by A.D.Hodgkinson  */
/*          from various original functions        */
/*                                                 */
/* History: 06-Feb-97: Created.                    */
/***************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "swis.h"

#include "URI.h" /* URI handler API, in URILib:h */

#include "wimp.h"
#include "event.h"

#include "svcprint.h"
#include "Global.h"
#include "Utils.h"

#include "Fetch.h" /* (Which itself includes URLstat.h) */
#include "Filetypes.h"

#include "URLutils.h"

/* Local definitions */

#define ExtensionMatches(url, len, ext) (len) > strlen(ext) && !strncmp((url) + (len) - strlen(ext), (ext), strlen(ext))

/* Local variables */

/* Pointer to first item in queue of URLs dispatched through */
/* the URI handler (structure defined in URLutils.h)         */

static uri_queue * uri_queue_base = NULL;

/*************************************************/
/* urlutils_pathname_to_url()                    */
/*                                               */
/* Takes a pathname, and turns it into a File    */
/* URL, if it isn't one already. The pathname    */
/* that you give is altered directly, so if you  */
/* want to remember the path as well as the URL, */
/* ensure there is a second copy of it in        */
/* another buffer somewhere.                     */
/*                                               */
/* Parameters: Pointer to the pathname;          */
/*                                               */
/*             Size of the buffer the pathname   */
/*             is stored in.                     */
/*************************************************/

void urlutils_pathname_to_url(char * path, int buffersize)
{
  int len;

  len = strlen(FileMethod ProtocolSepShort); /* (urlutils.h) */

  /* If the first part of the string doesn't match the FileMethod */
  /* specifier (see URLutils.h) then insert this text and convert */
  /* the rest of the path to a URL.                               */

  if (strncmp(path, FileMethod ProtocolSepShort, len))
  {
    memmove(path + len, path, buffersize - len);
    strncpy(path, FileMethod ProtocolSepShort, len);

    /* Ensure the string is terminated */

    path[buffersize - 1] = 0;

    /* Now translate the pathname part of the URL to a Unix-style */
    /* path scheme.                                               */

    urlutils_translate_pathname(path + len);
  }

  return;
}

/*************************************************/
/* urlutils_url_to_pathname()                    */
/*                                               */
/* Takes a file:// URL, and turns it into a RISC */
/* OS pathname, if it isn't one already. The URL */
/* that you give is altered directly, so if you  */
/* want to remember the URL as well as the new   */
/* path, ensure there is a second copy of it in  */
/* another buffer somewhere.                     */
/*                                               */
/* Parameters: Pointer to the URL;               */
/*                                               */
/*             Size of the buffer the URL is     */
/*             stored in.                        */
/*************************************************/

void urlutils_url_to_pathname(char * url, int buffersize)
{
  int len;

  len = strlen(FileMethod); /* (urlutils.h) */

  /* If the first part of the string doesn't match the FileMethod */
  /* specifier (see URLutils.h) then we can't do anything.        */

  if (!url || strncmp(url, FileMethod, len)) return;

  /* Copy over the file: specifier and any '/'s */

  while (url[len] == '/') len++;

  memmove(url, url + len, buffersize - len);

  /* Convert the path component of the URL back to RISC OS style */

  urlutils_translate_pathname(url);

  return;
}

/*************************************************/
/* urlutils_translate_pathname()                 */
/*                                               */
/* Takes a RISC OS-style pathname and turns it   */
/* into a Unix-style pathname, or vice versa.    */
/*                                               */
/* The pathname you give is altered directly, so */
/* if you want to remember the path before       */
/* translation, ensure there is a second copy of */
/* in another buffer somewhere.                  */
/*                                               */
/* Parameters: Pointer to the pathname.          */
/*************************************************/

void urlutils_translate_pathname(char * path)
{
  char * p;

  p = path;

  /* Skip past any filing system separators (e.g. as in the */
  /* colons in 'ADFS::<disc>.<path>').                      */

  while (*p && *p != ':') p++;

  /* Swap '/' for '.' */

  while (*p)
  {
    if      (*p == '/') *p = '.';
    else if (*p == '.') *p = '/';

    p++;
  }

  return;
}

/*************************************************/
/* urlutils_leafname_from_url()                  */
/*                                               */
/* Returns a pointer to a string containing a    */
/* possible leafname, based upon the URL passed  */
/* into the function.                            */
/*                                               */
/* Parameters: Pointer to a URL string;          */
/*                                               */
/*             Pointer to a buffer into which to */
/*             place the leafname (not the same  */
/*             as the URL string);               */
/*                                               */
/*             Size of the buffer.               */
/*                                               */
/* Returns:    Will fill the buffer in with some */
/*             leafname, even if one could not   */
/*             be worked out from the URL.       */
/*             Returns the buffer pointer for    */
/*             convenience (even though the      */
/*             caller will almost certainly know */
/*             this).                            */
/*                                               */
/* Assumes:    Neither pointer may be NULL. The  */
/*             buffer must be at least 2 bytes   */
/*             in size. If either condition is   */
/*             not met, NULL is returned and the */
/*             buffer is left untouched.         */
/*************************************************/

char * urlutils_leafname_from_url(char * url, char * leaf, int size)
{
  int l = 0;

  if (!url || !leaf || size < 2) return NULL;

  memset(leaf, 0, size);

  /* l holds the string length if b->urlfdata exists */

  l = (int) strlen(url);

  /* If the string exists and is not null, try to extract */
  /* a leafname from it.                                  */

  if (l)
  {
    /* Set 'e' to point at the last character in the string, */
    /* and we will use 's' to point to the first character   */
    /* after the host name.                                  */

    int e     = l - 1;
    int s     = 0;
    int found = 0;

    /* We want to do the following, bearing in mind that whilst */
    /* a protocol at the start of the URL is assumed, it may be */
    /* separated by nothing more than a colon (not even ':/' or */
    /* '://'). The use of square brackets implies one or more   */
    /* analogous optional terms.                                */
    /*                                                          */
    /* a.b.c/[dir/]name.html  ->  name                          */
    /* a.b.c/dir[/]           ->  dir                           */
    /* a.b.c[/]               ->  Generic                       */
    /* a.b.c/name.txt         ->  name                          */
    /* a.b.c/name.tar.gz      ->  name/tar (as a consequence)   */
    /* a.b.c/name_1.2.1.gz    ->  name/1/2/1 (as a consequence) */
    /* a.b.c/name.html#anc    ->  anc                           */
    /*                                                          */
    /* The tricky part is determining what bit is host name and */
    /* what bit is path; it gets easier after that. First, look */
    /* for a ':'. This will either be a protocol separator, or  */
    /* a port number separator. Trouble is, you need to skip    */
    /* any '/'s after the ':' if it's the first case, so check  */
    /* if any '.'s are passed when looking for the ':'. If so,  */
    /* we're on a port number. The only possible failure case   */
    /* is a URL with no protocol specified and a single word    */
    /* host name with a port number; but then, just about every */
    /* other system will fail as the host name will be taken to */
    /* be the protocol (e.g. 'dylan:8080'). So this isn't a     */
    /* problem, really - we'd return a generic response.        */

    while (s < l && url[s] != ':')
    {
      if (url[s] == '.') found = 1;
      s++;
    }

    /* Did we run out of string? If so, go back to the start; */
    /* we may just have no protocol or port specified.        */

    if (s == l) s = 0;

    /* If we've not found any dots, may need to skip a few slashes;  */
    /* but not more than two, as three slashes (for example) implies */
    /* that no host name is present.                                 */

    if (!found)
    {
      s++; /* Skip past the ':' */

      if (url[s] == '/') s++;
      if (url[s] == '/') s++;

      /* If we ran out of string, must use a generic response */

      if (s == l) goto return_generic;
    }

    /* We're either now on the ':' separating the host name */
    /* and port number, or on the first character after any */
    /* '/'s or the ':' separating the protocol from the     */
    /* host name. In either case, we now search forward for */
    /* a single slash - the separator between host name and */
    /* path.                                                */

    while (s < l && url[s] != '/')
    {
      s++;
    }

    /* Skip past the '/' */

    if (url[s] == '/') s++;

    /* If we're run out string, go for a generic response - */
    /* e.g. 'http://www/' or just 'http://www'.             */

    if (s >= l) goto return_generic;

    /* Otherwise, we can now start searching backwards for a */
    /* usable leafname, knowing that reaching 's' is the     */
    /* exit condition. First, look for anchor names.         */

    if (strrchr(url, '#') > strrchr(url, '/')) /* Want 'a/b#c' -> 'c', but not 'a#b/c' -> 'b/c' */
    {
      while (e > s && url[e] != '#')
      {
        e--;
      }

      /* Step forward past the '#' */

      e++;

      /* If it turns out that the '#' is all that there is after */
      /* the host name, it may be possible to step back and get  */
      /* whatever comes before it; or if we hit 's' in trying,   */
      /* we'll have to return a generic response.                */

      if (e == l)
      {
        e --;

        if (e <= s) goto return_generic;
        else        goto get_directory_name;
      }

      /* Right, copy the string from this point forwards */

      strncpy(leaf, url + e, size - 1);

      /* Finished, so jump to the final stripping routine */

      goto strip_illegal_chars;
    }

    /* Right, if we have a trailing '/' or '#' (from the above code), */
    /* following a directory name, then get that directory name.      */

get_directory_name:

    if (url[e] == '/' || url[e] == '#')
    {
      int chars;

      found = e--;

      /* Find the start point of the name */

      while (e > s && url[e] != '/') e--;

      /* If e is greater than s, we're sitting on the '/' */
      /* found above, so advance past it.                 */

      if (e > s) e++;

      /* Copy the name in */

      chars = found - e;
      if (chars > size - 1) chars = size - 1;
      strncpy(leaf, url + e, chars);

      /* Finished - just need to take care of any illegal characters */

      goto strip_illegal_chars;
    }

    /* Otherwise, continue, looking for a '/'. If a '.' is found on the */
    /* way, remember the offset for the first time it is encountered.   */

    found = 0;

    while (e > s && url[e] != '/')
    {
      if (url[e] == '.' && !found) found = e;
      e--;
    }

    if (url[e] == '/')
    {
      e++;
      if (e >= l) goto return_generic;
    }

    /* We now have 'e' pointing to the start of a leafname */
    /* and possibly 'found' pointing to the start of a     */
    /* filename extension. If the latter is true, check    */
    /* that there is something between the two...          */

    if (found && found - 1 == e) goto return_generic;

    /* OK, we have a string */

    if (!found)
    {
      strncpy(leaf, url + e, size - 1);
    }
    else
    {
      int chars = found - e;

      if (chars > size - 1) chars = size - 1;
      strncpy(leaf, url + e, chars);
    }

    /* Right, that's the worst of it over...! */

    goto strip_illegal_chars;

return_generic:

    lookup_token("NoURLleaf:Index",0,0);
    strncpy(leaf, tokens, size - 1);
  }

  /* There was apparently no URL in the buffer, so offer a */
  /* neutral filename of HTMLfile.                         */

  else
  {
    lookup_token("NoURLdata:HTMLfile",0,0); /* Will put the string in the 'tokens' global buffer */
    strncpy(leaf, tokens, size - 1);
  }

strip_illegal_chars:

  /* Scan the leaf for illegal characters */

  l = 0;

  while (leaf[l])
  {
    /* A few we can replace with meaningful alternatives */

    if (leaf[l] == '.')  leaf[l] = '/';
    if (leaf[l] == '\\') leaf[l] = '/';
    if (leaf[l] == '&')  leaf[l] = '+';
    if (leaf[l] == '"')  leaf[l] = '\'';

    /* And the rest, replace with underscores */

    if (
         leaf[l] == '$' ||
         leaf[l] == '%' ||
         leaf[l] == '@' ||
         leaf[l] == '^' ||
         leaf[l] == ':' ||
         leaf[l] == '#' ||
         leaf[l] == '*' ||
         leaf[l] == '"' ||
         leaf[l] == '|'
       )
       leaf[l] = '_';

    l++;
  }

  /* Finished. */

  return leaf;
}

/*************************************************/
/* urlutils_host_name_from_url()                 */
/*                                               */
/* Extracts the host name from a given URL.      */
/*                                               */
/* Parameters: Pointer to the URL string;        */
/*                                               */
/*             Pointer to a buffer to write the  */
/*             host name into;                   */
/*                                               */
/*             Size of the buffer.               */
/*************************************************/

void urlutils_host_name_from_url(char * url, char * host, int size)
{
  char * p;

  host[0] = 0;

  /* First look for '//', as in 'http://' */

  p = strstr(url, ProtocolSeparator);

  if (p)
  {
    /* If found, copy everything after that into 'host' */

    p += 2;
    strncpy(host, p, size - 1);
    host[size - 1] = 0;

    /* Now search for a '/', as in 'http://www.acorn/', */
    /* and if found force a terminator there.           */

    p = strchr(host, '/');
    if (p) *p = 0;
  }
}

/*************************************************/
/* urlutils_filetype_from_url()                  */
/*                                               */
/* Examines a URL and returns a RISC OS filetype */
/* based on the filename extension in the URL.   */
/*                                               */
/* Parameters: Pointer to a null-terminated URL  */
/*             string.                           */
/*                                               */
/* Returns:    A RISC OS filetype.               */
/*************************************************/

int urlutils_filetype_from_url(const char * url)
{
  // For now, hard code it. In future, use a mime mapper module.

  int len;

  if (!url || !*url) return NULL;
  else len = strlen(url);

  /* Document types */

  if (ExtensionMatches(url, len, ".html"))  return FileType_HTML;
  if (ExtensionMatches(url, len, ".htm"))   return FileType_HTML;
  if (ExtensionMatches(url, len, ".txt"))   return FileType_TEXT;
  if (ExtensionMatches(url, len, ".shtml")) return FileType_HTML;
  if (ExtensionMatches(url, len, ".shtm"))  return FileType_HTML;
  if (ExtensionMatches(url, len, ".pdf"))   return FileType_PDF;
  if (ExtensionMatches(url, len, ".doc"))   return FileType_WORD;
  if (ExtensionMatches(url, len, ".ps"))    return FileType_PS;
  if (ExtensionMatches(url, len, ".eps"))   return FileType_PS;
  if (ExtensionMatches(url, len, ".wri"))   return FileType_DOS;
  if (ExtensionMatches(url, len, ".xls"))   return FileType_XLS;

  /* Images */

  if (ExtensionMatches(url, len, ".gif"))   return FileType_GIF;
  if (ExtensionMatches(url, len, ".jpg"))   return FileType_JPEG;
  if (ExtensionMatches(url, len, ".jpeg"))  return FileType_JPEG;
  if (ExtensionMatches(url, len, ".tiff"))  return FileType_TIFF;
  if (ExtensionMatches(url, len, ".tif"))   return FileType_TIFF;
  if (ExtensionMatches(url, len, ".png"))   return FileType_PNG;

  /* Archives */

  if (ExtensionMatches(url, len, ".zip"))   return FileType_ARC;
  if (ExtensionMatches(url, len, ".arc"))   return FileType_ARC;
  if (ExtensionMatches(url, len, ".spk"))   return FileType_ARC;
  if (ExtensionMatches(url, len, ".arj"))   return FileType_ARC;
  if (ExtensionMatches(url, len, ".gz"))    return FileType_GZ;
  if (ExtensionMatches(url, len, ".tar"))   return FileType_ARC;
  if (ExtensionMatches(url, len, ".zoo"))   return FileType_ARC;

  /* Sounds */

  if (ExtensionMatches(url, len, ".wav"))   return FileType_WAVE;
  if (ExtensionMatches(url, len, ".arm"))   return FileType_ARMA;
  if (ExtensionMatches(url, len, ".mod"))   return FileType_MOD;

  /* Movies */

  if (ExtensionMatches(url, len, ".mov"))   return FileType_AVI;
  if (ExtensionMatches(url, len, ".avi"))   return FileType_AVI;
  if (ExtensionMatches(url, len, ".qt"))    return FileType_AVI;
  if (ExtensionMatches(url, len, ".qtvr"))  return FileType_AVI;
  if (ExtensionMatches(url, len, ".rpl"))   return FileType_ARMO;
  if (ExtensionMatches(url, len, ".rep"))   return FileType_ARMO;

  /* Miscellaneous */

  if (ExtensionMatches(url, len, ".bin"))   return FileType_DATA;
  if (ExtensionMatches(url, len, ".dat"))   return FileType_DATA;
  if (ExtensionMatches(url, len, ".data"))  return FileType_DATA;
  if (ExtensionMatches(url, len, ".exe"))   return FileType_DOS;
  if (ExtensionMatches(url, len, ".com"))   return FileType_DOS;

  /* Otherwise, return text */

  return FileType_TEXT;
}

/*************************************************/
/* urlutils_create_hotlist_url()                 */
/*                                               */
/* Creates a URL though which a hotlist file may */
/* be fetched. This is done by looking at a      */
/* system variable 'Browse$HotlistURL'. If that  */
/* isn't set it looks at 'Browse$HotlistURIFile' */
/* which can hold the path of a URI file to      */
/* load. Lastly, it looks at the Choices file    */
/* token 'HotlistPath', where a RISC OS pathname */
/* pointing to the file should be placed. This   */
/* will be turned into a URL for fetching, so    */
/* care must be taken over the path used.        */
/*                                               */
/* Parameters: Pointer to a buffer to place the  */
/*             URL in (not in a flex block!);    */
/*                                               */
/*             Size of the buffer.               */
/*************************************************/

void urlutils_create_hotlist_url(char * buffer, int size)
{
  _kernel_oserror * e;

  /* See if the variable exists. */

  memset(buffer, 0, size);

  /* Equivalent to getenv, but the RISC OS implementation evaluates  */
  /* the system variable as an expression which we don't want; hence */
  /* the direct use of the SWI.                                      */

  e = _swix(OS_ReadVarVal,
            _INR(0,4),

            "Browse$HotlistURL", /* Variable name                      */
            buffer,              /* Buffer                             */
            size,                /* Buffer size, -1 to check it exists */
            0,                   /* Name pointer (0 for 1st call)      */
            4);                  /* Variable type (4 = literal string) */

  /* First lookup failed, so try the URI file. */

  if (e)
  {
    e = _swix(OS_ReadVarVal,
              _INR(0,4),

              "Browse$HotlistURIFile",
              buffer,
              size,
              0,
              4);

    if (e)
    {
      /* If the above gives an error, the variable doesn't exist; get */
      /* the HotlistPath string from the Messages file instead.       */

      strncpy(buffer, lookup_choice("HotlistPath",1,0), size - 1);
      urlutils_pathname_to_url(buffer, size);
    }
    else
    {
      char path[Limits_OS_Pathname];

      StrNCpy0(path, buffer);
      urlutils_load_uri_file(buffer, size, path);
    }
  }
}

/*************************************************/
/* urlutils_create_home_url()                    */
/*                                               */
/* Creates a URL though which a home page may be */
/* fetched. This is done by looking at a system  */
/* variable 'Browse$HomeURL'. If that isn't set, */
/* it looks at 'Browse$HomeURIFile', which can�  */
/* hold the path of a URI file to load. Lastly,  */
/* it looks at the Choices file token            */
/* 'HomePage'.                                   */
/*                                               */
/* Parameters: Pointer to a buffer to place the  */
/*             URL in (not in a flex block!);    */
/*                                               */
/*             Size of the buffer.               */
/*************************************************/

void urlutils_create_home_url(char * buffer, int size)
{
  _kernel_oserror * e;

  /* See if the variable exists */

  memset(buffer, 0, size);

  /* Equivalent to getenv, but the RISC OS implementation evaluates  */
  /* the system variable as an expression which we don't want; hence */
  /* the direct use of the SWI.                                      */

  e = _swix(OS_ReadVarVal,
            _INR(0,4),

            "Browse$HomeURL", /* Variable name                      */
            buffer,           /* Buffer                             */
            size,             /* Buffer size, -1 to check it exists */
            0,                /* Name pointer (0 for 1st call)      */
            4);               /* Variable type (4 = literal string) */

  /* First lookup failed, so try the URI file. */

  if (e)
  {
    e = _swix(OS_ReadVarVal,
              _INR(0,4),

              "Browse$HomeURIFile",
              buffer,
              size,
              0,
              4);

    if (e)
    {
      /* If the above gives an error, the variable doesn't exist; get */
      /* the HotlistPath string from the global choices structure     */
      /* instead.                                                     */

      strncpy(buffer, choices.home_page, size - 1);
    }
    else
    {
      char path[Limits_OS_Pathname];

      StrNCpy0(path, buffer);
      urlutils_load_uri_file(buffer, size, path);
    }
  }
}

/*************************************************/
/* urlutils_fix_url()                            */
/*                                               */
/* Takes a URL and 'fixes' it, e.g. appends a    */
/* '/' character to a URL which is missing one.  */
/* The contents of the buffer you give with the  */
/* URL inside are altered directly, so if you    */
/* want to remember the old URL, ensure there is */
/* a second copy of it in another buffer         */
/* somewhere.                                    */
/*                                               */
/* Parameters: Pointer to the URL;               */
/*                                               */
/*             Size of the buffer the URL is     */
/*             stored in.                        */
/*                                               */
/* Returns:    Pointer to the fixed URL (which   */
/*             at the moment is the buffer that  */
/*             you passed in).                   */
/*************************************************/

char * urlutils_fix_url(char * buffer, int buffersize)
{
  int len,  shl;
  int flen, fshl;
  int glen, gshl;
  int blen, plen;

  plen = strlen(ProtocolSeparator);

  shl  = strlen(HTTPmethod);
  len  = shl + plen;

  fshl = strlen(FTPmethod);
  flen = fshl + plen;

  gshl = strlen(GopherMethod);
  glen = gshl + plen;

  blen = strlen(buffer);

  /* If there's no ':' in the string, insert a protocol type */

  if (
       !strchr(buffer, ':')     &&
       blen + len  < buffersize &&
       blen + flen < buffersize
     )
  {
    /* If the site appears to be an FTP site, insert the FTP protocol */
    /* at the start. Similarly for Gopher; else insert HTTP.          */

    if (!strncmp(buffer, FTPmethod, fshl - 1)) /* -1 as we don't want to compare the ':' */
    {
      memmove(buffer + flen, buffer, buffersize - flen);
      strncpy(buffer, FTPmethod ProtocolSeparator, flen);
    }
    else if (!strncmp(buffer, GopherMethod, gshl - 1))
    {
      memmove(buffer + glen, buffer, buffersize - glen);
      strncpy(buffer, GopherMethod ProtocolSeparator, glen);
    }
    else
    {
      memmove(buffer + len, buffer, buffersize - len);
      strncpy(buffer, HTTPmethod ProtocolSeparator, len);
    }

    buffer[buffersize - 1] = 0;
  }

  /* If there are at least 2 unused bytes in the buffer, and the */
  /* front of the string matches the HTTPMethod specifier (again */
  /* this is defined at the top of this file) then search for a  */
  /* '/' character which isn't part of a '//' sequence. If none  */
  /* is found, append a '/'. This is why 2 bytes free are needed */
  /* - one for the '/', one for the string terminator.           */

  if (strlen(buffer) < buffersize - 2 && !strncmp(buffer, HTTPmethod, shl))
  {
    int i, s = 0;

    for (i = 0;
         !s && buffer[i] && (i < (buffersize - 1));
         i ++)
    {
      /* If we have a '/' but not a '//' sequence, mark this with s = 1 */

      if (buffer[i] == '/' && buffer[i + 1] != '/') s = 1;

      /* If at start of a '//' sequence, skip past it */

      else if (buffer[i] == '/') i++;
    }

    if (!s) strcat(buffer,"/");
  }

  return buffer;
}

/*************************************************/
/* urlutils_load_uri_file()                      */
/*                                               */
/* Loads a URI file. Will take ANT URL files     */
/* too. The given buffer will be filled with a   */
/* null-terminated URI from the file.            */
/*                                               */
/* The load terminates when the buffer is full   */
/* except for the last byte (to allow for a      */
/* forced terminator), or a control code is met  */
/* in the URI file. Note that the buffer is      */
/* initialised to hold null bytes before the URI */
/* file is opened.                               */
/*                                               */
/* If there is an error opening the file or the  */
/* file is empty, the contents of the buffer are */
/* undefined.                                    */
/*                                               */
/* Parameters: Pointer to the buffer;            */
/*                                               */
/*             Size of the buffer;               */
/*                                               */
/*             Pointer to the pathname of the    */
/*             URI file.                         */
/*                                               */
/* Assumes:    The buffer and path must NOT be   */
/*             the same area in memory.          */
/*************************************************/

void urlutils_load_uri_file(char * buffer, size_t size, char * path)
{
  FILE * fp;
  int    byte, counter = 0;
  int    type, found;

  /* Warning - heavy use of 'goto's coming up shortly...! */

  if (!size) return;
  memset(buffer, 0, size);

  /* Find the filetype */

  if (
       _swix(OS_File,
             _INR(0,1) | _OUT(0) | _OUT(6),

             23, /* Read catalogue info for named, stamped object */
             path,

             &found,
             &type)
     )
     return;

  /* 'found' should be 1 (a file, rather than not found, a directory, etc.). */

  if (found != 1) return;

  /* Don't load if not the correct type */

  if (type != FileType_URI && type != FileType_URL) return;

  /* Open the file */

  fp = fopen(path, "rb");

  if (fp)
  {
    byte = getc(fp);

    /* For URL files, go straight to URL reading - they're just */
    /* a terminated, or unterminated URL in a file.             */

    if (type == FileType_URL) goto urlutils_load_uri_file_read_uri;

    /* If it is a URI file, it must start with 'URI'. */

    if (byte != 'U') goto urlutils_load_uri_file_not_a_uri_file;

    byte = getc(fp);
    if (byte != 'R') goto urlutils_load_uri_file_not_a_uri_file;

    byte = getc(fp);
    if (byte != 'I') goto urlutils_load_uri_file_not_a_uri_file;

    /* Get to first character after the 'I' */

    byte = getc(fp);

    /* Now find the version number - skip white space */

urlutils_load_uri_file_skip_white_space_1:

    while (byte != EOF && byte < ' ') byte = getc(fp);

    /* If we've hit a hash, this is a comment line - terminated by white */
    /* space or end of file.                                             */

    if (byte == '#')
    {
      /* Now want to *find* the white space to skip the comment */

      while (byte != EOF && byte >= ' ') byte = getc(fp);

      /* If we're not at the end of the file, loop back and continue */
      /* skipping white space.                                       */

      if (byte != EOF) goto urlutils_load_uri_file_skip_white_space_1;
    }

    /* By now, we're either at EOF or the start of the first real entry */
    /* after 'URL', which will be the file version. URI file versions   */
    /* are guaranteed backwards compatible, so we can skip this bit.    */

    while (byte != EOF && byte >= ' ') byte = getc(fp);

    /* Now we want to get to the URI, so again, skip white space */
    /* and comment lines. After that, we'll be on the first byte */
    /* of the URI (or end of file, if it's broken).              */

urlutils_load_uri_file_skip_white_space_2:

    while (byte != EOF && byte < ' ') byte = getc(fp);

    if (byte == '#')
    {
      while (byte != EOF && byte >= ' ') byte = getc(fp);

      if (byte != EOF) goto urlutils_load_uri_file_skip_white_space_2;
    }

    /* Load the URI component - assume everything from here up */
    /* to any control chracter or EOF is part of the URI. Note */
    /* that 'byte' already contains the first character.       */

urlutils_load_uri_file_read_uri:

    while (byte != EOF && byte >= ' ' && counter < size - 1)
    {
      buffer[counter++] = byte;
      byte              = getc(fp);
    }

    fclose(fp);
  }

  return;

  /* Error condition exits */

urlutils_load_uri_file_not_a_uri_file:

  /* Not a URI / URL file */

  if (fp) fclose(fp);

  erb.errnum = Utils_Error_Custom_Message;

  StrNCpy0(erb.errmess,
           lookup_token("NotAURI:This is not a valid URI file.",
                        0,
                        0));

  show_error_ret(&erb);

  return;
}

/*************************************************/
/* urlutils_extract_uri()                        */
/*                                               */
/* Looks at a URI file loaded into a buffer, and */
/* overwrites it with the URL contents extracted */
/* from the body.                                */
/*                                               */
/* Parameters: Pointer to the buffer holding the */
/*             URI file;                         */
/*                                               */
/*             Size of the file (the buffer is   */
/*             assumed to be at least this size  */
/*             but not assumed to be larger).    */
/*                                               */
/* Returns:    Contents of the buffer are        */
/*             updated to hold a null terminated */
/*             URI.                              */
/*************************************************/

void urlutils_extract_uri(char * buffer, size_t file_size)
{
  int copy, counter;

  if (!buffer || !file_size) return;

  /* Ensure it starts with 'URI' */

  if (strncmp(buffer, "URI", 3)) goto urlutils_extract_uri_not_a_uri_file;

  /* Point to the first byte after the 'URI' indentifier */

  counter = 3;

  /* Skip white space to find the version number */

urlutils_extract_uri_skip_white_space_1:

  while (
          counter < file_size   &&
          buffer[counter]       &&
          buffer[counter] < ' '
        )
        counter++;

  /* If we find a comment, skip the comment body */

  if (buffer[counter] == '#')
  {
    while (
            counter < file_size    &&
            buffer[counter]        &&
            buffer[counter] >= ' '
          )
          counter++;

    /* Now go back to skip the white space after the comment, and */
    /* thus any other comments that follow.                       */

    if (buffer[counter]) goto urlutils_extract_uri_skip_white_space_1;
  }

  /* Skip the file version */

  while (
          counter < file_size    &&
          buffer[counter]        &&
          buffer[counter] >= ' '
        )
        counter++;

  /* Again, skip white space and comments */

urlutils_extract_uri_skip_white_space_2:

  while (
          counter < file_size   &&
          buffer[counter]       &&
          buffer[counter] < ' '
        )
        counter++;

  if (buffer[counter] == '#')
  {
    while (
            counter < file_size    &&
            buffer[counter]        &&
            buffer[counter] >= ' '
          )
          counter++;

    if (buffer[counter]) goto urlutils_extract_uri_skip_white_space_2;
  }

  /* Now we're at the URI. Copy it to the start of the buffer. */

  copy = 0;

  while (
          counter < file_size    &&
          buffer[counter]        &&
          buffer[counter] >= ' '
        )
        buffer[copy++] = buffer[counter++];

  /* Need to make sure that the string is terminated - if we're */
  /* likely to overflow the buffer, we must overwrite the last  */
  /* char with a terminator. You never know, it could be a non- */
  /* essential last character (e.g. trailing '/').              */

  if (copy && copy < file_size) buffer[copy] = 0;
  else buffer[copy - 1] = 0;

  return;

urlutils_extract_uri_not_a_uri_file:

  /* Not a URI / URL file */

  erb.errnum = Utils_Error_Custom_Message;

  StrNCpy0(erb.errmess,
           lookup_token("NotAURI:This is not a valid URI file.",
                        0,
                        0));

  show_error_ret(&erb);

  return;
}

/*************************************************/
/* urlutils_internal_extra()                     */
/*                                               */
/* Returns an offset into a given string at      */
/* which extra data in an internal URL may be    */
/* found.                                        */
/*                                               */
/* Parameters: Pointer to the URL string.        */
/*                                               */
/* Returns:    Offset for the extra data, or 0   */
/*             if none is found.                 */
/*************************************************/

int urlutils_internal_extra(char * iurl)
{
  char * extra;

  if (strncmp(iurl, Internal_URL, Int_URL_Len)) return 0;

  extra = strchr(iurl, ':');

  if (!extra) return 0;
  else extra ++;

  return (int) (extra - iurl);
}

/*************************************************/
/* urlutils_internal_tail()                      */
/*                                               */
/* Returns an offset into a given string at      */
/* which tail data (typically a URL leafname)    */
/* may be found.                                 */
/*                                               */
/* Parameters: Pointer to the URL string.        */
/*                                               */
/* Returns:    Offset for the tail data, or 0 if */
/*             none is found.                    */
/*************************************************/

int urlutils_internal_tail(char * iurl)
{
  char * tail, * extra;
  int    exoff, found = 0;

  exoff = urlutils_internal_extra(iurl);

  if (!exoff) return 0;

  extra = iurl + exoff;
  tail  = iurl + strlen(iurl); /* No '-1' here as tail is decremented early in the while loop below */

  while (tail > extra && !found)
  {
    tail--;
    if (*tail == '/') found = 1;
  }

  if (!found) return 0;
  else tail ++;

  return (int) (tail - iurl);
}

/*************************************************/
/* urlutils_set_displayed()                      */
/*                                               */
/* On the basis of a given internal URL, sets    */
/* the 'displayed' field of a given browser_data */
/* structure.                                    */
/*                                               */
/* Parameters: Pointer to a browser_data struct  */
/*             that is to be altered;            */
/*                                               */
/*             Pointer to the internal URL.      */
/*************************************************/

void urlutils_set_displayed(browser_data * b, char * iurl)
{
  if (!strncmp(iurl, Internal_URL, Int_URL_Len))
  {
    if      (!strncmp(iurl + Int_URL_Len, ForExternalHImage, strlen(ForExternalHImage))) b->displayed = Display_External_Image;
    else if (!strncmp(iurl + Int_URL_Len, ForExternalNImage, strlen(ForExternalNImage))) b->displayed = Display_External_Image;
    else if (!strncmp(iurl + Int_URL_Len, ForScrapFile,      strlen(ForScrapFile)))      b->displayed = Display_Scrap_File;
    else if (!strncmp(iurl + Int_URL_Len, ForGoBack,         strlen(ForGoBack)))         b->displayed = Display_Previous_Page;
    else if (!strncmp(iurl + Int_URL_Len, ForGoRecover,      strlen(ForGoRecover)))      b->displayed = Display_Recovered_Page;
    else if (!strncmp(iurl + Int_URL_Len, ForGoHome,         strlen(ForGoHome)))         b->displayed = Display_Home_Page;

    else b->displayed = Display_Fetched_Page;
  }
  else b->displayed = Display_Fetched_Page;
}

/*************************************************/
/* urlutils_check_protocols()                    */
/*                                               */
/* Checks a given URL to see if the fetch        */
/* protocol it specifies can be handled.         */
/*                                               */
/* Parameters: Pointer to the URL string.        */
/*                                               */
/* Returns:    1 if the URL can be handled (i.e. */
/*             the protocol at the start of the  */
/*             URL matches one that the Messages */
/*             file says a module which is       */
/*             currently running copes with),    */
/*             else 0.                           */
/*************************************************/

int urlutils_check_protocols(char * url)
{
  int  protocols = 0;
  int  i;
  char p[24];

  if (!url || (url && !*url)) return 0;

  /* Find the number of possible protocols */

  protocols = atoi(lookup_control("ProtocolMax", 1, NULL));

  /* Exit if not found / not a sensible number */

  if (protocols <= 0) return 0;

  /* Loop round all protocols */

  for (i = 1; i <= protocols; i++)
  {
    /* Look up the module name by building a MessageTrans  */
    /* token of the appropriate format, and call OS_Module */
    /* 18 (lookup module) for it; if the SWI doesn't       */
    /* raise an error, the module was found.               */

    sprintf(p, "ProtocolM%d", i);

    if (!_swix(OS_Module,
               _INR(0,1),

               18,
               lookup_control(p, 1, NULL)))
    {
      /* Module is present, so check the protocol */

      sprintf(p, "ProtocolU%d", i);

      lookup_control(p, 1, NULL);

      /* If the protocol identifier can be found... */

      if (tokens[0] != '!')
      {
        /* Compare it to the same number of characters in */
        /* the given URL. If it matches, we can deal with */
        /* the URL.                                       */

        if (!strncmp(tokens, url, strlen(tokens))) return 1;

        /* Otherwise, loop on to the next protocol... */
      }
    }
  }

  /* If we reach here, no protocol was found. */

  return 0;
}

/*************************************************/
/* urlutils_dispatch()                           */
/*                                               */
/* Puts a given URI into the URI queue and sends */
/* it out to the URI handler.                    */
/*                                               */
/* Parameters: Pointer to a browser_data struct  */
/*             for which the URI relates;        */
/*                                               */
/*             Pointer to null-terminated URI    */
/*             string (not in a movable block,   */
/*             so not, e.g., in a flex block);   */
/*                                               */
/*             URI queue flags (see URIutils.h). */
/*                                               */
/* Assumes:    That the caller has already made  */
/*             sure the URI handler is present.  */
/*************************************************/

_kernel_oserror * urlutils_dispatch(browser_data * b, char * uri, unsigned int flags)
{
  _kernel_oserror * e;
  unsigned int      return_code;
  uri_queue       * entry;

  #ifdef TRACE
    if (tl & (1u<<21)) Printf("urlutils_dispatch: Called for %p with '%s'\n",b,uri);
  #endif

  /* Claim memory for the new entry */

  entry = malloc(sizeof(uri_queue));

  /* Moan if the claim failed */

  if (!entry)
  {
    #ifdef TRACE
      if (tl & (1u<<21)) Printf("urlutils_dispatch: Memory claim for queue entry failed\n",b,uri);
    #endif

    return make_no_fetch_memory_error(15);
  }

  #ifdef TRACE
    malloccount += sizeof(uri_queue);
    if (tl & (1u<<13)) Printf("** malloccount (urlutils_dispatch): \0211%d\0217\n",malloccount);

    if (tl & (1u<<21)) Printf("urlutils_dispatch: Claimed queue entry %p\n",entry);
  #endif

  /* Fill in part of the entry */

  entry->flags = flags;
  entry->b     = b;

  /* If there are no entries, set uri_queue_base to the */
  /* address of this one. Otherwise, point this entry's */
  /* 'next' to the current base item, and point that    */
  /* item's 'prev' back to this entry. Then replace the */
  /* current base entry with this new one.              */

  entry->prev = NULL;

  if (!uri_queue_base) entry->next = NULL;
  else
  {
    entry->next          = uri_queue_base;
    uri_queue_base->prev = entry;
  }

  uri_queue_base = entry;

  /* Now call the URI handler and get a handle to fill in */
  /* the last uri_queue field.                            */

  e = uri_dispatch(URI_Dispatch_Inform,
                   uri,
                   task_handle,

                   &return_code,
                   NULL,
                   &entry->uri_handle);

  if (e)
  {
    #ifdef TRACE
      if (tl & (1u<<21)) Printf("urlutils_dispatch: Exitting with error\n");
    #endif

    return e;
  }

  /* If the request was refused complain */

  if (return_code != URI_Dispatch_RequestAccepted)
  {
    erb.errnum = Utils_Error_Custom_Message;
    StrNCpy0(erb.errmess,
             lookup_token("Refused:Cannot fetch this address as the fetch request was refused by the internal handler.",
                          0,0));

    #ifdef TRACE
      if (tl & (1u<<21)) Printf("urlutils_dispatch: Exitting with error\n");
    #endif

    return &erb;
  }

  /* Otherwise exit successfully */

  #ifdef TRACE
    if (tl & (1u<<21)) Printf("urlutils_dispatch: Successful\n");
  #endif

  return NULL;
}

/*************************************************/
/* urlutils_remove_from_queue()                  */
/*                                               */
/* Removes a specified entry from the list of    */
/* uri_queue structures, freeing the memory      */
/* allocated for it.                             */
/*                                               */
/* Parameters: The URI handle of the entry.      */
/*************************************************/

_kernel_oserror * urlutils_remove_from_queue(URI_handle_t uri_handle)
{
  uri_queue * entry = uri_queue_base;

  #ifdef TRACE
    if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Called with handle %p\n", uri_handle);
  #endif

  /* Try to find the entry */

  while (entry && entry->uri_handle != uri_handle) entry = entry->next;

  #ifdef TRACE

    /* Complain if not found */

    if (!entry)
    {
      erb.errnum = Utils_Error_Custom_Normal;
      sprintf(erb.errmess, "Can't find URI handle %p in URI queue", uri_handle);

      if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Exitting with error\n");

      return &erb;
    }

  #else

    /* Fail silently */

    if (!entry) return NULL;

  #endif

  if (entry->prev) entry->prev->next = entry->next;
  if (entry->next) entry->next->prev = entry->prev;

  if (entry == uri_queue_base) uri_queue_base = entry->next;

  #ifdef TRACE
    malloccount -= sizeof(uri_queue);
    if (tl & (1u<<13)) Printf("** malloccount (uriutils_remove_from_queue): \0212%d\0217\n",malloccount);

    if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Freeing entry %p\n",entry);
  #endif

  free (entry);

  #ifdef TRACE
    if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Successful\n");
  #endif

  return NULL;
}

/*************************************************/
/* urlutils_find_queue_entry()                   */
/*                                               */
/* Finds an entry in the list of uri_queue       */
/* structures.                                   */
/*                                               */
/* Parameters: The URI handle of the entry.      */
/*                                               */
/* Returns:    Pointer to the entry, or NULL if  */
/*             no entry with that handle could   */
/*             be found.                         */
/*************************************************/

uri_queue * urlutils_find_queue_entry(URI_handle_t uri_handle)
{
  uri_queue * entry = uri_queue_base;

  #ifdef TRACE
    if (tl & (1u<<21)) Printf("urlutils_find_queue_entry: Called with handle %p\n", uri_handle);
  #endif

  while (entry && entry->uri_handle != uri_handle) entry = entry->next;

  #ifdef TRACE
    if (tl & (1u<<21)) Printf("urlutils_find_queue_entry: Returning with entry %p\n", entry);
  #endif

  return entry;
}