/* Copyright 1997 Acorn Computers Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/***************************************************/
/* File   : SaveText.c                             */
/*                                                 */
/* Purpose: Save a web page as text.               */
/*                                                 */
/* Author : Merlyn Kline for Customer browser     */
/*          This source adapted by A.D.Hodgkinson  */
/*                                                 */
/* History: 24-Nov-97: Created.                    */
/***************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "swis.h"
#include "flex.h"

#include "HTMLLib.h" /* HTML library API, Which will include html2_ext.h, tags.h and struct.h */

#include "wimp.h"
#include "wimplib.h"
#include "event.h"

#include "svcprint.h"
#include "Global.h"
#include "FromROSLib.h"
#include "Utils.h"

#include "Fetch.h"
#include "Filetypes.h"
#include "Forms.h"
#include "Protocols.h"
#include "Reformat.h"
#include "Save.h"
#include "SaveObject.h"

#include "SaveText.h"

/* Local statics */

static int    references         = 0;
static char   savetext_hr_text[] = "\n  --------------------------------------------------------------------------\n"; /* Use [] not char *, as sizeof() is called on this entity */
static FILE * outfile            = NULL;

/* Static function prototypes */

static int    savetext_write_bytes       (const char * s, unsigned int n);

static char * savetext_create_image_text (HStream * t, char * s, int size);
static int    savetext_write_ref         (int getsize);
static int    savetext_summarise_refs    (browser_data * b, HStream * stream, int getsize);
static int    savetext_save              (browser_data * b, HStream * stream, int getsize);

/*************************************************/
/* savetext_write_bytes()                        */
/*                                               */
/* Outputs a given number of bytes of a given    */
/* string to the FILE * set up in the global     */
/* 'outfile', returning a success flag.          */
/*                                               */
/* Parameters: Pointer to a string;              */
/*                                               */
/*             Number of characters from the     */
/*             string to save.                   */
/*                                               */
/* Returns:    1 if successful, 0 if failed.     */
/*                                               */
/* Assumes:    An appropriately opened file is   */
/*             accessible through the global     */
/*             FILE * 'outfile'.                 */
/*************************************************/

static int savetext_write_bytes(const char * s, unsigned int n)
{
  if (!s || !outfile) return 0;

  while (n && *s)
  {
    if (fputc(*s, outfile) == EOF) return 0;

    s++, n--;
  }

  return 1;
}

/*************************************************/
/* savetext_create_image_text()                  */
/*                                               */
/* Creates a string that can be used to          */
/* represent an image, using the ALT text if     */
/* present or a generic alternative from the     */
/* Messages file.                                */
/*                                               */
/* Parameters: Pointer to an HStream struct      */
/*             representing the image;           */
/*                                               */
/*             Pointer to a buffer to hold the   */
/*             string;                           */
/*                                               */
/*             Size of the buffer.               */
/*************************************************/

static char * savetext_create_image_text(HStream * t, char * s, int size)
{
  /* Quick sanity check */

  if (!s || size < 3) return s;

  /* Want to have '[', ALT text, then ']' */

  s[0] = '[';
  s[1] = 0;

  /* If there's any ALT text, strncpy this in after the opening '[' */

  if (t->text) strncpy(s + 1, t->text, size - 1);

  /* Force a terminator in the penultimate byte. This gives us room */
  /* for the closing ']', even if the strncpy above filled up the   */
  /* buffer.                                                        */

  s[size - 2] = 0;

  /* What if there was no ALT text? */

  if (s[1] == 0)
  {
    char * generic = lookup_token("SaveTextImage:Image",0,0);

    /* If there's room, use a generic string instead */

    if (size > strlen(generic) + 2) strcat(s, generic);
  }

  /* Append the closing ']' */

  strcat(s, "]");

  return s;
}

/*************************************************/
/* savetext_write_ref()                          */
/*                                               */
/* Build a reference number, returning either    */
/* a size of entity or a success flag following  */
/* writing the reference to the output file.     */
/*                                               */
/* The format specifier for the reference is     */
/* read from the Messages file.                  */
/*                                               */
/* Parameters: 1 to return the size of the       */
/*             constructed entity, 0 to return a */
/*             success flag having tried to      */
/*             write the entity to disc.         */
/*************************************************/

static int savetext_write_ref(int getsize)
{
  char * format = lookup_token("SaveTextRef: [Ref %%d]",0,0);
  char   j[64];
  int    nl;

  /* Increment the reference (anchor) count */

  references++;

  /* Check the length */

  nl = utils_number_length(references);

  if (nl + strlen(format) >= sizeof(j))
  {
    /* If it won't fit, use a simple indicator that will */

    sprintf(j, " [%d]", references);
  }
  else
  {
    /* If it will fit, use the format specified in the Messages file */

    sprintf(j, format, references);
  }

  /* Return either the size or return through the */
  /* disc output routine.                         */

  if (getsize) return strlen(j);

  return savetext_write_bytes(j, strlen(j));
}

/*************************************************/
/* savetext_summarise_refs()                     */
/*                                               */
/* Build a summary of all of the references made */
/* through the text file so far.                 */
/*                                               */
/* Parameters: Pointer to a browser_data struct  */
/*             relevant to the file;             */
/*                                               */
/*             Pointer to the first HStream of   */
/*             the list from which the refs. are */
/*             to be built;                      */
/*                                               */
/*             1 to return the size of the       */
/*             constructed entity, 0 to return a */
/*             success flag having tried to      */
/*             write the entity to disc.         */
/*************************************************/

static int savetext_summarise_refs(browser_data * b, HStream * stream, int getsize)
{
  char         j[512];
  int          size = 0;
  static int   n    = 0;
  HStream    * curr;
  HStream    * last;

  /* If there are no references, there's nothing to do. */

  if (!references) return getsize ? 0 : 1;

  if (stream == b->stream)
  {
    n = 0;

    /* Output a separator and section title, if we're not in a table cell */

    sprintf(j,
            "\n\n==============================================================================\n\n%s\n\n",
            lookup_token("SaveTextRefs:References in this document:",0,0));

    size += strlen(j);

    /* If required, try to output this to the file  */

    if (!getsize && !savetext_write_bytes(j, strlen(j))) return 0;
  }

  last = NULL;
  curr = stream;

  while (curr)
  {
    if (ISBODY(curr))
    {
      if (curr->tagno == TAG_TABLE)
      {
        table_stream   * table = (table_stream *) curr;
        table_row      * R;
        table_headdata * D;
        HStream        * attached;

        /* Scan the rows and cells */

        R = table->List;

        while (R)
        {
          D = R->List;

          while (D)
          {
            if (D->Tag)
            {
              switch (D->Tag)
              {
                case TagTableData:
                case TagTableHead:
                {
                  attached = (HStream *) D->List;

                  /* Deal with the cell contents */

                  if (getsize) size += savetext_summarise_refs(b, attached, getsize);
                  else if (!savetext_summarise_refs(b, attached, getsize)) return 0;
                }
                break;
              }
            }

            D = D->Next;
          }

          R = R->Next;
        }
      }

      /* If the current item is a link, and the last item wasn't */
      /* a link or wasn't the same link as this one (remember,   */
      /* multiple tokens can represent the same link), then      */
      /* output this as a reference. We're relying on the        */
      /* conditions under which a reference is output here being */
      /* exactly the same as those which mark a reference when   */
      /* the body of the page is being output.                   */

      if (
           ISLINK(curr)             &&
           curr->tagno != TAG_TABLE &&
           (
             !last                        ||
             !ISLINK(last)                ||
             !ISBODY(last)                ||
             last->tagno  == TAG_TABLE    ||
             last->anchor != curr->anchor
           )
         )
      {
        /* Increment the local reference counter */

        n++;
        sprintf(j, "%8d. ", n);

        /* Output the reference number if required */

        size += strlen(j);
        if (!getsize && !savetext_write_bytes(j, strlen(j))) return 0;

        /* If required, output the anchor text */

        size += strlen(curr->anchor);
        if (!getsize && !savetext_write_bytes(curr->anchor, strlen(curr->anchor))) return 0;

        /* Add a line break */

        size++;
        if (!getsize && !savetext_write_bytes("\n", 1)) return 0;
      }
    }

    /* Move on to the next item */

    last = curr;
    curr = curr->next;
  }

  /* Finished; return either the total size that would have been */
  /* output to a file, or flag success.                          */

  return getsize ? size : 1;
}

/*************************************************/
/* savetext_save()                               */
/*                                               */
/* Does the work of outputting the page as text  */
/* to a file (the FILE * should already be set   */
/* up in 'outfile') or works out how large the   */
/* file would be.                                */
/*                                               */
/* Parameters: Pointer to a browser_data struct  */
/*             relevant to the file;             */
/*                                               */
/*             Pointer to the first HStream of   */
/*             the list to output;               */
/*                                               */
/*             1 to return the size of the       */
/*             constructed file, 0 to return a   */
/*             success flag having tried to      */
/*             write the file to disc.           */
/*************************************************/

static int savetext_save(browser_data * b, HStream * stream, int getsize)
{
  HStream * curr;
  HStream * last;
  int       size = 0, s;

  curr = stream;
  last = NULL;

  /* Only zero references if we're working on the main document */
  /* stream - table cells should all add to the reference list  */

  if (stream == b->stream) references = 0;

  /* Go through the tokens */

  while (curr)
  {
    if (ISBODY(curr))
    {
      /* If we're on a line break, then write (or account for writing) */
      /* a new line, then tabs to indent to the current level of       */
      /* indentation (whatever that may be).                           */

      if (reformat_newline(curr, last, 0))
      {
        if (getsize) size += 1 + curr->indent;
        else
        {
          int i = curr->indent;

          if (!savetext_write_bytes("\n", 1)) return 0;

          while (i)
          {
            if (!savetext_write_bytes("\t", i)) return 0;
            i --;
          }
        }
      }

      /* Output a line break for paragraphs */

      if (curr->style & P)
      {
        if (getsize) size += 1;
        else if (!savetext_write_bytes("\n", 1)) return 0;
      }

      /* Hmm. A table. This'll be fun, then. */

      if (curr->tagno == TAG_TABLE)
      {
        table_stream   * table = (table_stream *) curr;
        table_row      * R;
        table_headdata * D;
        HStream        * attached;
        char             j[512];
        char             n[64];
        char           * l;
        int              cell = 0;

        strcpy(j, "\n[");

        /* Compile a description of the table */

        sprintf(n, "%d", table->RowSpan);
        l = lookup_token("SaveTextTable:Table - %0", 0, n);
        strcat(j, l);

        sprintf(n, "%d", table->ColSpan);
        if (table->RowSpan == 1) l = lookup_token("SaveTextRow: row and %0",   0, n);
        else                     l = lookup_token("SaveTextRows: rows and %0", 0, n);
        strcat(j, l);

        if (table->ColSpan == 1) l = lookup_token("SaveTextCol: column",   0, 0);
        else                     l = lookup_token("SaveTextCols: columns", 0, 0);
        strcat(j, l);

        strcat(j, "]\n");

        /* Output this description */

        if (getsize) size += strlen(j);
        else if (!savetext_write_bytes(j, strlen(j))) return 0;

        /* Scan the rows and cells */

        R = table->List;

        while (R)
        {
          D = R->List;

          while (D)
          {
            if (D->Tag)
            {
              switch (D->Tag)
              {
                case TagTableData:
                case TagTableHead:
                {
                  cell ++;

                  attached = (HStream *) D->List;

                  /* Compile a description of the cell */

                  if (cell > 1)
                  {
                    if (getsize) size += 1;
                    else if (!savetext_write_bytes("\n", 1)) return 0;
                  }

                  if (getsize) size += 2;
                  else if (!savetext_write_bytes("\n[", 2)) return 0;

                  l = lookup_token("SaveTextCell:Row %%d, column %%d", 0, 0);

                  sprintf(j,
                          l,
                          D->RowOffs + 1,
                          D->ColOffs + 1);

                  /* Output this description */

                  if (getsize) size += strlen(j);
                  else if (!savetext_write_bytes(j, strlen(j))) return 0;

                  if (getsize) size += 3;
                  else if (!savetext_write_bytes("]\n\n", 3)) return 0;

                  /* Deal with the cell contents */

                  if (getsize) size += savetext_save(b, attached, getsize);
                  else if (!savetext_save(b, attached, getsize)) return 0;
                }
                break;
              }
            }

            D = D->Next;
          }

          R = R->Next;
        }
      }

      /* Output image data */

      else if (
                (curr->style & IMG) ||
                (
                  curr->tagno         == TAG_INPUT &&
                  HtmlINPUTtype(curr) == inputtype_IMAGE
                )
              )
      {
        char j[128]; /* Don't really want more than about 127 characters of ALT text in one go, surely?! */

        savetext_create_image_text(curr, j, sizeof(j));

        if (getsize) size += strlen(j);
        else if (!savetext_write_bytes(j, strlen(j))) return 0;
      }

      /* For bullets, output an asterisk */

      else if (ISBULLET(curr))
      {
        if (getsize) size += 2;
        else if (!savetext_write_bytes("* ", 2)) return 0;
      }

      /* Horizontal rules */

      else if (curr->style & HR)
      {
        if (getsize) size += sizeof(savetext_hr_text) - 1; /* ('-1' as sizeof includes the terminating null byte */
        else if (!savetext_write_bytes(savetext_hr_text, sizeof(savetext_hr_text) - 1)) return 0;
      }

      /* Text areas: ' [', the text area value, then '] ' */

      else if (curr->tagno == TAG_SELECT)
      {
        if (getsize) size += 4 + strlen(form_get_field(b, curr)->text);
        else
        {
          if (!savetext_write_bytes(" [",                          2))                                  return 0;
          if (!savetext_write_bytes(form_get_field(b, curr)->text, strlen(form_get_field(b, curr)->text))) return 0;
          if (!savetext_write_bytes("] ",                          2))                                  return 0;
        }
      }

      /* Selection lists - ' [', the current displayed item, */
      /* ']', then '[>] ' to represent the popup.            */

      else if(curr->tagno == TAG_STYLE)
      {
        if (getsize) size += 7 + strlen(form_get_field_text(b, curr));
        else
        {
          if (!savetext_write_bytes(" [",                         2))                                 return 0;
          if (!savetext_write_bytes(form_get_field_text(b, curr), strlen(form_get_field_text(b, curr)))) return 0;
          if (!savetext_write_bytes("][>] ",                      5))                                 return 0;
        }
      }

      /* Various INPUT field types */

      else if (curr->tagno == TAG_INPUT)
      {
        switch (HtmlINPUTtype(curr))
        {
          /* Single line normal writable */

          case inputtype_TEXT:
          {
            if (getsize) size += 4 +strlen(form_get_field(b, curr)->text);
            else
            {
              if (!savetext_write_bytes(" [",                          2))                                  return 0;
              if (!savetext_write_bytes(form_get_field(b, curr)->text, strlen(form_get_field(b, curr)->text))) return 0;
              if (!savetext_write_bytes("] ",                          2))                                  return 0;
            }
          }
          break;

          /* Single line password writable - output '*'s instead of the actual text */

          case inputtype_PASSWORD:
          {
            if (getsize) size += 4 + strlen(form_get_field(b, curr)->text);
            else
            {
              int j ,l;

              if (!savetext_write_bytes(" [", 2)) return 0;

              l = strlen(form_get_field(b, curr)->text);

              for (j = 0; j < l; j++)
              {
                if (!savetext_write_bytes("*",1)) return 0;
              }

              if (!savetext_write_bytes("] ", 2)) return 0;
            }
          }
          break;

          /* Check box; '[Y]' if selected, '[_]' if not */

          case inputtype_CHECKBOX:
          {
            if (getsize) size += 3;
            else if (!savetext_write_bytes(form_get_field(b, curr)->checked ? "[Y]" : "[_]", 3)) return 0;
          }
          break;

          /* Radio button; '(O)' if selected, '(_)' if not */

          case inputtype_RADIO:
          {
            if (getsize) size += 3;
            else if (!savetext_write_bytes(form_get_field(b, curr)->checked ? "(O)" : "(_)", 3)) return 0;
          }
          break;

          /* Hidden items give no text */

          case inputtype_HIDDEN: break;

          /* Sumbit / Reset buttons */

          case inputtype_SUBMIT: /* same as TYPE_RESET: no break */
          case inputtype_BUTTON: /* Again, no break              */
          case inputtype_RESET:
          {
            const char * p;

            p = form_button_text(curr);

            if (getsize) size += strlen(p) + 4;
            else
            {
              if (!savetext_write_bytes(" <", 2))         return 0;
              if (!savetext_write_bytes(p,    strlen(p))) return 0;
              if (!savetext_write_bytes("> ", 2))         return 0;
            }
          }
          break;
        }
      }

      /* For anything else, try to extract some generic text */

      else
      {
        char * d = curr->text;

        if (getsize && d) size += strlen(d);
        else if (d && !savetext_write_bytes(d, strlen(d))) return 0;
      }

      /* If we've got a link and the last item wasn't a link or */
      /* was, but to a different URL as this one, then output a */
      /* marker pointing to a reference that will be added on   */
      /* at the end of the document.                            */
      /*                                                        */
      /* Note that these conditions MUST be kept in step with   */
      /* those in savetext_summarise_refs, which actually       */
      /* writes the references pointed to by these markers.     */

      if (
           ISLINK(curr)             &&
           curr->tagno != TAG_TABLE &&
           (
             !last                        ||
             !ISLINK(last)                ||
             !ISBODY(last)                ||
             last->tagno  == TAG_TABLE    ||
             last->anchor != curr->anchor
           )
         )
      {
        s = savetext_write_ref(getsize);

        if (getsize) size += s;
        else if (!s) return 0;
      }
    }

    last = curr;
    curr = curr->next;
  }

  /* Summarise the references if we're not in a table cell */

  if (stream == b->stream)
  {
    s = savetext_summarise_refs(b, b->stream, getsize);
    if (getsize) size += s;
    else if (!s) return 0;
  }

  /* Return whatever was requested - size, or a success flag */

  return getsize ? size : 1;
}

/*************************************************/
/* savetext_save_text()                          */
/*                                               */
/* Save a given browser page as a text file.     */
/*                                               */
/* Parameters: Pointer to a browser_data struct  */
/*             relevant to the page to save as   */
/*             text;                             */
/*                                               */
/*             Pointer to the pathname to save   */
/*             to.                               */
/*************************************************/

_kernel_oserror * savetext_save_text(browser_data * b, const char * pathname)
{
  int success = 0;

  /* If we seem to have left a file open, close it */

  if (outfile)
  {
    fclose(outfile);
    outfile = NULL;
  }

  /* Only proceed if we've got a pathname */

  if (pathname && *pathname)
  {
    save_record_path(pathname);

    outfile = fopen(pathname, "wb");

    if (!outfile) RetLastE;

    success = savetext_save(b, b->stream, 0);

    fclose(outfile);
    outfile = NULL;

    if (!success) RetLastE;

    /* Set the filetype */

    return _swix(OS_File,
                 _INR(0,2) | _INR(4,5),

                 18,
                 pathname,
                 FileType_TEXT);
  }

  return NULL;
}

/*************************************************/
/* savetext_text_size()                          */
/*                                               */
/* Returns the size of file that would be        */
/* written by savetext_save_text for the given   */
/* browser.                                      */
/*                                               */
/* Parameters: Pointer to a browser_data struct  */
/*             relevant to the page to save as   */
/*             text, for which the file size is  */
/*             to be returned.                   */
/*                                               */
/* Returns:    Size of file that would be        */
/*             written by savetext_save_text for */
/*             the given browser.                */
/*************************************************/

int savetext_text_size(browser_data * b)
{
  return savetext_save(b, b->stream, 1);
}