/* Copyright 1996 Acorn Computers Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/* c.rojpeg - JPEG for use within RISC OS */

#include <stdlib.h>
#include "swis.h"
#include "commondefs.h"
#include "jinclude.h"
#include "rojpeg.h"

/**************************************************************************
*                                                                         *
*    JPEG library code.                                                   *
*                                                                         *
**************************************************************************/

#ifdef SOURCE_IS_rojpeg
#define CFSI
#define tracef(args)     /* Nothing */
#define assert(x, y)     {if (!(x)) exit(y);}
#define newline()        /* Nothing */
#define comment(ws,text) /* Nothing */
#define IFDEBUG(a)       /* Nothing */
#endif

#include "jrdjfif.c"
#include "jdhuff.c"
#include "jcconv.c"

#undef FILE_
#define FILE_ (40000)

/**************************************************************************
*                                                                         *
*    Low-level debugging output.                                          *
*                                                                         *
**************************************************************************/

void assembler_panic(decompress_info_ptr cinfo, int *regblock, int code)
/* The assembler code calls us here when something goes wrong, in an attempt
 * to learn what happened. On exit it then returns, usually leading to no picture
 * being painted.
 */
{
#ifdef DEBUG
  int i;

  tracef("Corrupted data in Huffman stream at byte %i\n" _ (char*)regblock[3] - cinfo->input_buffer);
  tracef("cinfo=0x%x regblock=0x%x code=%i.\n" _ (int)cinfo _ (int)regblock _ code);
  for (i = 0; i < 16; i++) tracef("R%i=0x%x\n" _ i _ regblock[i]);
  tracef("Nearby input bytes:\n");
  for (i = -10; i < 10; i++) tracef("%i:0x%x " _ i _ ((char*)(regblock[3]))[i]);
  tracef("\n");
#else
  UNUSED(cinfo);
  UNUSED(regblock);
  UNUSED(code);
#endif
}

/**************************************************************************
*                                                                         *
*    JPEG utility functions.                                              *
*                                                                         *
**************************************************************************/

#ifdef CFSI
int do_jpeg_scan_file(char *space, int space_size, char *file_image, int image_length)
/* Simple entry sequence for use by ChangeFSI */
{
  decompress_info_ptr cinfo = (decompress_info_ptr) space;

  cinfo->workspace_size = space_size;
  cinfo->error_code = -1; /* force total reset */
  return jpeg_scan_file(cinfo, file_image, image_length, 0, 0x7fffffff, -1, -1, 0);
}

int do_jpeg_scan_file_16(char *space, int space_size, char *file_image, int image_length)
/* Simple entry sequence for use by ChangeFSI - 16bpp output. */
{
  decompress_info_ptr cinfo = (decompress_info_ptr) space;

  cinfo->workspace_size = space_size;
  cinfo->error_code = -1; /* force total reset */
  return jpeg_scan_file(cinfo, file_image, image_length, 0, 0x7fffffff, -1, -1, jopt_OUTBPP_16);
}
#else
static int palette_is_grey(int *palette, int entries); /* In c.PutScaled */

static void check_jpeg_workspace(asm_workspace *wp, int jpeg_ws_size)
/* Check (and adjust) the workspace needed */
{
  decompress_info_ptr cinfo = wp->jpeg_info_ptr;

  if (!cinfo)
  {
    tracef("Malloc requesting %x bytes of workspace\n" _ jpeg_ws_size);
    cinfo = malloc(jpeg_ws_size);
    assert(cinfo != NULL, ERROR_NO_MEMORY);
    wp->jpeg_info_ptr = cinfo;                   /* mark the workspace entirely uninitialised */
    cinfo->error_code = -1;
    cinfo->workspace_size = jpeg_ws_size;        /* at least that big, malloc might have rounded up */
  }
  else
  {
    if (jpeg_ws_size)                            /* need to realloc */
    {
      tracef("Realloc requesting %x extra bytes of workspace\n" _ jpeg_ws_size);
      jpeg_ws_size = jpeg_ws_size + cinfo->workspace_size;
      cinfo = realloc(cinfo, jpeg_ws_size);
      assert(cinfo != NULL, ERROR_NO_MEMORY);
      wp->jpeg_info_ptr = cinfo;
      cinfo->error_code = -1;                    /* mark the workspace entirely uninitialised */
      cinfo->workspace_size = jpeg_ws_size;
    }
  }
}

static int jpeg_decompressor_opts(decompress_info_ptr cinfo, asm_workspace *wp)
/* Deduce the decompressor options */
{
  int opt = 0;
  
  /* JPEG decompression options */
  if (wp->BPP < 4)
  {
    opt |= jopt_GREY; /* greyscale if 4bpp or less */
    if ((wp->save_PdriverIntercept & 2) == 0) /* printing is not on */
      wp->ColourTTR = 0; /* don't use trans table for 4bpp or less, results are naff */
  }
  if ((wp->BPP <=8) && (wp->dither_truecolour & 2))
  {
    opt |= jopt_DIFFUSE;
    wp->ColourTTR = 0;
  }
  if ((wp->save_xadd - wp->save_xdiv) * 6 <= wp->save_xdiv &&
      (wp->save_yadd - wp->save_ydiv) * 6 <= wp->save_ydiv)
    opt |= jopt_DC_ONLY; /* postage stamp - go faster, do only DC values of JPEG tiles */

  if (wp->BPP == 8)
  {
    int temp;
    int size;

    _swix(ColourTrans_ReadPalette, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _IN(4) | _OUT(3),
                                   -1, -1, 0, 256*4, 0, &size); /* save palette into newtranstable area */
    tracef("need %x bytes for palette\n" _ size);
    _swix(ColourTrans_ReadPalette, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _IN(4),
                                   -1, -1, &(wp->newtranstable[0]), size, 0); /* save palette into newtranstable area */
    temp = palette_is_grey(wp->newtranstable, size/4);
    if (temp) opt |= jopt_GREY;
    if (temp == 2)
    {
      opt |= jopt_OUTBPP_8GREY;
      opt &= ~jopt_DIFFUSE;
    }
  }
  if ((wp->save_PdriverIntercept & 2) == 0 && (wp->dither_truecolour & 1)) /* old format palette and printing is not on */
  {
    int  size;

    if ((wp->dither_truecolour & 2) && wp->BPP < 16)
    {
#ifdef DEBUG
      int loop;
#endif
      tracef("trying new shiny 8BPP plotting technique\n");
      _swix(ColourTrans_ReadPalette, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _IN(4) | _OUT(3),
                                     -1, -1, 0, 256*4, 0, &size); /* save palette into newtranstable area */
      tracef("need %x bytes for palette\n" _ size);
      _swix(ColourTrans_ReadPalette, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _IN(4),
                                     -1, -1, &(wp->newtranstable[0]), size, 0); /* save palette into newtranstable area */
      tracef("created palette at %x\n" _ &(wp->newtranstable[0]));
#ifdef DEBUG
      tracef("Read palette, palette entries are:-\n");
      for(loop = 0;loop<size/4;loop++)
        tracef("Entry %d = %x\n" _ loop _ wp->newtranstable[loop]);
#endif
      if (wp->BPP == 4)
      {
        if (palette_is_grey(wp->newtranstable, size/4)) opt |= jopt_GREY;
      }
      else if (wp->BPP == 8)
      {
        int temp;

        temp = palette_is_grey(wp->newtranstable, size/4);
        if (temp) opt |= jopt_GREY;
        if (temp == 2)
        {
          opt |= jopt_OUTBPP_8GREY;
          opt &= ~jopt_DIFFUSE;
        }
      }
    }

    if ((wp->BPP == 4) && !(wp->dither_truecolour & 2)) opt |= jopt_GREY;

    if (wp->BPP == 8)
    {
      if ((wp->dither_truecolour & 2) && !(opt & jopt_OUTBPP_8GREY))
      {
        opt |= jopt_OUTBPP_8;                        /* full error diffusion */
        cinfo->error_code = -1;                      /* mark the workspace entirely uninitialised */
      }
      else
      {
        int modeflags;

        _swix(OS_ReadModeVariable, _IN(0) | _IN(1) | _OUT(2), -1, 0, &modeflags);
        if ((modeflags & (1<<7)) == 0)
          opt |= jopt_OUTBPP_8YUV;                   /* strange diffusion from YUV data */
      }
    }
    else if (wp->BPP == 16)
      opt |= jopt_OUTBPP_16;
  }

#ifdef DEBUG
  tracef("After set up, options are:\n");
  if (opt & jopt_GREY) tracef("   jopt_GREY\n");
  if (opt & jopt_DC_ONLY) tracef("   jopt_DC_ONLY\n");
  if (opt & jopt_INTERP_X) tracef("   jopt_INTERP_X\n");
  if (opt & jopt_OUTBPP_8) tracef("   jopt_OUTBPP_8\n");
  if (opt & jopt_OUTBPP_16) tracef("   jopt_OUTBPP_16\n");
  if (opt & jopt_OUTBPP_8YUV) tracef("   jopt_OUTBPP_8YUV\n");
  if (opt & jopt_DIFFUSE) tracef("   jopt_DIFFUSE\n");
  if (opt & jopt_OUTBPP_8GREY) tracef("   jopt_OUTBPP_8GREY\n");
#endif

  return opt;
}
#endif

static void init_workspace(decompress_info_ptr cinfo, int size)
/* Workspace has been allocated. Initialise it, any subsidiary
 * structures etc. Do not touch the band buffer, might not be allocated yet.
 */
{
  int   i;
  int   workspace_size;
  char *table32k;

  /* Must preserve the workspace size and 32k colour table */
  workspace_size = cinfo->workspace_size;
  table32k = cinfo->table32k;
  memset(cinfo, 0, size);
  cinfo->workspace_size = workspace_size;
  cinfo->table32k = table32k;

  cinfo->comp_info = &cinfo->s_cur_comp_info[0];

  for (i = 0; i < NUM_QUANT_TBLS; i++) /* allocate quantisation tables */
    cinfo->quant_tbl_ptrs[i] = (QUANT_VAL*) &cinfo->s_quant_tbl[i];
    
  for (i = 0; i < NUM_HUFF_TBLS; i++) /* allocate huffman tables */
  {
    cinfo->dc_huff_tbl_ptrs[i] = &cinfo->s_dc_huff_tbl[i];
    cinfo->ac_huff_tbl_ptrs[i] = &cinfo->s_ac_huff_tbl[i];
  }
}

static void process_restart(decompress_info_ptr cinfo)
/* Coping with restarts - whoever put restarts in this standard?
 * We should be precisely at a restart marker.
 */
{
  char c = *cinfo->next_input_byte++;
  int ci;

#if 0
  tracef("Processing restart marker %i at %i bytes\n" _ cinfo->next_restart_num _ cinfo->next_input_byte - cinfo->input_buffer);
  {
    int i;
    tracef("inbuf=0x%x nbits=%i inptr=0x%x file=0x%x\n" _ cinfo->get_buffer _ cinfo->bits_left _ (int)cinfo->next_input_byte _ (int)cinfo->input_buffer);
    for (i = -10; i < 10; i++) tracef("%i:0x%x " _ i _ cinfo->next_input_byte[i]);
    tracef("\n");
  }
#endif
  assert(cinfo->bits_left <= 7, ERROR_BAD_JPEG);
  assert(c == 0xff, ERROR_BAD_JPEG);
  while (*cinfo->next_input_byte == 0xff) cinfo->next_input_byte++; /* additional 0xffs allowed at this point */
  c = *cinfo->next_input_byte++;
  assert((c & 0xF8) == 0xD0, ERROR_BAD_JPEG); /* RST0..RST7 markers */
  assert((c & 7) == cinfo->next_restart_num, ERROR_BAD_JPEG); /* should be precisely the correct marker */
  /* It appears to be a correctly formed restart marker */
  cinfo->bits_left = 0;       /* flush the remaining bits */
  cinfo->get_buffer = 0;
  cinfo->restarts_to_go = cinfo->restart_interval;
  cinfo->next_restart_num = (cinfo->next_restart_num + 1) & 7;
  for (ci = 0; ci < cinfo->comps_in_scan; ci++) cinfo->last_dc_val[ci] = 0;
}

/**************************************************************************
*                                                                         *
*    Huffman.                                                             *
*                                                                         *
**************************************************************************/

static void save_huff_stream(decompress_info_ptr cinfo, huff_pointer *h)
/* Save the current state of the huffman stream, so that we could
 * restart reading at this point.
 */
{
  assert(cinfo->bits_left < 32, ERROR_BAD_JPEG);
  assert(cinfo->bits_left >= 0, ERROR_BAD_JPEG);
  assert(cinfo->input_buffer < cinfo->next_input_byte, ERROR_BAD_JPEG);
  assert(cinfo->next_input_byte < cinfo->buffer_end, ERROR_BAD_JPEG);

  h->bit_pointer = (cinfo->next_input_byte - cinfo->input_buffer)*32 + cinfo->bits_left;
  h->get_buffer = cinfo->get_buffer;
  h->last_dc_val0 = cinfo->last_dc_val[0];
  h->last_dc_val1 = cinfo->last_dc_val[1];
  h->last_dc_val2 = cinfo->last_dc_val[2];
  h->restarts_to_go = cinfo->restarts_to_go;
  h->next_restart_num = cinfo->next_restart_num;
}

static void restore_huff_stream(decompress_info_ptr cinfo, huff_pointer *h)
/* Reset a save state of the huffman stream, so that we can continue reading. */
{
  cinfo->get_buffer = h->get_buffer;
  cinfo->next_input_byte = cinfo->input_buffer + h->bit_pointer/32;
  cinfo->bits_left = h->bit_pointer & 31;
  cinfo->last_dc_val[0] = h->last_dc_val0;
  cinfo->last_dc_val[1] = h->last_dc_val1;
  cinfo->last_dc_val[2] = h->last_dc_val2;
  cinfo->restarts_to_go = h->restarts_to_go;
  cinfo->next_restart_num = h->next_restart_num;

#ifdef DEBUG
  if (!(cinfo->input_buffer < cinfo->next_input_byte && cinfo->next_input_byte < cinfo->buffer_end))
    tracef("oops restore_huff_stream: 0x%x 0x%x 0x%x\n" _ (int)cinfo->input_buffer _ (int)cinfo->next_input_byte _ (int)cinfo->buffer_end);
#endif
  assert(cinfo->input_buffer < cinfo->next_input_byte, ERROR_FATAL);
  assert(cinfo->next_input_byte < cinfo->buffer_end, ERROR_FATAL);
}

static void
do_huff_skip_blocks(decompress_info_ptr cinfo, JBLOCK block,
                    HUFF_TBL *dctbl, HUFF_TBL *actbl, QUANT_TBL_PTR quanttbl,
                    int *last_dc_val, int nblocks, BOOL block_per_mcu)
/* Just like asm_huff_skip_blocks, but handles restart markers. If block_per_mcu
 * then count one restart interval per block, else just count one.
 */
{
  if (cinfo->restart_interval)
  {
    BOOL count = TRUE;

    while (nblocks > 0)
    {
      if (count)
      {
        if (cinfo->restarts_to_go == 0) process_restart(cinfo);
        cinfo->restarts_to_go--;
      }
      asm_huff_skip_blocks(cinfo, block, dctbl, actbl, quanttbl, last_dc_val, 1);
      nblocks--;
      count = block_per_mcu;
      block += DCTSIZE2;
    }
  }
  else
    asm_huff_skip_blocks(cinfo, block, dctbl, actbl, quanttbl, last_dc_val, nblocks);
}

static void
do_huff_decode_blocks(decompress_info_ptr cinfo, JBLOCK block,
                      HUFF_TBL *dctbl, HUFF_TBL *actbl, QUANT_TBL_PTR quanttbl,
                      int *last_dc_val, int nblocks, BOOL block_per_mcu)
/* Just like asm_huff_decode_blocks, but handles restart markers. If block_per_mcu
 * then count one restart interval per block, else just count one.
 */
{
  if (cinfo->restart_interval)
  {
    BOOL count = TRUE;

    while (nblocks > 0)
    {
      if (count)
      {
        if (cinfo->restarts_to_go == 0) process_restart(cinfo);
        cinfo->restarts_to_go--;
      }
      asm_huff_decode_blocks(cinfo, block, dctbl, actbl, quanttbl, last_dc_val, 1);
      nblocks--;
      count = block_per_mcu;
      block += DCTSIZE2;
    }
  }
  else
    asm_huff_decode_blocks(cinfo, block, dctbl, actbl, quanttbl, last_dc_val, nblocks);
}

/**************************************************************************
*                                                                         *
*    Band expansion from entropy encoded data.                            *
*                                                                         *
**************************************************************************/

static int do_1_component_band(decompress_info_ptr cinfo, int line_offset)
{
  int width = 0;
  int *outptr = cinfo->band_buffer;
  int nlines_fetched;
  QUANT_TBL_PTR quanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
  HUFF_TBL *dc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
  HUFF_TBL *ac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
  
  tracef("in do_1_component_band\n");
  
  if (cinfo->xmin >= 8)
  {
    int count = (cinfo->xmin - 8) >> 3; /* how many blocks we can just skip */
  
    do_huff_skip_blocks(cinfo, cinfo->jblocks[0], dc, ac, quanttbl, &cinfo->last_dc_val[0], count, TRUE);
    width += 8*count;
    if (cinfo->options & jopt_OUTBPP_8GREY)
      outptr += 2*count;
    else
      outptr += 8*count;
  }
  
  if (cinfo->options & jopt_DC_ONLY) /* monochrome, only tile values */
  {
    /* We only want a flat value for each tile. Just create a single line but
     * do not attempt to collapse this in the x direction, too complex to interface to.
     */
    while (width < cinfo->xmax) /* the data we want */
    {
      int pix;
  
      do_huff_skip_blocks(cinfo, cinfo->jblocks[1], dc, ac, quanttbl, &cinfo->last_dc_val[0], 1, TRUE);
      pix = mono_convert_pixel(cinfo, cinfo->last_dc_val[0] * quanttbl[0]);
      outptr[0] = pix; outptr[1] = pix;
      outptr[2] = pix; outptr[3] = pix;
      outptr[4] = pix; outptr[5] = pix;
      outptr[6] = pix; outptr[7] = pix;
      width += 8;
      outptr += 8;
    }
    nlines_fetched = 1;
  }
  else /* mono normal case, all pixels required */
  {
    while (width < cinfo->xmax) /* the data we want */
    {
      do_huff_decode_blocks(cinfo, cinfo->jblocks[1], dc, ac, quanttbl, &cinfo->last_dc_val[0], 1, TRUE);
      asm_j_rev_dct(cinfo, cinfo->jblocks[1], 1); /* output in jblocks[0] */
      if (cinfo->options & jopt_OUTBPP_8GREY)
        asm_mono_convert_block_8(cinfo->jblocks[0], outptr, line_offset);
      else
        asm_mono_convert_block(cinfo->jblocks[0], outptr, line_offset);

      width += 8;
      if (cinfo->options & jopt_OUTBPP_8GREY)
        outptr += 2;
      else
        outptr += 8;
    }
    nlines_fetched = 8;
  }

  return nlines_fetched;
}

static int do_3_component_band(decompress_info_ptr cinfo, int line_offset)
{
  int width = 0;
  int *outptr = cinfo->band_buffer;
  int nlines_fetched;
  HUFF_TBL *ydc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
  HUFF_TBL *yac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
  QUANT_TBL_PTR yquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
  HUFF_TBL *udc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[1]->dc_tbl_no];
  HUFF_TBL *uac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[1]->ac_tbl_no];
  QUANT_TBL_PTR uquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[1]->quant_tbl_no];
  HUFF_TBL *vdc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[2]->dc_tbl_no];
  HUFF_TBL *vac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[2]->ac_tbl_no];
  QUANT_TBL_PTR vquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[2]->quant_tbl_no];
  int vsamp = cinfo->comp_info[0].v_samp_factor;
  int hsamp = cinfo->comp_info[0].h_samp_factor;
  
  tracef("in do_3_component_band\n");
  
  while (width + 8*hsamp <= cinfo->xmin) /* skip over blocks we don't want */
  {
    do_huff_skip_blocks(cinfo, cinfo->jblocks[0], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp*vsamp, FALSE);
    asm_huff_skip_blocks(cinfo, cinfo->jblocks[0], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
    asm_huff_skip_blocks(cinfo, cinfo->jblocks[0], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
    width += 8*hsamp;
    if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
    else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
    else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
    else outptr += 8*hsamp;
  }
  
  if (cinfo->options & jopt_DC_ONLY) /* colour, only want pixel tile values */
  {
    while (width < cinfo->xmax) /* the data we want */
    {
      int y[4];
      int u;
      int v;
      int i;
  
      do_huff_skip_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], 1, FALSE);
      y[0] = cinfo->last_dc_val[0] * yquanttbl[0];
      for (i = 1; i < hsamp*vsamp; i++)
      {
        asm_huff_skip_blocks(cinfo, cinfo->jblocks[1 + i], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], 1);
        y[i] = cinfo->last_dc_val[0] * yquanttbl[0];
      }
      asm_huff_skip_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
      asm_huff_skip_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
      if (cinfo->options & jopt_GREY) /* greyscale output is acceptable */
      {
        tracef("about to do replicate some luma\n");
        mono_convert_pixels(y); /* results back in y[0..3] */
        if (cinfo->options & jopt_OUTBPP_8GREY)
        {
          /* Native greyscale 8bpp */
          outptr[0] = outptr[1] = YYYTOYYYY(y[0]);
          if (hsamp == 2) outptr[2] = outptr[3] = YYYTOYYYY(y[1]);
          if (vsamp == 2)
          {
            outptr[line_offset + 0] = outptr[line_offset + 1] = YYYTOYYYY(y[hsamp == 2 ? 2 : 1]);
            if (hsamp == 2) outptr[line_offset + 2] = outptr[line_offset + 3] = YYYTOYYYY(y[3]);
          }
        }
        else
        {
          /* Output greyscale 24bpp, and dither it later if needed */
          for (i = 0; i < 8; i++)
          {
            outptr[0 + i] = y[0];
            if (hsamp == 2) outptr[8 + i] = y[1];
            if (vsamp == 2)
            {
              outptr[line_offset + 0 + i] = y[hsamp == 2 ? 2 : 1];
              if (hsamp == 2) outptr[line_offset + 8 + i] = y[3];
            }
          }
        }
      }
      else
      {
        tracef("about to do YUV to greyscale\n");
        u = cinfo->last_dc_val[1] * uquanttbl[0];
        v = cinfo->last_dc_val[2] * vquanttbl[0];
        colour_convert_pixels(y, u, v); /* results back in y[0..3] */
        for (i = 0; i < 8; i++)
        {
          outptr[i] = y[0];
          if (hsamp == 2) outptr[8+i] = y[1];
          if (vsamp == 2)
          {
            outptr[line_offset+i] = y[hsamp == 2 ? 2 : 1];
            if (hsamp == 2) outptr[line_offset+8+i] = y[3];
          }
        }
      }
      width += 8*hsamp;
      if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
      else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
      else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
      else outptr += 8*hsamp;
    }
    nlines_fetched = vsamp;
  }
  else /* colour, normal case (want all pixels) */
  {
    while (width < cinfo->xmax) /* the data we want */
    {
      do_huff_decode_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp*vsamp, FALSE);
      asm_huff_decode_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
      asm_huff_decode_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);

      if (cinfo->options & jopt_GREY) /* greyscale output is acceptable */
      {
        tracef("about to do some grey conversion\n");
        if (cinfo->options & jopt_OUTBPP_8GREY)
        {
          asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* output in jblocks[0..3] */
          asm_mono_convert_block_8(cinfo->jblocks[0], outptr, line_offset);
          if (hsamp == 2) asm_mono_convert_block_8(cinfo->jblocks[1], outptr + 2, line_offset);
          if (vsamp == 2)
          {
            asm_mono_convert_block_8(cinfo->jblocks[hsamp == 1 ? 1 : 2], outptr + 8*line_offset, line_offset);
            if (hsamp == 2) asm_mono_convert_block_8(cinfo->jblocks[3], outptr + 8*line_offset + 2, line_offset);
          }
        }
        else
        {
          asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* output in jblocks[0..3] */
          asm_mono_convert_block(cinfo->jblocks[0], outptr, line_offset);
          if (hsamp == 2) asm_mono_convert_block(cinfo->jblocks[1], outptr + 8, line_offset);
          if (vsamp == 2)
          {
            asm_mono_convert_block(cinfo->jblocks[hsamp == 1 ? 1 : 2], outptr + 8*line_offset, line_offset);
            if (hsamp == 2) asm_mono_convert_block(cinfo->jblocks[3], outptr + 8*line_offset + 8, line_offset);
          }
        }
      }
      else
      {
        tracef("about to do some colour conversion\n");
        if (hsamp*vsamp == 4)
        {
          asm_j_rev_dct(cinfo, cinfo->jblocks[1], 6); /* output in jblocks[0..5] */ /* usual, speed-critical case */
          if (cinfo->options & jopt_OUTBPP_16)
            asm_colour_convert_block_16(cinfo->jblocks[0], (short int*) outptr, line_offset);
          else if (cinfo->options & jopt_OUTBPP_8YUV)
            asm_colour_convert_block_8(cinfo->jblocks[0], (char*) outptr, line_offset);
          else
            asm_colour_convert_block(cinfo->jblocks[0], outptr, line_offset);
        }
        else
        {
          asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* weird aspect ratio - only do DCTs we need to do */
          asm_j_rev_dct(cinfo, cinfo->jblocks[5], 2);
          colour_convert_unusual_block(cinfo->jblocks[0], outptr, line_offset, hsamp, vsamp);
        }
      }
      width += 8*hsamp;
      if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
      else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
      else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
      else outptr += 8*hsamp;
    }
    nlines_fetched = 8*vsamp;
  }

  return nlines_fetched;
}

/**************************************************************************
*                                                                         *
*    JPEG entry points.                                                   *
*                                                                         *
**************************************************************************/

#define BADFILE(reason) {tracef("bad file %d" _ reason); \
                         cinfo->error_code = reason; \
                         return reason;}
#define BADFILE1(reason,arg) {tracef("bad file %d %d" _ reason _ arg); \
                              cinfo->error_code = reason; \
                              cinfo->error_argument1 = arg; \
                              return reason;}
#define BADFILE2(reason,arg1,arg2) {tracef("bad file %d %d %d" _ reason _ arg1 _ arg2); \
                                    cinfo->error_code = reason; \
                                    cinfo->error_argument1 = arg1; cinfo->error_argument2 = arg2; \
                                    return reason;}

int jpeg_scan_file(decompress_info_ptr cinfo, char *file_image, int image_length,
                          int xmin, int xmax, int width, int height, int options)
/* Effectively the length of the workspace available is passed in as cinfo->workspace_size,
 * a bit illogical. cinfo->error_code is also important.
 */
{
  int i;
  int vsamp, hsamp;

#ifdef EMBED
  tracef("jpeg_scan_file cinfo=0x%x file_image=0x%x image_length=%i xmin=%i xmax=%i width=%i height=%i options=%i\n"
         _ (int)cinfo _ (int)file_image _ image_length _ xmin _ xmax _ width _ height _ options);
  tracef("wssize %x\n" _ cinfo->workspace_size);
#endif

  if (options & jopt_DC_ONLY)
  {
    options &= ~(jopt_OUTBPP_8 | jopt_OUTBPP_16 | jopt_OUTBPP_8YUV);
    tracef("Clearing 8YUV because of DC_ONLY flag\n");
  }
  if (xmin < 0) xmin = 0; /* xmax will be clipped to image_height, when we know it. */
  if (options & (jopt_OUTBPP_8 | jopt_OUTBPP_8YUV))
  {
    /* xmin = 0;*/ /* always have to do left part, because errors diffuse over from there */
    xmin -= 16; /* cos errors can diffuse forwards a little - clipped to 0 later */
    if (xmin < 0) xmin = 0;
    xmin &= ~15; /* round down to multiple of 16 - needed by dithering */
    xmax += 16; /* cos errors can diffuse backwards a little - clipped to image width later */
  }

#ifdef EMBED
  tracef("err %x buf %x file %x img %x\n" _ cinfo->error_code _ cinfo->input_buffer _ file_image _ image_length);
  tracef("ck1 %s im1 %x\n" _ cinfo->check1 _ ((int*)file_image)[image_length/(2*4)]);
  tracef("ck2 %s im2 %x\n" _ cinfo->check2 _ ((int*)file_image)[image_length/4 - image_length/(4*4)]);
  tracef("ck3 %s im3 %x\n" _ cinfo->check3 _ ((int*)file_image)[image_length/4 - image_length/(8*4)]);
#endif

  /* Look to see if this is precisely the same JPEG file as last time */
  if (  cinfo->error_code == 0                                               /* check for very first time, or for error last time */
     && cinfo->input_buffer == file_image                                    /* check for image in same place */
     && cinfo->buffer_end - cinfo->input_buffer == image_length              /* check length unchanged */
     && (width == -1 || cinfo->image_width == width)                         /* width OK */
     && (height == -1 || cinfo->image_height == height)                      /* height OK */
     && cinfo->check1 == ((int*)file_image)[image_length/(2*4)]                  /* Random checks on data - the /4 gets us down to char offsets */
     && cinfo->check2 == ((int*)file_image)[image_length/4 - image_length/(4*4)] /* we check a word half-way through, and two later on */
     && cinfo->check3 == ((int*)file_image)[image_length/4 - image_length/(8*4)] /* any change will perturb the whole file - we trust! */
     && cinfo->options == options
     )
  {
    tracef("This looks like the same JPEG file as last time.\n");
    if (xmax > cinfo->image_width) xmax = cinfo->image_width;
    if (xmin < cinfo->xmin || xmax > cinfo->xmax) cinfo->current_huff_pointer = -1; /* no band sufficiently loaded */
    cinfo->xmin = xmin; /* might need more, or less, than last time */
    cinfo->xmax = xmax;

#ifdef EMBED
    tracef("Exit jpeg_scan_file OK\n\n");
#endif
    return 0;
  }

  /* If called for the first time, initialise the table32k pointer. */
  if (cinfo->error_code < 0) cinfo->table32k = 0;

  /* Clear out the info struct, preserving any possible workspace size. */
  init_workspace(cinfo, cinfo->workspace_size);

  /* Set up the band buffer pointer. */
  if (cinfo->workspace_size == 0) /* there's no workspace, they just want us to scan the header */
  {
    cinfo->band_buffer = NULL;
    cinfo->band_buffer_size = (64*1024 /* JPEG width limit */) *
                              (8*2 /* 8 x vsamp max */); /* prevent E_TOO_WIDE complaints later on */
  }
  else
  {
    cinfo->band_buffer = (int*)((char*)cinfo + sizeof(struct decompress_info_struct));
    cinfo->band_buffer_size = (cinfo->workspace_size - sizeof(struct decompress_info_struct))/sizeof(int) & 0xfffffff0;
  }

  /* Set up the check words, to hope for repeated calls on this sprite */
  cinfo->check1 = ((int*)file_image)[image_length/(2*4)];
  cinfo->check2 = ((int*)file_image)[image_length/4 - image_length/(4*4)];
  cinfo->check3 = ((int*)file_image)[image_length/4 - image_length/(8*4)];

  /* Preserve pointers to the jpeg file image */
  cinfo->input_buffer = file_image;
  cinfo->next_input_byte = file_image;
  cinfo->buffer_end = file_image + image_length;

  /* Read the file header - sets various cinfo fields. */
  read_file_header(cinfo);

  /* Read the header for the first scan - sets various cinfo fields. */
  read_scan_header(cinfo);

  /* Initialise any huffman tables present. */
  for (i = 0; i < 4; i++)
  {
    fix_huff_tbl(cinfo, cinfo->dc_huff_tbl_ptrs[i]);
    fix_huff_tbl(cinfo, cinfo->ac_huff_tbl_ptrs[i]);
  }
  tracef("Huffman tables fixed.\n");
  tracef("Image starts at byte %i of JPEG data.\n" _ cinfo->next_input_byte - cinfo->input_buffer);

  /* Initialise the first huff decoding stream. */
  huff_decoder_init(cinfo);
  cinfo->current_huff_pointer = -1; /* No band currently unloaded. */
  cinfo->restarts_to_go = cinfo->restart_interval;
  cinfo->next_restart_num = 0;
  if (xmax > cinfo->image_width) xmax = cinfo->image_width;
  if (xmin > xmax) xmin = xmax; /* guard against silly input */
  cinfo->xmin = xmin;
  cinfo->xmax = xmax;
  cinfo->options = options;
  vsamp = cinfo->comp_info[0].v_samp_factor;
  hsamp = cinfo->comp_info[0].h_samp_factor;

  /* Check various limitations of our code. */
  if (cinfo->data_precision != 8) BADFILE1(E_PRE_NOT_8, cinfo->data_precision);
  if (cinfo->num_components != cinfo->comps_in_scan) BADFILE(E_MULTI_SCAN);
  if (cinfo->image_width != width && width != -1) BADFILE1(E_WIDTH_DISAGREES, cinfo->image_width);
  if (cinfo->image_height != height && height != -1) BADFILE1(E_HEIGHT_DISAGREES, cinfo->image_height);

  /* Allocate the array of pointers into the huffman codes, at the base of where the band
   * buffer currently is.
   */
  {
    int mcu_height = 8 * vsamp; /* 8 or 16 */
    int huff_array_size = sizeof(huff_pointer) * ((cinfo->image_height + mcu_height - 1)/mcu_height); /* in bytes */

    cinfo->huff_pointers = (huff_pointer*) cinfo->band_buffer;
    cinfo->band_buffer += huff_array_size/sizeof(int);
    cinfo->band_buffer_size -= huff_array_size/sizeof(int);
    if (cinfo->image_width > cinfo->band_buffer_size / mcu_height)
      BADFILE2(E_TOO_WIDE, cinfo->image_width, cinfo->band_buffer_size / mcu_height);
  }

  /* Now try the specific cases that we can do. */
  switch (cinfo->num_components)
  {
    case 1:
      tracef("num_components = 1 (Greyscale file)\n");

      if (hsamp != 1 || vsamp != 1)
        BADFILE2(E_BAD_SAMPLE, hsamp, vsamp);
      if (cinfo->jpeg_color_space == CS_GRAYSCALE)
      {
        int height = 0;
        int hpointer_index = 0;
        HUFF_TBL *dc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
        HUFF_TBL *ac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
        QUANT_TBL_PTR quanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];

        options &= ~(jopt_OUTBPP_16 | jopt_OUTBPP_8YUV); /* haven't got optimised colour conversion for 16bpp output */

        while (height < cinfo->image_height)
        {
          /* save the state of the huff stream. */
          save_huff_stream(cinfo, &cinfo->huff_pointers[hpointer_index]);
          hpointer_index++;
          do_huff_skip_blocks(cinfo, cinfo->jblocks[0], dc, ac, quanttbl, &cinfo->last_dc_val[0], (cinfo->image_width + 7) >> 3, TRUE);
          height += 8;
        }
      }
      else
        BADFILE1(E_COLOUR, cinfo->jpeg_color_space);
      break;

    case 3:
      tracef("num_components = 3 (YUV file)\n");

      /* We expect an MCU (minimum coding unit) of 2x2 Y blocks to each U and V block.
       * We will accept MCUs of 1x1:1:1 or 2x1:1:1 and 1x2:1:1 as well, since there
       * are examples of this.
       * JFIF allows an arbitrary ratio - seems uncecessary. A band is always 1 MCU high.
       */
      if (hsamp > 2 || vsamp > 2)
        BADFILE2(E_BAD_SAMPLE, hsamp, vsamp);
      if (cinfo->comp_info[1].h_samp_factor != 1 || cinfo->comp_info[1].v_samp_factor != 1)
        BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[1].h_samp_factor, cinfo->comp_info[1].v_samp_factor);
      if (cinfo->comp_info[2].h_samp_factor != 1 || cinfo->comp_info[2].v_samp_factor != 1)
        BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[2].h_samp_factor, cinfo->comp_info[2].v_samp_factor);
      if (cinfo->jpeg_color_space == CS_YCbCr)
      {
        int height = 0;
        int hpointer_index = 0;
        HUFF_TBL *ydc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
        HUFF_TBL *yac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
        QUANT_TBL_PTR yquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
        HUFF_TBL *udc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[1]->dc_tbl_no];
        HUFF_TBL *uac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[1]->ac_tbl_no];
        QUANT_TBL_PTR uquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[1]->quant_tbl_no];
        HUFF_TBL *vdc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[2]->dc_tbl_no];
        HUFF_TBL *vac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[2]->ac_tbl_no];
        QUANT_TBL_PTR vquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[2]->quant_tbl_no];

        if (hsamp != 2 || vsamp != 2) options &= ~(jopt_OUTBPP_16 | jopt_OUTBPP_8YUV); /* haven't got optimised colour conversion for unusual colour blocks */
  
        while (height < cinfo->image_height)
        {
          int width;

          /* save the state of the huff stream. */
          save_huff_stream(cinfo, &cinfo->huff_pointers[hpointer_index]);
          hpointer_index++;
  
          width = 0;
          while (width < cinfo->image_width)
          {
            /* Skip over the six blocks representing this square of pixels */
            do_huff_skip_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp * vsamp, FALSE);
            asm_huff_skip_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
            asm_huff_skip_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
            width += 8 * hsamp;
          }
          height += 8 * vsamp;
        }
      }
      else
        BADFILE1(E_COLOUR, cinfo->jpeg_color_space);
      break;

    case 4:
      tracef("num_components = 4 (CMYK file)\n");
    default:
      BADFILE1(E_COMPONENTS, cinfo->num_components);
      break;
  }

  if (options & jopt_INTERP_X) /* interpolation requested */
  {
    int size_per_line = cinfo->band_buffer_size / (8 * hsamp);

    if (cinfo->image_width * 2 <= size_per_line)
      cinfo->error_argument1 |= jopt_INTERP_X; /* signal that we'll do it */
    else
      cinfo->options &= ~jopt_INTERP_X; /* not enough space - remember that we won't do it */
  }
  if (options & jopt_OUTBPP_8) cinfo->error_argument1 |= jopt_OUTBPP_8;
  if (options & jopt_OUTBPP_16) cinfo->error_argument1 |= jopt_OUTBPP_16;
  if (options & jopt_OUTBPP_8YUV) cinfo->error_argument1 |= jopt_OUTBPP_8YUV;
  if (options & jopt_OUTBPP_8GREY) cinfo->error_argument1 |= jopt_OUTBPP_8GREY;
  cinfo->options = options; /* in case processing above has discovered options we're not prepared to do */

#ifdef DEBUG
  tracef("cinfo->erooarg1 = %d\n" _ cinfo->error_argument1);
  tracef("After scan file, options are:\n");
  if (options & jopt_GREY) tracef("  jopt_GREY\n");
  if (options & jopt_DC_ONLY) tracef("  jopt_DC_ONLY\n");
  if (options & jopt_INTERP_X) tracef("  jopt_INTERP_X\n");
  if (options & jopt_OUTBPP_8) tracef("  jopt_OUTBPP_8\n");
  if (options & jopt_OUTBPP_16) tracef("  jopt_OUTBPP_16\n");
  if (options & jopt_OUTBPP_8YUV) tracef("  jopt_OUTBPP_8YUV\n");
  if (options & jopt_DIFFUSE) tracef("  jopt_DIFFUSE\n");
  if (options & jopt_OUTBPP_8GREY) tracef("  jopt_OUTBPP_8GREY\n");
#endif

#ifdef EMBED
  tracef("Exit jpeg_scan_file OK\n\n");
#endif
  return cinfo->error_code;
}

int *jpeg_find_line(decompress_info_ptr cinfo, int ycoord, int *palette_data)
/* This gets called for every line of a rendered image. Most of the time it is fast,
 * every 8 or 16 lines or so it must do some de-JPEGing of some more data.
 */
{
  int  hpointer; /* huff pointer index */
  int  l2_band_height = 2 + cinfo->comp_info[0].v_samp_factor; /* log2 of band height - 3 for mono, usually 4 for colour */
  int  line_offset = cinfo->band_buffer_size >> l2_band_height; /* offset in words between lines of output */
  int *result;

  tracef("jpeg_find_line, palette data at %x\n" _ palette_data);
  
  ycoord = cinfo->image_height - ycoord - 1; /* coordinates fed into this are RISC OS-style, with 0
                                              * meaning the bottom row. Reverse this so that 0 means
                                              * the top row.
                                              */
  assert(cinfo->band_buffer != NULL, ERROR_FATAL); /* someone had better have provided one! */
  assert(ycoord >= 0, ERROR_FATAL);
  assert(ycoord < cinfo->image_height, ERROR_FATAL);
  hpointer = ycoord >> l2_band_height;

  if (hpointer != cinfo->current_huff_pointer) /* Fetch a line */
  {
    int nlines_fetched = 0;

    cinfo->current_huff_pointer = -1; /* in case of error exit - set correctly at end */
    restore_huff_stream(cinfo, &cinfo->huff_pointers[hpointer]); /* restore the huffman stream */

    /* Get a row of blocks into the band buffer */
    switch (cinfo->num_components)
    {
      case 1:
        nlines_fetched = do_1_component_band(cinfo, line_offset); /* Greyscale */
        break;

      case 3:
        nlines_fetched = do_3_component_band(cinfo, line_offset); /* YUV */
        break;

      case 4:
      default:
        /* These were rejected in jpeg_scan_file() */
        break;
    }

    if (cinfo->options & jopt_DIFFUSE)
    {
      int *line = cinfo->band_buffer + cinfo->xmin;
      int linelen = cinfo->xmax - cinfo->xmin;

      /* The error diffusion cannot handle a very thin strip at the right, eg one pixel wide. So the last
       * unit of diffusion may be up to 31 pixels. However, the units of diffusion until then must
       * be unaffected by exactly what xmin/xmax are, or we will get minor pixel variation depending
       * on the clipping. xmin is already a multiple of 16.
       */
      linelen = (linelen + 15) & ~15; /* round up to a multiple of 16 */
      if (linelen > cinfo->image_width - cinfo->xmin) linelen = cinfo->image_width - cinfo->xmin;

      if (!cinfo->table32k_unavailable)
      {
        /* Try to get the 16bpp->8bpp lookup table from ColourTrans. If we
         * fail then never try again, probably running on old OS/ColourTrans where it
         * isn't available.
         */
        cinfo->table32k = asm_get_table32k(palette_data);
        if (cinfo->table32k == 0) cinfo->table32k_unavailable = TRUE;
        tracef("Fetched 32k lookup table, at 0x%x\n" _ (int) cinfo->table32k);
      }

      /* Dither in lengths of 16, to allow xmin to be non-zero. xmin is a multiple of 16 */
      if (cinfo->options & jopt_OUTBPP_8)
      {
        char *outptr = (char *)cinfo->band_buffer + cinfo->xmin;

        while (linelen > 0)
        {
          int blockwidth = linelen >= 32 ? 16 : linelen; /* avoid having very narrow blocks at r hand edge. */

          tracef("calling diffuse to 8bpp code. palette = %x\n" _ palette_data);
          asm_diffuse_to_8bpp(line, blockwidth, outptr, cinfo->table32k, nlines_fetched, line_offset, palette_data);
          outptr += blockwidth;
          line += blockwidth;
          linelen -= blockwidth;
          tracef("done diffusion, outptr = &%x, blockwidth = &%xlinelen = &%x\n" _ outptr _ blockwidth _ linelen);
        }
      }
      else
      {
        int *outptr = cinfo->band_buffer + cinfo->xmin;

        while (linelen > 0)
        {
          int blockwidth = linelen >= 32 ? 16 : linelen; /* avoid having very narrow blocks at r hand edge. */

          tracef("calling diffuse to palette entries code. palette = %x\n" _ palette_data);
          asm_diffuse_to_24bpp(line, blockwidth, outptr, cinfo->table32k, nlines_fetched, line_offset, palette_data);
          outptr += blockwidth;
          line += blockwidth;
          linelen -= blockwidth;
          tracef("done diffusion, outptr = &%x, blockwidth = &%xlinelen = &%x\n" _ outptr _ blockwidth _ linelen);
        }
      }
    }

    cinfo->current_huff_pointer = hpointer; /* line completed correctly - remember for next time. */
  }

  result = cinfo->band_buffer; /* the band buffer now contains suitable pixels */
  if (cinfo->options & jopt_DC_ONLY)
  {
    /* Rather than copy the data 8 times, the DC content is spaced out by just 1 line
     * and the address frigged here to point to one or the other
     */
    if (l2_band_height == 4 && (ycoord & 0xf) >= 8)
      result += line_offset;
  }
  else
  {
    /* normal - choose between 8 or 16 rows of pixels */
    result += (ycoord & ((1 << l2_band_height) - 1)) * line_offset;
  }
  
  return result;
}

#define M_APP0        0xE0
#define M_APP1        0xE1
#define M_SOI         0xd8
#define M_EOI         0xd9
#define M_SOS         0xda
#define M_SOFMASK     0xF0
#define M_SOFANY      0xC0
#define M_SOF0        0xc0 /* Baseline */
#define M_SOF1        0xc1 /* Extended sequential */

int find_image_dims(char *jdata, image_dims_info *image, int *ws_size)
/* Code to find basic dimensions of a JPEG file. Not done via main scanning
 * code so that you can decide these things before you allocate the band buffer
 * etc.
 * Returns 0 if all is OK
 *         1 if this is not kosher JPEG data
 *         2 if this is valid JPEG data but cannot be rendered by SpriteExtend
 *         > 2 is an OS error pointer
 */
{
  int             iwidth, iheight;
  int             itype = 0;
  int             new_size;
#ifndef CFSI
  BOOL            load_file;
  char            buffer[1024];
  int             file_handle,
                  dataend;
  int             pixel_density;
  _kernel_oserror *e;
#endif

  tracef("Find image dims called.\n");
#ifndef CFSI
  /* Bit 0 clear means its all in memory, else load from file */
  load_file = (((unsigned)ws_size & 3) == 1);
  if (load_file) 
  {
    tracef("We think it's a filename '%s'\n" _ jdata);
    e = _swix(OS_Find, _IN(0) | _IN(1) | _OUT(0), 0x4f, jdata, &file_handle);
    if (e != 0) /* failed to open the file */
    {
      tracef("e points to err %x '%s'\n" _ e->errnum _ e->errmess);
      return ((int)e);
    }
    jdata = buffer;
    tracef("Going to scan top of handle %d into buffer = %x\n" _ file_handle _ buffer);
    e = _swix(OS_GBPB, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _OUT(2),
                  4, file_handle, jdata, sizeof(buffer), &dataend);
    if (e != 0)
    {
      _swix(OS_Find, _IN(0) | _IN(1), 0, file_handle);
      return ((int)e);
    }
    tracef("Got jdata = %x, data_end = %x\n" _ jdata _ dataend);
  }
#endif
  tracef("jdata = %x\n" _ jdata);

  /* Just check there's a SOI tag, the other decoding software can look
   * at the M_APPn tags. Thus we let through JFIF and EXIF at this stage
   * See JFIF 1.02 spec
   *     EXIF 2.00 spec
   */
  if (jdata[0] != 0xff || jdata[1] != M_SOI || jdata[2] != 0xff)
  {
    tracef("Even a cursory look shows it ain't a jpeg\n");
#ifndef CFSI
    if (load_file)
    {
      e = _swix(OS_Find, _IN(0) | _IN(1), 0, file_handle);
      if (e != 0) return ((int)e);
    }
#endif
    return 1;
  }

#ifndef CFSI
  if (jdata[3] == M_APP0)
  {
    int xdens, ydens;

    tracef("pixel density is %x\n" _ jdata[13]);
    tracef("  jdata[14-17], %x %x %x %x\n" _ jdata[14] _ jdata[15] _ jdata[16] _ jdata[17]);
    switch (jdata[13]) /* from JFIF 1.02 definition, if APP0 exists the density units will be here */
    {
      case 0:
        xdens = STANDARDDPI;
        ydens = (jdata[14] << 8) + jdata[15]; /* take into account the case where the scale may be 0:0, pah! */
        if (ydens == 0) ydens = 1;
        ydens = ((jdata[16] << 8) + jdata[17]) / ydens;
        if (ydens == 0)
          ydens = STANDARDDPI;
        else
          ydens *=STANDARDDPI;
        pixel_density = (xdens << 16) | ydens;
        itype |= 0x80;    /* set bit in type word to indicate density is only a ratio */
        break;

      case 1:
        pixel_density = (jdata[14] << 24) + (jdata[15] << 16) + (jdata[16] << 8) + jdata[17];
        /* Should make pixel_density = [XdenHigh XdenLow YdenHigh YdenLow] */
        break;

      case 2:
        pixel_density = (int)(jdata[14]*CMTOINCH_NUM/CMTOINCH_DEN) << 24 + (int)(jdata[15]*CMTOINCH_NUM/CMTOINCH_DEN) << 16
                         + (int)(jdata[16]*CMTOINCH_NUM/CMTOINCH_DEN) << 8 + (int)(jdata[17]*CMTOINCH_NUM/CMTOINCH_DEN);
        break;

      default:
        if (load_file)
        {
          e = _swix(OS_Find, _IN(0) | _IN(1), 0, file_handle);
          if (e != 0) return ((int)e);
        }
        return 1;
    }
  }
  else
  {
    tracef("no APP0 tag, assuming a 1:1 xdpi to ydpi ratio\n");
    itype |= 0x80;    /* set bit in type word to indicate density is only a ratio */
    pixel_density = (STANDARDDPI << 16) | STANDARDDPI;
  }
  tracef("pixel density is %d\n" _ pixel_density);
#endif

  /* We need the image width and height in order to construct the
   * sprite header. Walk into the JPEG file until we find the SOF marker.
   */
  jdata += 2; /* skip over 0xFF, SOI(0xd8) */
  tracef("jdata = %x\n" _ jdata);
  while (jdata[0] == 0xff &&
         ((jdata[1] & M_SOFMASK) != M_SOFANY) && /* until we get to any SOF marker */
         jdata[1] != M_SOS &&
         jdata[1] != M_EOI) /* or start of scan or end of image */
  {
    jdata += (jdata[2] << 8) + jdata[3] + 2; /* skip over variable length marker */
#ifndef CFSI
    if (load_file && ((int)jdata >= dataend))  /* need to load up more data */
    {
      int   offset = (int)jdata - dataend;
      int   current;

      /* Next tag isn't in the 1k of RAM so jump forward and load another 1k
       * such that the new tag is at the start of the RAM, thus there's no
       * danger of reading the 'next offset' from off the end of buffer
       */
      tracef("Loading another chunk. jdata = %x, offset = %x\n" _ jdata _ offset);
      _swix(OS_Args, _IN(0) | _IN(1) | _OUT(2), 0, file_handle, &current);
      jdata = buffer;
      e = _swix(OS_GBPB, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _IN(4) | _OUT(2),
                    3, file_handle, jdata, sizeof(buffer), current + offset, &dataend);
      if (e != 0)
      {
        _swix(OS_Find, _IN(0) | _IN(1), 0, file_handle);
        return ((int)e);
      }
      if ((int)jdata == dataend)
      {
        jdata[1] = M_EOI;
        break; /* nothing was transferred, must be EOF */
      }
    }
#endif
  }

  if (jdata[1] == M_SOF0 || jdata[1] == M_SOF1) /* found a supported SOF marker */
  {
    iheight = (jdata[5] << 8) + jdata[6];
    iwidth = (jdata[7] << 8) + jdata[8];
    itype |= jdata[9]; /* 1 for monochrome or 3 for YUV, the other state we can accept. */
    itype |= ((jdata[1] & ~M_SOFMASK) << 8); /* mix in the 4 bit SOF type */
  }
  else
  {
    tracef("Header looks like JPEG data, but not not one we recognise.\n");
    if (jdata[1] == M_SOS || jdata[1] == M_EOI) return 1;
    return 2; /* wrong sort of entropy encoding. */
  }

  /* This is pessimistic overall for files with strange Y:U:V sample ratios - don't care.
   * (estimate for the huff_pointer array is optimistic, but the band buffer
   * size is vastly pessimistic)
   */
  new_size = sizeof(struct decompress_info_struct) + 1024 +
             sizeof(huff_pointer) * (itype == 1 ? ((iheight + 7)/8) : ((iheight + 15)/16)) +
             (itype == 1 ? 8 : 16) * (iwidth + 15) * sizeof(int);
  new_size = (new_size + 3) & ~3; /* round up to a multiple of 4 */

  if (image != NULL)
  {
    image->height = iheight;
    image->width = iwidth;
    image->type = itype;
#ifndef CFSI
    image->density = pixel_density;
#endif
  }

#ifndef CFSI
  if ((unsigned)ws_size > 3)
  {
    decompress_info_ptr  cinfo;
    int                  current_size, page_size;

    ws_size = (int *)((unsigned)ws_size & ~3);
    cinfo = (decompress_info_ptr)*ws_size;
    current_size = cinfo->workspace_size;

    /* If cinfo is not NULL then it points to the current decompress_info_struct which
     * contains info about the current memory allocation
     */
    if (cinfo == NULL)
    {
      new_size = new_size + (10*1024); /* first jpeg => alloc size we need plus safety margin */
    }
    else
    {
      if (cinfo->workspace_size >= new_size)
      {
        new_size = 0; /* we already have enough memory */
      }
      else
      {
        new_size -= cinfo->workspace_size; /* size that will be added for this jpeg */
      }
    }
    _swix(OS_ReadMemMapInfo, _OUT(0), &page_size);
    new_size = (new_size + (page_size - 1)) & ~(page_size - 1);
    *ws_size = new_size;
    tracef("We need another %x bytes\n" _ new_size);
  }

  if (load_file) /* We need to close the file... */
  {
    tracef("closing file\n");
    e = _swix(OS_Find, _IN(0) | _IN(1), 0, file_handle);
    if (e != 0)
    {
      return ((int)e);
    }
  }
#else /* CFSI */
  if ((unsigned)ws_size > 1)
  {
    *ws_size = new_size;
  }
#endif

  return 0; /* all is well */
}