diff --git a/Sources/CSupport b/Sources/CSupport
index 42e22edd1551a2110688092c4153cac9f2c3a079..fc40a8b2d2e1f14a11b8f4d6bfd62ad710801951 100644
--- a/Sources/CSupport
+++ b/Sources/CSupport
@@ -16,6 +16,8 @@
 
         EXPORT  |__rt_sdiv|
         EXPORT  |__rt_udiv|
+        EXPORT  |memcpy|
+        EXPORT  |memset|
 
 |x$divide|
 |__rt_sdiv|
@@ -165,4 +167,25 @@ u_sh0   RSBS    ip, a1, a2
         MOV     a1, a3
         MOV     pc, r14
 
+|memcpy|
+; extern void *memcpy(void *a1, const void *a2, size_t a3)
+        TEQ     a3, #0
+        MOVNE   ip, a1
+mc_0
+        LDRNEB  a4, [a2], #1
+        STRNEB  a4, [ip], #1
+        SUBNES  a3, a3, #1
+        BNE     mc_0
+        MOV     pc, lr
+        
+|memset|
+; extern void *memset(void *a1, int a2, size_t a3)
+        TEQ     a3, #0
+        MOVNE   ip, a1
+ms_0
+        STRNEB  a2, [ip], #1
+        SUBNES  a3, a3, #1
+        BNE     ms_0
+        MOV     pc, lr
+
         END
diff --git a/VersionASM b/VersionASM
index 5e0a97d3a5e237b885994afda2ff8badae56e1ee..700cd731c73894be9f19658a1d9c534e8a7f313e 100644
--- a/VersionASM
+++ b/VersionASM
@@ -11,13 +11,13 @@
                         GBLS    Module_HelpVersion
                         GBLS    Module_ComponentName
                         GBLS    Module_ComponentPath
-Module_MajorVersion     SETS    "1.36"
-Module_Version          SETA    136
+Module_MajorVersion     SETS    "1.37"
+Module_Version          SETA    137
 Module_MinorVersion     SETS    ""
-Module_Date             SETS    "14 Nov 2010"
-Module_ApplicationDate  SETS    "14-Nov-10"
+Module_Date             SETS    "22 Dec 2010"
+Module_ApplicationDate  SETS    "22-Dec-10"
 Module_ComponentName    SETS    "SprExtend"
 Module_ComponentPath    SETS    "mixed/RiscOS/Sources/Video/Render/SprExtend"
-Module_FullVersion      SETS    "1.36"
-Module_HelpVersion      SETS    "1.36 (14 Nov 2010)"
+Module_FullVersion      SETS    "1.37"
+Module_HelpVersion      SETS    "1.37 (22 Dec 2010)"
                         END
diff --git a/VersionNum b/VersionNum
index e1f01182952c7da5c8f9ab7881fa154f593ced16..ccd9869770bc49d60afc35044613434b9b9de425 100644
--- a/VersionNum
+++ b/VersionNum
@@ -1,23 +1,23 @@
-/* (1.36)
+/* (1.37)
  *
  * This file is automatically maintained by srccommit, do not edit manually.
  * Last processed by srccommit version: 1.1.
  *
  */
-#define Module_MajorVersion_CMHG        1.36
+#define Module_MajorVersion_CMHG        1.37
 #define Module_MinorVersion_CMHG        
-#define Module_Date_CMHG                14 Nov 2010
+#define Module_Date_CMHG                22 Dec 2010
 
-#define Module_MajorVersion             "1.36"
-#define Module_Version                  136
+#define Module_MajorVersion             "1.37"
+#define Module_Version                  137
 #define Module_MinorVersion             ""
-#define Module_Date                     "14 Nov 2010"
+#define Module_Date                     "22 Dec 2010"
 
-#define Module_ApplicationDate          "14-Nov-10"
+#define Module_ApplicationDate          "22-Dec-10"
 
 #define Module_ComponentName            "SprExtend"
 #define Module_ComponentPath            "mixed/RiscOS/Sources/Video/Render/SprExtend"
 
-#define Module_FullVersion              "1.36"
-#define Module_HelpVersion              "1.36 (14 Nov 2010)"
-#define Module_LibraryVersionInfo       "1:36"
+#define Module_FullVersion              "1.37"
+#define Module_HelpVersion              "1.37 (22 Dec 2010)"
+#define Module_LibraryVersionInfo       "1:37"
diff --git a/c/rojpeg b/c/rojpeg
index 7b88625284ffefd6f45cdbf5b54e32e20a7fb844..a76e2bcb5fb039d75d44c624e5aab56b11a2868a 100644
--- a/c/rojpeg
+++ b/c/rojpeg
@@ -13,12 +13,6 @@
  * limitations under the License.
  */
 /* c.rojpeg - JPEG for use within RISC OS */
-#undef FILE_
-#define FILE_ (40000)
-
-#ifdef SOURCE_IS_rojpeg
-#  define CFSI
-#endif
 
 #include <stdlib.h>
 #include "swis.h"
@@ -26,127 +20,83 @@
 #include "jinclude.h"
 #include "rojpeg.h"
 
-/* ----------------- surrogate C library stuff, if embedded use within RISC OS ----------------- */
-/* This section is dependent on the way in which the code is embedded into
-assembler. It makes use of facilities provided by c.PutScaled for trace output,
-since the two in factshare the same namespace. */
-#ifdef EMBED
-
-
-extern void *memcpy(void *dest, const void *src, size_t n)
-{
-  char *d = (char*) dest;
-  char *s = (char*) src;
-
-  while (n-- > 0) *d++ = *s++;
-  return dest;
-}
-
-extern void *memset(void *s, int c, size_t n)
-{
-  char *d = (char*) s;
-
-  while (n-- > 0) *d++ = c;
-  return s;
-}
-
-#ifdef DEBUG
-/* Debug only has an effect if embedded. */
-
-#ifndef assert
-#define assert(x, y) do_assert(__LINE__, x, y, NULL)
-extern void do_assert(int line, BOOL arg, int error, char *describe); /* defined in c.PutScaled */
-#endif
-
-#ifndef tracef
-#define _ ,
-#define tracef(args) sprintf(0, args)
-extern void sprintf(char *d, char *format, ...); /* defined in c.PutScaled */
-#endif
-
-#ifndef newline
-#define newline() tracef("\n");
-#endif
-
-#else
-/* Non-debugging things */
-
-#ifndef tracef
-#define tracef(args) ((void) 0)
-#endif
-#ifndef newline
-#define newline() ((void) 0)
-#endif
-#ifndef assert
-#define assert(x, y) do_assert(__LINE__, x, y, NULL)
-static void do_assert(int line, BOOL arg, int error, char *describe)
-{
-  if (!arg)
-  {
-    tracef("ASSERTION FAILED (line %i): %s\n" _ line _ describe);
-    exit(error);
-  }
-}
-#endif
-
-#endif
-
-#else
-/* Non-embedded things */
-#define verbose 1
-/* In the JPEG stuff the trace macros refer to this, so that
-j2s can turn on/off the comments dynamically */
-#ifdef DEBUG
-  #define tracef(args) if (verbose) printf(args)
-  static void newline(void) {tracef("\n");}
-#else
-  #define tracef(args) ((void) 0)
-  static void newline(void) {tracef("\n");}
-#endif
+/**************************************************************************
+*                                                                         *
+*    JPEG library code.                                                   *
+*                                                                         *
+**************************************************************************/
 
+#ifdef SOURCE_IS_rojpeg
+#define CFSI
+#define tracef(args)     /* Nothing */
+#define assert(x, y)     {if (!(x)) exit(y);}
+#define newline()        /* Nothing */
+#define comment(ws,text) /* Nothing */
+#define IFDEBUG(a)       /* Nothing */
 #endif
 
-/* ----------------- 'global' facilities declared within nested c files ----------------- */
-
 #include "jrdjfif.c"
 #include "jdhuff.c"
-#ifdef STATS
-  #include "jrevdct4.c"
-#endif
 #include "jcconv.c"
 
-/* Reassert this file ID, for error exit identification */
 #undef FILE_
 #define FILE_ (40000)
 
-/* ----------------- Assembler debug assistance ----------------------------- */
+/**************************************************************************
+*                                                                         *
+*    Low-level debugging output.                                          *
+*                                                                         *
+**************************************************************************/
 
 void assembler_panic(decompress_info_ptr cinfo, int *regblock, int code)
 /* The assembler code calls us here when something goes wrong, in an attempt
-to learn what happened. On exit it then returns, usually leading to no picture
-being painted. */
+ * to learn what happened. On exit it then returns, usually leading to no picture
+ * being painted.
+ */
 {
 #ifdef DEBUG
-    int i;
+  int i;
 
-    tracef("Corrupted data in Huffman stream at byte %i\n" _ (char*)regblock[3] - cinfo->input_buffer);
-#if 1
-      tracef("cinfo=0x%x regblock=0x%x code=%i.\n" _ (int)cinfo _ (int)regblock _ code);
-      for (i = 0; i < 16; i++) tracef("R%i=0x%x\n" _ i _ regblock[i]);
-      tracef("Nearby input bytes:\n");
-      for (i = -10; i < 10; i++) tracef("%i:0x%x " _ i _ ((char*)(regblock[3]))[i]);
-      tracef("\n");
-#endif
+  tracef("Corrupted data in Huffman stream at byte %i\n" _ (char*)regblock[3] - cinfo->input_buffer);
+  tracef("cinfo=0x%x regblock=0x%x code=%i.\n" _ (int)cinfo _ (int)regblock _ code);
+  for (i = 0; i < 16; i++) tracef("R%i=0x%x\n" _ i _ regblock[i]);
+  tracef("Nearby input bytes:\n");
+  for (i = -10; i < 10; i++) tracef("%i:0x%x " _ i _ ((char*)(regblock[3]))[i]);
+  tracef("\n");
 #else
-    UNUSED(cinfo);
-    UNUSED(regblock);
-    UNUSED(code);
+  UNUSED(cinfo);
+  UNUSED(regblock);
+  UNUSED(code);
 #endif
 }
 
+/**************************************************************************
+*                                                                         *
+*    JPEG utility functions.                                              *
+*                                                                         *
+**************************************************************************/
 
-/* ----------------- JPEG entrypoints and control ----------------------------- */
-#ifndef CFSI
+#ifdef CFSI
+int do_jpeg_scan_file(char *space, int space_size, char *file_image, int image_length)
+/* Simple entry sequence for use by ChangeFSI */
+{
+  decompress_info_ptr cinfo = (decompress_info_ptr) space;
+
+  cinfo->workspace_size = space_size;
+  cinfo->error_code = -1; /* force total reset */
+  return jpeg_scan_file(cinfo, file_image, image_length, 0, 0x7fffffff, -1, -1, 0);
+}
+
+int do_jpeg_scan_file_16(char *space, int space_size, char *file_image, int image_length)
+/* Simple entry sequence for use by ChangeFSI - 16bpp output. */
+{
+  decompress_info_ptr cinfo = (decompress_info_ptr) space;
+
+  cinfo->workspace_size = space_size;
+  cinfo->error_code = -1; /* force total reset */
+  return jpeg_scan_file(cinfo, file_image, image_length, 0, 0x7fffffff, -1, -1, jopt_OUTBPP_16);
+}
+#else
 static int palette_is_grey(int *palette, int entries); /* In c.PutScaled */
 
 static void check_jpeg_workspace(asm_workspace *wp, int jpeg_ws_size)
@@ -170,7 +120,7 @@ static void check_jpeg_workspace(asm_workspace *wp, int jpeg_ws_size)
       tracef("Realloc requesting %x extra bytes of workspace\n" _ jpeg_ws_size);
       jpeg_ws_size = jpeg_ws_size + cinfo->workspace_size;
       cinfo = realloc(cinfo, jpeg_ws_size);
-      assert(cinfo != 0, ERROR_NO_MEMORY);
+      assert(cinfo != NULL, ERROR_NO_MEMORY);
       wp->jpeg_info_ptr = cinfo;
       cinfo->error_code = -1;                    /* mark the workspace entirely uninitialised */
       cinfo->workspace_size = jpeg_ws_size;
@@ -228,10 +178,10 @@ static int jpeg_decompressor_opts(decompress_info_ptr cinfo, asm_workspace *wp)
 #endif
       tracef("trying new shiny 8BPP plotting technique\n");
       _swix(ColourTrans_ReadPalette, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _IN(4) | _OUT(3),
-                                     -1, -1, 0, 256*4, 0, &size); /*save palette into newtranstable area */
+                                     -1, -1, 0, 256*4, 0, &size); /* save palette into newtranstable area */
       tracef("need %x bytes for palette\n" _ size);
       _swix(ColourTrans_ReadPalette, _IN(0) | _IN(1) | _IN(2) | _IN(3) | _IN(4),
-                                     -1, -1, &(wp->newtranstable[0]), size, 0); /*save palette into newtranstable area */
+                                     -1, -1, &(wp->newtranstable[0]), size, 0); /* save palette into newtranstable area */
       tracef("created palette at %x\n" _ &(wp->newtranstable[0]));
 #ifdef DEBUG
       tracef("Read palette, palette entries are:-\n");
@@ -295,83 +245,79 @@ static int jpeg_decompressor_opts(decompress_info_ptr cinfo, asm_workspace *wp)
 #endif
 
 static void init_workspace(decompress_info_ptr cinfo, int size)
-/* Workspace has been allocated. Initialise it, any subsidiary structures etc. Do not
-touch the band buffer, might not be allocated yet. */
+/* Workspace has been allocated. Initialise it, any subsidiary
+ * structures etc. Do not touch the band buffer, might not be allocated yet.
+ */
 {
-  int i;
-#if 0
-  char *free = (char*) (cinfo + 1);                           /* after the decompress_info_struct itself */
-  int size = jpeg_workspace_size();
-  char *end = (char*) cinfo + size;
-  int *bb = cinfo->band_buffer;
-  int bbs = cinfo->band_buffer_size;
-#endif
-  int wss = cinfo->workspace_size;
-  char *t32k = cinfo->table32k;
-
-  MEMZERO((void*) cinfo, size);
-
-  /* Replace workspace size */
-#if 0
-  cinfo->band_buffer = bb;
-  cinfo->band_buffer_size = bbs;
-#endif
-  cinfo->workspace_size = wss;
-  cinfo->table32k = t32k;
-
-#if 0
-  cinfo->comp_info = (jpeg_component_info *) free; /* pointer to array of components */
-  free = (char*) ((jpeg_component_info *) free + 3);
-
-  /* Set up pointers to subsidiary structures */
-  for (i = 0; i < NUM_QUANT_TBLS; i++)                  /* allocate quantisation tables */
-  {
-    cinfo->quant_tbl_ptrs[i] = (QUANT_TBL_PTR) free;
-    free = (char*) ((QUANT_TBL *) free + 1);
-  }
-  for (i = 0; i < NUM_HUFF_TBLS; i++)
+  int   i;
+  int   workspace_size;
+  char *table32k;
+
+  /* Must preserve the workspace size and 32k colour table */
+  workspace_size = cinfo->workspace_size;
+  table32k = cinfo->table32k;
+  memset(cinfo, 0, size);
+  cinfo->workspace_size = workspace_size;
+  cinfo->table32k = table32k;
+
+  cinfo->comp_info = &cinfo->s_cur_comp_info[0];
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) /* allocate quantisation tables */
+    cinfo->quant_tbl_ptrs[i] = (QUANT_VAL*) &cinfo->s_quant_tbl[i];
+    
+  for (i = 0; i < NUM_HUFF_TBLS; i++) /* allocate huffman tables */
   {
-    cinfo->dc_huff_tbl_ptrs[i] = (HUFF_TBL *) free;
-    free = (char*) ((HUFF_TBL *) free + 1);
-    cinfo->ac_huff_tbl_ptrs[i] = (HUFF_TBL *) free;
-    free = (char*) ((HUFF_TBL *) free + 1);
+    cinfo->dc_huff_tbl_ptrs[i] = &cinfo->s_dc_huff_tbl[i];
+    cinfo->ac_huff_tbl_ptrs[i] = &cinfo->s_ac_huff_tbl[i];
   }
+}
 
-  /* Check we got the size right */
-  assert(free == end);
-#else
-  cinfo->comp_info = &(cinfo->s_cur_comp_info[0]);
-  for (i = 0; i < NUM_QUANT_TBLS; i++)                  /* allocate quantisation tables */
-    cinfo->quant_tbl_ptrs[i] = (QUANT_VAL*) &(cinfo->s_quant_tbl[i]);
-  for (i = 0; i < NUM_HUFF_TBLS; i++)
+static void process_restart(decompress_info_ptr cinfo)
+/* Coping with restarts - whoever put restarts in this standard?
+ * We should be precisely at a restart marker.
+ */
+{
+  char c = *cinfo->next_input_byte++;
+  int ci;
+
+#if 0
+  tracef("Processing restart marker %i at %i bytes\n" _ cinfo->next_restart_num _ cinfo->next_input_byte - cinfo->input_buffer);
   {
-    cinfo->dc_huff_tbl_ptrs[i] = &(cinfo->s_dc_huff_tbl[i]);
-    cinfo->ac_huff_tbl_ptrs[i] = &(cinfo->s_ac_huff_tbl[i]);
+    int i;
+    tracef("inbuf=0x%x nbits=%i inptr=0x%x file=0x%x\n" _ cinfo->get_buffer _ cinfo->bits_left _ (int)cinfo->next_input_byte _ (int)cinfo->input_buffer);
+    for (i = -10; i < 10; i++) tracef("%i:0x%x " _ i _ cinfo->next_input_byte[i]);
+    tracef("\n");
   }
 #endif
+  assert(cinfo->bits_left <= 7, ERROR_BAD_JPEG);
+  assert(c == 0xff, ERROR_BAD_JPEG);
+  while (*cinfo->next_input_byte == 0xff) cinfo->next_input_byte++; /* additional 0xffs allowed at this point */
+  c = *cinfo->next_input_byte++;
+  assert((c & 0xF8) == 0xD0, ERROR_BAD_JPEG); /* RST0..RST7 markers */
+  assert((c & 7) == cinfo->next_restart_num, ERROR_BAD_JPEG); /* should be precisely the correct marker */
+  /* It appears to be a correctly formed restart marker */
+  cinfo->bits_left = 0;       /* flush the remaining bits */
+  cinfo->get_buffer = 0;
+  cinfo->restarts_to_go = cinfo->restart_interval;
+  cinfo->next_restart_num = (cinfo->next_restart_num + 1) & 7;
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) cinfo->last_dc_val[ci] = 0;
 }
 
-#define BADFILE(reason) {tbad(reason,0,0); cinfo->error_code = reason; return reason;}
-#define BADFILE1(reason,arg) {tbad(reason,arg,0); cinfo->error_code = reason; cinfo->error_argument1 = arg; return reason;}
-#define BADFILE2(reason,arg1,arg2) \
-  {tbad(reason,arg1,arg2); cinfo->error_code = reason; cinfo->error_argument1 = arg1; cinfo->error_argument2 = arg2; return reason;}
-#if 0
-  static void tbad(int r, int a1, int a2) {tracef("bad file (%i,%i,%i)\n" _ r _ a1 _ a2);}
-#else
-  #define tbad(r,a1,a2) ((void*) 0)
-#endif
-
-/* --------------------------------------------------------------------------------------------- */
+/**************************************************************************
+*                                                                         *
+*    Huffman.                                                             *
+*                                                                         *
+**************************************************************************/
 
 static void save_huff_stream(decompress_info_ptr cinfo, huff_pointer *h)
-/* Save the current state of the huffman stream, so that we could restart reading at this point. */
+/* Save the current state of the huffman stream, so that we could
+ * restart reading at this point.
+ */
 {
-#if 1
   assert(cinfo->bits_left < 32, ERROR_BAD_JPEG);
   assert(cinfo->bits_left >= 0, ERROR_BAD_JPEG);
   assert(cinfo->input_buffer < cinfo->next_input_byte, ERROR_BAD_JPEG);
   assert(cinfo->next_input_byte < cinfo->buffer_end, ERROR_BAD_JPEG);
-#endif
 
   h->bit_pointer = (cinfo->next_input_byte - cinfo->input_buffer)*32 + cinfo->bits_left;
   h->get_buffer = cinfo->get_buffer;
@@ -394,55 +340,21 @@ static void restore_huff_stream(decompress_info_ptr cinfo, huff_pointer *h)
   cinfo->restarts_to_go = h->restarts_to_go;
   cinfo->next_restart_num = h->next_restart_num;
 
-#if 1
 #ifdef DEBUG
-    if (!(cinfo->input_buffer < cinfo->next_input_byte && cinfo->next_input_byte < cinfo->buffer_end))
-      tracef("oops restore_huff_stream: 0x%x 0x%x 0x%x\n" _ (int)cinfo->input_buffer _ (int)cinfo->next_input_byte _ (int)cinfo->buffer_end);
+  if (!(cinfo->input_buffer < cinfo->next_input_byte && cinfo->next_input_byte < cinfo->buffer_end))
+    tracef("oops restore_huff_stream: 0x%x 0x%x 0x%x\n" _ (int)cinfo->input_buffer _ (int)cinfo->next_input_byte _ (int)cinfo->buffer_end);
 #endif
   assert(cinfo->input_buffer < cinfo->next_input_byte, ERROR_FATAL);
   assert(cinfo->next_input_byte < cinfo->buffer_end, ERROR_FATAL);
-#endif
-}
-
-/* --------------------------------------------------------------------------------------------- */
-
-/* Coping with restarts - whoever put restarts in this standard? */
-
-static void process_restart(decompress_info_ptr cinfo)
-/* We should be precisely at a restart marker */
-{
-  char c = *cinfo->next_input_byte++;
-  int ci;
-
-#if 0
-  tracef("Processing restart marker %i at %i bytes\n" _ cinfo->next_restart_num _ cinfo->next_input_byte - cinfo->input_buffer);
-  {
-    int i;
-    tracef("inbuf=0x%x nbits=%i inptr=0x%x file=0x%x\n" _ cinfo->get_buffer _ cinfo->bits_left _ (int)cinfo->next_input_byte _ (int)cinfo->input_buffer);
-    for (i = -10; i < 10; i++) tracef("%i:0x%x " _ i _ cinfo->next_input_byte[i]);
-    tracef("\n");
-  }
-  #endif
-  assert(cinfo->bits_left <= 7, ERROR_BAD_JPEG);
-  assert(c == 0xff, ERROR_BAD_JPEG);
-  while (*cinfo->next_input_byte == 0xff) cinfo->next_input_byte++; /* additional 0xffs allowed at this point */
-  c = *cinfo->next_input_byte++;
-  assert((c & 0xF8) == 0xD0, ERROR_BAD_JPEG); /* RST0..RST7 markers */
-  assert((c & 7) == cinfo->next_restart_num, ERROR_BAD_JPEG); /* should be precisely the correct marker */
-  /* It appears to be a correctly formed restart marker */
-  cinfo->bits_left = 0;       /* flush the remaining bits */
-  cinfo->get_buffer = 0;
-  cinfo->restarts_to_go = cinfo->restart_interval;
-  cinfo->next_restart_num = (cinfo->next_restart_num + 1) & 7;
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) cinfo->last_dc_val[ci] = 0;
 }
 
 static void
 do_huff_skip_blocks(decompress_info_ptr cinfo, JBLOCK block,
                     HUFF_TBL *dctbl, HUFF_TBL *actbl, QUANT_TBL_PTR quanttbl,
                     int *last_dc_val, int nblocks, BOOL block_per_mcu)
-/* Just like asm_huff_skip_blocks, but handles restart markers */
-/* If block_per_mcu then count one restart interval per block, else just count one. */
+/* Just like asm_huff_skip_blocks, but handles restart markers. If block_per_mcu
+ * then count one restart interval per block, else just count one.
+ */
 {
   if (cinfo->restart_interval)
   {
@@ -465,13 +377,13 @@ do_huff_skip_blocks(decompress_info_ptr cinfo, JBLOCK block,
     asm_huff_skip_blocks(cinfo, block, dctbl, actbl, quanttbl, last_dc_val, nblocks);
 }
 
-
 static void
 do_huff_decode_blocks(decompress_info_ptr cinfo, JBLOCK block,
                       HUFF_TBL *dctbl, HUFF_TBL *actbl, QUANT_TBL_PTR quanttbl,
                       int *last_dc_val, int nblocks, BOOL block_per_mcu)
-/* Just like asm_huff_decode_blocks, but handles restart markers */
-/* If block_per_mcu then count one restart interval per block, else just count one. */
+/* Just like asm_huff_decode_blocks, but handles restart markers. If block_per_mcu
+ * then count one restart interval per block, else just count one.
+ */
 {
   if (cinfo->restart_interval)
   {
@@ -494,36 +406,275 @@ do_huff_decode_blocks(decompress_info_ptr cinfo, JBLOCK block,
     asm_huff_decode_blocks(cinfo, block, dctbl, actbl, quanttbl, last_dc_val, nblocks);
 }
 
+/**************************************************************************
+*                                                                         *
+*    Band expansion from entropy encoded data.                            *
+*                                                                         *
+**************************************************************************/
 
-/* --------------------------------------------------------------------------------------------- */
-#ifdef CFSI
-int do_jpeg_scan_file(char *space, int space_size, char *file_image, int image_length)
-/* Simple entry sequence for use by ChangeFSI */
+static int do_1_component_band(decompress_info_ptr cinfo, int line_offset)
 {
-  decompress_info_ptr cinfo = (decompress_info_ptr) space;
+  int width = 0;
+  int *outptr = cinfo->band_buffer;
+  int nlines_fetched;
+  QUANT_TBL_PTR quanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
+  HUFF_TBL *dc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
+  HUFF_TBL *ac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
+  
+  tracef("in do_1_component_band\n");
+  
+  if (cinfo->xmin >= 8)
+  {
+    int count = (cinfo->xmin - 8) >> 3; /* how many blocks we can just skip */
+  
+    do_huff_skip_blocks(cinfo, cinfo->jblocks[0], dc, ac, quanttbl, &cinfo->last_dc_val[0], count, TRUE);
+    width += 8*count;
+    if (cinfo->options & jopt_OUTBPP_8GREY)
+      outptr += 2*count;
+    else
+      outptr += 8*count;
+  }
+  
+  if (cinfo->options & jopt_DC_ONLY) /* monochrome, only tile values */
+  {
+    /* We only want a flat value for each tile. Just create a single line but
+     * do not attempt to collapse this in the x direction, too complex to interface to.
+     */
+    while (width < cinfo->xmax) /* the data we want */
+    {
+      int pix;
+  
+      do_huff_skip_blocks(cinfo, cinfo->jblocks[1], dc, ac, quanttbl, &cinfo->last_dc_val[0], 1, TRUE);
+      pix = mono_convert_pixel(cinfo, cinfo->last_dc_val[0] * quanttbl[0]);
+      outptr[0] = pix; outptr[1] = pix;
+      outptr[2] = pix; outptr[3] = pix;
+      outptr[4] = pix; outptr[5] = pix;
+      outptr[6] = pix; outptr[7] = pix;
+      width += 8;
+      outptr += 8;
+    }
+    nlines_fetched = 1;
+  }
+  else /* mono normal case, all pixels required */
+  {
+    while (width < cinfo->xmax) /* the data we want */
+    {
+      do_huff_decode_blocks(cinfo, cinfo->jblocks[1], dc, ac, quanttbl, &cinfo->last_dc_val[0], 1, TRUE);
+      asm_j_rev_dct(cinfo, cinfo->jblocks[1], 1); /* output in jblocks[0] */
+      if (cinfo->options & jopt_OUTBPP_8GREY)
+        asm_mono_convert_block_8(cinfo->jblocks[0], outptr, line_offset);
+      else
+        asm_mono_convert_block(cinfo->jblocks[0], outptr, line_offset);
 
-  cinfo->workspace_size = space_size;
-  cinfo->error_code = -1; /* force total reset */
-  return jpeg_scan_file(cinfo, file_image, image_length, 0, 0x7fffffff, -1, -1, 0);
+      width += 8;
+      if (cinfo->options & jopt_OUTBPP_8GREY)
+        outptr += 2;
+      else
+        outptr += 8;
+    }
+    nlines_fetched = 8;
+  }
+
+  return nlines_fetched;
 }
 
-int do_jpeg_scan_file_16(char *space, int space_size, char *file_image, int image_length)
-/* Simple entry sequence for use by ChangeFSI - 16bpp output. */
+static int do_3_component_band(decompress_info_ptr cinfo, int line_offset)
 {
-  decompress_info_ptr cinfo = (decompress_info_ptr) space;
+  int width = 0;
+  int *outptr = cinfo->band_buffer;
+  int nlines_fetched;
+  HUFF_TBL *ydc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
+  HUFF_TBL *yac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
+  QUANT_TBL_PTR yquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
+  HUFF_TBL *udc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[1]->dc_tbl_no];
+  HUFF_TBL *uac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[1]->ac_tbl_no];
+  QUANT_TBL_PTR uquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[1]->quant_tbl_no];
+  HUFF_TBL *vdc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[2]->dc_tbl_no];
+  HUFF_TBL *vac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[2]->ac_tbl_no];
+  QUANT_TBL_PTR vquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[2]->quant_tbl_no];
+  int vsamp = cinfo->comp_info[0].v_samp_factor;
+  int hsamp = cinfo->comp_info[0].h_samp_factor;
+  
+  tracef("in do_3_component_band\n");
+  
+  while (width + 8*hsamp <= cinfo->xmin) /* skip over blocks we don't want */
+  {
+    do_huff_skip_blocks(cinfo, cinfo->jblocks[0], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp*vsamp, FALSE);
+    asm_huff_skip_blocks(cinfo, cinfo->jblocks[0], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
+    asm_huff_skip_blocks(cinfo, cinfo->jblocks[0], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
+    width += 8*hsamp;
+    if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
+    else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
+    else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
+    else outptr += 8*hsamp;
+  }
+  
+  if (cinfo->options & jopt_DC_ONLY) /* colour, only want pixel tile values */
+  {
+    while (width < cinfo->xmax) /* the data we want */
+    {
+      int y[4];
+      int u;
+      int v;
+      int i;
+  
+      do_huff_skip_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], 1, FALSE);
+      y[0] = cinfo->last_dc_val[0] * yquanttbl[0];
+      for (i = 1; i < hsamp*vsamp; i++)
+      {
+        asm_huff_skip_blocks(cinfo, cinfo->jblocks[1 + i], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], 1);
+        y[i] = cinfo->last_dc_val[0] * yquanttbl[0];
+      }
+      asm_huff_skip_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
+      asm_huff_skip_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
+      if (cinfo->options & jopt_GREY) /* greyscale output is acceptable */
+      {
+        tracef("about to do replicate some luma\n");
+        mono_convert_pixels(y); /* results back in y[0..3] */
+        if (cinfo->options & jopt_OUTBPP_8GREY)
+        {
+          /* Native greyscale 8bpp */
+          outptr[0] = outptr[1] = YYYTOYYYY(y[0]);
+          if (hsamp == 2) outptr[2] = outptr[3] = YYYTOYYYY(y[1]);
+          if (vsamp == 2)
+          {
+            outptr[line_offset + 0] = outptr[line_offset + 1] = YYYTOYYYY(y[hsamp == 2 ? 2 : 1]);
+            if (hsamp == 2) outptr[line_offset + 2] = outptr[line_offset + 3] = YYYTOYYYY(y[3]);
+          }
+        }
+        else
+        {
+          /* Output greyscale 24bpp, and dither it later if needed */
+          for (i = 0; i < 8; i++)
+          {
+            outptr[0 + i] = y[0];
+            if (hsamp == 2) outptr[8 + i] = y[1];
+            if (vsamp == 2)
+            {
+              outptr[line_offset + 0 + i] = y[hsamp == 2 ? 2 : 1];
+              if (hsamp == 2) outptr[line_offset + 8 + i] = y[3];
+            }
+          }
+        }
+      }
+      else
+      {
+        tracef("about to do YUV to greyscale\n");
+        u = cinfo->last_dc_val[1] * uquanttbl[0];
+        v = cinfo->last_dc_val[2] * vquanttbl[0];
+        colour_convert_pixels(y, u, v); /* results back in y[0..3] */
+        for (i = 0; i < 8; i++)
+        {
+          outptr[i] = y[0];
+          if (hsamp == 2) outptr[8+i] = y[1];
+          if (vsamp == 2)
+          {
+            outptr[line_offset+i] = y[hsamp == 2 ? 2 : 1];
+            if (hsamp == 2) outptr[line_offset+8+i] = y[3];
+          }
+        }
+      }
+      width += 8*hsamp;
+      if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
+      else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
+      else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
+      else outptr += 8*hsamp;
+    }
+    nlines_fetched = vsamp;
+  }
+  else /* colour, normal case (want all pixels) */
+  {
+    while (width < cinfo->xmax) /* the data we want */
+    {
+      do_huff_decode_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp*vsamp, FALSE);
+      asm_huff_decode_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
+      asm_huff_decode_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
 
-  cinfo->workspace_size = space_size;
-  cinfo->error_code = -1; /* force total reset */
-  return jpeg_scan_file(cinfo, file_image, image_length, 0, 0x7fffffff, -1, -1, jopt_OUTBPP_16);
+      if (cinfo->options & jopt_GREY) /* greyscale output is acceptable */
+      {
+        tracef("about to do some grey conversion\n");
+        if (cinfo->options & jopt_OUTBPP_8GREY)
+        {
+          asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* output in jblocks[0..3] */
+          asm_mono_convert_block_8(cinfo->jblocks[0], outptr, line_offset);
+          if (hsamp == 2) asm_mono_convert_block_8(cinfo->jblocks[1], outptr + 2, line_offset);
+          if (vsamp == 2)
+          {
+            asm_mono_convert_block_8(cinfo->jblocks[hsamp == 1 ? 1 : 2], outptr + 8*line_offset, line_offset);
+            if (hsamp == 2) asm_mono_convert_block_8(cinfo->jblocks[3], outptr + 8*line_offset + 2, line_offset);
+          }
+        }
+        else
+        {
+          asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* output in jblocks[0..3] */
+          asm_mono_convert_block(cinfo->jblocks[0], outptr, line_offset);
+          if (hsamp == 2) asm_mono_convert_block(cinfo->jblocks[1], outptr + 8, line_offset);
+          if (vsamp == 2)
+          {
+            asm_mono_convert_block(cinfo->jblocks[hsamp == 1 ? 1 : 2], outptr + 8*line_offset, line_offset);
+            if (hsamp == 2) asm_mono_convert_block(cinfo->jblocks[3], outptr + 8*line_offset + 8, line_offset);
+          }
+        }
+      }
+      else
+      {
+        tracef("about to do some colour conversion\n");
+        if (hsamp*vsamp == 4)
+        {
+          asm_j_rev_dct(cinfo, cinfo->jblocks[1], 6); /* output in jblocks[0..5] */ /* usual, speed-critical case */
+          if (cinfo->options & jopt_OUTBPP_16)
+            asm_colour_convert_block_16(cinfo->jblocks[0], (short int*) outptr, line_offset);
+          else if (cinfo->options & jopt_OUTBPP_8YUV)
+            asm_colour_convert_block_8(cinfo->jblocks[0], (char*) outptr, line_offset);
+          else
+            asm_colour_convert_block(cinfo->jblocks[0], outptr, line_offset);
+        }
+        else
+        {
+          asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* weird aspect ratio - only do DCTs we need to do */
+          asm_j_rev_dct(cinfo, cinfo->jblocks[5], 2);
+          colour_convert_unusual_block(cinfo->jblocks[0], outptr, line_offset, hsamp, vsamp);
+        }
+      }
+      width += 8*hsamp;
+      if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
+      else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
+      else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
+      else outptr += 8*hsamp;
+    }
+    nlines_fetched = 8*vsamp;
+  }
+
+  return nlines_fetched;
 }
-#endif
+
+/**************************************************************************
+*                                                                         *
+*    JPEG entry points.                                                   *
+*                                                                         *
+**************************************************************************/
+
+#define BADFILE(reason) {tracef("bad file %d" _ reason); \
+                         cinfo->error_code = reason; \
+                         return reason;}
+#define BADFILE1(reason,arg) {tracef("bad file %d %d" _ reason _ arg); \
+                              cinfo->error_code = reason; \
+                              cinfo->error_argument1 = arg; \
+                              return reason;}
+#define BADFILE2(reason,arg1,arg2) {tracef("bad file %d %d %d" _ reason _ arg1 _ arg2); \
+                                    cinfo->error_code = reason; \
+                                    cinfo->error_argument1 = arg1; cinfo->error_argument2 = arg2; \
+                                    return reason;}
 
 int jpeg_scan_file(decompress_info_ptr cinfo, char *file_image, int image_length,
                           int xmin, int xmax, int width, int height, int options)
 /* Effectively the length of the workspace available is passed in as cinfo->workspace_size,
-a bit illogical. cinfo->error_code is also important. */
+ * a bit illogical. cinfo->error_code is also important.
+ */
 {
   int i;
+  int vsamp, hsamp;
+
 #ifdef EMBED
   tracef("jpeg_scan_file cinfo=0x%x file_image=0x%x image_length=%i xmin=%i xmax=%i width=%i height=%i options=%i\n"
          _ (int)cinfo _ (int)file_image _ image_length _ xmin _ xmax _ width _ height _ options);
@@ -545,13 +696,6 @@ a bit illogical. cinfo->error_code is also important. */
     xmax += 16; /* cos errors can diffuse backwards a little - clipped to image width later */
   }
 
-#if 0
-  /* Paranoid check that band buffer and workspace do not overlap. */
-  assert(cinfo->band_buffer == 0
-        || (int*)cinfo->band_buffer + cinfo->band_buffer_size <= (int*)cinfo
-        || (char*)cinfo + sizeof(struct decompress_info_struct) <= (char*) cinfo->band_buffer);
-#endif
-
 #ifdef EMBED
   tracef("err %x buf %x file %x img %x\n" _ cinfo->error_code _ cinfo->input_buffer _ file_image _ image_length);
   tracef("ck1 %s im1 %x\n" _ cinfo->check1 _ ((int*)file_image)[image_length/(2*4)]);
@@ -567,18 +711,19 @@ a bit illogical. cinfo->error_code is also important. */
      && (height == -1 || cinfo->image_height == height)                      /* height OK */
      && cinfo->check1 == ((int*)file_image)[image_length/(2*4)]                  /* Random checks on data - the /4 gets us down to char offsets */
      && cinfo->check2 == ((int*)file_image)[image_length/4 - image_length/(4*4)] /* we check a word half-way through, and two later on */
-     && cinfo->check3 == ((int*)file_image)[image_length/4 - image_length/(8*4)]
+     && cinfo->check3 == ((int*)file_image)[image_length/4 - image_length/(8*4)] /* any change will perturb the whole file - we trust! */
      && cinfo->options == options
-     /* In the compressed data, any change will perturb the whole file - we trust! */
      )
-  /* >>> We could allow the image to be shifted in store, might help considerably with some clients.
-  The other tests are surely adequate. Would have to shift all the pointers in the huff_pointer array. */
   {
     tracef("This looks like the same JPEG file as last time.\n");
     if (xmax > cinfo->image_width) xmax = cinfo->image_width;
     if (xmin < cinfo->xmin || xmax > cinfo->xmax) cinfo->current_huff_pointer = -1; /* no band sufficiently loaded */
     cinfo->xmin = xmin; /* might need more, or less, than last time */
     cinfo->xmax = xmax;
+
+#ifdef EMBED
+    tracef("Exit jpeg_scan_file OK\n\n");
+#endif
     return 0;
   }
 
@@ -591,7 +736,7 @@ a bit illogical. cinfo->error_code is also important. */
   /* Set up the band buffer pointer. */
   if (cinfo->workspace_size == 0) /* there's no workspace, they just want us to scan the header */
   {
-    cinfo->band_buffer = 0;
+    cinfo->band_buffer = NULL;
     cinfo->band_buffer_size = (64*1024 /* JPEG width limit */) *
                               (8*2 /* 8 x vsamp max */); /* prevent E_TOO_WIDE complaints later on */
   }
@@ -617,10 +762,6 @@ a bit illogical. cinfo->error_code is also important. */
   /* Read the header for the first scan - sets various cinfo fields. */
   read_scan_header(cinfo);
 
-#ifdef DEBUG
-    if (file_image[18] != 0) tracef("Packed RGB thumbnail size: %ix%i.\n" _ file_image[18] _ file_image[19]);
-#endif
-
   /* Initialise any huffman tables present. */
   for (i = 0; i < 4; i++)
   {
@@ -640,133 +781,129 @@ a bit illogical. cinfo->error_code is also important. */
   cinfo->xmin = xmin;
   cinfo->xmax = xmax;
   cinfo->options = options;
+  vsamp = cinfo->comp_info[0].v_samp_factor;
+  hsamp = cinfo->comp_info[0].h_samp_factor;
 
   /* Check various limitations of our code. */
-  if (cinfo->data_precision != 8) BADFILE1(E_PRE_NOT_8, cinfo->data_precision)
-  /* if (cinfo->restart_interval != 0) BADFILE1(E_RESTART, cinfo->restart_interval) */
-  if (cinfo->num_components != cinfo->comps_in_scan) BADFILE(E_MULTI_SCAN)
-  if (cinfo->image_width != width && width != -1) BADFILE1(E_WIDTH, cinfo->image_width)
-  if (cinfo->image_height != height && height != -1) BADFILE1(E_HEIGHT, cinfo->image_height)
+  if (cinfo->data_precision != 8) BADFILE1(E_PRE_NOT_8, cinfo->data_precision);
+  if (cinfo->num_components != cinfo->comps_in_scan) BADFILE(E_MULTI_SCAN);
+  if (cinfo->image_width != width && width != -1) BADFILE1(E_WIDTH_DISAGREES, cinfo->image_width);
+  if (cinfo->image_height != height && height != -1) BADFILE1(E_HEIGHT_DISAGREES, cinfo->image_height);
 
   /* Allocate the array of pointers into the huffman codes, at the base of where the band
-  buffer currently is. */
+   * buffer currently is.
+   */
   {
-    int mcu_height = 8 * cinfo->comp_info[0].v_samp_factor; /* 8 or 16 */
-    /*int mcu_width = 8 * cinfo->comp_info[0].h_samp_factor;*/ /* 8 or 16 */
+    int mcu_height = 8 * vsamp; /* 8 or 16 */
     int huff_array_size = sizeof(huff_pointer) * ((cinfo->image_height + mcu_height - 1)/mcu_height); /* in bytes */
 
     cinfo->huff_pointers = (huff_pointer*) cinfo->band_buffer;
     cinfo->band_buffer += huff_array_size/sizeof(int);
     cinfo->band_buffer_size -= huff_array_size/sizeof(int);
     if (cinfo->image_width > cinfo->band_buffer_size / mcu_height)
-      BADFILE2(E_TOO_WIDE, cinfo->image_width, cinfo->band_buffer_size / mcu_height)
+      BADFILE2(E_TOO_WIDE, cinfo->image_width, cinfo->band_buffer_size / mcu_height);
   }
 
   /* Now try the specific cases that we can do. */
-  if (cinfo->num_components == 1)
+  switch (cinfo->num_components)
   {
-    /* if (cinfo->image_height > 8 * HPOINTERS) BADFILE2(E_TOO_HIGH, cinfo->image_height, 8 * HPOINTERS) */
-    if (cinfo->jpeg_color_space != CS_GRAYSCALE) BADFILE1(E_COLOUR, cinfo->jpeg_color_space)
-    /* if (cinfo->image_width > cinfo->band_buffer_size / 8) BADFILE2(E_TOO_HIGH, cinfo->image_width, cinfo->band_buffer_size / 8) */
-    if (cinfo->comp_info[0].h_samp_factor != 1
-       || cinfo->comp_info[0].v_samp_factor != 1
-       ) BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[0].h_samp_factor, cinfo->comp_info[0].v_samp_factor)
-    tracef("Greyscale file.\n");
-    {
-      int height = 0;
-      jpeg_component_info * compptr = cinfo->cur_comp_info[0];
-      HUFF_TBL *dc = cinfo->dc_huff_tbl_ptrs[compptr->dc_tbl_no];
-      HUFF_TBL *ac = cinfo->ac_huff_tbl_ptrs[compptr->ac_tbl_no];
-      QUANT_TBL_PTR quanttbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no];
-      int hpointer_index = 0;
-
-      while (height < cinfo->image_height)
+    case 1:
+      tracef("num_components = 1 (Greyscale file)\n");
+
+      if (hsamp != 1 || vsamp != 1)
+        BADFILE2(E_BAD_SAMPLE, hsamp, vsamp);
+      if (cinfo->jpeg_color_space == CS_GRAYSCALE)
       {
-        /* save the state of the huff stream. */
-        save_huff_stream(cinfo, &cinfo->huff_pointers[hpointer_index]);
-        hpointer_index++;
-        do_huff_skip_blocks(cinfo, cinfo->jblocks[0], dc, ac, quanttbl, &cinfo->last_dc_val[0], (cinfo->image_width + 7) >> 3, TRUE);
-        height += 8;
-        /* tracef("Scanned a band, bytes left = %i.\n" _ cinfo->buffer_end - cinfo->next_input_byte); */
+        int height = 0;
+        int hpointer_index = 0;
+        HUFF_TBL *dc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
+        HUFF_TBL *ac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
+        QUANT_TBL_PTR quanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
+
+        options &= ~(jopt_OUTBPP_16 | jopt_OUTBPP_8YUV); /* haven't got optimised colour conversion for 16bpp output */
+
+        while (height < cinfo->image_height)
+        {
+          /* save the state of the huff stream. */
+          save_huff_stream(cinfo, &cinfo->huff_pointers[hpointer_index]);
+          hpointer_index++;
+          do_huff_skip_blocks(cinfo, cinfo->jblocks[0], dc, ac, quanttbl, &cinfo->last_dc_val[0], (cinfo->image_width + 7) >> 3, TRUE);
+          height += 8;
+        }
       }
-    }
-    options &= ~(jopt_OUTBPP_16 | jopt_OUTBPP_8YUV); /* haven't got optimised colour conversion for 16bpp output */
-    tracef("Clearing 8YUV because of Greyscale flag\n");
-  }
-  else if (cinfo->num_components == 3)
-  {
+      else
+        BADFILE1(E_COLOUR, cinfo->jpeg_color_space);
+      break;
 
-    /* A usual MCU (minimum coding unit) contains 4 Y blocks, 1 U block, 1 V block.
-    We will accept MCUs of 1:1:1 or 2:1:1 as well, since there are examples of this.
-    JFIF allows an arbitrary ratio - seems uncecessary. A band is always 1 MCU high. */
-    int vsamp = cinfo->comp_info[0].v_samp_factor;
-    int hsamp = cinfo->comp_info[0].h_samp_factor;
-
-    if (cinfo->comp_info[0].h_samp_factor > 2 || cinfo->comp_info[0].v_samp_factor > 2)
-      BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[0].h_samp_factor, cinfo->comp_info[0].v_samp_factor)
-    if (cinfo->comp_info[1].h_samp_factor != 1 || cinfo->comp_info[1].v_samp_factor != 1)
-      BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[1].h_samp_factor, cinfo->comp_info[1].v_samp_factor)
-    if (cinfo->comp_info[2].h_samp_factor != 1 || cinfo->comp_info[2].v_samp_factor != 1)
-      BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[2].h_samp_factor, cinfo->comp_info[2].v_samp_factor)
-/*    if (cinfo->image_height > 8 * vsamp * HPOINTERS) BADFILE2(E_TOO_HIGH, cinfo->image_height, 8 * vsamp * HPOINTERS) */
-    if (cinfo->jpeg_color_space != CS_YCbCr) BADFILE1(E_COLOUR, cinfo->jpeg_color_space)
-/*    if (cinfo->image_width > cinfo->band_buffer_size / (8*hsamp)) BADFILE2(E_TOO_WIDE, cinfo->image_width, cinfo->band_buffer_size /(8*hsamp)) */
-    tracef("Interleaved YUV colour file.\n");
-    /* if (cinfo->image_height & 15 != 0) tracef("Warning - height not multiple of 16.\n"); */
-    /* if (cinfo->image_width & 15 != 0) tracef("Warning - width not multiple of 16.\n"); */
-    /* >>> Check that the components are in the order we expect/assume! */
-
-    if (hsamp != 2 || vsamp != 2) options &= ~(jopt_OUTBPP_16 | jopt_OUTBPP_8YUV); /* haven't got optimised colour conversion for unusual colour blocks */
+    case 3:
+      tracef("num_components = 3 (YUV file)\n");
 
-    {
-      int width;
-      int height = 0;
-      int hpointer_index = 0;
-
-      HUFF_TBL *ydc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
-      HUFF_TBL *yac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
-      QUANT_TBL_PTR yquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
-      HUFF_TBL *udc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[1]->dc_tbl_no];
-      HUFF_TBL *uac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[1]->ac_tbl_no];
-      QUANT_TBL_PTR uquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[1]->quant_tbl_no];
-      HUFF_TBL *vdc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[2]->dc_tbl_no];
-      HUFF_TBL *vac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[2]->ac_tbl_no];
-      QUANT_TBL_PTR vquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[2]->quant_tbl_no];
-
-      while (height < cinfo->image_height)
+      /* We expect an MCU (minimum coding unit) of 2x2 Y blocks to each U and V block.
+       * We will accept MCUs of 1x1:1:1 or 2x1:1:1 and 1x2:1:1 as well, since there
+       * are examples of this.
+       * JFIF allows an arbitrary ratio - seems uncecessary. A band is always 1 MCU high.
+       */
+      if (hsamp > 2 || vsamp > 2)
+        BADFILE2(E_BAD_SAMPLE, hsamp, vsamp);
+      if (cinfo->comp_info[1].h_samp_factor != 1 || cinfo->comp_info[1].v_samp_factor != 1)
+        BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[1].h_samp_factor, cinfo->comp_info[1].v_samp_factor);
+      if (cinfo->comp_info[2].h_samp_factor != 1 || cinfo->comp_info[2].v_samp_factor != 1)
+        BADFILE2(E_BAD_SAMPLE, cinfo->comp_info[2].h_samp_factor, cinfo->comp_info[2].v_samp_factor);
+      if (cinfo->jpeg_color_space == CS_YCbCr)
       {
-        /* save the state of the huff stream. */
-        save_huff_stream(cinfo, &cinfo->huff_pointers[hpointer_index]);
-        hpointer_index++;
-
-        width = 0;
-        while (width < cinfo->image_width)
+        int height = 0;
+        int hpointer_index = 0;
+        HUFF_TBL *ydc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
+        HUFF_TBL *yac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
+        QUANT_TBL_PTR yquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
+        HUFF_TBL *udc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[1]->dc_tbl_no];
+        HUFF_TBL *uac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[1]->ac_tbl_no];
+        QUANT_TBL_PTR uquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[1]->quant_tbl_no];
+        HUFF_TBL *vdc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[2]->dc_tbl_no];
+        HUFF_TBL *vac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[2]->ac_tbl_no];
+        QUANT_TBL_PTR vquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[2]->quant_tbl_no];
+
+        if (hsamp != 2 || vsamp != 2) options &= ~(jopt_OUTBPP_16 | jopt_OUTBPP_8YUV); /* haven't got optimised colour conversion for unusual colour blocks */
+  
+        while (height < cinfo->image_height)
         {
-          /* Skip over the six blocks representing this 16*16 square of pixels */
-          do_huff_skip_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp * vsamp, FALSE);
-          asm_huff_skip_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
-          asm_huff_skip_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
+          int width;
 
-          /* tracef("Done a block, bytes left = %i.\n" _ cinfo->buffer_end - cinfo->next_input_byte); */
-          width += 8 * hsamp;
+          /* save the state of the huff stream. */
+          save_huff_stream(cinfo, &cinfo->huff_pointers[hpointer_index]);
+          hpointer_index++;
+  
+          width = 0;
+          while (width < cinfo->image_width)
+          {
+            /* Skip over the six blocks representing this square of pixels */
+            do_huff_skip_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp * vsamp, FALSE);
+            asm_huff_skip_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
+            asm_huff_skip_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
+            width += 8 * hsamp;
+          }
+          height += 8 * vsamp;
         }
-        height += 8 * vsamp;
-        /* tracef("Scanned a band, bytes left = %i.\n" _ cinfo->buffer_end - cinfo->next_input_byte); */
       }
-    }
-    /* tracef("Finished scan, bytes left over = %i\n" _ cinfo->buffer_end - cinfo->next_input_byte); */
+      else
+        BADFILE1(E_COLOUR, cinfo->jpeg_color_space);
+      break;
+
+    case 4:
+      tracef("num_components = 4 (CMYK file)\n");
+    default:
+      BADFILE1(E_COMPONENTS, cinfo->num_components);
+      break;
   }
-  else
-    BADFILE1(E_COMPONENTS, cinfo->num_components)
-
-/*  assert(cinfo->error_code == 0);*/
 
   if (options & jopt_INTERP_X) /* interpolation requested */
   {
-    int size_per_line = cinfo->band_buffer_size / (8 * cinfo->comp_info[0].h_samp_factor);
+    int size_per_line = cinfo->band_buffer_size / (8 * hsamp);
 
-    if (cinfo->image_width * 2 <= size_per_line) cinfo->error_argument1 |= jopt_INTERP_X; /* signal that we'll do it */
-    else cinfo->options &= ~jopt_INTERP_X; /* not enough space - remember that we won't do it */
+    if (cinfo->image_width * 2 <= size_per_line)
+      cinfo->error_argument1 |= jopt_INTERP_X; /* signal that we'll do it */
+    else
+      cinfo->options &= ~jopt_INTERP_X; /* not enough space - remember that we won't do it */
   }
   if (options & jopt_OUTBPP_8) cinfo->error_argument1 |= jopt_OUTBPP_8;
   if (options & jopt_OUTBPP_16) cinfo->error_argument1 |= jopt_OUTBPP_16;
@@ -793,394 +930,102 @@ a bit illogical. cinfo->error_code is also important. */
   return cinfo->error_code;
 }
 
-/* -------------------------------------------------------------------------------------------- */
-
 int *jpeg_find_line(decompress_info_ptr cinfo, int ycoord, int *palette_data)
-/* This gets called for every line of a rendered image. Most of the time it is fast, every 8 or 16 lines
-or so it must do some de-JPEGing of some more data. */
+/* This gets called for every line of a rendered image. Most of the time it is fast,
+ * every 8 or 16 lines or so it must do some de-JPEGing of some more data.
+ */
 {
-  int hpointer; /* huff pointer index */
-  int l2_band_height = 2 + cinfo->comp_info[0].v_samp_factor; /* log2 of band height - 3 for mono, usually 4 for colour */
-  int line_offset = cinfo->band_buffer_size >> l2_band_height; /* offset in words between lines of output */
+  int  hpointer; /* huff pointer index */
+  int  l2_band_height = 2 + cinfo->comp_info[0].v_samp_factor; /* log2 of band height - 3 for mono, usually 4 for colour */
+  int  line_offset = cinfo->band_buffer_size >> l2_band_height; /* offset in words between lines of output */
+  int *result;
 
   tracef("jpeg_find_line, palette data at %x\n" _ palette_data);
-  /* coordinates fed into this are RISC OS-style, with 0 meaning the bottom row.
-  Reverse this so that 0 means the top row. */
-  ycoord = cinfo->image_height - ycoord - 1;
+  
+  ycoord = cinfo->image_height - ycoord - 1; /* coordinates fed into this are RISC OS-style, with 0
+                                              * meaning the bottom row. Reverse this so that 0 means
+                                              * the top row.
+                                              */
+  assert(cinfo->band_buffer != NULL, ERROR_FATAL); /* someone had better have provided one! */
   assert(ycoord >= 0, ERROR_FATAL);
   assert(ycoord < cinfo->image_height, ERROR_FATAL);
   hpointer = ycoord >> l2_band_height;
 
-#if 0
-#ifdef DEBUG
-    tracef("jpeg_find_line %i, hpointer=%i current=%i.\n" _ ycoord _ hpointer _ cinfo->current_huff_pointer);
-    tracef("cinfo=0x%x band_buffer=0x%x band_buffer_size=%i\n" _ (int)cinfo _ (int)cinfo->band_buffer _ cinfo->band_buffer_size);
-    if (ycoord < 0 || ycoord >= cinfo->image_height)
-    {
-      tracef("ycoord out of bounds, [%i..%i..%i]\n" _ 0 _ ycoord _ cinfo->image_height);
-      assert(0);
-    }
-#endif
-#endif
-
   if (hpointer != cinfo->current_huff_pointer) /* Fetch a line */
   {
     int nlines_fetched = 0;
 
-    assert(cinfo->band_buffer != 0, ERROR_FATAL); /* someone had better have provided one! */
-
-    /* Restore the huffman stream */
     cinfo->current_huff_pointer = -1; /* in case of error exit - set correctly at end */
-    restore_huff_stream(cinfo, &cinfo->huff_pointers[hpointer]);
+    restore_huff_stream(cinfo, &cinfo->huff_pointers[hpointer]); /* restore the huffman stream */
 
-    if (cinfo->num_components == 1)
+    /* Get a row of blocks into the band buffer */
+    switch (cinfo->num_components)
     {
-      /* Get a row of blocks into the band buffer */
-      int width = 0;
-      int *outptr = cinfo->band_buffer;
-      jpeg_component_info * compptr = cinfo->cur_comp_info[0];
-      QUANT_TBL_PTR quanttbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no];
-      HUFF_TBL *dc = cinfo->dc_huff_tbl_ptrs[compptr->dc_tbl_no];
-      HUFF_TBL *ac = cinfo->ac_huff_tbl_ptrs[compptr->ac_tbl_no];
-
-      tracef("in mono half of jpeg_find_line\n");
-
-      if (cinfo->xmin >= 8)
-      {
-        int count = (cinfo->xmin - 8) >> 3; /* how many blocks we can just skip */
+      case 1:
+        nlines_fetched = do_1_component_band(cinfo, line_offset); /* Greyscale */
+        break;
 
-        do_huff_skip_blocks(cinfo, cinfo->jblocks[0], dc, ac, quanttbl, &cinfo->last_dc_val[0], count, TRUE);
-        width += 8*count;
-        if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*count;
-          else outptr += 8*count;
-      }
+      case 3:
+        nlines_fetched = do_3_component_band(cinfo, line_offset); /* YUV */
+        break;
 
-      if (cinfo->options & jopt_DC_ONLY) /* monochrome, only tile values */
-      {
-        /* We only want a flat value for each tile. Just create a single line but
-        do not attempt to collapse this in the x direction, too complex to interface to. */
-        while (width < cinfo->xmax) /* the data we want */
-        {
-          int pix;
-
-          do_huff_skip_blocks(cinfo, cinfo->jblocks[1], dc, ac, quanttbl, &cinfo->last_dc_val[0], 1, TRUE);
-          pix = mono_convert_pixel(cinfo, cinfo->last_dc_val[0] * quanttbl[0]);
-          outptr[0] = pix;
-          outptr[1] = pix;
-          outptr[2] = pix;
-          outptr[3] = pix;
-          outptr[4] = pix;
-          outptr[5] = pix;
-          outptr[6] = pix;
-          outptr[7] = pix;
-          width += 8;
-          outptr += 8;
-        }
-        nlines_fetched = 1;
-      }
-      else /* mono normal case, all pixels required */
-      {
-        while (width < cinfo->xmax) /* the data we want */
-        {
-          /* Could convert and DCT the data 6 blocks at a time? Increases cache requirement... so, we won't */
-          do_huff_decode_blocks(cinfo, cinfo->jblocks[1], dc, ac, quanttbl, &cinfo->last_dc_val[0], 1, TRUE);
-#ifdef TIMINGS
-          if (!(cinfo->options & jopt_HUFF_ONLY))
-#endif
-          {
-#ifdef STATS
-              if (stats)
-                j_rev_dct(cinfo, cinfo->jblocks[1], 1); /* output in jblocks[0] */
-              else
-#endif
-                asm_j_rev_dct(cinfo, cinfo->jblocks[1], 1); /* output in jblocks[0] */
-#ifdef TIMINGS
-              if (!(cinfo->options & jopt_DCT_ONLY))
-#endif
-              if (cinfo->options & jopt_OUTBPP_8GREY)
-                asm_mono_convert_block_8(cinfo->jblocks[0], outptr, line_offset);
-              else
-                asm_mono_convert_block(cinfo->jblocks[0], outptr, line_offset);
-          }
-          width += 8;
-          if (cinfo->options & jopt_OUTBPP_8GREY)
-            outptr += 2;
-          else
-            outptr += 8;
-        }
-        nlines_fetched = 8;
-      }
+      case 4:
+      default:
+        /* These were rejected in jpeg_scan_file() */
+        break;
     }
-    else /* colour */
-    {
-      int width = 0;
-      int *outptr = cinfo->band_buffer;
-      HUFF_TBL *ydc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[0]->dc_tbl_no];
-      HUFF_TBL *yac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[0]->ac_tbl_no];
-      QUANT_TBL_PTR yquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[0]->quant_tbl_no];
-      HUFF_TBL *udc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[1]->dc_tbl_no];
-      HUFF_TBL *uac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[1]->ac_tbl_no];
-      QUANT_TBL_PTR uquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[1]->quant_tbl_no];
-      HUFF_TBL *vdc = cinfo->dc_huff_tbl_ptrs[cinfo->cur_comp_info[2]->dc_tbl_no];
-      HUFF_TBL *vac = cinfo->ac_huff_tbl_ptrs[cinfo->cur_comp_info[2]->ac_tbl_no];
-      QUANT_TBL_PTR vquanttbl = cinfo->quant_tbl_ptrs[cinfo->cur_comp_info[2]->quant_tbl_no];
-      int vsamp = cinfo->comp_info[0].v_samp_factor;
-      int hsamp = cinfo->comp_info[0].h_samp_factor;
-
-      tracef("in colour half of jpeg_find_line\n");
-
-      while (width + 8*hsamp <= cinfo->xmin) /* skip over blocks we don't want */
-      {
-        do_huff_skip_blocks(cinfo, cinfo->jblocks[0], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp*vsamp, FALSE);
-        asm_huff_skip_blocks(cinfo, cinfo->jblocks[0], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
-        asm_huff_skip_blocks(cinfo, cinfo->jblocks[0], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
-        width += 8*hsamp;
-        if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
-        else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
-        else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
-        else outptr += 8*hsamp;
-      }
-
-      if (cinfo->options & jopt_DC_ONLY) /* colour, only want pixel tile values */
-      {
-        while (width < cinfo->xmax) /* the data we want */
-        {
-          int y[4];
-          int u;
-          int v;
-          int i;
-
-          do_huff_skip_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], 1, FALSE);
-          y[0] = cinfo->last_dc_val[0] * yquanttbl[0];
-          for (i = 1; i < hsamp*vsamp; i++)
-          {
-            asm_huff_skip_blocks(cinfo, cinfo->jblocks[1 + i], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], 1);
-            y[i] = cinfo->last_dc_val[0] * yquanttbl[0];
-          }
-          asm_huff_skip_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
-          asm_huff_skip_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
-          if (cinfo->options & jopt_GREY) /* greyscale output is acceptable */
-          {
-            tracef("about to do replicate some luma\n");
-            mono_convert_pixels(y); /* results back in y[0..3] */
-            if (cinfo->options & jopt_OUTBPP_8GREY)
-            {
-              /* Native greyscale 8bpp */
-              outptr[0] = outptr[1] = YYYTOYYYY(y[0]);
-              if (hsamp == 2) outptr[2] = outptr[3] = YYYTOYYYY(y[1]);
-              if (vsamp == 2)
-              {
-                outptr[line_offset + 0] = outptr[line_offset + 1] = YYYTOYYYY(y[hsamp == 2 ? 2 : 1]);
-                if (hsamp == 2) outptr[line_offset + 2] = outptr[line_offset + 3] = YYYTOYYYY(y[3]);
-              }
-            }
-            else
-            {
-              /* Output greyscale 24bpp, and dither it later if needed */
-              for (i = 0; i < 8; i++)
-              {
-                outptr[0 + i] = y[0];
-                if (hsamp == 2) outptr[8 + i] = y[1];
-                if (vsamp == 2)
-                {
-                  outptr[line_offset + 0 + i] = y[hsamp == 2 ? 2 : 1];
-                  if (hsamp == 2) outptr[line_offset + 8 + i] = y[3];
-                }
-              }
-            }
-          }
-          else
-          {
-            u = cinfo->last_dc_val[1] * uquanttbl[0];
-            v = cinfo->last_dc_val[2] * vquanttbl[0];
-            /* tracef("Colour values %i %i %i %i, %i %i\n" _ y[0] _ y[1] _ y[2] _ y[3] _ u _ v); */
-            colour_convert_pixels(y, u, v); /* results back in y[0..3] */
-            /* tracef("Converted colour values %i %i %i %i, %i %i\n" _ y[0] _ y[1] _ y[2] _ y[3] _ u _ v); */
-            for (i = 0; i < 8; i++)
-            {
-              outptr[i] = y[0];
-              if (hsamp == 2) outptr[8+i] = y[1];
-              if (vsamp == 2)
-              {
-                outptr[line_offset+i] = y[hsamp == 2 ? 2 : 1];
-                if (hsamp == 2) outptr[line_offset+8+i] = y[3];
-              }
-            }
-          }
-          width += 8*hsamp;
-          if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
-          else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
-          else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
-          else outptr += 8*hsamp;
-        }
-        nlines_fetched = vsamp;
-      }
-      else /* colour, normal case (want all pixels) */
-      {
-        while (width < cinfo->xmax) /* the data we want */
-        {
-          do_huff_decode_blocks(cinfo, cinfo->jblocks[1], ydc, yac, yquanttbl, &cinfo->last_dc_val[0], hsamp*vsamp, FALSE);
-          asm_huff_decode_blocks(cinfo, cinfo->jblocks[5], udc, uac, uquanttbl, &cinfo->last_dc_val[1], 1);
-          asm_huff_decode_blocks(cinfo, cinfo->jblocks[6], vdc, vac, vquanttbl, &cinfo->last_dc_val[2], 1);
-#ifdef TIMINGS
-          if (!(cinfo->options & jopt_HUFF_ONLY))
-#endif
-          {
-#ifdef TIMINGS
-            if (!(cinfo->options & jopt_DCT_ONLY))
-#endif
-            {
-              if (cinfo->options & jopt_GREY) /* greyscale output is acceptable */
-              {
-                tracef("about to do some grey conversion\n");
-                if (cinfo->options & jopt_OUTBPP_8GREY)
-                {
-                  asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* output in jblocks[0..3] */
-                  asm_mono_convert_block_8(cinfo->jblocks[0], outptr, line_offset);
-                  if (hsamp == 2) asm_mono_convert_block_8(cinfo->jblocks[1], outptr + 2, line_offset);
-                  if (vsamp == 2)
-                  {
-                    asm_mono_convert_block_8(cinfo->jblocks[hsamp == 1 ? 1 : 2], outptr + 8*line_offset, line_offset);
-                    if (hsamp == 2) asm_mono_convert_block_8(cinfo->jblocks[3], outptr + 8*line_offset + 2, line_offset);
-                  }
-                }
-                else
-                {
-#ifdef STATS
-                  if (stats)
-                    j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* output in jblocks[0..3] */
-                  else
-#endif
-                    asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* output in jblocks[0..3] */
-                  asm_mono_convert_block(cinfo->jblocks[0], outptr, line_offset);
-                  if (hsamp == 2) asm_mono_convert_block(cinfo->jblocks[1], outptr + 8, line_offset);
-                  if (vsamp == 2)
-                  {
-                    asm_mono_convert_block(cinfo->jblocks[hsamp == 1 ? 1 : 2], outptr + 8*line_offset, line_offset);
-                    if (hsamp == 2) asm_mono_convert_block(cinfo->jblocks[3], outptr + 8*line_offset + 8, line_offset);
-                  }
-                }
-              }
-              else
-              {
-                tracef("about to do some colour conversion\n");
-
-                if (hsamp*vsamp == 4)
-                {
-#ifdef STATS
-                    if (stats)
-                      j_rev_dct(cinfo, cinfo->jblocks[1], 6); /* output in jblocks[0..5] */
-                    else
-#endif
-                      asm_j_rev_dct(cinfo, cinfo->jblocks[1], 6); /* output in jblocks[0..5] */ /* usual, speed-critical case */
-                  if (cinfo->options & jopt_OUTBPP_16)
-                    asm_colour_convert_block_16(cinfo->jblocks[0], (short int*) outptr, line_offset);
-                  else if (cinfo->options & jopt_OUTBPP_8YUV)
-                    asm_colour_convert_block_8(cinfo->jblocks[0], (char*) outptr, line_offset);
-                  else
-                    asm_colour_convert_block(cinfo->jblocks[0], outptr, line_offset);
-                }
-                else
-                {
-#ifdef STATS
-                    if (stats)
-                    {
-                      j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* weird aspect ratio - only do DCTs we need to do */
-                      j_rev_dct(cinfo, cinfo->jblocks[5], 2);
-                    }
-                    else
-#endif
-                    {
-                      asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* weird aspect ratio - only do DCTs we need to do */
-                      asm_j_rev_dct(cinfo, cinfo->jblocks[5], 2);
-                    }
-                  colour_convert_unusual_block(cinfo->jblocks[0], outptr, line_offset, hsamp, vsamp);
-                }
-              }
-            }
-#ifdef TIMINGS
-            else /* for timing experiments - still do the DCTs */
-                 /* we don't gather stats and timings at the same time, so no need to check STATS */
-            {
-              if (hsamp*vsamp == 4)
-                asm_j_rev_dct(cinfo, cinfo->jblocks[1], 6); /* output in jblocks[0..5] */ /* usual, speed-critical case */
-              else
-              {
-                asm_j_rev_dct(cinfo, cinfo->jblocks[1], hsamp*vsamp); /* weird aspect ratio - only do DCTs we need to do */
-                asm_j_rev_dct(cinfo, cinfo->jblocks[5], 2);
-              }
-            }
-#endif
-          }
-          width += 8*hsamp;
-          if (cinfo->options & jopt_OUTBPP_16) outptr += 4*hsamp;
-          else if (cinfo->options & jopt_OUTBPP_8YUV) outptr += 2*hsamp;
-          else if (cinfo->options & jopt_OUTBPP_8GREY) outptr += 2*hsamp;
-          else outptr += 8*hsamp;
-        } /* while */
-        nlines_fetched = 8*vsamp;
-      } /* if */
-    } /* if */
 
     if (cinfo->options & jopt_DIFFUSE)
-    /*cinfo->options & jopt_OUTBPP_8*/ /* convert to 8bpp output, for instance by dithering */
     {
-      if (/*cinfo->table32k == 0 && */!cinfo->table32k_unavailable)
+      int *line = cinfo->band_buffer + cinfo->xmin;
+      int linelen = cinfo->xmax - cinfo->xmin;
+
+      /* The error diffusion cannot handle a very thin strip at the right, eg one pixel wide. So the last
+       * unit of diffusion may be up to 31 pixels. However, the units of diffusion until then must
+       * be unaffected by exactly what xmin/xmax are, or we will get minor pixel variation depending
+       * on the clipping. xmin is already a multiple of 16.
+       */
+      linelen = (linelen + 15) & ~15; /* round up to a multiple of 16 */
+      if (linelen > cinfo->image_width - cinfo->xmin) linelen = cinfo->image_width - cinfo->xmin;
+
+      if (!cinfo->table32k_unavailable)
       {
         /* Try to get the 16bpp->8bpp lookup table from ColourTrans. If we
-        fail then never try again, probably running on old OS/ColourTrans where it isn't available. */
+         * fail then never try again, probably running on old OS/ColourTrans where it
+         * isn't available.
+         */
         cinfo->table32k = asm_get_table32k(palette_data);
         if (cinfo->table32k == 0) cinfo->table32k_unavailable = TRUE;
         tracef("Fetched 32k lookup table, at 0x%x\n" _ (int) cinfo->table32k);
       }
 
-      /* tracef("Doing dithering to 8bpp on %i lines\n" _ nlines_fetched); */
       /* Dither in lengths of 16, to allow xmin to be non-zero. xmin is a multiple of 16 */
       if (cinfo->options & jopt_OUTBPP_8)
       {
-        int *line = cinfo->band_buffer + cinfo->xmin;
-        int linelen = cinfo->xmax - cinfo->xmin;
-        char *outptr = (char*) cinfo->band_buffer + cinfo->xmin;
-
-        /* The error diffusion cannot handle a very thin strip at the right, eg one pixel wide. So the last
-        unit of diffusion may be up to 31 pixels. However, the units of diffusion until then must
-        be unaffected by exactly what xmin/xmax are, or we will get minor pixel variation depending
-        on the clipping. xmin is already a multiple of 16. */
-        linelen = (linelen + 15) & ~15; /* round up to a multiple of 16 */
-        if (linelen > cinfo->image_width - cinfo->xmin) linelen = cinfo->image_width - cinfo->xmin;
-
-//        if (cinfo->options & jopt_INTERP_X) {line += cinfo->xmin; linelen += linelen;} /* double if you have interpolated */
+        char *outptr = (char *)cinfo->band_buffer + cinfo->xmin;
+
         while (linelen > 0)
         {
           int blockwidth = linelen >= 32 ? 16 : linelen; /* avoid having very narrow blocks at r hand edge. */
 
           tracef("calling diffuse to 8bpp code. palette = %x\n" _ palette_data);
           asm_diffuse_to_8bpp(line, blockwidth, outptr, cinfo->table32k, nlines_fetched, line_offset, palette_data);
-          outptr = outptr + blockwidth;
+          outptr += blockwidth;
           line += blockwidth;
           linelen -= blockwidth;
+          tracef("done diffusion, outptr = &%x, blockwidth = &%xlinelen = &%x\n" _ outptr _ blockwidth _ linelen);
         }
       }
       else
       {
-        int *line = cinfo->band_buffer + cinfo->xmin;
-        int linelen = cinfo->xmax - cinfo->xmin;
         int *outptr = cinfo->band_buffer + cinfo->xmin;
 
-        /* The error diffusion cannot handle a very thin strip at the right, eg one pixel wide. So the last
-        unit of diffusion may be up to 31 pixels. However, the units of diffusion until then must
-        be unaffected by exactly what xmin/xmax are, or we will get minor pixel variation depending
-        on the clipping. xmin is already a multiple of 16. */
-        linelen = (linelen + 15) & ~15; /* round up to a multiple of 16 */
-        if (linelen > cinfo->image_width - cinfo->xmin) linelen = cinfo->image_width - cinfo->xmin;
-
-//        if (cinfo->options & jopt_INTERP_X) {line += cinfo->xmin; linelen += linelen;} /* double if you have interpolated */
         while (linelen > 0)
         {
           int blockwidth = linelen >= 32 ? 16 : linelen; /* avoid having very narrow blocks at r hand edge. */
 
           tracef("calling diffuse to palette entries code. palette = %x\n" _ palette_data);
-          asm_diffuse_to_24bpp(line, blockwidth, (int *)outptr, cinfo->table32k, nlines_fetched, line_offset, palette_data);
+          asm_diffuse_to_24bpp(line, blockwidth, outptr, cinfo->table32k, nlines_fetched, line_offset, palette_data);
           outptr += blockwidth;
           line += blockwidth;
           linelen -= blockwidth;
@@ -1190,27 +1035,24 @@ or so it must do some de-JPEGing of some more data. */
     }
 
     cinfo->current_huff_pointer = hpointer; /* line completed correctly - remember for next time. */
-    /* tracef("Done requested band, bytes left = %i.\n" _ cinfo->buffer_end - cinfo->next_input_byte); */
   }
 
-  /* The band buffer now contains suitable pixels */
+  result = cinfo->band_buffer; /* the band buffer now contains suitable pixels */
+  if (cinfo->options & jopt_DC_ONLY)
   {
-    int* result = cinfo->band_buffer;
-
-    if (cinfo->options & jopt_DC_ONLY)
-    {
-      /* Rather than copy the data 8 times, the DC content is spaced out by just 1 line and the
-       * address frigged here to point to one or the other
-       */
-      if (l2_band_height == 4 && (ycoord & 0xf) >= 8)
-        result += line_offset;
-    }
-    else /* normal - choose between 8 or 16 rows of pixels */
-      result += (ycoord & ((1<<l2_band_height)-1)) * line_offset;
-
-    /* tracef("result=0x%x\n" _ (int)result); */
-    return result;
+    /* Rather than copy the data 8 times, the DC content is spaced out by just 1 line
+     * and the address frigged here to point to one or the other
+     */
+    if (l2_band_height == 4 && (ycoord & 0xf) >= 8)
+      result += line_offset;
   }
+  else
+  {
+    /* normal - choose between 8 or 16 rows of pixels */
+    result += (ycoord & ((1 << l2_band_height) - 1)) * line_offset;
+  }
+  
+  return result;
 }
 
 #define M_APP0        0xE0
diff --git a/h/jinclude b/h/jinclude
index 52b413cefa72bb2d616891cea1e7764e12bfc9b2..a56efa6c5314410f8c0f1ea49ff446811a4a6020 100644
--- a/h/jinclude
+++ b/h/jinclude
@@ -97,7 +97,11 @@ asm_huff_skip_blocks(decompress_info_ptr cinfo, JBLOCK block,
                  int *last_dc_val, int nblocks);
 #endif
 
+#ifdef STATS
+#define asm_j_rev_dct(i,b,c) j_rev_dct(i,b,c) /* Substitute 'Sources.jrevdct' for 'c.jrevdct4' */
+#else
 extern void asm_j_rev_dct(decompress_info_ptr cinfo, JBLOCK block, int count);
+#endif
 extern void asm_mono_convert_block(JBLOCK jblock, int *outptr, int outoffset);
 extern void asm_mono_convert_block_8(JBLOCK jblock, int *outptr, int outoffset);
 extern void asm_colour_convert_block(JBLOCK jblock, int *outptr, int outoffset);
diff --git a/h/jpegdata b/h/jpegdata
index d0df573ab7ff3204749f7912fd4e6fd1005baa7f..141971c32e23054ae795bafbc2f93b6748cbc1e0 100644
--- a/h/jpegdata
+++ b/h/jpegdata
@@ -190,16 +190,14 @@ typedef struct
 } huff_pointer;
 
 /* ------------- Error codes for various forms of unacceptable JPEG file ------------- */
-#define E_PRE_NOT_8         1            /* Data precision not 8 */
-#define E_RESTART           2            /* Restart interval not 0 */
-#define E_MULTI_SCAN        3            /* Multi-scan file */
-#define E_TOO_HIGH          4            /* Image too high, max is %i pixels */
-#define E_BAD_SAMPLE        5            /* Bad sample factor */
-#define E_HEIGHT            6            /* Height is %i, not as specified */
-#define E_WIDTH             7            /* Width is %i, not as specified */
-#define E_COLOUR            8            /* Bad colour space (%i), not grey or YUV */
-#define E_COMPONENTS        9            /* Bad number (%i) of components, only 1 or 3 allowed */
-#define E_TOO_WIDE          10           /* Image too wide, max is %i pixels */
+#define E_PRE_NOT_8         1            /* Unsupported - Data precision not 8 */
+#define E_MULTI_SCAN        2            /* Unsupported - Multi scan file */
+#define E_COMPONENTS        3            /* Unsupported - Bad number of components, only 1 or 3 allowed */
+#define E_BAD_SAMPLE        4            /* Bad sample factor */
+#define E_HEIGHT_DISAGREES  5            /* Height is not as specified */
+#define E_WIDTH_DISAGREES   6            /* Width is not as specified */
+#define E_COLOUR            7            /* Bad colour space, not grey or YUV */
+#define E_TOO_WIDE          8            /* Image too wide based on number of MCUs */
 
 /* ------------- Working data for decompression ---------------------------------- */