/* Copyright 1997 Acorn Computers Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /***************************************************/ /* File : SaveText.c */ /* */ /* Purpose: Save a web page as text. */ /* */ /* Author : Merlyn Kline for Customer browser */ /* This source adapted by A.D.Hodgkinson */ /* */ /* History: 24-Nov-97: Created. */ /***************************************************/ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> #include "swis.h" #include "flex.h" #include "HTMLLib.h" /* HTML library API, Which will include html2_ext.h, tags.h and struct.h */ #include "wimp.h" #include "wimplib.h" #include "event.h" #include "svcprint.h" #include "Global.h" #include "FromROSLib.h" #include "Utils.h" #include "Fetch.h" #include "Filetypes.h" #include "Forms.h" #include "Protocols.h" #include "Reformat.h" #include "Save.h" #include "SaveObject.h" #include "SaveText.h" /* Local statics */ static int references = 0; static char savetext_hr_text[] = "\n --------------------------------------------------------------------------\n"; /* Use [] not char *, as sizeof() is called on this entity */ static FILE * outfile = NULL; /* Static function prototypes */ static int savetext_write_bytes (const char * s, unsigned int n); static char * savetext_create_image_text (HStream * t, char * s, int size); static int savetext_write_ref (int getsize); static int savetext_summarise_refs (browser_data * b, HStream * stream, int getsize); static int savetext_save (browser_data * b, HStream * stream, int getsize); /*************************************************/ /* savetext_write_bytes() */ /* */ /* Outputs a given number of bytes of a given */ /* string to the FILE * set up in the global */ /* 'outfile', returning a success flag. */ /* */ /* Parameters: Pointer to a string; */ /* */ /* Number of characters from the */ /* string to save. */ /* */ /* Returns: 1 if successful, 0 if failed. */ /* */ /* Assumes: An appropriately opened file is */ /* accessible through the global */ /* FILE * 'outfile'. */ /*************************************************/ static int savetext_write_bytes(const char * s, unsigned int n) { if (!s || !outfile) return 0; while (n && *s) { if (fputc(*s, outfile) == EOF) return 0; s++, n--; } return 1; } /*************************************************/ /* savetext_create_image_text() */ /* */ /* Creates a string that can be used to */ /* represent an image, using the ALT text if */ /* present or a generic alternative from the */ /* Messages file. */ /* */ /* Parameters: Pointer to an HStream struct */ /* representing the image; */ /* */ /* Pointer to a buffer to hold the */ /* string; */ /* */ /* Size of the buffer. */ /*************************************************/ static char * savetext_create_image_text(HStream * t, char * s, int size) { /* Quick sanity check */ if (!s || size < 3) return s; /* Want to have '[', ALT text, then ']' */ s[0] = '['; s[1] = 0; /* If there's any ALT text, strncpy this in after the opening '[' */ if (t->text) strncpy(s + 1, t->text, size - 1); /* Force a terminator in the penultimate byte. This gives us room */ /* for the closing ']', even if the strncpy above filled up the */ /* buffer. */ s[size - 2] = 0; /* What if there was no ALT text? */ if (s[1] == 0) { char * generic = lookup_token("SaveTextImage:Image",0,0); /* If there's room, use a generic string instead */ if (size > strlen(generic) + 2) strcat(s, generic); } /* Append the closing ']' */ strcat(s, "]"); return s; } /*************************************************/ /* savetext_write_ref() */ /* */ /* Build a reference number, returning either */ /* a size of entity or a success flag following */ /* writing the reference to the output file. */ /* */ /* The format specifier for the reference is */ /* read from the Messages file. */ /* */ /* Parameters: 1 to return the size of the */ /* constructed entity, 0 to return a */ /* success flag having tried to */ /* write the entity to disc. */ /*************************************************/ static int savetext_write_ref(int getsize) { char * format = lookup_token("SaveTextRef: [Ref %%d]",0,0); char j[64]; int nl; /* Increment the reference (anchor) count */ references++; /* Check the length */ nl = utils_number_length(references); if (nl + strlen(format) >= sizeof(j)) { /* If it won't fit, use a simple indicator that will */ sprintf(j, " [%d]", references); } else { /* If it will fit, use the format specified in the Messages file */ sprintf(j, format, references); } /* Return either the size or return through the */ /* disc output routine. */ if (getsize) return strlen(j); return savetext_write_bytes(j, strlen(j)); } /*************************************************/ /* savetext_summarise_refs() */ /* */ /* Build a summary of all of the references made */ /* through the text file so far. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the file; */ /* */ /* Pointer to the first HStream of */ /* the list from which the refs. are */ /* to be built; */ /* */ /* 1 to return the size of the */ /* constructed entity, 0 to return a */ /* success flag having tried to */ /* write the entity to disc. */ /*************************************************/ static int savetext_summarise_refs(browser_data * b, HStream * stream, int getsize) { char j[512]; int size = 0; static int n = 0; HStream * curr; HStream * last; /* If there are no references, there's nothing to do. */ if (!references) return getsize ? 0 : 1; if (stream == b->stream) { n = 0; /* Output a separator and section title, if we're not in a table cell */ sprintf(j, "\n\n==============================================================================\n\n%s\n\n", lookup_token("SaveTextRefs:References in this document:",0,0)); size += strlen(j); /* If required, try to output this to the file */ if (!getsize && !savetext_write_bytes(j, strlen(j))) return 0; } last = NULL; curr = stream; while (curr) { if (ISBODY(curr)) { if (curr->tagno == TAG_TABLE) { table_stream * table = (table_stream *) curr; table_row * R; table_headdata * D; HStream * attached; /* Scan the rows and cells */ R = table->List; while (R) { D = R->List; while (D) { if (D->Tag) { switch (D->Tag) { case TagTableData: case TagTableHead: { attached = (HStream *) D->List; /* Deal with the cell contents */ if (getsize) size += savetext_summarise_refs(b, attached, getsize); else if (!savetext_summarise_refs(b, attached, getsize)) return 0; } break; } } D = D->Next; } R = R->Next; } } /* If the current item is a link, and the last item wasn't */ /* a link or wasn't the same link as this one (remember, */ /* multiple tokens can represent the same link), then */ /* output this as a reference. We're relying on the */ /* conditions under which a reference is output here being */ /* exactly the same as those which mark a reference when */ /* the body of the page is being output. */ if ( ISLINK(curr) && curr->tagno != TAG_TABLE && ( !last || !ISLINK(last) || !ISBODY(last) || last->tagno == TAG_TABLE || last->anchor != curr->anchor ) ) { /* Increment the local reference counter */ n++; sprintf(j, "%8d. ", n); /* Output the reference number if required */ size += strlen(j); if (!getsize && !savetext_write_bytes(j, strlen(j))) return 0; /* If required, output the anchor text */ size += strlen(curr->anchor); if (!getsize && !savetext_write_bytes(curr->anchor, strlen(curr->anchor))) return 0; /* Add a line break */ size++; if (!getsize && !savetext_write_bytes("\n", 1)) return 0; } } /* Move on to the next item */ last = curr; curr = curr->next; } /* Finished; return either the total size that would have been */ /* output to a file, or flag success. */ return getsize ? size : 1; } /*************************************************/ /* savetext_save() */ /* */ /* Does the work of outputting the page as text */ /* to a file (the FILE * should already be set */ /* up in 'outfile') or works out how large the */ /* file would be. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the file; */ /* */ /* Pointer to the first HStream of */ /* the list to output; */ /* */ /* 1 to return the size of the */ /* constructed file, 0 to return a */ /* success flag having tried to */ /* write the file to disc. */ /*************************************************/ static int savetext_save(browser_data * b, HStream * stream, int getsize) { HStream * curr; HStream * last; int size = 0, s; curr = stream; last = NULL; /* Only zero references if we're working on the main document */ /* stream - table cells should all add to the reference list */ if (stream == b->stream) references = 0; /* Go through the tokens */ while (curr) { if (ISBODY(curr)) { /* If we're on a line break, then write (or account for writing) */ /* a new line, then tabs to indent to the current level of */ /* indentation (whatever that may be). */ if (reformat_newline(curr, last, 0)) { if (getsize) size += 1 + curr->indent; else { int i = curr->indent; if (!savetext_write_bytes("\n", 1)) return 0; while (i) { if (!savetext_write_bytes("\t", i)) return 0; i --; } } } /* Output a line break for paragraphs */ if (curr->style & P) { if (getsize) size += 1; else if (!savetext_write_bytes("\n", 1)) return 0; } /* Hmm. A table. This'll be fun, then. */ if (curr->tagno == TAG_TABLE) { table_stream * table = (table_stream *) curr; table_row * R; table_headdata * D; HStream * attached; char j[512]; char n[64]; char * l; int cell = 0; strcpy(j, "\n["); /* Compile a description of the table */ sprintf(n, "%d", table->RowSpan); l = lookup_token("SaveTextTable:Table - %0", 0, n); strcat(j, l); sprintf(n, "%d", table->ColSpan); if (table->RowSpan == 1) l = lookup_token("SaveTextRow: row and %0", 0, n); else l = lookup_token("SaveTextRows: rows and %0", 0, n); strcat(j, l); if (table->ColSpan == 1) l = lookup_token("SaveTextCol: column", 0, 0); else l = lookup_token("SaveTextCols: columns", 0, 0); strcat(j, l); strcat(j, "]\n"); /* Output this description */ if (getsize) size += strlen(j); else if (!savetext_write_bytes(j, strlen(j))) return 0; /* Scan the rows and cells */ R = table->List; while (R) { D = R->List; while (D) { if (D->Tag) { switch (D->Tag) { case TagTableData: case TagTableHead: { cell ++; attached = (HStream *) D->List; /* Compile a description of the cell */ if (cell > 1) { if (getsize) size += 1; else if (!savetext_write_bytes("\n", 1)) return 0; } if (getsize) size += 2; else if (!savetext_write_bytes("\n[", 2)) return 0; l = lookup_token("SaveTextCell:Row %%d, column %%d", 0, 0); sprintf(j, l, D->RowOffs + 1, D->ColOffs + 1); /* Output this description */ if (getsize) size += strlen(j); else if (!savetext_write_bytes(j, strlen(j))) return 0; if (getsize) size += 3; else if (!savetext_write_bytes("]\n\n", 3)) return 0; /* Deal with the cell contents */ if (getsize) size += savetext_save(b, attached, getsize); else if (!savetext_save(b, attached, getsize)) return 0; } break; } } D = D->Next; } R = R->Next; } } /* Output image data */ else if ( (curr->style & IMG) || ( curr->tagno == TAG_INPUT && HtmlINPUTtype(curr) == inputtype_IMAGE ) ) { char j[128]; /* Don't really want more than about 127 characters of ALT text in one go, surely?! */ savetext_create_image_text(curr, j, sizeof(j)); if (getsize) size += strlen(j); else if (!savetext_write_bytes(j, strlen(j))) return 0; } /* For bullets, output an asterisk */ else if (ISBULLET(curr)) { if (getsize) size += 2; else if (!savetext_write_bytes("* ", 2)) return 0; } /* Horizontal rules */ else if (curr->style & HR) { if (getsize) size += sizeof(savetext_hr_text) - 1; /* ('-1' as sizeof includes the terminating null byte */ else if (!savetext_write_bytes(savetext_hr_text, sizeof(savetext_hr_text) - 1)) return 0; } /* Text areas: ' [', the text area value, then '] ' */ else if (curr->tagno == TAG_SELECT) { if (getsize) size += 4 + strlen(form_get_field(b, curr)->text); else { if (!savetext_write_bytes(" [", 2)) return 0; if (!savetext_write_bytes(form_get_field(b, curr)->text, strlen(form_get_field(b, curr)->text))) return 0; if (!savetext_write_bytes("] ", 2)) return 0; } } /* Selection lists - ' [', the current displayed item, */ /* ']', then '[>] ' to represent the popup. */ else if(curr->tagno == TAG_STYLE) { if (getsize) size += 7 + strlen(form_get_field_text(b, curr)); else { if (!savetext_write_bytes(" [", 2)) return 0; if (!savetext_write_bytes(form_get_field_text(b, curr), strlen(form_get_field_text(b, curr)))) return 0; if (!savetext_write_bytes("][>] ", 5)) return 0; } } /* Various INPUT field types */ else if (curr->tagno == TAG_INPUT) { switch (HtmlINPUTtype(curr)) { /* Single line normal writable */ case inputtype_TEXT: { if (getsize) size += 4 +strlen(form_get_field(b, curr)->text); else { if (!savetext_write_bytes(" [", 2)) return 0; if (!savetext_write_bytes(form_get_field(b, curr)->text, strlen(form_get_field(b, curr)->text))) return 0; if (!savetext_write_bytes("] ", 2)) return 0; } } break; /* Single line password writable - output '*'s instead of the actual text */ case inputtype_PASSWORD: { if (getsize) size += 4 + strlen(form_get_field(b, curr)->text); else { int j ,l; if (!savetext_write_bytes(" [", 2)) return 0; l = strlen(form_get_field(b, curr)->text); for (j = 0; j < l; j++) { if (!savetext_write_bytes("*",1)) return 0; } if (!savetext_write_bytes("] ", 2)) return 0; } } break; /* Check box; '[Y]' if selected, '[_]' if not */ case inputtype_CHECKBOX: { if (getsize) size += 3; else if (!savetext_write_bytes(form_get_field(b, curr)->checked ? "[Y]" : "[_]", 3)) return 0; } break; /* Radio button; '(O)' if selected, '(_)' if not */ case inputtype_RADIO: { if (getsize) size += 3; else if (!savetext_write_bytes(form_get_field(b, curr)->checked ? "(O)" : "(_)", 3)) return 0; } break; /* Hidden items give no text */ case inputtype_HIDDEN: break; /* Sumbit / Reset buttons */ case inputtype_SUBMIT: /* same as TYPE_RESET: no break */ case inputtype_BUTTON: /* Again, no break */ case inputtype_RESET: { const char * p; p = form_button_text(curr); if (getsize) size += strlen(p) + 4; else { if (!savetext_write_bytes(" <", 2)) return 0; if (!savetext_write_bytes(p, strlen(p))) return 0; if (!savetext_write_bytes("> ", 2)) return 0; } } break; } } /* For anything else, try to extract some generic text */ else { char * d = curr->text; if (getsize && d) size += strlen(d); else if (d && !savetext_write_bytes(d, strlen(d))) return 0; } /* If we've got a link and the last item wasn't a link or */ /* was, but to a different URL as this one, then output a */ /* marker pointing to a reference that will be added on */ /* at the end of the document. */ /* */ /* Note that these conditions MUST be kept in step with */ /* those in savetext_summarise_refs, which actually */ /* writes the references pointed to by these markers. */ if ( ISLINK(curr) && curr->tagno != TAG_TABLE && ( !last || !ISLINK(last) || !ISBODY(last) || last->tagno == TAG_TABLE || last->anchor != curr->anchor ) ) { s = savetext_write_ref(getsize); if (getsize) size += s; else if (!s) return 0; } } last = curr; curr = curr->next; } /* Summarise the references if we're not in a table cell */ if (stream == b->stream) { s = savetext_summarise_refs(b, b->stream, getsize); if (getsize) size += s; else if (!s) return 0; } /* Return whatever was requested - size, or a success flag */ return getsize ? size : 1; } /*************************************************/ /* savetext_save_text() */ /* */ /* Save a given browser page as a text file. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the page to save as */ /* text; */ /* */ /* Pointer to the pathname to save */ /* to. */ /*************************************************/ _kernel_oserror * savetext_save_text(browser_data * b, const char * pathname) { int success = 0; /* If we seem to have left a file open, close it */ if (outfile) { fclose(outfile); outfile = NULL; } /* Only proceed if we've got a pathname */ if (pathname && *pathname) { save_record_path(pathname); outfile = fopen(pathname, "wb"); if (!outfile) RetLastE; success = savetext_save(b, b->stream, 0); fclose(outfile); outfile = NULL; if (!success) RetLastE; /* Set the filetype */ return _swix(OS_File, _INR(0,2) | _INR(4,5), 18, pathname, FileType_TEXT); } return NULL; } /*************************************************/ /* savetext_text_size() */ /* */ /* Returns the size of file that would be */ /* written by savetext_save_text for the given */ /* browser. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the page to save as */ /* text, for which the file size is */ /* to be returned. */ /* */ /* Returns: Size of file that would be */ /* written by savetext_save_text for */ /* the given browser. */ /*************************************************/ int savetext_text_size(browser_data * b) { return savetext_save(b, b->stream, 1); }