/* Copyright 1997 Acorn Computers Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /***************************************************/ /* File : Fetch.c */ /* */ /* Purpose: Mid-level fetch functions, concerned */ /* mostly with HStreams but not low level */ /* HTMLLib interfacing. Compare with */ /* FetchPage.c, which provides a much */ /* higher level interface. */ /* */ /* Author : A.D.Hodgkinson */ /* */ /* History: 25-Nov-96: Created. */ /* 17-Aug-97: Split up to form the */ /* URLveneer.c and FetchHTML.c */ /* sources. */ /***************************************************/ #include <stdlib.h> #include <stdio.h> #include <string.h> #include "swis.h" #include "flex.h" #include "HTMLLib.h" /* HTML library API, Which will include html2_ext.h, tags.h and struct.h */ #include "wimp.h" #include "wimplib.h" #include "event.h" #include "svcprint.h" #include "Global.h" #include "MiscDefs.h" #include "Utils.h" #include "Authorise.h" #include "Browser.h" #include "Filetypes.h" #include "FontManage.h" #include "Forms.h" #include "FetchHTML.h" #include "Frames.h" #include "History.h" #include "Images.h" #include "Memory.h" #include "Meta.h" #include "Redraw.h" #include "Reformat.h" #include "SaveFile.h" #include "SaveObject.h" #include "Toolbars.h" #include "URLutils.h" #include "URLveneer.h" #include "Windows.h" #include "Fetch.h" /* (Which itself includes URLstat.h) */ /* Local definitons */ #define AuthorisationStr "Authorization: Basic " /* Local compilation options */ #define FRAMES_SUPPORT /* Static function prototypes */ static HStream * fetch_find_anchor_token_r (browser_data * b, HStream * streambase, char * anchor); /*************************************************/ /* fetch_start() */ /* */ /* Initiate a fetch for some URL. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* for the browser window that the */ /* fetch relates to; the URL is */ /* pointed to in that structure, as */ /* the last item in the history. */ /* */ /* Returns: A pointer to a _kernel_oserror */ /* structure if an error occured, or */ /* NULL if there was no error. */ /*************************************************/ _kernel_oserror * fetch_start(browser_data * b) { int handle, method; _kernel_oserror * e; #ifdef TRACE if (tl & (1u<<6)) Printf("\nfetch_start: Called\n"); #endif /* (Order of evaluation ensures the check for the contents of */ /* the memory pointed to by browser_fetch_url only occurs if */ /* the pointer isn't null) */ if (!browser_fetch_url(b) || !*browser_fetch_url(b)) { b->fetch_status = BS_IDLE; toolbars_cancel_status(b, Toolbars_Status_Fetching); return NULL; } /* URL method is set to POST if there is forms data, or GET */ /* if not (see Fetch.c for the definitions) */ method = b->extradata ? URL_Method_http_POST : URL_Method_http_GET; /* Reset the encoding priority to default, awaiting any */ /* information in the HTTP header. Don't actually change */ /* the encoding yet as we don't want the menu to change */ /* at this point. */ if ( b->encoding_priority > priority_link && b->encoding_priority < priority_user ) b->encoding_priority = priority_default; /* Find out if this is an internal URL, and if so, */ /* set the 'displayed' field in the browser_data */ /* struct appropriately. */ /* */ /* If we're saving out data in a link, then don't */ /* do this, as the page contents aren't actually */ /* changing (so leave the flag alone). */ if (!b->save_link) urlutils_set_displayed(b, b->urlfdata); /* Reset the data size counter */ b->data_size = 0; /* Get, and start parsing the document */ e = html_get(b->urlfdata, /* Required document */ b->extradata, /* Extra bits to append for POST etc */ &handle, /* The library's handle for request */ method, /* See above - POST or GET at this point */ NULL, /* User name for Mailserv */ 1, /* Allow HTML parsing, 1 = yes, 0 = no */ !b->reloading); /* If 0, don't go through a proxy - e.g. for a reload */ #ifdef TRACE if (b->extradata) { flexcount -= flex_size((flex_ptr) &b->extradata); if (tl & (1u<<13)) Printf("** flexcount: %d\n",flexcount); } #endif if (b->extradata) flex_free((flex_ptr) &b->extradata); if (e) { b->fetch_status = BS_IDLE; toolbars_cancel_status(b, Toolbars_Status_Fetching); return e; } /* No error, so signal that the fetch has started. */ b->fetch_handle = handle; b->fetch_status = BS_STARTED; toolbars_update_status(b, Toolbars_Status_Connecting); /* At this point e will always be NULL but that might change, */ /* so the full trace code is being left in for now */ #ifdef TRACE if (tl & (1u<<6)) { if (e) Printf("fetch_start: Exiting with error\n"); else Printf("fetch_start: Successful\n"); } #endif return e; } /*************************************************/ /* fetch_fetching() */ /* */ /* Returns 1 if there is a fetch in progress */ /* according to the contents of the data that */ /* was pointed to (see Parameters), else 0. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the inquiry. */ /* */ /* Returns: 1 if a fetch is in progress, or */ /* 0 if a fetch is not in progress. */ /*************************************************/ int fetch_fetching(browser_data * b) { /* This is currently very simple - a fetch is considered to be in */ /* progress so long as the fetch_status doesn't indicate BS_IDLE. */ return (b->fetch_status != BS_IDLE); } /*************************************************/ /* fetch_find_name_tag() */ /* */ /* Finds the # separating an anchor name in a */ /* URL. */ /* */ /* Parameters: A pointer to the URL string. */ /* */ /* Returns: A pointer to the anchor string, */ /* including the leading # */ /*************************************************/ char * fetch_find_name_tag(char * url) { char * p; p = strchr(url,'/'); /* Get past the first /, as in http:/ */ if (p) p = strchr(p + 1, '/'); /* Get past second /, as in http:// */ if (p) p = strchr(p + 1, '/'); /* Get past site specifier, as in http://www.this.that/ */ if (p) p = strchr(p + 1, '#'); /* Find # in the document path */ return p; } /*************************************************/ /* fetch_find_anchor_token() */ /* */ /* Returns the address of the first token in the */ /* token list which has the given anchor name */ /* associated with it, or NULL if none can be */ /* found. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the token list; */ /* */ /* Pointer to the anchor name. */ /* */ /* Returns: Pointer to the token associated */ /* with the given anchor name, or */ /* NULL if none is found. */ /*************************************************/ HStream * fetch_find_anchor_token(browser_data * b, char * anchor) { return fetch_find_anchor_token_r(b, b->stream, anchor); } /*************************************************/ /* fetch_find_anchor_token_r() */ /* */ /* Recursive back-end to fetch_find_anchor_token */ /* - takes an extra parameter giving the top of */ /* the HStream list to scan. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the token list; */ /* */ /* Pointer to first item in HStream */ /* list to scan; */ /* */ /* Pointer to the anchor name. */ /* */ /* Returns: As fetch_find_anchor_token. */ /*************************************************/ static HStream * fetch_find_anchor_token_r(browser_data * b, HStream * streambase, char * anchor) { HStream * tp; tp = streambase; /* Go down the token list, checking if a token represents an */ /* anchor, has a name, and that name matches the given one. If */ /* so, return the token address, else go onto the next token. */ while (tp && (tp->flags & HFlags_DealtWithToken)) { /* A table token? */ if ( tp->tag == TABLE && ISBODY(tp) ) { table_stream * table = (table_stream *) tp; table_row * row = NULL; table_headdata * head = NULL; HStream * tf; int cellcount = 0; int cellmax = table->ColSpan * table->RowSpan; /* Scan the table for the token, using a recursive */ /* call to this function for each cell. */ if (table->cells) { row = table->List; while (row && cellcount < cellmax) { head = row->List; while (head && cellcount < cellmax) { switch (head->Tag) { case TagTableData: case TagTableHead: { tf = fetch_find_anchor_token_r(b, (HStream *) head->List, anchor); if (tf) return tf; } break; } cellcount ++; head = head->Next; /* Closure of 'while (head && ...)' */ } row = row->Next; /* Closure of 'while (row && ...)' */ } /* Closure of 'if (table->cells)' */ } /* Closure of check to see if token represents a table */ } else if ( (tp->style & A) && tp->name && !strcmp(tp->name, anchor) ) return tp; tp = tp->next; } /* No match found - return NULL. */ return NULL; } /*************************************************/ /* fetch_preprocess_token() */ /* */ /* Takes a token for a given browser_data struct */ /* and preprocesses it - e.g. tells the image */ /* library about image tokens so fetches can */ /* start for those images. */ /* */ /* Parameters: A pointer to a browser_data */ /* structure relevant to the token; */ /* */ /* Pointer to the token. */ /*************************************************/ void fetch_preprocess_token(browser_data * b, HStream * tptr) { int reprocess_table = 0; /* final_token keeps track of the last token dealt with by this */ /* routine, in the main token stream. */ if (!tptr->parent) b->final_token = tptr; /* Are we reprocessing the contents of a table tag which has been */ /* dealt with before? */ if (tptr->tag == TABLE && ISBODY(tptr)) reprocess_table = 1; /* Deal with smart quotes etc. */ // This could have side effects placed here...! Sort it out! - the function just returns at present. reformat_change_text(b, tptr); /* Deal with document body tags (not within HEAD, FRAMESET etc. containers) */ if (ISBODY(tptr)) { /* Don't reprocess the token generally - this may accidentally */ /* clear form_flag, say, if the high level table structures */ /* we are passing through on the way to the lower level ones */ /* don't have the FORM bit set. Other such consequences are */ /* avoided by only doing the table handling if this is a table */ /* tag with the HFlags_DealtWithToken bit set in its flags. */ if (!reprocess_table) { /* If the 'style' entry has the image (IMG) bit set, ask */ /* the image library to handle a new image. tptr->src */ /* will be a char * to the URL of the image. */ if (tptr->style & IMG) { if (fetch_chkerror(b, image_new_image(b, tptr->src, tptr, 0))) return; } /* Handle some form tags */ if (tptr->style & FORM) { /* If there are no forms in this fetch so far, create a new one */ if (!b->form_flag) { if (fetch_chkerror(b, form_new_form(b, tptr))) return; } /* Deal with creating a new field as appropriate */ if (tptr->tagno == TAG_INPUT) { switch(HtmlINPUTtype(tptr)) { case inputtype_TEXT: if (fetch_chkerror(b, form_new_field(b, tptr, form_text, tptr->text ))) return; break; case inputtype_PASSWORD:if (fetch_chkerror(b, form_new_field(b, tptr, form_password, tptr->text ))) return; break; case inputtype_CHECKBOX:if (fetch_chkerror(b, form_new_field(b, tptr, form_checkbox, (char *) HtmlINPUTchecked(tptr)))) return; break; case inputtype_RADIO: if (fetch_chkerror(b, form_new_field(b, tptr, form_radio, (char *) HtmlINPUTchecked(tptr)))) return; break; case inputtype_IMAGE: if (fetch_chkerror(b, form_new_field(b, tptr, form_image, NULL ))) return; break; case inputtype_HIDDEN: if (fetch_chkerror(b, form_new_field(b, tptr, form_hidden, NULL ))) return; break; case inputtype_SUBMIT: if (fetch_chkerror(b, form_new_field(b, tptr, form_submit, NULL ))) return; break; case inputtype_RESET: if (fetch_chkerror(b, form_new_field(b, tptr, form_reset, NULL ))) return; break; } } /* Handle text areas */ if (tptr->tagno == TAG_TEXTAREA) { if (fetch_chkerror(b, form_new_field(b, tptr, form_textarea, tptr->text))) return; } /* Handle selection buttons */ if (tptr->tagno == TAG_SELECT) { if (fetch_chkerror(b, form_new_field(b, tptr, form_select, (char *) HtmlSELECToptions(tptr)))) return; } b->form_flag = 1; } else b->form_flag = 0; } /* Tables - need to preprocess any HStreams attached as part of a table */ /* tag. Because any one token is only run through this preprocessor */ /* once, and because when this table tag is run through it all of the */ /* HStreams within may not have arrived yet (the page is only partially */ /* fetched), it is still necessary to rescan the attached HStreams at a */ /* later date (e.g. as part of the reformatting process) to ensure they */ /* are all preprocessed correctly. */ if (tptr->tag == TABLE) { table_stream * table = (table_stream *) tptr; table_row * R; table_headdata * D; HStream * attached; R = table->List; /* Scan the rows and cells */ while (R) { D = R->List; while (D) { if (D->Tag) { switch (D->Tag) { case TagTableData: case TagTableHead: { attached = (HStream *) D->List; /* Preprocess any attached HStream list - must */ /* check table tags even if they've been done */ /* before to look for new HStreams, otherwise */ /* avoid preprocessing the same thing twice. */ while (attached) { if ( ( ISBODY(attached) && attached->tag == TABLE ) || ( !(attached->flags & HFlags_DealtWithToken) ) ) fetch_preprocess_token(b, attached); attached = attached->next; } } break; } } D = D->Next; } R = R->Next; } } /* Closure of long 'if' to see if the HStream structure represented */ /* a body tag or header information - the code is run if it's a */ /* body tag. */ } #ifdef FRAMES_SUPPORT else if ISFRAMESET(tptr) { browser_data * parent; browser_data * child = b; parent = b->parent; if (!parent) parent = b; if (tptr->size) { int level = tptr->size; /* If filling_frame is equal to the number of children, then */ /* they've all been filled - the frameset must be broken. */ if ( !child->nchildren || ( child->nchildren && child->filling_frame < child->nchildren ) ) { while (level > 1) { /* If in a nested frameset, find out what browser_data struct */ /* to put the frames in. This should go in the next frame that */ /* is to be filled in according to the parent. */ child = (browser_data *) child->children[child->filling_frame]; level--; } /* If stepping down a level, i.e. after a /frameset tag, */ /* will want to increment the filling_frame field for */ /* this browser to say that the child we just stepped */ /* down for has been filled with a frameset. There's the */ /* complication of a /frameset being followed by another */ /* frameset and the level therefore staying the same; */ /* this is dealt with in the frameset section below. */ if (tptr->size < parent->nesting_level) { child->filling_frame++; } /* The aforementioned frameset section... */ if (!(tptr->style & FRAME)) { /* Define a new frameset. */ if (tptr->size == parent->nesting_level && child->parent) { /* If at the same level as before on receiving a frameset */ /* tag, must be doing nested frames and just had a */ /* /frameset before this tag came along. So need to */ /* increment the filling_frame counter of the *parent* */ /* (remember, we're at the level of the frame to fill in, */ /* not in the level below as with the code above that */ /* checked the level had stepped down). Therefore, need */ /* to find out again what browser_data struct is to be */ /* given the frameset based on the new filled_frame value. */ child->parent->filling_frame++; child = (browser_data *) child->parent->children[child->parent->filling_frame]; } /* Must force scrollbars off in this current view, */ /* as a frameset is about to appear over it. */ windows_check_tools(child, NULL); /* Finally, define the frameset at the required depth. */ frames_define_frameset(child, tptr); } else { /* Fill in details of a frame. */ frames_define_frame(child, tptr); } parent->nesting_level = tptr->size; } #ifdef STRICT_PARSER else { erb.errnum = Utils_Error_Custom_Message; StrNCpy0(erb.errmess, lookup_token("FramNest:Frames definition is badly nested; could not complete the frames layout.", 0,0)); show_error_ret(&erb); } #endif } } #endif else if ISHEAD(tptr) { /* Deal with header (HEAD) tags */ if (tptr->tagno == TAG_TITLE && tptr->text) { /* The tag is TITLE, and there is title text. */ char title[Limits_Title]; char * p = title; char * end; /* Can't overflow maximum length so just crop the string to fit */ StrNCpy0(title, tptr->text); /* Strip any spaces at the start */ while (*p == ' ') p++; /* Strip any spaces at the end */ end = (char *) ((int) p + strlen(p) - 1); if (end > p) while (*end == ' ') *end-- = 0; /* If there's anything left now... */ if (*p != 0) { /* Set the title */ if (!b->ancestor && fetch_chkerror(b, window_set_title(0, b->self_id, p))) return; /* Try adding this title to the history, ignoring any errors */ history_add_title(p, browser_fetch_url(b)); } } if (tptr->tag == BODY) { /* The BODY tag. All sorts of exciting stuff in here... */ if (HtmlBODYbackground(tptr)) { /* If there's a URL for the image, ask the image library for it */ /* and remember the image number in the browser_data structure */ image_new_image(b, HtmlBODYbackground(tptr), tptr, 2); } /* Get the 24-bit background colour, if any. */ if (HtmlBODYbgcolour(tptr) != NULL_COLOUR) { b->background_colour = HtmlBODYbgcolour(tptr); #ifdef TRACE if (tl & (1u<<6)) Printf("fetch_preprocess_token: Background colour set to %d\n", b->background_colour); #endif /* If there's no actual background image, set the anti-alias */ /* colour to be the same as the background colour. */ if (b->background_image < 0) b->antialias_colour = b->background_colour; browser_update_bottom(b, 0); } /* Get the rest of the colour info out. */ if (HtmlBODYtext (tptr) != NULL_COLOUR) b->text_colour = HtmlBODYtext (tptr); if (HtmlBODYlink (tptr) != NULL_COLOUR) b->link_colour = HtmlBODYlink (tptr); if (HtmlBODYvlink(tptr) != NULL_COLOUR) b->used_colour = HtmlBODYvlink(tptr); if (HtmlBODYalink(tptr) != NULL_COLOUR) b->followed_colour = HtmlBODYalink(tptr); /* Also pull out the onload and onunload scripts. */ if (HtmlBODYonload (tptr)) b->onload = HtmlBODYonload (tptr); if (HtmlBODYonunload(tptr)) b->onunload = HtmlBODYonunload(tptr); } /* Deal with META... tags */ if (tptr->tag == META) { meta_process_tag(b, tptr); } /* Closure of long else to see if the HStream structure represented */ /* a body tag or header information - the code is run if it's a */ /* head tag. */ } /* If we've reached here, the token has been dealt with */ /* successfully - so mark this in its flags word. */ tptr->flags |= HFlags_DealtWithToken; return; } /*************************************************/ /* fetch_fetcher() */ /* */ /* The main part of the fetch routine. Handles */ /* the processing of data from the URL module, */ /* after fetch_start has asked it to start */ /* getting data from a server. */ /* */ /* Parameters: A pointer to a browser_data */ /* structure, to which the fetch */ /* relates. */ /*************************************************/ void fetch_fetcher(browser_data * b) { HStream * tptr; int start = -1; int i, remain, sofar, waiting; /* It really helps to understand this function if you realise that */ /* it's coded more or less backwards (Merlyn was weird...). For */ /* example, the BS_DATAFETCH code below isn't executed unless the */ /* fetch state reaches BS_DATAFETCH, but it can't do that unless */ /* fetcher code much further down is run. So you really need to */ /* read this all in one go before trying to piece it together, or */ /* maybe even read it from the bottom up... */ /* */ /* BS stands for Browser Status, by the way, and nothing else ;-) */ for (i = 0; i < 10; i ++) /* Get several tokens on each null event */ { /* For BS_DATAFETCH, save the file */ if (b->fetch_status == BS_DATAFETCH) { #ifdef TRACE if (tl & (1u<<6)) Printf("fetch_fetcher: fetch_status = BS_DATAFETCH / BS_DATAWHERE.\n"); #endif /* This code gets called by the stuff further down advancing */ /* the status to BS_DATAFETCH. */ /* */ /* If the save_file field is NULL, we're still waiting for */ /* the user to pull their finger out and say where to save */ /* the object. */ if (b->save_file) /* Proceed if there's a file to save to */ { char buffer[2048]; int success, bytes = -1, done = 0; _kernel_oserror * e; /* Get a chunk of data */ e = fetch_get_raw_data(NULL, b->fetch_handle, buffer, sizeof(buffer), &done, &bytes); /* If there's an error, show it but continue */ if (e) show_error_ret(e); success = !e; /* If there was not an error, write a chunk of file */ if (success && bytes) success = fwrite(buffer, 1, bytes, b->save_file); /* If the expected number of bytes was not written, */ /* show whatever error fwrite generated */ if (success != bytes && bytes) { success = 0; erb = *_kernel_last_oserror(); show_error_ret(&erb); } /* If apparently successful and finished, read the pathname */ /* of the file so the filetype can be set. */ if (success && done) { _swix(OS_Args, _INR(0,2) | _IN(5), 7, /* Read pathname of open file */ b->save_file->__file, buffer, sizeof(buffer)); } /* If finished or there was some error above, stop the fetch */ if (!success || done) fetch_stop(b, 0); /* This closes the output file, too */ /* If successful and finished, set the filetype */ if (success && done) { _swix(OS_File, _INR(0,2), 18, buffer, b->save_type); } /* Finally, ensure toolbars are up to date. */ toolbars_update_progress(b); } return; } #ifdef TRACE if (tl & (1u<<6)) Printf("fetch_fetcher: Get next token\n"); #endif /* Get the next token, with fetch_chkerror allowing us to exit */ /* relatively cleanly should an error occur. */ if ( fetch_chkerror( b, html_get_next_token( b, b->fetch_handle, &remain, &sofar, &tptr, &waiting, (flex_ptr) &b->source, browser_fetch_url(b), 0 ) ) ) return; /* Show the fetch's progress */ toolbars_update_progress(b); /* If waiting = 3 the data being fetched isn't parseable, or has */ /* been marked as not for parsing so that it may be saved. */ if (waiting == 3) { /* Full screen browsers can't save objects out */ if (choices.full_screen && b->full_screen) { erb.errnum = Utils_Error_Custom_Message; StrNCpy0(erb.errmess, lookup_token("NotInternal:Can't save objects when running in full screen mode.", 0, 0)); fetch_chkerror(b, &erb); return; } else { #ifdef TRACE if (tl & (1u<<6)) Printf("fetch_fetcher: fetch_status moved to BS_DATAFETCH\n"); #endif b->fetch_status = BS_DATAFETCH; b->save_type = remain; b->save_link = 1; if (b->save_type == FileType_DATA || b->save_type == 0x000) { /* If we've been given data or unknown, see if we can have a better guess! */ b->save_type = urlutils_filetype_from_url(browser_fetch_url(b)); } /* Open a save dialogue for the object */ if (fetch_chkerror(b, saveobject_open_for(b))) return; } return; } /* If waiting = 2, a redirect has occurred */ else if (waiting == 2) { char * url; int internal = 0; #ifdef TRACE if (tl & (1u<<6)) Printf("fetch_fetcher: Redirect to %s\n",(char *) remain); #endif /* Get the new URL pointed to by 'url' */ url = (char *) remain; if (b->displayed != Display_Fetched_Page) internal = 1; /* Record the pre-redirection URL in the global history */ if (!internal) { history_record_global(browser_fetch_url(b)); /* Allocate space for new URL and copy it into that space */ #ifdef TRACE if (tl & (1u<<12)) Printf("fetch_fetcher: Chunk CK_FURL set to %d\n",strlen(url) + 1); #endif if (fetch_chkerror(b, memory_set_chunk_size(b, NULL, CK_FURL, strlen(url) + 1))) return; strcpy(b->urlfdata, url); } else { char furl[Limits_URL]; /* Allocate space for new URL plus old URL and separator, */ /* and copy them into that space */ #ifdef TRACE if (tl & (1u<<12)) Printf("fetch_fetcher: Chunk CK_FURL set to %d\n",strlen(url) + strlen(furl) + 2); #endif StrNCpy0(furl, browser_fetch_url(b)); if (fetch_chkerror(b, memory_set_chunk_size(b, NULL, CK_FURL, strlen(url) + strlen(furl) + 2))) return; strcpy(b->urlfdata, url); strcat(b->urlfdata, ":"); strcat(b->urlfdata, furl); } /* Reflect the new URL in the status and URL bars */ toolbars_update_status(b, Toolbars_Status_Redirected); toolbars_update_url(b); } /* If we're waiting for new tokens, don't sit here in a loop */ /* single tasking - break out, allowing more external polling. */ else if (waiting) { break; } /* We're not waiting for data. */ else { /* If it isn't already non-zero, set 'start' to the number of */ /* the last line in the line list (i.e. nlines - 1). */ if (start < 0) start = b->cell->nlines - 1; /* We're not waiting, have we got a token? */ if (b->fetch_status == BS_STARTED) { /* Yes - this is the first token on this page. Get the window */ /* ready for the new page - this includes ditching old data. */ int l; /* Make the current display URL = current fetch URL... */ l = strlen(browser_fetch_url(b)); /* Get the fetching URL string length */ /* Allocate memory for it, and copy the string across */ #ifdef TRACE if (tl & (1u<<12)) Printf("fetch_fetcher: Chunk CK_DURL set to %d\n",l + 1); #endif if (fetch_chkerror(b, memory_set_chunk_size(b, NULL, CK_DURL, l + 1))) return; strcpy(b->urlddata, browser_fetch_url(b)); toolbars_hide_internal(b->urlddata); /* Write to the global history */ history_record_global(b->urlddata); /* Update the title bar */ if (!b->ancestor) /* Child windows don't have title bars... */ { char title[Limits_Title]; StrNCpy0(title, b->urlddata); if (fetch_chkerror(b, window_set_title(0, b->self_id, title))) return; } /* Set status to FETCHING instead of STARTED */ b->fetch_status = BS_FETCHING; /* If there was previous display data present, get rid of it */ if ((b->display_handle) && (b->display_handle != b->fetch_handle)) { html_close(b->display_handle); b->display_handle = 0; } b->save_oldstore = 0; /* Signal that the display data is coming from the fetch data, */ /* so that the fetch data doesn't get accidentally ditched */ /* until it's finished with */ b->display_handle = b->fetch_handle; /* Initialise various things inside the browser_data structure */ /* to do with colours and so-forth */ #ifdef TRACE if (tl & (1u<<18)) Printf("New fetch for %p, stream %p\n",b,tptr); #endif b->stream = tptr; /* Pointer to list of HStream structures */ b->final_token = NULL; /* Last HStream structure dealt with */ b->last_char = ' '; /* Last character dealt with */ b->background_colour = -1; /* Background colour, or -1 for default */ b->background_image = -1; /* Image no. of background image, 0=none */ b->text_colour = choices.text_colour; /* Body text default colour */ b->link_colour = choices.link_colour; /* Link text default colour */ b->used_colour = choices.used_colour; /* Followed link default colour */ b->antialias_colour = redraw_backcol(b); /* Colour to anti-alias to, or -1=none */ b->followed_colour = choices.followed_colour; /* Following link default colour */ b->selected_colour = choices.selected_colour; /* Selected (highlighted) link colour */ b->onload = NULL; /* <BODY onload> attribute */ b->onunload = NULL; /* <BODY onunload> attribute */ /* Ensure the nesting level and filling frame counters are reset */ b->nesting_level = 0; b->filling_frame = 0; /* Cancel any pending automatic fetches */ if (b->meta_refresh_at) deregister_null_claimant(Wimp_ENull, (WimpEventHandler *) meta_check_refresh, b); b->meta_refresh_at = 0; b->meta_refresh_url = NULL; /* Cancel pending reformats */ reformat_stop_pending(b); /* Hideously long comment alert... */ /* */ /* Although if a frame loads a document containing another frameset */ /* this is in one sense a nested frame defintion, in another the */ /* second document is independent of the first; certainly as far as */ /* incrementing the filling_frame field of the parent goes, the */ /* <frame> tag that loaded this document into the frame in the */ /* first place will already have done that. */ /* */ /* Consequently, whilst all child frames have an ancestor - the */ /* original, base browser that defined the first of possibly many */ /* framesets - only genuinely nested frameset arrays have parents. */ /* That is, a parent can only have children; it may not also be a */ /* child (i.e. have a parent), it may only have an ancestor. */ /* */ /* Genuinely nested frames consist of one document with more than */ /* one set of <frameset> tags. Here, filling_frame considerations */ /* demand the use of a nested_level count and a parent as well as */ /* an ancestor. For those single documents, we won't be running */ /* this code when second or further framesets come in, so the */ /* parent field will get estabilshed and remain as long as needed */ /* by the frames routines. */ b->parent = NULL; /* Yup - that whole comment for one tiny line of code. Woo... */ /* */ /* 'Course, that said, it's useful for every child to know who its */ /* parent is. That's what the real_parent field is for. */ // { // WimpGetWindowStateBlock state; // // state.window_handle = b->window_handle; // // if (!wimp_get_window_state(&state)) // { // b->display_width = b->display_extent = state.visible_area.xmax - state.visible_area.xmin; // } // } /* Don't want to set the pointer_over field, as then it may not seem to */ /* have changed from one fetch to another; the pointer can get 'stuck' */ /* in the 'link' shape. */ b->highlight = NULL; /* No tokens are highlighted */ b->selected = NULL; /* No tokens are selected */ b->selected_owner = NULL; #ifdef TRACE if (tl & (1u<<6)) Printf("\nfetch_fetcher: Document colours etc. set to default values\n"); #endif /* Clear the status bar contents block for an ancestor */ /* window beginning a new fetch. */ if (!b->ancestor && b->nstatus) { #ifdef TRACE if ( (tl & (1u<<1)) || (tl & (1u<<6)) ) Printf("fetch_fetcher: Freeing status_contents array\n"); #endif b->nstatus = 0; memory_set_chunk_size(b, NULL, CK_STAT, 0); } /* Clear allocated memory for the forms, and tell */ /* the font library that the fonts aren't needed */ // /* anymore. Images are cleared after the fetch, */ // /* so that any images common between the two can */ // /* be preserved. */ form_discard(b); fm_lose_fonts(b); /* Flag that images need to be garbage collected later */ // Um... ToDo list time... image_discard(b); // b->clear_images = 1; /* IMPORTANT, must call the reformatter here to ensure that all various */ /* line list data is invalidated, discarded, and any new stuff is valid. */ /* Otherwise, could have bits of the application subsequently using old */ /* line data and things will go very wrong very quickly. */ /* */ /* DON'T put anything that might try and read line data before this call! */ b->display_extent = b->display_width; /* Ensure a new fetch starts with the horizontal extent matching the visible area */ start = -1; reformat_format_from(b, -1, 1, -1); reformat_check_extent(b); /* If there's a save dialogue open for this frame, get rid of it */ savefile_close(b->self_id, 0); /* Collapse any frames within this browser */ frames_collapse_set(b); /* Ensure window tools are up to date */ if (b->ancestor || b->full_screen) windows_set_tools(b, NULL, !b->ancestor, 0, 0, 0); /* If there's a # inside the URL (i.e. we're supposed to jump to an */ /* anchor) then set the token to display first to be DISPLAY_NAMED, */ /* a large number which acts as a flag to say 'jump to anchor'. The */ /* fetch polling routine (see FetchPage.c) should notice this and */ /* start looking for a token with the appropriate name, and if it */ /* finds it, display that token. */ if (fetch_find_name_tag(browser_current_url(b))) b->display_request = DISPLAY_NAMED; /* Ensure the pointer shape is correct */ browser_pointer_check(0, NULL, NULL, b); /* Reflect the new browser status */ toolbars_update_status(b, Toolbars_Status_Fetching); /* Since the new fetch is now official, update the current and previous */ /* page variables */ if (!b->ancestor) { /* Not speed critical, so avoid lots of nasty C-isms with malloc */ /* and so-on, by running through OS_CLI. */ _swix(OS_CLI, _IN(0), "Set Browse$PreviousPage <Browse$CurrentPage>"); _swix(OS_SetVarVal, _INR(0,4), "Browse$CurrentPage", b->urlfdata, strlen(b->urlfdata), 0, 4); } /* (Initialisation to an empty state is now complete, so we're */ /* ready to fetch a new page). */ } /* We're not waiting, but if there's also no data left to fetch, */ /* then we're just chugging through the list of tokens that the */ /* library has generated, telling various bits of the code about */ /* their contents (e.g. a new image, a new form). In this case, */ /* change the fetch status so the status bar can reflect the new */ /* situation. */ if (!remain) { b->fetch_status = BS_PROCESS; toolbars_update_status(b, Toolbars_Status_Processing); toolbars_update_progress(b); } /* If tptr is null, there are no HStream structures (see the */ /* html_get_next_token call). But we're not waiting either, */ /* so must be at the end of the file - stop the fetch. */ if (!tptr) { if (b->last_token->tag == TABLE && ISBODY(b->last_token)) { /* If the last thing the reformatter dealt with was a table, */ /* then extra table structures could have been added by */ /* HTMLLib. It is important to ensure that any tokens that */ /* were added to the token stream are preprocessed before */ /* starting a reformat. */ fetch_preprocess_token(b, b->last_token); /* Make sure the page is fully reformatted */ start = -1; reformat_format_from(b, b->cell->nlines - 2, 1, -1); } #ifdef TRACE if (tl & (1u<<6)) Printf("\nfetch_fetcher: Finished, so stopping and exiting.\n"); #endif fetch_stop(b, 1); break; } else fetch_preprocess_token(b, tptr); /* Closure of series of ifs that checked the state of 'waiting' */ /* amongst other things, to handle redirections etc. The bulk */ /* of the code deals with a conventional fetch. */ } /* Closure of for loop that deals with several fetches per null */ } /* If start is >= 0, there is data that can be used for */ /* displaying the page; so start a reformat based on that */ /* data. Start from 'one line up' as the last line may have */ /* been only partially finished when it was last redrawn. */ /* The reformat session can be deferred if the reformatter */ /* is not running, but if the reformatter is still going, */ /* push this request through immediately. Otherwise */ /* problems with long-delayed reformats way after the page */ /* has been more or less completely formatted can occur. */ if (start >= 0)// && !reformat_formatting(b)) { reformat_format_from(b, start - 1, /* '-1' as this parameter is the *last valid* line number */ /* we want to keep, and 'start' holds the first line to */ /* start the reformat at. */ reformat_formatting(b), -1); } } /*************************************************/ /* fetch_chkerror() */ /* */ /* Called by low level fetch routines instead of */ /* the ChkError macro, as it stops the current */ /* fetch correctly before reporting the error. */ /* */ /* Parameters: Pointer to a browser_data */ /* structure relevant to the fetch; */ /* */ /* Pointer to a _kernel_oserror */ /* structure, which contains the */ /* error to report (or NULL). */ /* */ /* Returns: 0 if there was no error, else 1. */ /*************************************************/ int fetch_chkerror(browser_data * b, _kernel_oserror * e) { if (e) { /* There is an error - cancel the fetch */ fetch_cancel(b); /* Report the error */ show_error_ret(e); /* Flag the error in the returned value */ return 1; } return 0; } /*************************************************/ /* fetch_cancel() */ /* */ /* Aborts a fetch, closing any relevant streams, */ /* freeing up any claimed memory that was only */ /* relevant to the fetch, but leaves the page */ /* fetched so far visible. */ /* */ /* Parameters: A pointer to the browser_data */ /* structure relevant to the fetch */ /* to be cancelled. */ /*************************************************/ _kernel_oserror * fetch_cancel(browser_data * b) { /* If there is a fetch, and the HTML data isn't being used by the */ /* display routines, close the fetch handle and free up any memory */ /* associated with it. */ if ((b->fetch_handle) && ((b->fetch_handle) != (b->display_handle))) html_close(b->fetch_handle); b->fetch_handle = 0; /* If a META tag is about to do a reload, cancel this */ if (b->meta_refresh_at) deregister_null_claimant(Wimp_ENull, (WimpEventHandler *) meta_check_refresh, b); b->meta_refresh_at = 0; /* If not fetching, exit here */ if (!fetch_fetching(b)) return NULL; /* Stop everything else */ fetch_stop(b, 1); /* Ensure the page is correctly formatted */ if (b->cell->nlines) reformat_format_from(b, b->cell->nlines - 1, 1, -1); return(NULL); } /*************************************************/ /* fetch_stop() */ /* */ /* Stops a fetch, optionally discarding the */ /* HTML source, making sure the browser window */ /* state (buttons, status bar animation etc.) is */ /* correct, any open files are closed, and so */ /* forth. In the UI sense this is higher level */ /* than fetch_cancel, though fetch_cancel calls */ /* this as part of doing other cancel actions, */ /* and is therefore the higher level function. */ /* */ /* Parameters: A pointer to the browser_data */ /* structure relevant to the fetch */ /* to be stopped; */ /* */ /* 1 to keep the HTML source, 0 to */ /* destroy it. */ /*************************************************/ void fetch_stop(browser_data * b, int keep_source) { /* Destroy the source, provided the browser was fetching any */ if (fetch_fetching(b) && !keep_source) browser_destroy_source(b); /* Set the fetch status to idle */ b->fetch_status = BS_IDLE; /* The save_link flag tells the browser to save the next fetch as data, */ /* even if it is parsable. Want to make sure that flag is clear now to */ /* avoid complications later on. */ b->save_link = 0; /* If data was being saved to a file, close that file */ if (b->save_file) { char buffer[Limits_OS_Pathname]; /* Leave the file intact, but set it to a Data filetype */ if ( !_swix(OS_Args, _INR(0,2) | _IN(5), 7, /* Read pathname of open file */ b->save_file->__file, buffer, sizeof(buffer)) ) { _swix(OS_File, _INR(0,2), 18, buffer, FileType_DATA); } fclose(b->save_file); b->save_file = NULL; } if (b->save_dbox) saveobject_close(b); /* If there is a fetch, and the associated HTML document isn't being */ /* used by the display routines, close that fetch handle and free */ /* any memory associated with it. */ if ((b->fetch_handle) && (b->fetch_handle != b->display_handle)) html_close(b->fetch_handle); b->fetch_handle = 0; /* Discard the URL being fetched */ #ifdef TRACE if (tl & (1u<<12)) Printf("fetch_stop: Chunk CK_FURL set to 0\n"); #endif memory_set_chunk_size(b, NULL, CK_FURL, 0); b->reloading = 0; /* Update the status bar */ toolbars_cancel_status(b, Toolbars_Status_Fetching); /* Check that the window extent is large enough to fit the whole page in */ reformat_check_extent(b); /* Set up the window buttons */ toolbars_set_button_states(b); } /*************************************************/ /* fetch_authorisation_proceed() */ /* */ /* Given a browser_data structure with a URL */ /* containing a host and a pointer to a realm */ /* string, proceed with an authorisation */ /* request based on the data in the global */ /* 'authorise' flex block (handled by the */ /* functions in Authorise.c). */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the authorisation */ /* request; */ /* */ /* Pointer to a urlstat structure */ /* giving the fetch context, or NULL */ /* to get it from the browser_data */ /* structure's fetch_handle field */ /* (no good for images, obviously); */ /* */ /* Pointer to a string containing */ /* the realm for the request; */ /* */ /* Pointer to the request URL. */ /*************************************************/ void fetch_authorisation_proceed(browser_data * b, urlstat * context, char * realm, char * url) { int ok, l, s, offset; char host [Limits_HostName]; char base64 [(Limits_AuthUserWrit + Limits_AuthPassWrit + 2) * 4 / 3]; char authcode[(Limits_AuthUserWrit + Limits_AuthPassWrit + 2)]; urlstat * up; _kernel_oserror * e; /* Clear the 'authorising' flag */ authorising = 0; /* If required, find out the session handle */ if (!context) { up = urlstat_find_entry(b->fetch_handle); if (!up) { fetch_cancel(b); erb.errnum = Utils_Error_Custom_Normal; /* Nasty error but can recover from it here */ StrNCpy0(erb.errmess, lookup_token("StrNotFd:Internal error: Can't find structure in %0.", 0, "fetch_authorisation_proceed")); show_error_ret(&erb); return; } } else up = context; /* Mark this fetch as authorised once already - if the server */ /* resends an authorisation request the fetcher will know */ /* that the authorisation failed (see html_get_next_token). */ up->authorised = 2; if (up->extradata) s = flex_size((flex_ptr) &up->extradata); else s = 0; /* Work out the host name */ urlutils_host_name_from_url(url, host, sizeof(host)); /* Store the details in the authcode block */ offset = authorise_find_user_name(host, realm); if (offset < 0) { fetch_authorisation_fail(b); return; } strcpy(authcode, authorise + offset); strcat(authcode, ":"); offset = authorise_find_password(host, realm); if (offset < 0) { fetch_authorisation_fail(b); return; } strcat(authcode, authorise + offset); /* Encode the block */ l = encode_base64(authcode, strlen(authcode), base64); base64[l] = 0; /* Allocate memory for the encoded data as a */ /* header entry. */ /* */ /* +2 accounts for CR + LF termination. */ l += strlen(AuthorisationStr) + 2; if (s) ok = flex_extend((flex_ptr) &up->extradata, s + l); else ok = flex_alloc((flex_ptr) &up->extradata, l + 1); if (!ok) { fetch_cancel(b); show_error_ret(make_no_fetch_memory_error(12)); return; } if (s) memmove(up->extradata + l, up->extradata, s); /* Copy the data in */ strcpy(up->extradata, AuthorisationStr); strncpy(up->extradata + strlen(AuthorisationStr), base64, l - 23); up->extradata[l - 2] = '\r'; up->extradata[l - 1] = '\n'; if (!s) up->extradata[l] = 0; /* Restart the fetch with authentication */ e = url_get_url(0, up->session, up->method, url, up->extradata, NULL, 2); if (e) { fetch_cancel(b); show_error_ret(e); } return; } /*************************************************/ /* fetch_authorisation_fail() */ /* */ /* Called when authorisation for a URL fails in */ /* some way. Reports an appropriate error and */ /* stops the fetch. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the fetch. */ /*************************************************/ void fetch_authorisation_fail(browser_data * b) { /* Cancel the fetch */ authorising = 0; fetch_cancel(b); /* Give the error */ erb.errnum = Utils_Error_Custom_Message; StrNCpy0(erb.errmess, lookup_token("BadAuthor:Authorisation failed; you must use a valid user name and password.", 0, 0)); show_error_ret(&erb); } /*************************************************/ /* html_get_raw_data() */ /* */ /* Gets a chunk of data from a stream, assuming */ /* that it is *not* HTML. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* relevant to the fetch; */ /* */ /* The fetch handle; */ /* */ /* Pointer to buffer into which the */ /* fetched data will be placed (as */ /* a char *); */ /* */ /* Size of the buffer; */ /* */ /* Pointer to an int into which 1 is */ /* placed if the fetch is complete, */ /* else 0 is returned (this pointer */ /* may be NULL); */ /* */ /* Pointer to an int into which the */ /* number of bytes fetched is placed */ /* (which may also be NULL). */ /* */ /* Assumes: That if the browser_data struct */ /* pointer is NULL, the fetch is not */ /* for an internal URL; */ /* */ /* The int pointer to take the */ /* number of bytes fetched may not */ /* be NULL. */ /*************************************************/ _kernel_oserror * fetch_get_raw_data(browser_data * b, unsigned int handle, char * buffer, int size, int * done, int * bytes) { _kernel_oserror * e; urlstat * up; int s, t; // // This function does not know about internal URLs yet (so parameter 'b' is currently unused)... // BEWARE when using this, as of course this URL fetch may not be for page data under an // internal URL. // /* Find the urlstat structure for the fetch handle */ up = urlstat_find_entry(handle); if (!up) { erb.errnum = Utils_Error_Custom_Fatal; StrNCpy0(erb.errmess, lookup_token("StrNotFd:Internal error: Can't find structure in %0.", 0, "fetch_get_raw_data")); return &erb; } /* Read some data */ e = url_read_data(0, handle, buffer, size, NULL, bytes, &t); if (e) return e; /* Get the fetch status */ e = url_status(0, handle, &s, NULL, NULL); if (e) return e; /* Fill in 'done' as appropriate to the fetch status and exit */ if (done) *done = (s & URL_Status_Done) ? 1 : 0; return NULL; }