/* Copyright 1997 Acorn Computers Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /***************************************************/ /* File : URLutils.c */ /* Purpose: URL manipulation for the browser. */ /* Author : Merlyn Kline for Customer browser */ /* This source adapted by A.D.Hodgkinson */ /* from various original functions, with */ /* new additions. */ /* History: 06-Feb-97: Created */ /***************************************************/ #include <stdlib.h> #include <stdio.h> #include <string.h> #include "swis.h" #include "URI.h" /* URI handler API, in URILib:h */ #include "wimp.h" #include "event.h" #include "svcprint.h" #include "Global.h" #include "Utils.h" #include "Fetch.h" #include "URLutils.h" /* Local definitions */ #define FILEMETHOD "file://" #define HTTPMETHOD "http://" #define HTTPMSHORT "http:" /* Local variables */ /* Pointer to first item in queue of URLs dispatched through */ /* the URI handler (structure defined in URLutils.h) */ static uri_queue * uri_queue_base = NULL; /*************************************************/ /* urlutils_pathname_to_url() */ /* */ /* Takes a pathname, and turns it into a File */ /* URL, if it isn't one already. The pathname */ /* that you give is altered directly, so if you */ /* want to remember the path as well as the URL, */ /* ensure there is a second copy of it in */ /* another buffer somewhere. */ /* */ /* Parameters: Pointer to the pathname; */ /* Size of the buffer the pathname */ /* is stored in. */ /* */ /* Returns: Pointer to the URL (which at the */ /* moment is the buffer that you */ /* passed in). */ /*************************************************/ char * urlutils_pathname_to_url(char * path, int buffersize) { int len; len = strlen(FILEMETHOD); /* If the first part of the string doesn't match the FILEMETHOD */ /* specifier (see top of this file for FILEMETHOD's definition) */ /* then insert this text and convert the rest of the path to a */ /* file URL. */ if (strncmp(path,FILEMETHOD,len)) { memmove(path + len, path, buffersize - len); strncpy(path, FILEMETHOD, len); /* Ensure the string is terminated */ path[buffersize - 1] = 0; /* Now translate the pathname part of the URL to a Unix-style */ /* path scheme. */ urlutils_translate_pathname(path + len); } return path; } /*************************************************/ /* urlutils_translate_pathname() */ /* */ /* Takes a RISC OS-style pathname and turns it */ /* into a Unix-style pathname, e.g. by swapping */ /* '/' for '.'. The pathname you give is altered */ /* directly, so if you want to remember the path */ /* before translation, ensure there is a second */ /* copy of it in another buffer somewhere. */ /* */ /* Parameters: Pointer to the pathname. */ /* */ /* Returns: Pointer to the translated path */ /* (which at the moment is the */ /* buffer that you passed in). */ /*************************************************/ char * urlutils_translate_pathname(char * path) { char * p; p = path; /* Skip past any filing system separators (e.g. as in the */ /* colons in 'ADFS::<disc>.<path>'). */ while (*p && *p != ':') p++; /* Swap '/' for '.' */ while (*p) { if (*p == '/') *p = '.'; else if (*p == '.') *p = '/'; p++; } return path; } /*************************************************/ /* urlutils_leafname_from_url() */ /* */ /* Returns a pointer to a string containing a */ /* possible leafname, based upon the URL passed */ /* into the function. */ /* */ /* Parameters: Pointer to a URL string; */ /* Pointer to a buffer into which to */ /* place the leafname (not the same */ /* as the URL string); */ /* Size of the buffer. */ /* */ /* Returns: Will fill the buffer in with some */ /* leafname, even if one could not */ /* be worked out from the URL. */ /* Returns the buffer pointer for */ /* convenience (even though the */ /* caller will almost certainly know */ /* this). */ /* */ /* Assumes: Neither pointer may be NULL. The */ /* buffer must be at least 2 bytes */ /* in size. If either condition is */ /* not met, NULL is returned and the */ /* buffer is left untouched. */ /*************************************************/ char * urlutils_leafname_from_url(char * url, char * leaf, int size) { int l = 0; if (!url || !leaf || size < 2) return NULL; memset(leaf, 0, size); /* l holds the string length if b->urlfdata exists */ l = (int) strlen(url); /* If the string exists and is not null, try to extract */ /* a leafname from it. */ if (l) { /* Set 'a' to point at the last character in the string */ int a = l - 1; /* Look backwards through the string until a forward */ /* slash is found, or we get to the string's start */ while ((url[a] != '/') && (a > 0)) a--; /* If we're not at the start of the string, c[a] will be */ /* the forward slash found by the above - advance 'a' by */ /* one (to point just past the slash). */ if (a) a++; /* If 'a' is greater than or equal to the string length */ /* minus one, there was a forward slash at the end of */ /* the URL string - i.e. the URL was specified without */ /* a leafname. In that case, offer 'Index' as a likely */ /* filename to match the real remote file (purely an */ /* aesthetic decision). */ if (a >= (l - 1)) { lookup_token("NoURLleaf:Index",0,0); strncpy(leaf, tokens, size - 1); } else { /* Otherwise, we've found the leafname but need to strip */ /* off the extension (if present) as for this save, it */ /* will always be .html, .htm or whatever and since the */ /* file is filetyped the extension isn't needed. So set */ /* b to point to the last character in the string and */ /* search backwards for a full stop, up to the start of */ /* the leafname as found above. */ /* */ /* The extra check for a hash character ensures anchor */ /* names, if present, are also stripped. */ int b = l - 1; while (!(url[b] == '.' || url[b] == '#') && (b > a)) b--; /* b now either points to the extension including the */ /* dot, or is equal to a (if there was no extension). */ if (b > a) { /* We have in 'a' the first character of the leafname, and */ /* in 'b' the position of the start of the extension, */ /* including the dot. Copy the string between a and b */ /* (including a but not b) into the leafname buffer. */ if (b - a > size - 1) strncpy(leaf, url + a, size - 1); else strncpy(leaf, url + a, b - a); } else { /* In this case we have a leafname but no extension, so */ /* copy the string over. */ strncpy(leaf, url + a, size - 1); } } } /* There was apparently no URL in the buffer, so offer a */ /* neutral filename of HTMLfile. */ else { lookup_token("NoURLdata:HTMLfile",0,0); /* Will put the string in the 'tokens' global buffer */ strncpy(leaf, tokens, size - 1); } return leaf; } /*************************************************/ /* urlutils_host_name_from_url() */ /* */ /* Extracts the host name from a given URL. */ /* */ /* Parameters: Pointer to the URL string; */ /* */ /* Pointer to a buffer to write the */ /* host name into; */ /* */ /* Size of the buffer. */ /*************************************************/ void urlutils_host_name_from_url(char * url, char * host, int size) { char * p; host[0] = 0; /* First look for '//', as in 'http://' */ p = strstr(url, "//"); if (p) { /* If found, copy everything after that into 'host' */ p += 2; strncpy(host, p, size - 1); host[size - 1] = 0; /* Now search for a '/', as in 'http://www.acorn/', */ /* and if found force a terminator there. */ p = strchr(host, '/'); if (p) *p = 0; } } /*************************************************/ /* urlutils_create_hotlist_url() */ /* */ /* Creates a URL though which a hotlist file may */ /* be fetched. This is done by looking at a */ /* system variable 'Browse$HotlistURL'. If that */ /* isn't set it looks at 'Browse$HotlistURIFile' */ /* which can hold the path of a URI file to */ /* load. Lastly, it looks at the Choices file */ /* token 'HotlistPath', where a RISC OS pathname */ /* pointing to the file should be placed. This */ /* will be turned into a URL for fetching, so */ /* care must be taken over the path used. */ /* */ /* Parameters: Pointer to a buffer to place the */ /* URL in (not in a flex block!); */ /* Size of the buffer. */ /*************************************************/ void urlutils_create_hotlist_url(char * buffer, int size) { _kernel_oserror * e; memset(buffer, 0, size); /* See if the variable exists */ e = _swix(OS_ReadVarVal, _INR(0,4), "Browse$HotlistURL", /* Variable name */ buffer, /* Buffer */ size, /* Buffer size, -1 to check it exists */ 0, /* Name pointer (0 for 1st call) */ 4); /* Variable type (4 = literal string) */ /* First lookup failed, so try the URI file. */ if (e) { e = _swix(OS_ReadVarVal, _INR(0,4), "Browse$HotlistURIFile", buffer, size, 0, 4); if (e) { /* If the above gives an error, the variable doesn't exist; get */ /* the HotlistPath string from the Messages file instead. */ strncpy(buffer, lookup_choice("HotlistPath",1,0), size - 1); urlutils_pathname_to_url(buffer, size); } else { char path[2048]; StrNCpy0(path, buffer); urlutils_load_uri_file(buffer, size, path); } } } /*************************************************/ /* urlutils_create_home_url() */ /* */ /* Creates a URL though which a home page may be */ /* fetched. This is done by looking at a system */ /* variable 'Browse$HomeURL'. If that isn't set, */ /* it looks at 'Browse$HomeURIFile', which can� */ /* hold the path of a URI file to load. Lastly, */ /* it looks at the Choices file token */ /* 'HomePage'. */ /* */ /* Parameters: Pointer to a buffer to place the */ /* URL in (not in a flex block!); */ /* Size of the buffer. */ /*************************************************/ void urlutils_create_home_url(char * buffer, int size) { _kernel_oserror * e; memset(buffer, 0, size); /* See if the variable exists */ e = _swix(OS_ReadVarVal, _INR(0,4), "Browse$HomeURL", /* Variable name */ buffer, /* Buffer */ size, /* Buffer size, -1 to check it exists */ 0, /* Name pointer (0 for 1st call) */ 4); /* Variable type (4 = literal string) */ /* First lookup failed, so try the URI file. */ if (e) { e = _swix(OS_ReadVarVal, _INR(0,4), "Browse$HomeURIFile", buffer, size, 0, 4); if (e) { /* If the above gives an error, the variable doesn't exist; get */ /* the HotlistPath string from the Messages file instead. */ strncpy(buffer, lookup_choice("HomePage",1,0), size - 1); } else { char path[2048]; StrNCpy0(path, buffer); urlutils_load_uri_file(buffer, size, path); } } } /*************************************************/ /* urlutils_fix_url() */ /* */ /* Takes a URL and 'fixes' it, e.g. appends a */ /* '/' character to a URL which is missing one. */ /* The contents of the buffer you give with the */ /* URL inside are altered directly, so if you */ /* want to remember the old URL, ensure there is */ /* a second copy of it in another buffer */ /* somewhere. */ /* */ /* Parameters: Pointer to the URL; */ /* Size of the buffer the URL is */ /* stored in. */ /* */ /* Returns: Pointer to the fixed URL (which */ /* at the moment is the buffer that */ /* you passed in). */ /*************************************************/ char * urlutils_fix_url(char * buffer, int buffersize) { int len, shl; len = strlen(HTTPMETHOD); shl = strlen(HTTPMSHORT); /* If the first part of the string doesn't math the HTTPMETHOD */ /* specifier (see top of this file for HTTPMETHOD's definition) */ /* then insert this text at the front of the URL. */ if (!strchr(buffer,':') && strlen(buffer) < buffersize - len) { memmove(buffer + len, buffer, buffersize - len); strncpy(buffer, HTTPMETHOD, len); buffer[buffersize - 1] = 0; } /* If there are at least 2 unused bytes in the buffer, and the */ /* front of the string matches the HTTPMSHORT specifier (again */ /* this is defined at the top of this file) then search for a */ /* '/' character which isn't part of a '//' sequence. If none */ /* is found, append a '/'. This is why 2 bytes free are needed */ /* - one for the '/', one for the string terminator. */ if (strlen(buffer) < buffersize - 2 && !strncmp(buffer, HTTPMSHORT, shl)) { int i, s = 0; for (i = 0; !s && buffer[i] && (i < (buffersize - 1)); i ++) { /* If we have a '/' but not a '//' sequence, mark this with s = 1 */ if (buffer[i] == '/' && buffer[i + 1] != '/') s = 1; /* If at start of a '//' sequence, skip past it */ else if (buffer[i] == '/') i++; } if (!s) strcat(buffer,"/"); } return buffer; } /*************************************************/ /* urlutils_load_uri_file() */ /* */ /* Takes the given file, opens it, and copies */ /* its contents to the given buffer. This is */ /* intended for URI files, where the contents */ /* may be a URL string with no terminator. */ /* */ /* The copy terminates when the buffer is full */ /* except for the last byte (to allow for a */ /* forced terminator), or a control code is met */ /* in the URI file. Note that the buffer is */ /* initialised to hold null bytes before the URI */ /* file is opened. */ /* */ /* If there is an error opening the file or the */ /* file is empty, the first byte of the buffer */ /* will be zero. */ /* */ /* Parameters: Pointer to the buffer; */ /* Size of the buffer; */ /* Pointer to the pathname of the */ /* URI file. */ /* */ /* Assumes: The buffer and path must NOT be */ /* the same area in memory. */ /*************************************************/ void urlutils_load_uri_file(char * buffer, size_t size, char * path) { FILE * fp; int byte, counter = 0; memset(buffer, 0, size); fp = fopen(path, "rb"); if (fp) { do { byte = getc(fp); if (byte != EOF && byte >= ' ') buffer[counter++] = byte; } while (byte != EOF && byte >= ' ' && counter < size - 1); fclose(fp); } } /*************************************************/ /* urlutils_internal_extra() */ /* */ /* Returns an offset into a given string at */ /* which extra data in an internal URL may be */ /* found. */ /* */ /* Parameters: Pointer to the URL string. */ /* */ /* Returns: Offset for the extra data, or 0 */ /* if none is found. */ /*************************************************/ int urlutils_internal_extra(char * iurl) { char * extra; if (strncmp(iurl, Internal_URL, Int_URL_Len)) return 0; extra = strchr(iurl, ':'); if (!extra) return 0; else extra ++; return (int) (extra - iurl); } /*************************************************/ /* urlutils_internal_tail() */ /* */ /* Returns an offset into a given string at */ /* which tail data (typically a URL leafname) */ /* may be found. */ /* */ /* Parameters: Pointer to the URL string. */ /* */ /* Returns: Offset for the tail data, or 0 if */ /* none is found. */ /*************************************************/ int urlutils_internal_tail(char * iurl) { char * tail, * extra; int exoff, found = 0; exoff = urlutils_internal_extra(iurl); if (!exoff) return 0; extra = iurl + exoff; tail = iurl + strlen(iurl); /* No '-1' here as tail is decremented early in the while loop below */ while (tail > extra && !found) { tail--; if (*tail == '/') found = 1; } if (!found) return 0; else tail ++; return (int) (tail - iurl); } /*************************************************/ /* urlutils_set_displayed() */ /* */ /* On the basis of a given internal URL, sets */ /* the 'displayed' field of a given browser_data */ /* structure. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* that is to be altered; */ /* Pointer to the internal URL. */ /*************************************************/ void urlutils_set_displayed(browser_data * b, char * iurl) { if (!strncmp(iurl, Internal_URL, Int_URL_Len)) { if (!strncmp(iurl + Int_URL_Len, "PExtImage", 9)) b->displayed = Display_External_Image; else if (!strncmp(iurl + Int_URL_Len, "GoBack", 6)) b->displayed = Display_Previous_Page; else if (!strncmp(iurl + Int_URL_Len, "GoRecover", 9)) b->displayed = Display_Recovered_Page; else if (!strncmp(iurl + Int_URL_Len, "GoHome", 6)) b->displayed = Display_Home_Page; else b->displayed = Display_Fetched_Page; // Catch all for now... } else b->displayed = Display_Fetched_Page; } /*************************************************/ /* urlutils_set_displayed() */ /* */ /* Checks a given URL to see if the fetch */ /* protocol it specifies can be handled. */ /* */ /* Parameters: Pointer to the URL string. */ /* */ /* Returns: 1 if the URL can be handled (i.e. */ /* the protocol at the start of the */ /* URL matches one that the Messages */ /* file says a module which is */ /* currently running copes with), */ /* else 0. */ /*************************************************/ int urlutils_check_protocols(char * url) { int protocols = 0; int i; char p[16]; if (!url || (url && !*url)) return 0; /* Find the number of possible protocols */ protocols = atoi(lookup_token("ProtocolMax", 1, NULL)); /* Exit if not found / not a sensible number */ if (protocols <= 0) return 0; /* Loop round all protocols */ for (i = 1; i <= protocols; i++) { /* Look up the module name by building a MessageTrans */ /* token of the appropriate format, and call OS_Module */ /* 18 (lookup module) for it; if the SWI doesn't */ /* raise an error, the module was found. */ sprintf(p, "ProtocolM%d", i); if (!_swix(OS_Module, _INR(0,1), 18, lookup_token(p, 1, NULL))) { /* Module is present, so check the protocol */ sprintf(p, "ProtocolU%d", i); lookup_token(p, 1, NULL); /* If the protocol identifier can be found... */ if (tokens[0] != '!') { /* Compare it to the same number of characters in */ /* the given URL. If it matches, we can deal with */ /* the URL. */ if (!strncmp(tokens, url, strlen(tokens))) return 1; /* Otherwise, loop on to the next protocol... */ } } } /* If we reach here, no protocol was found. */ return 0; } /*************************************************/ /* urlutils_dispatch() */ /* */ /* Puts a given URI into the URI queue and sends */ /* it out to the URI handler. */ /* */ /* Parameters: Pointer to a browser_data struct */ /* for which the URI relates; */ /* */ /* Pointer to null-terminated URI */ /* string (not in a movable block, */ /* so not, e.g., in a flex block); */ /* */ /* URI queue flags (see URIutils.h). */ /* */ /* Assumes: That the caller has already made */ /* sure the URI handler is present. */ /*************************************************/ _kernel_oserror * urlutils_dispatch(browser_data * b, char * uri, unsigned int flags) { _kernel_oserror * e; unsigned int return_code; uri_queue * entry; #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_dispatch: Called for %p with '%s'\n",b,uri); #endif /* Claim memory for the new entry */ entry = malloc(sizeof(uri_queue)); /* Moan if the claim failed */ if (!entry) { #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_dispatch: Memory claim for queue entry failed\n",b,uri); #endif make_no_fetch_memory_error(15); return &erb; } #ifdef TRACE malloccount += sizeof(uri_queue); if (tl & (1u<<13)) Printf("** malloccount: %d\n",malloccount); if (tl & (1u<<21)) Printf("urlutils_dispatch: Claimed queue entry %p\n",entry); #endif /* Fill in part of the entry */ entry->flags = flags; entry->b = b; /* If there are no entries, set uri_queue_base to the */ /* address of this one. Otherwise, point this entry's */ /* 'next' to the current base item, and point that */ /* item's 'prev' back to this entry. Then replace the */ /* current base entry with this new one. */ entry->prev = NULL; if (!uri_queue_base) entry->next = NULL; else { entry->next = uri_queue_base; uri_queue_base->prev = entry; } uri_queue_base = entry; /* Now call the URI handler and get a handle to fill in */ /* the last uri_queue field. */ e = uri_dispatch(URI_Dispatch_Inform, uri, task_handle, &return_code, NULL, &entry->uri_handle); if (e) { #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_dispatch: Exitting with error\n"); #endif return e; } /* If the request was refused complain */ if (return_code != URI_Dispatch_RequestAccepted) { erb.errnum = Utils_Error_Custom_Message; StrNCpy0(erb.errmess, lookup_token("Refused:Cannot fetch this address as the fetch request was refused by the internal handler.", 0,0)); #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_dispatch: Exitting with error\n"); #endif return &erb; } /* Otherwise exit successfully */ #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_dispatch: Successful\n"); #endif return NULL; } /*************************************************/ /* urlutils_remove_from_queue() */ /* */ /* Removes a specified entry from the list of */ /* uri_queue structures, freeing the memory */ /* allocated for it. */ /* */ /* Parameters: The URI handle of the entry. */ /*************************************************/ _kernel_oserror * urlutils_remove_from_queue(URI_handle_t uri_handle) { uri_queue * entry = uri_queue_base; #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Called with handle %p\n", uri_handle); #endif /* Try to find the entry */ while (entry && entry->uri_handle != uri_handle) entry = entry->next; #ifdef TRACE /* Complain if not found */ if (!entry) { erb.errnum = Utils_Error_Custom_Normal; sprintf(erb.errmess, "Can't find URI handle %p in URI queue", uri_handle); if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Exitting with error\n"); return &erb; } #else /* Fail silently */ if (!entry) return NULL; #endif if (entry->prev) entry->prev->next = entry->next; if (entry->next) entry->next->prev = entry->prev; if (entry == uri_queue_base) uri_queue_base = entry->next; #ifdef TRACE malloccount -= sizeof(uri_queue); if (tl & (1u<<13)) Printf("** malloccount: %d\n",malloccount); if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Freeing entry %p\n",entry); #endif free (entry); #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_remove_from_queue: Successful\n"); #endif return NULL; } /*************************************************/ /* urlutils_find_queue_entry() */ /* */ /* Finds an entry in the list of uri_queue */ /* structures. */ /* */ /* Parameters: The URI handle of the entry. */ /* */ /* Returns: Pointer to the entry, or NULL if */ /* no entry with that handle could */ /* be found. */ /*************************************************/ uri_queue * urlutils_find_queue_entry(URI_handle_t uri_handle) { uri_queue * entry = uri_queue_base; #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_find_queue_entry: Called with handle %p\n", uri_handle); #endif while (entry && entry->uri_handle != uri_handle) entry = entry->next; #ifdef TRACE if (tl & (1u<<21)) Printf("urlutils_find_queue_entry: Returning with entry %p\n", entry); #endif return entry; } /*************************************************/