/* Copyright 1996 Acorn Computers Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* string.c: ANSI draft (X3J11 Oct 86) library code, section 4.11 */ /* Copyright (C) Codemist Ltd., 1988 */ /* Copyright (C) Advanced Risc Machines Ltd., 1991 */ /* version 3a */ /* Also the routine 'strerror' is in 'perror.c' rather than here */ /* 19-nov-86: If _copywords is #define'd then all cmp/cpy/set (but not cat) routines use word operations if operands lie on a word boundary. Enabling this assumes sizeof(int)=sizeof(void*)=4 and byte addressing. memset and strlen do even better. Beware however, memcpy and memmove also require length to be a multiple of 4 (change?). Further beware that memmove makes assumptions about memory layout. */ #include "hostsys.h" /* for BYTESEX_EVEN/BYTESEX_ODD */ #include <stddef.h> /* for size_t */ #include <string.h> #include "kernel.h" #include "territory.h" #include "swis.h" #define _chararg int /* arg spec for char when ANSI says int */ #define _copywords 1 /* do fast cpy/cmp if word aligned */ /* private export */ extern void _set_strcoll(int territory); /* The following magic check was designed by A. Mycroft. It yields a */ /* nonzero value if the argument w has a zero byte in it somewhere. The */ /* messy constants have been unfolded a bit in this code so that they */ /* get preloaded into registers before relevant loops. */ #ifdef _copywords /* for safety */ # define ONES_WORD 0x01010101 # define EIGHTS_WORD 0x80808080 # define nullbyte_prologue_() \ int ones_word = ONES_WORD; int eights_word = ones_word << 7 # define word_has_nullbyte(w) (((w) - ones_word) & ~(w) & eights_word) #endif /* copying functions */ #ifndef SEPARATE_MEMCPY void *memcpy(void *a, const void *b, size_t n) /* copy memory (upwards) - it is an errof for args to overlap. */ /* Relies on sizeof(int)=sizeof(void *) and byte addressing. */ { #ifdef _copywords /* do it fast if word aligned ... */ if ((((int)a | (int)b | (int)n) & 3) == 0) { int *wa,*wb; n >>= 2; for (wa = (int *)a, wb = (int *)b; n-- > 0;) *wa++ = *wb++; } else #endif { char *ca,*cb; for (ca = (char *)a, cb = (char *)b; n-- > 0;) *ca++ = *cb++; } return a; } void *memmove(void *a, const void *b, size_t n) /* copy memory taking care of overlap */ /* Relies on sizeof(int)=sizeof(void *) and byte addressing. Also that memory does not wrap round for direction test. */ { #ifdef _copywords /* do it fast if word aligned ... */ if ((((int)a | (int)b | (int)n) & 3) == 0) { int *wa,*wb; n >>= 2; if (a < (void *)b) for (wa = (int *)a, wb = (int *)b; n-- > 0;) *wa++ = *wb++; else for (wa = n+(int *)a, wb = n+(int *)b; n-- > 0;) *--wa = *--wb; } else #endif { char *ca,*cb; if (a < (void *)b) for (ca = (char *)a, cb = (char *)b; n-- > 0;) *ca++ = *cb++; else for (ca = n+(char *)a, cb = n+(char *)b; n-- > 0;) *--ca = *--cb; } return a; } #endif #ifdef PLUS_APRM /* Arthur library contains a more efficient coding of this in clib.s.cl_body */ void *_memcpy(void *a, const void *b, size_t n) /* Used by compiler for structure assignments */ { #ifdef PLUS_APRM int *wa = (int *)a, *wb = (int *)b; if ((int)wb & 2) { /* get source word-aligned */ *((short *)wa) = *((short *)wb); wa = (int *)((short *)wa + 1); wb = (int *)((short *)wb + 1); n-= 2; } while (n >= 4) {*wa++ = *wb++; n -= 4;} if (n) *((short *)wa) = *((short *)wb); #else /* * copy memory assuming no overlap, word aligned etc. * Relies on sizeof(int)=sizeof(void *) and byte addressing. */ int *wa, *wb; n >>= 2; for (wa = (int *)a, wb = (int *)b; n-- > 0;) *wa++ = *wb++; #endif return a; } #endif char *strcpy(char *a, const char *b) /* copy from b to a */ { char *p = a; #ifdef _copywords if ((((int)p | (int)b) & 3) == 0) { int w; nullbyte_prologue_(); while (w = *(int *)b, b += 4, !word_has_nullbyte(w)) *(int *)p = w, p += 4; /* This next piece of code relies on knowledge of the order of bytes */ /* within a word. */ #ifdef BYTESEX_EVEN for (;;) { if ((*p++ = w) == 0) return a; w >>= 8; } #else for (;;) /* I rather assume that shifts are fast operations here. */ { if ((*p++ = (w >> 24)) == 0) return a; w <<= 8; } #endif } #endif while ((*p++ = *b++) != 0); return a; } char *strncpy(char *a, const char *b, size_t n) /* as strcpy, but at most n chars */ /* NB may not be nul-terminated */ { char *p = a; #ifdef _copywords if ((((int)p | (int)b) & 3) == 0) { int w; nullbyte_prologue_(); while (n >= 4 && (w = *(int *)b, !word_has_nullbyte(w))) *(int *)p = w, p += 4, b += 4, n -= 4; } /* Although the above code has fetched the last part-filled word I will */ /* copy the last few bytes by steam in this case. The test on n and the */ /* need for padding seem to make anything else seem too messy. */ #endif while (n-- > 0) if ((*p++ = *b++) == 0) { char c = 0; while (n-- > 0) *p++ = c; /* ANSI says pad out with nul's */ return a; } return a; } /* concatenation functions */ char *strcat(char *a, const char *b) /* concatenate b on the end of a */ { char *p = a; while (*p != 0) p++; while ((*p++ = *b++) != 0); return a; } char *strncat(char *a, const char *b, size_t n) /* as strcat, but at most n chars */ { char *p = a; while (*p != 0) p++; while (n-- > 0) if ((*p++ = *b++) == 0) return a; *p = 0; return a; } /* comparison functions */ int memcmp(const void *a, const void *b, size_t n) { const unsigned char *ac = (const unsigned char *)a, *bc = (const unsigned char *)b; #ifdef _copywords if ((((int)ac | (int)bc) & 3) == 0) { while (n >= 4 && *(int *)ac == *(int *)bc) ac += 4, bc += 4, n -= 4; } #endif while (n-- > 0) { unsigned char c1 = *ac++, c2 = *bc++; /* unsigned cmp seems more intuitive */ int d = c1 - c2; if (d != 0) return d; } return 0; } int strcmp(const char *a, const char *b) /* lexical comparison on strings */ { #ifdef _copywords #ifdef BYTESEX_EVEN /* Improved little-endian ARM strcmp code by Ian Rickards, ARM Ltd. */ /* sbrodie (06/04/01): unfortunately, it breaks strcmp() semantics required by the library definition */ if ((((int)a | (int)b) & 3) == 0) { int w1, w2, res, rc; nullbyte_prologue_(); do { w1 = *(int *)a, a += 4; w2 = *(int *)b, b += 4; res = w1 - w2; if (res != 0) goto strcmp_checkbytes; } while (!word_has_nullbyte(w1)); return 0; strcmp_checkbytes: # ifdef WANT_ARMS_BROKEN_STRCMP_FOR_TOP_BIT_SET_CHARACTERS /* carry propagation in subtract means that no subtract-per-byte is needed */ rc = res << 24; if (rc != 0) return rc; if ((w1 & 0xff) == 0) return rc; rc = res << 16; if (rc != 0) return rc; if ((w1 & 0xff00) == 0) return rc; rc = res << 8; if (rc != 0) return rc; if ((w1 & 0xff0000) == 0) return rc; return res; # else /* WANT_ARMS_BROKEN_STRCMP_FOR_TOP_BIT_SET_CHARACTERS */ /* res is guaranteed non-zero, so rc will not be zero, therefore the loop * will find the bit eventually. The shifting is done to ensure that if it * is the top-byte that contains the difference, we don't lose the sign bit * on the subtraction. Right-shift on signed integers implementation defined, * but because we mask w1 and w2 with res, whether ASL or LSL is used is irrelevant. */ rc = 0xFF; for (;;) { if (((w1 | w2) & rc) == 0) return 0; if (rc & res) return (w1 & rc) - (w2 & rc); w1 >>= 1; w2 >>= 1; res >>= 1; rc <<= 7; } # endif /* WANT_ARMS_BROKEN_STRCMP_FOR_TOP_BIT_SET_CHARACTERS */ #else if ((((int)a | (int)b) & 3) == 0) { int w1, w2; nullbyte_prologue_(); do { w1 = *(int *)a, a += 4; w2 = *(int *)b, b += 4; } while (w1 == w2 && !word_has_nullbyte(w1)); /* sbrodie added note: it gets away with these implementation-defined right * shifts only because of the masking with 0xff. */ for (;;) { char c1 = (w1 >> 24) & 0xff, c2 = (w2 >> 24) & 0xff; int d = c1 - c2; if (d != 0) return d; if (c1 == 0) return 0; w1 = w1 << 8; w2 = w2 << 8; } #endif } #endif { char const *ap = a; /* in order to move ap from reg a1 */ for (;;) { char c1 = *ap++, c2 = *b++; int d = c1 - c2; if (d != 0) return d; if (c1 == 0) return d; /* no need to check c2 */ } } } int strncmp(const char *a, const char * b, size_t n) /* as strcmp, but at most n chars */ { #ifdef _copywords if ((((int)a | (int)b) & 3) == 0) { int w; nullbyte_prologue_(); while (n >= 4 && (w = *(int *)a) == *(int *)b && !word_has_nullbyte(w)) a += 4, b += 4, n -= 4; } #endif while (n-- > 0) { char c1 = *a++, c2 = *b++; int d = c1 - c2; if (d != 0) return d; if (c1 == 0) return 0; /* no need to check c2 */ } return 0; } static int strcoll_territory = 0; size_t strxfrm(char *s1, const char *s2, size_t n) { _kernel_swi_regs r; size_t l; if (strcoll_territory) { r.r[0] = strcoll_territory; r.r[1] = (int)s1; r.r[2] = (int)s2; r.r[3] = n; if (!_kernel_swi(Territory_TransformString, &r, &r)) return r.r[0]; } /* C locale or error from territory SWI */ /* In C locale this should just *resemble* a less efficient strncpy(). */ l = strlen(s2); for (; n != 0; n--) if ((*s1++ = *s2++) == 0) break; return l; } int strcoll(const char *a, const char *b) { _kernel_swi_regs r; if (strcoll_territory == 0) return strcmp(a, b); /* C locale */ r.r[0] = strcoll_territory; r.r[1] = (int) a; r.r[2] = (int) b; r.r[3] = 0; if (_kernel_swi(Territory_Collate, &r, &r)) return 0; return r.r[0]; } void _set_strcoll(int territory) { strcoll_territory = territory; } /* search functions - ordered more logically then in ANSI spec */ void *memchr(const void *s, _chararg ch, size_t n) /* first instance of ch in s */ { const unsigned char *t = (const unsigned char *)s; unsigned char c1 = (unsigned char)ch; while (n-- > 0) if (*t == c1) return (void *)t; else t++; return 0; } /* for the next two functions ANSI say you CAN search for '\0'. */ char *strchr(const char *s, _chararg ch) /* find first instance of ch in s */ { char c1 = (char)ch; for (;;) { char c = *s++; if (c == c1) return (char *)s-1; if (c == 0) return 0; } } char *strrchr(const char *s, _chararg ch) /* find last instance of ch in s */ { const char *p = s; while (*p++ != 0) /* nothing */; { char c1 = (char)ch; do { if (*--p == c1) return (char *)p; } while (p!=s); } return 0; } /* N.B. strspn(s,"")==0 & strcspn(s,"")==strlen(s) means that the next two fns are not quite symmetric. */ size_t strspn(const char *s, const char *p) /* find first char in s not in p */ { const char *ss, *pp; char ch; for (ss=s;;ss++) { if ((ch = *ss) == 0) return ss-s; for (pp=p;;) { char c1 = *pp++; if (c1 == 0) return ss-s; if (c1 == ch) break; } } } size_t strcspn(const char *s, const char *p) /* find first char in s that is in p */ { const char *ss, *pp; char ch; for (ss=s;;ss++) { char c1; if ((ch = *ss) == 0) return ss-s; for (pp=p; (c1 = *pp++) != 0; ) if (c1 == ch) return ss-s; } } char *strpbrk(const char *s, const char *p) /* ditto, except ptr/NULL result */ { const char *ss, *pp; char ch; for (ss=s;;ss++) { char c1; if ((ch = *ss) == 0) return 0; for (pp=p; (c1 = *pp++) != 0; ) if (c1 == ch) return (char *)ss; } } char *strstr(const char *a, const char *b) /* find first occurrence of b in a, or NULL */ { int i; for (;;) { for (i=0;; i++) { char ch = b[i]; if (ch == 0) return (char *)a; if (a[i] != ch) break; } if (*a++ == 0) return 0; } } char *strtok(char *s1, const char *s2) { static char *saves1 = ""; char *s0; if (s1 == 0) s1 = saves1; /* use saved pointer */ if (*(s1 += strspn(s1,s2)) == 0) s0 = 0; /* no tokens */ else { s0 = s1; if (*(s1 += strcspn(s1,s2)) != 0) *s1++ = 0; /* insert 0 if nec */ } return (saves1 = s1, s0); } /* Miscellaneous functions */ #ifndef SEPARATE_MEMSET void *memset(void *s, _chararg c, size_t n) { unsigned char *p = s; while (n > 0) { #ifdef _copywords if (n >= 4 && ((int)p & 3) == 0) { int w = 0x01010101 * (unsigned char)c; /* duplicate 4 times */ do *(int *)p = w, p += 4, n -= 4; while (n >= 4); } else #endif *p++ = (unsigned char)c, n--; } return s; } #endif char *strerror(int n) { static char v[80]; return _strerror(n, v); } size_t strlen(const char *a) /* find number of chars in a string */ { const char *x = a + 1; #ifdef _copywords int w; while ((int)a & 3) { if (*a++ == 0) return a - x; } { nullbyte_prologue_(); while (w = *(int *)a, a += 4, !word_has_nullbyte(w)) /* do nothing */; /* a now points one word after the one containing the terminating null */ } #ifdef BYTESEX_EVEN if ((w & 0xff) == 0) a -= 3; else if ((w & 0xff00) == 0) a -= 2; else if ((w & 0xff0000) == 0) a -= 1; #else if ((w & 0xff000000) == 0) a -= 3; else if ((w & 0xff0000) == 0) a -= 2; else if ((w & 0xff00) == 0) a -= 1; #endif #else while (*a++ != 0); #endif return a - x; } #include <errno.h> char *_strerror(int n, char *v) { switch (n) { case 0: return _kernel_getmessage2("No error (errno = 0)", "C35", v, 80); case EDOM: return _kernel_getmessage2("EDOM - function argument out of range", "C36", v, 80); case ERANGE: return _kernel_getmessage2("ERANGE - function result not representable", "C37", v, 80); case ESIGNUM: return _kernel_getmessage2("ESIGNUM - illegal signal number to signal() or raise()", "C66", v, 80); default: return _hostos_error_string(n, v); } } /* End string.c */