| /* ********************************************************** |
| * Copyright (c) 2011-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2002-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2002-2003 Massachusetts Institute of Technology */ |
| |
| /* |
| * io.c - routines for i/o to avoid library dependencies |
| */ |
| |
| /* FIXME: failure modes should be more graceful than failing asserts in most places */ |
| |
| #include "globals.h" |
| #include <string.h> |
| #include <stdarg.h> /* for varargs */ |
| |
| #ifdef UNIX |
| # ifdef MACOS |
| /* request extern functions to avoid conflicts with our own tolower() */ |
| # define _EXTERNALIZE_CTYPE_INLINES_TOP_ |
| # define _EXTERNALIZE_CTYPE_INLINES_ |
| # endif |
| # include <wchar.h> |
| #endif |
| |
| #ifdef NOT_DYNAMORIO_CORE_PROPER |
| /* drpreinject doesn't link utils.c. We fail gracefully without the assertion, |
| * so just define it away. |
| */ |
| # undef CLIENT_ASSERT |
| # define CLIENT_ASSERT(cond, msg) |
| # undef ASSERT |
| # define ASSERT(x) |
| #endif /* NOT_DYNAMORIO_CORE_PROPER */ |
| |
| #define VA_ARG_CHAR2INT |
| #define BUF_SIZE 64 |
| |
| #ifdef UNIX |
| const static double pos_inf = 1.0/0.0; |
| const static double neg_inf = -1.0/0.0; |
| #else |
| /* Windows says "error C2099: initializer is not a constant", or |
| * "error C2124: divide or mod by zero", for the above. |
| */ |
| # pragma warning(disable : 4723) /* warning C4723: potential divide by 0 */ |
| const static double zerof = 0.0; |
| # define pos_inf (1.0/zerof) |
| # define neg_inf (-1.0/zerof) |
| #endif |
| |
| /* assumes that d > 0 */ |
| long /* exported to utils.c */ |
| double2int_trunc(double d) |
| { |
| long i = (long)d; |
| double id = (double)i; |
| /* when building with /QIfist casting rounds instead of truncating (i#763) */ |
| if (id > d) |
| return i-1; |
| else |
| return i; |
| } |
| |
| /* assumes that d > 0 */ |
| static long |
| double2int(double d) |
| { |
| long i = (long)d; |
| double id = (double)i; |
| /* when building with /QIfist casting rounds instead of truncating (i#763) */ |
| if (id < d && d - id >= 0.5) |
| return i+1; |
| else if (id > d && id - d >= 0.5) |
| return i-1; |
| else |
| return i; |
| } |
| |
| #ifdef WINDOWS |
| /***************************************************************************** |
| * UTF-8 <-> UTF-16 |
| * |
| * Windows-only b/c it assumes wide chars are 2 bytes (primarily just when |
| * examining input values). |
| */ |
| |
| /* Returns the number of elements written if all of the characters of src, |
| * or if max_chars from src, were successfully encoded into dst. |
| * Passing max_chars==0 means no limit. |
| * If there is room, appends a null terminator (which is not included in the |
| * return value). (This is to match our snprintf semantics.) |
| * Returns -1 on an error, such as src not being valid UTF-8, or on encountering |
| * a character that cannot be encoded with UTF-16. |
| * In *written, returns the number of unicode characters written to dst including |
| * the terminating null. |
| * Will not write a partial multi-byte character. |
| * Does not use a byte-order mark. |
| * |
| * XXX: instead of bailing and returning -1, should we use a particular encoding |
| * for each invalid sequence? MultiByteToWideChar uses U+FFFD. |
| */ |
| static ssize_t |
| utf8_to_utf16(wchar_t *dst, size_t dst_sz/*elements*/, const char *src, |
| size_t max_chars, size_t *written/*unicode chars*/) |
| { |
| /* Be sure to use unsigned for proper comparisons below */ |
| const unsigned char *s = (const unsigned char *) src; |
| wchar_t *d = dst; |
| size_t chars = 0; |
| while (dst_sz > 0 && *s != '\0' && (max_chars == 0 || chars < max_chars)) { |
| if (*s <= 0x7f) { |
| /* through U+007F: 7 bits: bottom 7 of 1st byte */ |
| *d = (wchar_t) *s; |
| chars++; |
| } else if (*s >> 5 == 0x6) { |
| /* through U+07FF: 11 bits: bottom 5 of 1st and bottom 6 of 2nd */ |
| wchar_t first = (((wchar_t)*s) & 0x1f) << 6; |
| s++; |
| if (*s >> 6 != 0x2) |
| return -1; /* malformed UTF-8 */ |
| *d = first | (*s & 0x3f); |
| chars++; |
| } else if (*s >> 4 == 0xe) { |
| /* through U+FFFF: 16 bits: bottom 4 of 1st, bottom 6 of 2nd + 3rd */ |
| wchar_t first = (((wchar_t)*s) & 0xf) << 12; |
| s++; |
| if (*s >> 6 != 0x2) |
| return -1; /* malformed UTF-8 */ |
| first |= ((((wchar_t)*s) & 0x3f) << 6); |
| s++; |
| if (*s >> 6 != 0x2) |
| return -1; /* malformed UTF-8 */ |
| *d = first | (*s & 0x3f); |
| chars++; |
| } else if (*s >> 3 == 0x1e) { |
| /* through U+1FFFFF: 21 bits: bottom 3 of 1st, bottom 6 of 2nd-4th */ |
| uint cp = (((wchar_t)*s) & 0x7) << 18; |
| s++; |
| if (*s >> 6 != 0x2) |
| return -1; /* malformed UTF-8 */ |
| cp |= ((((wchar_t)*s) & 0x3f) << 12); |
| s++; |
| if (*s >> 6 != 0x2) |
| return -1; /* malformed UTF-8 */ |
| cp |= ((((wchar_t)*s) & 0x3f) << 6); |
| s++; |
| if (*s >> 6 != 0x2) |
| return -1; /* malformed UTF-8 */ |
| cp |= (((wchar_t)*s) & 0x3f); |
| /* check limit */ |
| if (cp > 0x10ffff) |
| return -1; /* not encodable with UTF-16 */ |
| /* encode using surrogate pairs */ |
| if ((size_t)(d + 1 - dst) >= dst_sz) /* no partial chars */ |
| break; |
| *d = (wchar_t) (((cp - 0x10000) >> 10) + 0xd800); |
| d++; |
| *d = (wchar_t) (((cp - 0x10000) & 0x3ff) + 0xdc00); |
| chars++; |
| } else if (*s >> 2 == 0x3e) { |
| /* through U+3FFFFFF: 26 bits: bottom 2 of 1st, bottom 6 of 2nd-5th */ |
| return -1; /* not encodable with UTF-16 */ |
| } else if (*s >> 1 == 0x7e) { |
| /* through U+7FFFFFFF: 31 bits: bottom 1 of 1st, bottom 6 of 2nd-6th */ |
| return -1; /* not encodable with UTF-16 */ |
| } |
| d++; |
| if ((size_t)(d - dst) >= dst_sz) |
| break; |
| s++; |
| } |
| if ((size_t)(d - dst) < dst_sz) |
| *d = L'\0'; |
| if (written != NULL) |
| *written = chars; |
| return d - dst; |
| } |
| |
| /* Returns the number of elements written if all of the characters of src, |
| * or if max_chars from src, were successfully encoded into dst. |
| * Passing max_chars==0 means no limit. |
| * If there is room, appends a null terminator (which is not included in the |
| * return value). (This is to match our snprintf semantics.) |
| * Returns -1 on an error, such as src not being valid UTF-16. |
| * In *written, returns the number of unicode characters written to dst including |
| * the terminating null. |
| * Will not write a partial multi-byte character. |
| * Does not handle a byte-order mark. |
| * If dst==NULL, returns the number of elements required to encode max_chars |
| * (or all if max_chars==0) from src. |
| */ |
| static ssize_t |
| utf16_to_utf8(char *dst, size_t dst_sz/*elements*/, const wchar_t *src, |
| size_t max_chars, size_t *written/*unicode chars*/) |
| { |
| const wchar_t *s = src; |
| char *d = dst; |
| ssize_t bytes = 0; |
| size_t chars = 0; |
| while ((dst == NULL || dst_sz > 0) && *s != L'\0' && |
| (max_chars == 0 || chars < max_chars)) { |
| if (*s <= 0x7f) { |
| if (dst != NULL) |
| *d = (char) *s; |
| else |
| bytes++; |
| chars++; |
| } else if (*s <= 0x7ff) { |
| /* 2-byte encoding: 0b110xxxxx 0b10xxxxxx */ |
| if (dst != NULL) { |
| if ((size_t)(d + 1 - dst) >= dst_sz) /* no partial chars */ |
| break; |
| *d = (char) (0xc0 | (*s >> 6)); |
| d++; |
| *d = (char) (0x80 | (*s & 0x3f)); |
| } else |
| bytes += 2; |
| chars++; |
| } else if (*s >= 0xd800 && *s <= 0xdfff) { |
| /* surrogate pairs */ |
| uint cp = (*s - 0xd800) << 10; |
| s++; |
| if (dst != NULL) { |
| if (*s == L'\0' || *s < 0xdc00 || *s > 0xdfff) |
| return -1; /* malformed UTF-16 */ |
| cp |= (*s - 0xdc00); |
| cp += 0x10000; |
| /* 4-byte encoding: 0b1110xxxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx */ |
| if ((size_t)(d + 3 - dst) >= dst_sz) /* no partial chars */ |
| break; |
| *d = (char) (0xf0 | (cp >> 18)); |
| d++; |
| *d = (char) (0x80 | ((cp >> 12) & 0x3f)); |
| d++; |
| *d = (char) (0x80 | ((cp >> 6) & 0x3f)); |
| d++; |
| *d = (char) (0x80 | (cp & 0x3f)); |
| } else |
| bytes += 4; |
| chars++; |
| } else { |
| /* 3-byte encoding: 0b1110xxxx 0b10xxxxxx 0b10xxxxxx */ |
| if (dst != NULL) { |
| if ((size_t)(d + 2 - dst) >= dst_sz) /* no partial chars */ |
| break; |
| *d = (char) (0xe0 | (*s >> 12)); |
| d++; |
| *d = (char) (0x80 | ((*s >> 6) & 0x3f)); |
| d++; |
| *d = (char) (0x80 | (*s & 0x3f)); |
| } else |
| bytes += 3; |
| chars++; |
| } |
| if (dst != NULL) { |
| d++; |
| if ((size_t)(d - dst) >= dst_sz) |
| break; |
| } |
| s++; |
| } |
| if (dst != NULL) { |
| if ((size_t)(d - dst) < dst_sz) |
| *d = L'\0'; |
| } |
| if (written != NULL) |
| *written = chars; |
| if (dst != NULL) |
| return d - dst; |
| else |
| return bytes; |
| } |
| |
| ssize_t |
| utf16_to_utf8_size(const wchar_t *src, size_t max_chars, size_t *written/*unicode chars*/) |
| { |
| return utf16_to_utf8(NULL, 0, src, max_chars, NULL); |
| } |
| #endif |
| |
| /***************************************************************************** |
| * snprintf |
| */ |
| |
| /* we generate from a template to get wide and narrow versions */ |
| #undef IOX_WIDE_CHAR |
| #include "iox.h" |
| |
| #define IOX_WIDE_CHAR |
| #include "iox.h" |
| |
| /***************************************************************************** |
| * Stand alone sscanf implementation. |
| */ |
| |
| typedef enum _specifier_t { |
| SPEC_INT, |
| SPEC_CHAR, |
| SPEC_STRING |
| } specifer_t; |
| |
| typedef enum _int_sz_t { |
| SZ_SHORT, |
| SZ_INT, |
| SZ_LONG, |
| SZ_LONGLONG, |
| #if defined(X64) && defined(WINDOWS) |
| SZ_PTR = SZ_LONGLONG |
| #else |
| SZ_PTR = SZ_LONG |
| #endif |
| } int_sz_t; |
| |
| /* The isspace() from ctype.h is actually a macro that calls __ctype_b_loc(), |
| * which tries to look something up in the library TLS. This doesn't work |
| * without the private loader, so we roll our own isspace(). |
| */ |
| static bool inline |
| our_isspace(int c) |
| { |
| return (c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || |
| c == '\v'); |
| } |
| |
| const char * |
| parse_int(const char *sp, uint64 *res_out, uint base, uint width, bool is_signed) |
| { |
| bool negative = false; |
| uint64 res = 0; |
| uint i; /* Use an index rather than pointer to compare with width. */ |
| |
| /* Check for invalid base. */ |
| if (base > 36 || base == 1) { |
| *res_out = (uint64) -1LL; |
| return NULL; |
| } |
| |
| /* Check for negative sign if signed. */ |
| if (is_signed) { |
| if (*sp == '-') { |
| negative = true; |
| sp++; |
| } |
| } |
| |
| /* Ignore leading +. */ |
| if (!negative && *sp == '+') |
| sp++; |
| |
| /* 0x prefix for hex is optional. */ |
| if ((base == 0 || base == 16) && sp[0] == '0' && sp[1] == 'x') { |
| sp += 2; |
| if (base == 0) |
| base = 16; |
| } |
| |
| /* Leading '0' with 0 base means octal. */ |
| if (base == 0 && *sp == '0') { |
| base = 8; |
| sp++; |
| } |
| |
| /* If we didn't find leading '0' or "0x", base is 10. */ |
| if (base == 0) |
| base = 10; |
| |
| /* XXX: For efficiency we could do a couple things: |
| * - Specialize the loop on base |
| * - Use a lookup table |
| */ |
| for (i = 0; width == 0 || i < width; i++) { |
| uint d = sp[i]; |
| if (d >= '0' && d <= '9') { |
| d -= '0'; |
| } else if (d >= 'a' && d <= 'z') { |
| d = d - 'a' + 10; |
| } else if (d >= 'A' && d <= 'Z') { |
| d = d - 'A' + 10; |
| } else { |
| break; /* Non-digit character. Could be \0. */ |
| } |
| /* Stop the parse here if this digit was not valid for the current base, |
| * (e.g. 9 for octal of g for hex). |
| */ |
| if (d >= base) |
| break; |
| /* FIXME: Check for overflow. */ |
| /* XXX: int64 multiply is inefficient on 32-bit. */ |
| res = res * base + d; |
| } |
| |
| /* No digits found, return failure. */ |
| if (i == 0) |
| return NULL; |
| |
| if (negative) |
| res = -(int64)res; |
| |
| *res_out = res; |
| return sp + i; |
| } |
| |
| /* Stand alone implementation of sscanf. We used to call libc's vsscanf while |
| * trying to isolate errno (i#238), but these days sscanf calls malloc (i#762). |
| * Therefore, we roll our own. |
| */ |
| int |
| our_vsscanf(const char *str, const char *fmt, va_list ap) |
| { |
| int num_parsed = 0; |
| const char *fp = fmt; |
| const char *sp = str; |
| int c; |
| |
| while (*fp != '\0' && *sp != '\0') { |
| specifer_t spec = SPEC_INT; |
| int_sz_t int_size = SZ_INT; |
| uint base = 10; |
| bool is_signed = false; |
| bool is_ignored = false; |
| uint width = 0; |
| |
| /* Handle literal characters and spaces up front. */ |
| c = *fp++; |
| if (our_isspace(c)) { |
| /* Space means consume any number of spaces. */ |
| while (our_isspace(*sp)) { |
| sp++; |
| } |
| continue; |
| } else if (c != '%') { |
| /* Literal, so check mismatch. */ |
| if (c != *sp) |
| return num_parsed; |
| sp++; |
| continue; |
| } |
| |
| /* Parse the format specifier. */ |
| ASSERT(c == '%'); |
| while (true) { |
| c = *fp++; |
| switch (c) { |
| /* Modifiers, should all continue the loop. */ |
| case 'l': |
| if (int_size == SZ_INT) { |
| int_size = SZ_LONG; |
| } else if (int_size == SZ_LONG) { |
| int_size = SZ_LONGLONG; |
| } else { |
| CLIENT_ASSERT(int_size != SZ_SHORT, |
| "dr_sscanf: can't use %hl modifier"); |
| CLIENT_ASSERT(int_size != SZ_LONGLONG, |
| "dr_sscanf: too many longs (%lll)"); |
| return num_parsed; /* error */ |
| } |
| break; |
| case 'h': |
| CLIENT_ASSERT(int_size == SZ_INT, |
| "dr_sscanf: can't use %lh modifier"); |
| int_size = SZ_SHORT; |
| continue; |
| case '*': |
| is_ignored = true; |
| continue; |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| /* We honor the specified width for strings to prevent buffer |
| * overruns, but we don't honor it for integers. Honoring the |
| * width for integers would require our own integer parser. |
| */ |
| width = width * 10 + c - '0'; |
| continue; |
| case 'I': |
| /* We support I32 and I64 from Windows sscanf because DR exports |
| * macros that use them. |
| */ |
| if (strncmp("32", fp, 2) == 0) { |
| int_size = SZ_INT; |
| } else if (strncmp("64", fp, 2) == 0) { |
| int_size = SZ_LONGLONG; |
| } else { |
| CLIENT_ASSERT(false, |
| "dr_sscanf: unsupported I<width> modifier"); |
| return num_parsed; |
| } |
| break; |
| /* XXX: Modifiers we could add support for: |
| * - j, z, t: C99 modifiers for intmax_t, size_t, and ptrdiff_t. |
| * - [] scan sets: These are complicated and better to avoid. |
| * - .*: For dynamically sized strings. Not part of C scanf. |
| * - n$: Store the result into the nth pointer arg after fmt. |
| */ |
| |
| /* Specifiers, should all break the loop. */ |
| case 'u': |
| spec = SPEC_INT; |
| is_signed = false; |
| goto spec_done; |
| case 'd': |
| spec = SPEC_INT; |
| is_signed = true; |
| goto spec_done; |
| case 'x': |
| spec = SPEC_INT; |
| is_signed = false; |
| base = 16; |
| goto spec_done; |
| case 'p': |
| int_size = SZ_PTR; |
| spec = SPEC_INT; |
| is_signed = false; |
| base = 16; |
| goto spec_done; |
| case 'c': |
| spec = SPEC_CHAR; |
| goto spec_done; |
| case 's': |
| spec = SPEC_STRING; |
| goto spec_done; |
| /* XXX: Specifiers we could add support for: |
| * - o: octal integer |
| * - g, e, f: floating point |
| * - n: characters consumed so far |
| */ |
| default: |
| CLIENT_ASSERT(false, "dr_sscanf: unknown specifier"); |
| return num_parsed; /* error */ |
| } |
| } |
| spec_done: |
| |
| /* Parse the string. */ |
| switch (spec) { |
| case SPEC_CHAR: |
| /* XXX: We don't support width with %c. */ |
| if (!is_ignored) { |
| *va_arg(ap, char*) = *sp; |
| } |
| sp++; |
| break; |
| case SPEC_STRING: |
| if (is_ignored) { |
| while (*sp != '\0' && !our_isspace(*sp)) { |
| sp++; |
| } |
| } else { |
| char *str_out = va_arg(ap, char*); |
| if (width > 0) { |
| uint i = 0; |
| while (i < width && *sp != '\0' && !our_isspace(*sp)) { |
| *str_out++ = *sp++; |
| i++; |
| } |
| /* Spec says only null terminate if we hit width. */ |
| if (i < width) |
| *str_out = '\0'; |
| } else { |
| while (*sp != '\0' && !our_isspace(*sp)) { |
| *str_out++ = *sp++; |
| } |
| *str_out = '\0'; |
| } |
| } |
| break; |
| case SPEC_INT: { |
| uint64 res; |
| /* C sscanf skips leading whitespace before parsing integers. */ |
| while (*sp != '\0' && our_isspace(*sp)) |
| sp++; |
| sp = parse_int(sp, &res, base, width, is_signed); |
| if (sp == NULL) |
| return num_parsed; |
| |
| if (!is_ignored) { |
| if (int_size == SZ_SHORT) |
| *va_arg(ap, short *) = (short)res; |
| else if (int_size == SZ_INT) |
| *va_arg(ap, int *) = (int)res; |
| else if (int_size == SZ_LONG) |
| *va_arg(ap, long *) = (long)res; |
| else if (int_size == SZ_LONGLONG) |
| *va_arg(ap, long long *) = (long long)res; |
| else { |
| ASSERT_NOT_REACHED(); |
| return num_parsed; |
| } |
| } |
| break; |
| } |
| default: |
| /* Format parsing code above should return an error earlier. */ |
| ASSERT_NOT_REACHED(); |
| return num_parsed; |
| } |
| |
| if (!is_ignored) |
| num_parsed++; |
| } |
| return num_parsed; |
| } |
| |
| int |
| our_sscanf(const char *str, const char *fmt, ...) |
| { |
| /* No need to save errno, we don't call libc anymore. */ |
| int res; |
| va_list ap; |
| va_start(ap, fmt); |
| res = our_vsscanf(str, fmt, ap); |
| va_end(ap); |
| return res; |
| } |
| |
| #ifdef STANDALONE_UNIT_TEST |
| /***************************************************************************** |
| * sscanf() tests |
| */ |
| |
| /* Copied from core/unix/os.c and modified so that they work when run |
| * cross-arch. We need %ll to parse 64-bit ints on 32-bit and drop the %l to |
| * parse 32-bit ints on x64. |
| */ |
| # define MAPS_LINE_FORMAT4 "%08x-%08x %s %08x %*s %llu %4096s" |
| # define MAPS_LINE_FORMAT8 "%016llx-%016llx %s %016llx %*s %llu %4096s" |
| |
| static void |
| test_sscanf_maps_x86(void) |
| { |
| char line_copy[1024]; |
| uint start, end; |
| uint offset; |
| uint64 inode; |
| char perm[16]; |
| char comment[4096]; |
| int len; |
| const char *maps_line = |
| "f75c3000-f75c4000 rw-p 00155000 fc:00 1840387" |
| " /lib32/libc-2.11.1.so"; |
| |
| strcpy(line_copy, maps_line); |
| len = our_sscanf(line_copy, MAPS_LINE_FORMAT4, |
| &start, &end, perm, &offset, &inode, comment); |
| EXPECT(len, 6); |
| /* Do int64 comparisons directly. EXPECT casts to ptr_uint_t. */ |
| EXPECT(start, 0xf75c3000UL); |
| EXPECT(end, 0xf75c4000UL); |
| EXPECT(offset, 0x00155000UL); |
| EXPECT((inode == 1840387ULL), 1); |
| EXPECT(strcmp(perm, "rw-p"), 0); |
| EXPECT(strcmp(comment, "/lib32/libc-2.11.1.so"), 0); |
| /* sscanf should not modify it's input. */ |
| EXPECT(strcmp(line_copy, maps_line), 0); |
| } |
| |
| static void |
| test_sscanf_maps_x64(void) |
| { |
| char line_copy[1024]; |
| uint64 start, end; |
| uint64 offset; |
| uint64 inode; |
| char perm[16]; |
| char comment[4096]; |
| int len; |
| const char *maps_line = |
| "7f94a6757000-7f94a6758000 rw-p 0017d000 fc:00 " |
| "1839331 /lib/libc-2.11.1.so"; |
| |
| strcpy(line_copy, maps_line); |
| len = our_sscanf(line_copy, MAPS_LINE_FORMAT8, |
| &start, &end, perm, &offset, &inode, comment); |
| EXPECT(len, 6); |
| /* Do int64 comparisons directly. EXPECT casts to ptr_uint_t. */ |
| EXPECT((start == 0x7f94a6757000ULL), 1); |
| EXPECT((end == 0x7f94a6758000ULL), 1); |
| EXPECT((offset == 0x00017d000ULL), 1); |
| EXPECT((inode == 1839331ULL), 1); |
| EXPECT(strcmp(perm, "rw-p"), 0); |
| EXPECT(strcmp(comment, "/lib/libc-2.11.1.so"), 0); |
| /* sscanf should not modify it's input. */ |
| EXPECT(strcmp(line_copy, maps_line), 0); |
| } |
| |
| static void |
| test_sscanf_all_specs(void) |
| { |
| int res; |
| char ch; |
| char str[16]; |
| int signed_int; |
| int signed_int_2; |
| uint unsigned_int; |
| uint hex_num; |
| unsigned long long ull_num; |
| |
| /* ULLONG_MAX is a corner case. */ |
| res = our_sscanf("c str -123 +456 0x789 0xffffffffffffffff", |
| "%c %s %d %u %x %llx", &ch, str, &signed_int, |
| &unsigned_int, &hex_num, &ull_num); |
| EXPECT(res, 6); |
| EXPECT(ch, 'c'); |
| EXPECT(strcmp(str, "str"), 0); |
| EXPECT(signed_int, -123); |
| EXPECT(unsigned_int, 456); |
| EXPECT(hex_num, 0x789); |
| EXPECT((ull_num == ULLONG_MAX), true); |
| |
| /* A variety of ways to say negative one. */ |
| res = our_sscanf("-1-1", "%d%d", &signed_int, &signed_int_2); |
| EXPECT(res, 2); |
| EXPECT(signed_int, -1); |
| EXPECT(signed_int_2, -1); |
| |
| /* Test ignores. */ |
| res = our_sscanf("c str -123 +456 0x789 0xffffffffffffffff 1", |
| "%*c %*s %*d %*u %*x %*llx %d", &signed_int); |
| EXPECT(res, 1); |
| EXPECT(signed_int, 1); |
| |
| /* Test width specifications on strings. */ |
| memset(str, '*', sizeof(str)); /* Fill string with garbage. */ |
| res = our_sscanf("abcdefghijklmnopqrstuvwxyz", |
| "%13s", str); |
| EXPECT(res, 1); |
| /* our_sscanf should read 13 chars without null termination. */ |
| EXPECT(memcmp(str, "abcdefghijklm", 13), 0); |
| EXPECT(str[13], '*'); /* Asterisk should still be there. */ |
| |
| /* Test width specifications for integers. */ |
| res = our_sscanf("123456 0x9abc", "%03d%03d %03xc", |
| &signed_int, &signed_int_2, &unsigned_int); |
| EXPECT(res, 3); |
| EXPECT(signed_int, 123); |
| EXPECT(signed_int_2, 456); |
| EXPECT(unsigned_int, 0x9ab); |
| |
| /* Test skipping leading whitespace for integer conversions. */ |
| res = our_sscanf(" \t123456\t\n 0x9abc", "%d%x", |
| &signed_int, &unsigned_int); |
| EXPECT(res, 2); |
| EXPECT(signed_int, 123456); |
| EXPECT(unsigned_int, 0x9abc); |
| |
| /* Test Windows-style integer width specifiers using decimal ULLONG_MAX. */ |
| res = our_sscanf("1234 18446744073709551615", "%I32d %I64d", |
| &signed_int, &ull_num); |
| EXPECT(res, 2); |
| EXPECT(signed_int, 1234); |
| EXPECT((ull_num == ULLONG_MAX), true); |
| |
| /* FIXME: When parse_int has range checking, we should add tests for parsing |
| * integers that overflow their requested integer sizes. |
| */ |
| } |
| |
| /***************************************************************************** |
| * memcpy() and memset() tests |
| */ |
| |
| # ifdef UNIX |
| # include <errno.h> |
| # include <dlfcn.h> /* for dlsym for libc routines */ |
| |
| /* From dlfcn.h, but we'd have to define _GNU_SOURCE 1 before globals.h. */ |
| # define RTLD_NEXT ((void *) -1l) |
| |
| typedef void (*memcpy_t)(void *dst, const void *src, size_t n); |
| |
| static void |
| test_memcpy_offset_size(size_t src_offset, size_t dst_offset, size_t size) |
| { |
| /* These can be aligned to whatever, we'll try a few different offsets. */ |
| byte src[1024]; |
| byte dst[1024]; |
| int i; |
| for (i = 0; i < sizeof(src); i++) { |
| src[i] = 0xcc; |
| dst[i] = 0; |
| } |
| EXPECT(src_offset + size <= sizeof(src), 1); |
| EXPECT(dst_offset + size <= sizeof(dst), 1); |
| memcpy(dst + dst_offset, src + src_offset, size); |
| EXPECT(memcmp(dst + dst_offset, src + src_offset, size), 0); |
| /* Check the bytes just out of bounds, which should still be zero. */ |
| if (dst_offset > 0) |
| EXPECT(dst[dst_offset-1], 0); |
| if (dst_offset+size < sizeof(dst)) |
| EXPECT(dst[dst_offset+size], 0); |
| } |
| |
| static void |
| test_our_memcpy(void) |
| { |
| int i, j; |
| void *ret; |
| /* Basic, copy the whole buffer. */ |
| test_memcpy_offset_size(0, 0, 1024); |
| /* Test misalignment less than copy size. */ |
| test_memcpy_offset_size(1, 1, 2); |
| test_memcpy_offset_size(2, 2, 2); |
| test_memcpy_offset_size(1, 1, 3); |
| test_memcpy_offset_size(2, 2, 3); |
| /* Test a variety of offsets. */ |
| for (i = 0; i < 16; i++) { |
| for (j = 0; j < 16; j++) { |
| test_memcpy_offset_size(i, j, 512); |
| } |
| } |
| /* Check that memcpy returns dst. */ |
| ret = memcpy(&i, &j, sizeof(i)); |
| EXPECT(ret == &i, 1); |
| } |
| |
| static void |
| test_memset_offset_size(int val, int start_offs, int end_offs) |
| { |
| byte buf[512]; |
| int i; |
| int end = sizeof(buf) - start_offs - end_offs; |
| /* Zero without memset. */ |
| for (i = 0; i < sizeof(buf); i++) |
| buf[i] = 0; |
| memset(buf + start_offs, val, end); |
| EXPECT(is_region_memset_to_char(buf + start_offs, end, val), 1); |
| if (start_offs > 0) |
| EXPECT(buf[start_offs-1], 0); |
| if (end_offs > 0) |
| EXPECT(buf[sizeof(buf)-end_offs], 0); |
| } |
| |
| static void |
| test_our_memset(void) |
| { |
| int val; |
| int i, j; |
| void *ret; |
| /* Test a variety of values. */ |
| for (val = 0; val < 0xff; val++) { |
| for (i = 0; i < 16; i++) { |
| for (j = 0; j < 16; j++) { |
| test_memset_offset_size(val, i, j); |
| } |
| } |
| } |
| /* Check that memset returns dst. */ |
| ret = memset(&i, -1, sizeof(i)); |
| EXPECT(ret == &i, 1); |
| } |
| |
| static void |
| our_memcpy_vs_libc(void) |
| { |
| /* Compare our memcpy with libc memcpy. |
| * XXX: Should compare on more sizes, especially small ones. |
| */ |
| size_t alloc_size = 20 * 1024; |
| int loop_count = 100 * 1000; |
| void *src = global_heap_alloc(alloc_size HEAPACCT(ACCT_OTHER)); |
| void *dst = global_heap_alloc(alloc_size HEAPACCT(ACCT_OTHER)); |
| int i; |
| memcpy_t glibc_memcpy = (memcpy_t) dlsym(RTLD_NEXT, "memcpy"); |
| uint64 our_memcpy_start, our_memcpy_end, our_memcpy_time; |
| uint64 libc_memcpy_start, libc_memcpy_end, libc_memcpy_time; |
| memset(src, -1, alloc_size); |
| memset(dst, 0, alloc_size); |
| |
| our_memcpy_start = query_time_millis(); |
| for (i = 0; i < loop_count; i++) { |
| memcpy(src, dst, alloc_size); |
| } |
| our_memcpy_end = query_time_millis(); |
| |
| libc_memcpy_start = query_time_millis(); |
| for (i = 0; i < loop_count; i++) { |
| glibc_memcpy(src, dst, alloc_size); |
| } |
| libc_memcpy_end = query_time_millis(); |
| |
| global_heap_free(src, alloc_size HEAPACCT(ACCT_OTHER)); |
| global_heap_free(dst, alloc_size HEAPACCT(ACCT_OTHER)); |
| our_memcpy_time = our_memcpy_end - our_memcpy_start; |
| libc_memcpy_time = libc_memcpy_end - libc_memcpy_start; |
| print_file(STDERR, "our_memcpy_time: "UINT64_FORMAT_STRING"\n", |
| our_memcpy_time); |
| print_file(STDERR, "libc_memcpy_time: "UINT64_FORMAT_STRING"\n", |
| libc_memcpy_time); |
| /* We could assert that we're not too much slower, but that's a recipe for |
| * flaky failures when the suite is run on shared VMs or in parallel. |
| */ |
| } |
| # endif /* UNIX */ |
| |
| static void |
| test_integer(void) |
| { |
| char buf[512]; |
| ssize_t res; |
| |
| /* test integer codes */ |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%lld", 0x12345678abcdef01LL); |
| EXPECT(res == (ssize_t) strlen("1311768467750121217"), true); |
| EXPECT(strcmp(buf, "1311768467750121217"), 0); |
| |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%lld", 0x82345678abcdef01LL); |
| EXPECT(res == (ssize_t) strlen("-9064525073711501567"), true); |
| EXPECT(strcmp(buf, "-9064525073711501567"), 0); |
| |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%llu", 0x82345678abcdef01LL); |
| EXPECT(res == (ssize_t) strlen("9382218999998050049"), true); |
| EXPECT(strcmp(buf, "9382218999998050049"), 0); |
| |
| /* XXX: add more tests */ |
| } |
| |
| void |
| unit_test_io(void) |
| { |
| char buf[512]; |
| wchar_t wbuf[512]; |
| ssize_t res; |
| |
| /* test wide char conversion */ |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%S", L"wide string"); |
| EXPECT(res == (ssize_t) strlen("wide string"), true); |
| EXPECT(strcmp(buf, "wide string"), 0); |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%ls", L"wide string"); |
| EXPECT(res == (ssize_t) strlen("wide string"), true); |
| EXPECT(strcmp(buf, "wide string"), 0); |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%.3S", L"wide string"); |
| EXPECT(res == (ssize_t) strlen("wid"), true); |
| EXPECT(strcmp(buf, "wid"), 0); |
| res = our_snprintf(buf, 4, "%S", L"wide string"); |
| EXPECT(res == -1, true); |
| EXPECT(buf[4], ' '); /* ' ' from prior calls: no NULL written since hit max */ |
| buf[4] = '\0'; |
| EXPECT(strcmp(buf, "wide"), 0); |
| |
| /* test float */ |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%3.1f", 42.9f); |
| EXPECT(res == (ssize_t) strlen("42.9"), true); |
| EXPECT(strcmp(buf, "42.9"), 0); |
| /* XXX: add more */ |
| |
| /* test all-wide */ |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%d%s%3.1f", |
| -42, L"wide string", 42.9f); |
| EXPECT(res == (ssize_t) wcslen(L"-42wide string42.9"), true); |
| EXPECT(wcscmp(wbuf, L"-42wide string42.9"), 0); |
| |
| /* test all-wide conversion */ |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%S", "narrow string"); |
| EXPECT(res == (ssize_t) wcslen(L"narrow string"), true); |
| EXPECT(wcscmp(wbuf, L"narrow string"), 0); |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%hs", "narrow string"); |
| EXPECT(res == (ssize_t) wcslen(L"narrow string"), true); |
| EXPECT(wcscmp(wbuf, L"narrow string"), 0); |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%.3S", "narrow string"); |
| EXPECT(res == (ssize_t) wcslen(L"nar"), true); |
| EXPECT(wcscmp(wbuf, L"nar"), 0); |
| res = our_snprintf_wide(wbuf, 6, L"%S", "narrow string"); |
| EXPECT(res == -1, true); |
| EXPECT(wbuf[6], L' '); /* ' ' from prior calls: no NULL written since hit max */ |
| wbuf[6] = L'\0'; |
| EXPECT(wcscmp(wbuf, L"narrow"), 0); |
| |
| #ifdef WINDOWS |
| /* test UTF-16 to UTF-8 */ |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%S", |
| L"\x0391\x03A9\x20Ac"); /* alpha, omega, euro sign */ |
| EXPECT(res == 7, true); /* 2x 2-char + 1 3-char encodings */ |
| EXPECT((byte)buf[0] == 0xce && (byte)buf[1] == 0x91, true); /* UTF-8 U-0391 */ |
| EXPECT((byte)buf[2] == 0xce && (byte)buf[3] == 0xa9, true); /* UTF-8 U-03A9 */ |
| EXPECT((byte)buf[4] == 0xe2 && (byte)buf[5] == 0x82 && (byte)buf[6] == 0xac, |
| true); /* UTF-8 U-20Ac */ |
| EXPECT((byte)buf[7] == '\0', true); |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%S", |
| L"\xd800"); /* no low surrogate */ |
| EXPECT(res == -1, true); |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%S", |
| L"\xd800\xdc00"); /* surrogate pair */ |
| EXPECT(res == 4, true); /* 4-char encoding */ |
| EXPECT((byte)buf[0] == 0xf0 && (byte)buf[1] == 0x90 && |
| (byte)buf[2] == 0x80 && (byte)buf[3] == 0x80, true); /* UTF-8 U-10000 */ |
| EXPECT((byte)buf[4] == '\0', true); |
| res = our_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%.6S", |
| L"\x0391\x03A9\x20Ac"); /* alpha, omega, euro sign */ |
| EXPECT(res == 4, true); /* 2x 2-char + aborted the 3-char encoding */ |
| EXPECT((byte)buf[0] == 0xce && (byte)buf[1] == 0x91, true); /* UTF-8 U-0391 */ |
| EXPECT((byte)buf[2] == 0xce && (byte)buf[3] == 0xa9, true); /* UTF-8 U-03A9 */ |
| EXPECT((byte)buf[4] == '\0', true); |
| |
| /* test UTF-8 to UTF-16 */ |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%S", |
| "\xce\x91\xce\xa9\xe2\x82\xac"); /* alpha, omega, euro sign */ |
| EXPECT(res == 3, true); |
| EXPECT(wbuf[0] == 0x0391 && wbuf[1] == 0x03a9 && wbuf[2] == 0x20ac, true); |
| EXPECT(wbuf[3] == L'\0', true); |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%S", |
| "\xff\x91\xce\xa9\xe2\x82"); |
| EXPECT(res == -1, true); /* not encodable in UTF-16 */ |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%S", |
| "\xf0\x90\x80\x80"); /* U-1000 */ |
| EXPECT(res == 2, true); |
| EXPECT(wbuf[0] == 0xd800 && wbuf[1] == 0xdc00 && wbuf[2] == L'\0', true); |
| res = our_snprintf_wide(wbuf, BUFFER_SIZE_ELEMENTS(wbuf), L"%.2S", |
| "\xce\x91\xce\xa9\xe2\x82\xac"); /* alpha, omega, euro sign */ |
| EXPECT(res == 2, true); |
| EXPECT(wbuf[0] == 0x0391 && wbuf[1] == 0x03a9 && wbuf[2] == L'\0', true); |
| #endif |
| |
| test_integer(); |
| |
| /* sscanf tests */ |
| test_sscanf_maps_x86(); |
| test_sscanf_maps_x64(); |
| test_sscanf_all_specs(); |
| |
| #ifdef UNIX |
| /* memcpy tests */ |
| test_our_memcpy(); |
| our_memcpy_vs_libc(); |
| |
| /* memset tests */ |
| test_our_memset(); |
| #endif /* UNIX */ |
| |
| /* XXX: add more tests */ |
| |
| print_file(STDERR, "io all done\n"); |
| } |
| |
| #endif /* STANDALONE_UNIT_TEST */ |