// $Id: code.h,v 1.22 2004/02/17 13:29:11 ericb Exp $ -*- c++ -*- // DO NOT MODIFY THIS FILE - it is generated using gencode.java. // // This software is subject to the terms of the IBM Jikes Compiler // License Agreement available at the following URL: // http://www.ibm.com/research/jikes. // Copyright (C) 1999, 2004 IBM Corporation and others. All Rights Reserved. // You must accept the terms of that agreement to use this software. // #include "platform.h" #ifdef HAVE_JIKES_NAMESPACE namespace Jikes { // Open namespace Jikes block #endif #ifndef code_INCLUDED #define code_INCLUDED class Code { // // To facilitate the scanning, the character set is partitioned into // categories using the array CODE. These are described below together // with some self-explanatory functions defined on CODE. // enum { SHIFT = 9, SPACE_CODE = 0, BAD_CODE = 1, DIGIT_CODE = 2, ID_PART_CODE = 3, LOWER_CODE = 4, UPPER_CODE = 5, ID_START_CODE = 6 }; static char codes[13558]; static u2 blocks[2176]; public: #ifdef JIKES_DEBUG static inline void CodeCheck(u4 c) { assert((u2) (blocks[c >> SHIFT] + c) < 13558); } static inline bool CodeCheck(void) { for (u4 c = 0; c <= 1114111; c++) CodeCheck(c); return true; } #endif // JIKES_DEBUG // // These methods test for Unicode surrogate pairs. // static inline bool IsHighSurrogate(wchar_t c) { return c >= 0xd800 && c <= 0xdbff; } static inline bool IsLowSurrogate(wchar_t c) { return c >= 0xdc00 && c <= 0xdfff; } static inline u4 Codepoint(wchar_t hi, wchar_t lo) { assert(IsHighSurrogate(hi) && IsLowSurrogate(lo)); return (hi << 10) + lo + (0x10000 - (0xd800 << 10) - 0xdc00); } static inline u4 Codepoint(const wchar_t* p) { u4 result = (u4) *p; if (IsHighSurrogate(result) && IsLowSurrogate(p[1])) result = Codepoint(result, p[1]); return result; } static inline int Codelength(const wchar_t* p) { return (IsHighSurrogate(*p) && IsLowSurrogate(p[1])) ? 2 : 1; } // // These methods test for ASCII characteristics. Since it is strictly ASCII, // there is no need to check for Unicode surrogate pairs. // static inline bool IsNewline(wchar_t c) { return c == U_LF || c == U_CR; } static inline bool IsSpaceButNotNewline(wchar_t c) { return c == U_SP || c == U_FF || c == U_HT; } static inline bool IsSpace(wchar_t c) { return c == U_SP || c == U_CR || c == U_LF || c == U_HT || c == U_FF; } static inline bool IsDecimalDigit(wchar_t c) { return c <= U_9 && c >= U_0; } static inline bool IsOctalDigit(wchar_t c) { return c <= U_7 && c >= U_0; } static inline bool IsHexDigit(wchar_t c) { return c <= U_f && (c >= U_a || (c >= U_A && c <= U_F) || (c >= U_0 && c <= U_9)); } static inline int Value(wchar_t c) { assert(IsHexDigit(c)); return c - (c <= U_9 ? U_0 : c < U_a ? U_A - 10 : U_a - 10); } static inline bool IsSign(wchar_t c) { return c == U_MINUS || c == U_PLUS; } static inline bool IsAsciiUpper(wchar_t c) { return c <= U_Z && c >= U_A; } static inline bool IsAsciiLower(wchar_t c) { return c <= U_z && c >= U_a; } // // The following methods recognize Unicode surrogate pairs, hence the need to // pass a pointer. Use Codelength() to determine if one or two characters // were used in the formation of a character. // static inline bool IsWhitespace(const wchar_t* p) { u4 c = Codepoint(p); return codes[(u2) (blocks[c >> SHIFT] + c)] == SPACE_CODE; } static inline bool IsDigit(const wchar_t* p) { u4 c = Codepoint(p); return codes[(u2) (blocks[c >> SHIFT] + c)] == DIGIT_CODE; } static inline bool IsUpper(const wchar_t* p) { u4 c = Codepoint(p); return codes[(u2) (blocks[c >> SHIFT] + c)] == UPPER_CODE; } static inline bool IsLower(const wchar_t* p) { u4 c = Codepoint(p); return codes[(u2) (blocks[c >> SHIFT] + c)] == LOWER_CODE; } static inline bool IsAlpha(const wchar_t* p) { u4 c = Codepoint(p); return codes[(u2) (blocks[c >> SHIFT] + c)] >= LOWER_CODE; } static inline bool IsAlnum(const wchar_t* p) { u4 c = Codepoint(p); return codes[(u2) (blocks[c >> SHIFT] + c)] >= DIGIT_CODE; } }; #endif // code_INCLUDED #ifdef HAVE_JIKES_NAMESPACE } // Close namespace Jikes block #endif