1 | // Copyright 2011 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #ifndef V8_CHAR_PREDICATES_H_ |
6 | #define V8_CHAR_PREDICATES_H_ |
7 | |
8 | #include "src/globals.h" |
9 | #include "src/unicode.h" |
10 | |
11 | namespace v8 { |
12 | namespace internal { |
13 | |
14 | // Unicode character predicates as defined by ECMA-262, 3rd, |
15 | // used for lexical analysis. |
16 | |
17 | inline constexpr int AsciiAlphaToLower(uc32 c); |
18 | inline constexpr bool IsCarriageReturn(uc32 c); |
19 | inline constexpr bool IsLineFeed(uc32 c); |
20 | inline constexpr bool IsAsciiIdentifier(uc32 c); |
21 | inline constexpr bool IsAlphaNumeric(uc32 c); |
22 | inline constexpr bool IsDecimalDigit(uc32 c); |
23 | inline constexpr bool IsHexDigit(uc32 c); |
24 | inline constexpr bool IsOctalDigit(uc32 c); |
25 | inline constexpr bool IsBinaryDigit(uc32 c); |
26 | inline constexpr bool IsRegExpWord(uc32 c); |
27 | inline constexpr bool IsRegExpNewline(uc32 c); |
28 | |
29 | // ES#sec-names-and-keywords |
30 | // This includes '_', '$' and '\', and ID_Start according to |
31 | // http://www.unicode.org/reports/tr31/, which consists of categories |
32 | // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties |
33 | // 'Pattern_Syntax' or 'Pattern_White_Space'. |
34 | inline bool IsIdentifierStart(uc32 c); |
35 | #ifdef V8_INTL_SUPPORT |
36 | V8_EXPORT_PRIVATE bool IsIdentifierStartSlow(uc32 c); |
37 | #else |
38 | inline bool IsIdentifierStartSlow(uc32 c) { |
39 | // Non-BMP characters are not supported without I18N. |
40 | return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false; |
41 | } |
42 | #endif |
43 | |
44 | // ES#sec-names-and-keywords |
45 | // This includes \u200c and \u200d, and ID_Continue according to |
46 | // http://www.unicode.org/reports/tr31/, which consists of ID_Start, |
47 | // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties |
48 | // 'Pattern_Syntax' or 'Pattern_White_Space'. |
49 | inline bool IsIdentifierPart(uc32 c); |
50 | #ifdef V8_INTL_SUPPORT |
51 | V8_EXPORT_PRIVATE bool IsIdentifierPartSlow(uc32 c); |
52 | #else |
53 | inline bool IsIdentifierPartSlow(uc32 c) { |
54 | // Non-BMP charaacters are not supported without I18N. |
55 | if (c <= 0xFFFF) { |
56 | return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); |
57 | } |
58 | return false; |
59 | } |
60 | #endif |
61 | |
62 | // ES6 draft section 11.2 |
63 | // This includes all code points of Unicode category 'Zs'. |
64 | // Further included are \u0009, \u000b, \u000c, and \ufeff. |
65 | inline bool IsWhiteSpace(uc32 c); |
66 | #ifdef V8_INTL_SUPPORT |
67 | V8_EXPORT_PRIVATE bool IsWhiteSpaceSlow(uc32 c); |
68 | #else |
69 | inline bool IsWhiteSpaceSlow(uc32 c) { return unibrow::WhiteSpace::Is(c); } |
70 | #endif |
71 | |
72 | // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 |
73 | // This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp) |
74 | // as well as \u0009 - \u000d and \ufeff. |
75 | inline bool IsWhiteSpaceOrLineTerminator(uc32 c); |
76 | inline bool IsWhiteSpaceOrLineTerminatorSlow(uc32 c) { |
77 | return IsWhiteSpaceSlow(c) || unibrow::IsLineTerminator(c); |
78 | } |
79 | |
80 | inline bool IsLineTerminatorSequence(uc32 c, uc32 next); |
81 | |
82 | } // namespace internal |
83 | } // namespace v8 |
84 | |
85 | #endif // V8_CHAR_PREDICATES_H_ |
86 | |