1 | // Copyright 2011 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #ifndef V8_CHAR_PREDICATES_INL_H_ |
6 | #define V8_CHAR_PREDICATES_INL_H_ |
7 | |
8 | #include "src/char-predicates.h" |
9 | |
10 | namespace v8 { |
11 | namespace internal { |
12 | |
13 | |
14 | // If c is in 'A'-'Z' or 'a'-'z', return its lower-case. |
15 | // Else, return something outside of 'A'-'Z' and 'a'-'z'. |
16 | // Note: it ignores LOCALE. |
17 | inline constexpr int AsciiAlphaToLower(uc32 c) { return c | 0x20; } |
18 | |
19 | inline constexpr bool IsCarriageReturn(uc32 c) { return c == 0x000D; } |
20 | |
21 | inline constexpr bool IsLineFeed(uc32 c) { return c == 0x000A; } |
22 | |
23 | inline constexpr bool IsAsciiIdentifier(uc32 c) { |
24 | return IsAlphaNumeric(c) || c == '$' || c == '_'; |
25 | } |
26 | |
27 | inline constexpr bool IsAlphaNumeric(uc32 c) { |
28 | return IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c); |
29 | } |
30 | |
31 | inline constexpr bool IsDecimalDigit(uc32 c) { |
32 | // ECMA-262, 3rd, 7.8.3 (p 16) |
33 | return IsInRange(c, '0', '9'); |
34 | } |
35 | |
36 | inline constexpr bool IsHexDigit(uc32 c) { |
37 | // ECMA-262, 3rd, 7.6 (p 15) |
38 | return IsDecimalDigit(c) || IsInRange(AsciiAlphaToLower(c), 'a', 'f'); |
39 | } |
40 | |
41 | inline constexpr bool IsOctalDigit(uc32 c) { |
42 | // ECMA-262, 6th, 7.8.3 |
43 | return IsInRange(c, '0', '7'); |
44 | } |
45 | |
46 | inline constexpr bool IsNonOctalDecimalDigit(uc32 c) { |
47 | return IsInRange(c, '8', '9'); |
48 | } |
49 | |
50 | inline constexpr bool IsBinaryDigit(uc32 c) { |
51 | // ECMA-262, 6th, 7.8.3 |
52 | return c == '0' || c == '1'; |
53 | } |
54 | |
55 | inline constexpr bool IsRegExpWord(uc16 c) { |
56 | return IsInRange(AsciiAlphaToLower(c), 'a', 'z') |
57 | || IsDecimalDigit(c) |
58 | || (c == '_'); |
59 | } |
60 | |
61 | inline constexpr bool IsRegExpNewline(uc16 c) { |
62 | // CR LF LS PS |
63 | return c != 0x000A && c != 0x000D && c != 0x2028 && c != 0x2029; |
64 | } |
65 | |
66 | // Constexpr cache table for character flags. |
67 | enum AsciiCharFlags { |
68 | kIsIdentifierStart = 1 << 0, |
69 | kIsIdentifierPart = 1 << 1, |
70 | kIsWhiteSpace = 1 << 2, |
71 | kIsWhiteSpaceOrLineTerminator = 1 << 3 |
72 | }; |
73 | constexpr uint8_t BuildAsciiCharFlags(uc32 c) { |
74 | // clang-format off |
75 | return |
76 | (IsAsciiIdentifier(c) || c == '\\') ? ( |
77 | kIsIdentifierPart | (!IsDecimalDigit(c) ? kIsIdentifierStart : 0)) : 0 | |
78 | (c == ' ' || c == '\t' || c == '\v' || c == '\f') ? |
79 | kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator : 0 | |
80 | (c == '\r' || c == '\n') ? kIsWhiteSpaceOrLineTerminator : 0; |
81 | // clang-format on |
82 | } |
83 | const constexpr uint8_t kAsciiCharFlags[128] = { |
84 | #define BUILD_CHAR_FLAGS(N) BuildAsciiCharFlags(N), |
85 | INT_0_TO_127_LIST(BUILD_CHAR_FLAGS) |
86 | #undef BUILD_CHAR_FLAGS |
87 | }; |
88 | |
89 | bool IsIdentifierStart(uc32 c) { |
90 | if (!IsInRange(c, 0, 127)) return IsIdentifierStartSlow(c); |
91 | DCHECK_EQ(IsIdentifierStartSlow(c), |
92 | static_cast<bool>(kAsciiCharFlags[c] & kIsIdentifierStart)); |
93 | return kAsciiCharFlags[c] & kIsIdentifierStart; |
94 | } |
95 | |
96 | bool IsIdentifierPart(uc32 c) { |
97 | if (!IsInRange(c, 0, 127)) return IsIdentifierPartSlow(c); |
98 | DCHECK_EQ(IsIdentifierPartSlow(c), |
99 | static_cast<bool>(kAsciiCharFlags[c] & kIsIdentifierPart)); |
100 | return kAsciiCharFlags[c] & kIsIdentifierPart; |
101 | } |
102 | |
103 | bool IsWhiteSpace(uc32 c) { |
104 | if (!IsInRange(c, 0, 127)) return IsWhiteSpaceSlow(c); |
105 | DCHECK_EQ(IsWhiteSpaceSlow(c), |
106 | static_cast<bool>(kAsciiCharFlags[c] & kIsWhiteSpace)); |
107 | return kAsciiCharFlags[c] & kIsWhiteSpace; |
108 | } |
109 | |
110 | bool IsWhiteSpaceOrLineTerminator(uc32 c) { |
111 | if (!IsInRange(c, 0, 127)) return IsWhiteSpaceOrLineTerminatorSlow(c); |
112 | DCHECK_EQ( |
113 | IsWhiteSpaceOrLineTerminatorSlow(c), |
114 | static_cast<bool>(kAsciiCharFlags[c] & kIsWhiteSpaceOrLineTerminator)); |
115 | return kAsciiCharFlags[c] & kIsWhiteSpaceOrLineTerminator; |
116 | } |
117 | |
118 | bool IsLineTerminatorSequence(uc32 c, uc32 next) { |
119 | if (!unibrow::IsLineTerminator(c)) return false; |
120 | if (c == 0x000d && next == 0x000a) return false; // CR with following LF. |
121 | return true; |
122 | } |
123 | |
124 | } // namespace internal |
125 | } // namespace v8 |
126 | |
127 | #endif // V8_CHAR_PREDICATES_INL_H_ |
128 | |