1 | // Copyright 2011 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #ifndef V8_DATEPARSER_H_ |
6 | #define V8_DATEPARSER_H_ |
7 | |
8 | #include "src/allocation.h" |
9 | #include "src/char-predicates.h" |
10 | |
11 | namespace v8 { |
12 | namespace internal { |
13 | |
14 | class DateParser : public AllStatic { |
15 | public: |
16 | // Parse the string as a date. If parsing succeeds, return true after |
17 | // filling out the output array as follows (all integers are Smis): |
18 | // [0]: year |
19 | // [1]: month (0 = Jan, 1 = Feb, ...) |
20 | // [2]: day |
21 | // [3]: hour |
22 | // [4]: minute |
23 | // [5]: second |
24 | // [6]: millisecond |
25 | // [7]: UTC offset in seconds, or null value if no timezone specified |
26 | // If parsing fails, return false (content of output array is not defined). |
27 | template <typename Char> |
28 | static bool Parse(Isolate* isolate, Vector<Char> str, FixedArray output); |
29 | |
30 | enum { |
31 | YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE |
32 | }; |
33 | |
34 | private: |
35 | // Range testing |
36 | static inline bool Between(int x, int lo, int hi) { |
37 | return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); |
38 | } |
39 | |
40 | // Indicates a missing value. |
41 | static const int kNone = kMaxInt; |
42 | |
43 | // Maximal number of digits used to build the value of a numeral. |
44 | // Remaining digits are ignored. |
45 | static const int kMaxSignificantDigits = 9; |
46 | |
47 | // InputReader provides basic string parsing and character classification. |
48 | template <typename Char> |
49 | class InputReader { |
50 | public: |
51 | explicit InputReader(Vector<Char> s) : index_(0), buffer_(s) { Next(); } |
52 | |
53 | int position() { return index_; } |
54 | |
55 | // Advance to the next character of the string. |
56 | void Next() { |
57 | ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; |
58 | index_++; |
59 | } |
60 | |
61 | // Read a string of digits as an unsigned number. Cap value at |
62 | // kMaxSignificantDigits, but skip remaining digits if the numeral |
63 | // is longer. |
64 | int ReadUnsignedNumeral() { |
65 | int n = 0; |
66 | int i = 0; |
67 | while (IsAsciiDigit()) { |
68 | if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; |
69 | i++; |
70 | Next(); |
71 | } |
72 | return n; |
73 | } |
74 | |
75 | // Read a word (sequence of chars. >= 'A'), fill the given buffer with a |
76 | // lower-case prefix, and pad any remainder of the buffer with zeroes. |
77 | // Return word length. |
78 | int ReadWord(uint32_t* prefix, int prefix_size) { |
79 | int len; |
80 | for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { |
81 | if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); |
82 | } |
83 | for (int i = len; i < prefix_size; i++) prefix[i] = 0; |
84 | return len; |
85 | } |
86 | |
87 | // The skip methods return whether they actually skipped something. |
88 | bool Skip(uint32_t c) { |
89 | if (ch_ == c) { |
90 | Next(); |
91 | return true; |
92 | } |
93 | return false; |
94 | } |
95 | |
96 | inline bool SkipWhiteSpace(); |
97 | inline bool SkipParentheses(); |
98 | |
99 | // Character testing/classification. Non-ASCII digits are not supported. |
100 | bool Is(uint32_t c) const { return ch_ == c; } |
101 | bool IsEnd() const { return ch_ == 0; } |
102 | bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } |
103 | bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } |
104 | bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } |
105 | |
106 | // Return 1 for '+' and -1 for '-'. |
107 | int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } |
108 | |
109 | private: |
110 | int index_; |
111 | Vector<Char> buffer_; |
112 | uint32_t ch_; |
113 | }; |
114 | |
115 | enum KeywordType { |
116 | INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM |
117 | }; |
118 | |
119 | struct DateToken { |
120 | public: |
121 | bool IsInvalid() { return tag_ == kInvalidTokenTag; } |
122 | bool IsUnknown() { return tag_ == kUnknownTokenTag; } |
123 | bool IsNumber() { return tag_ == kNumberTag; } |
124 | bool IsSymbol() { return tag_ == kSymbolTag; } |
125 | bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } |
126 | bool IsEndOfInput() { return tag_ == kEndOfInputTag; } |
127 | bool IsKeyword() { return tag_ >= kKeywordTagStart; } |
128 | |
129 | int length() { return length_; } |
130 | |
131 | int number() { |
132 | DCHECK(IsNumber()); |
133 | return value_; |
134 | } |
135 | KeywordType keyword_type() { |
136 | DCHECK(IsKeyword()); |
137 | return static_cast<KeywordType>(tag_); |
138 | } |
139 | int keyword_value() { |
140 | DCHECK(IsKeyword()); |
141 | return value_; |
142 | } |
143 | char symbol() { |
144 | DCHECK(IsSymbol()); |
145 | return static_cast<char>(value_); |
146 | } |
147 | bool IsSymbol(char symbol) { |
148 | return IsSymbol() && this->symbol() == symbol; |
149 | } |
150 | bool IsKeywordType(KeywordType tag) { |
151 | return tag_ == tag; |
152 | } |
153 | bool IsFixedLengthNumber(int length) { |
154 | return IsNumber() && length_ == length; |
155 | } |
156 | bool IsAsciiSign() { |
157 | return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); |
158 | } |
159 | int ascii_sign() { |
160 | DCHECK(IsAsciiSign()); |
161 | return 44 - value_; |
162 | } |
163 | bool IsKeywordZ() { |
164 | return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; |
165 | } |
166 | bool IsUnknown(int character) { |
167 | return IsUnknown() && value_ == character; |
168 | } |
169 | // Factory functions. |
170 | static DateToken Keyword(KeywordType tag, int value, int length) { |
171 | return DateToken(tag, length, value); |
172 | } |
173 | static DateToken Number(int value, int length) { |
174 | return DateToken(kNumberTag, length, value); |
175 | } |
176 | static DateToken Symbol(char symbol) { |
177 | return DateToken(kSymbolTag, 1, symbol); |
178 | } |
179 | static DateToken EndOfInput() { |
180 | return DateToken(kEndOfInputTag, 0, -1); |
181 | } |
182 | static DateToken WhiteSpace(int length) { |
183 | return DateToken(kWhiteSpaceTag, length, -1); |
184 | } |
185 | static DateToken Unknown() { |
186 | return DateToken(kUnknownTokenTag, 1, -1); |
187 | } |
188 | static DateToken Invalid() { |
189 | return DateToken(kInvalidTokenTag, 0, -1); |
190 | } |
191 | |
192 | private: |
193 | enum TagType { |
194 | kInvalidTokenTag = -6, |
195 | kUnknownTokenTag = -5, |
196 | kWhiteSpaceTag = -4, |
197 | kNumberTag = -3, |
198 | kSymbolTag = -2, |
199 | kEndOfInputTag = -1, |
200 | kKeywordTagStart = 0 |
201 | }; |
202 | DateToken(int tag, int length, int value) |
203 | : tag_(tag), |
204 | length_(length), |
205 | value_(value) { } |
206 | |
207 | int tag_; |
208 | int length_; // Number of characters. |
209 | int value_; |
210 | }; |
211 | |
212 | template <typename Char> |
213 | class DateStringTokenizer { |
214 | public: |
215 | explicit DateStringTokenizer(InputReader<Char>* in) |
216 | : in_(in), next_(Scan()) { } |
217 | DateToken Next() { |
218 | DateToken result = next_; |
219 | next_ = Scan(); |
220 | return result; |
221 | } |
222 | |
223 | DateToken Peek() { |
224 | return next_; |
225 | } |
226 | bool SkipSymbol(char symbol) { |
227 | if (next_.IsSymbol(symbol)) { |
228 | next_ = Scan(); |
229 | return true; |
230 | } |
231 | return false; |
232 | } |
233 | |
234 | private: |
235 | DateToken Scan(); |
236 | |
237 | InputReader<Char>* in_; |
238 | DateToken next_; |
239 | }; |
240 | |
241 | static int ReadMilliseconds(DateToken number); |
242 | |
243 | // KeywordTable maps names of months, time zones, am/pm to numbers. |
244 | class KeywordTable : public AllStatic { |
245 | public: |
246 | // Look up a word in the keyword table and return an index. |
247 | // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength |
248 | // and 'len' is the word length. |
249 | static int Lookup(const uint32_t* pre, int len); |
250 | // Get the type of the keyword at index i. |
251 | static KeywordType GetType(int i) { |
252 | return static_cast<KeywordType>(array[i][kTypeOffset]); |
253 | } |
254 | // Get the value of the keyword at index i. |
255 | static int GetValue(int i) { return array[i][kValueOffset]; } |
256 | |
257 | static const int kPrefixLength = 3; |
258 | static const int kTypeOffset = kPrefixLength; |
259 | static const int kValueOffset = kTypeOffset + 1; |
260 | static const int kEntrySize = kValueOffset + 1; |
261 | static const int8_t array[][kEntrySize]; |
262 | }; |
263 | |
264 | class TimeZoneComposer { |
265 | public: |
266 | TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {} |
267 | void Set(int offset_in_hours) { |
268 | sign_ = offset_in_hours < 0 ? -1 : 1; |
269 | hour_ = offset_in_hours * sign_; |
270 | minute_ = 0; |
271 | } |
272 | void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } |
273 | void SetAbsoluteHour(int hour) { hour_ = hour; } |
274 | void SetAbsoluteMinute(int minute) { minute_ = minute; } |
275 | bool IsExpecting(int n) const { |
276 | return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); |
277 | } |
278 | bool IsUTC() const { return hour_ == 0 && minute_ == 0; } |
279 | bool Write(FixedArray output); |
280 | bool IsEmpty() { return hour_ == kNone; } |
281 | private: |
282 | int sign_; |
283 | int hour_; |
284 | int minute_; |
285 | }; |
286 | |
287 | class TimeComposer { |
288 | public: |
289 | TimeComposer() : index_(0), hour_offset_(kNone) {} |
290 | bool IsEmpty() const { return index_ == 0; } |
291 | bool IsExpecting(int n) const { |
292 | return (index_ == 1 && IsMinute(n)) || |
293 | (index_ == 2 && IsSecond(n)) || |
294 | (index_ == 3 && IsMillisecond(n)); |
295 | } |
296 | bool Add(int n) { |
297 | return index_ < kSize ? (comp_[index_++] = n, true) : false; |
298 | } |
299 | bool AddFinal(int n) { |
300 | if (!Add(n)) return false; |
301 | while (index_ < kSize) comp_[index_++] = 0; |
302 | return true; |
303 | } |
304 | void SetHourOffset(int n) { hour_offset_ = n; } |
305 | bool Write(FixedArray output); |
306 | |
307 | static bool IsMinute(int x) { return Between(x, 0, 59); } |
308 | static bool IsHour(int x) { return Between(x, 0, 23); } |
309 | static bool IsSecond(int x) { return Between(x, 0, 59); } |
310 | |
311 | private: |
312 | static bool IsHour12(int x) { return Between(x, 0, 12); } |
313 | static bool IsMillisecond(int x) { return Between(x, 0, 999); } |
314 | |
315 | static const int kSize = 4; |
316 | int comp_[kSize]; |
317 | int index_; |
318 | int hour_offset_; |
319 | }; |
320 | |
321 | class DayComposer { |
322 | public: |
323 | DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} |
324 | bool IsEmpty() const { return index_ == 0; } |
325 | bool Add(int n) { |
326 | if (index_ < kSize) { |
327 | comp_[index_] = n; |
328 | index_++; |
329 | return true; |
330 | } |
331 | return false; |
332 | } |
333 | void SetNamedMonth(int n) { named_month_ = n; } |
334 | bool Write(FixedArray output); |
335 | void set_iso_date() { is_iso_date_ = true; } |
336 | static bool IsMonth(int x) { return Between(x, 1, 12); } |
337 | static bool IsDay(int x) { return Between(x, 1, 31); } |
338 | |
339 | private: |
340 | static const int kSize = 3; |
341 | int comp_[kSize]; |
342 | int index_; |
343 | int named_month_; |
344 | // If set, ensures that data is always parsed in year-month-date order. |
345 | bool is_iso_date_; |
346 | }; |
347 | |
348 | // Tries to parse an ES5 Date Time String. Returns the next token |
349 | // to continue with in the legacy date string parser. If parsing is |
350 | // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, |
351 | // returns DateToken::Invalid(). Otherwise parsing continues in the |
352 | // legacy parser. |
353 | template <typename Char> |
354 | static DateParser::DateToken ParseES5DateTime( |
355 | DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time, |
356 | TimeZoneComposer* tz); |
357 | }; |
358 | |
359 | |
360 | } // namespace internal |
361 | } // namespace v8 |
362 | |
363 | #endif // V8_DATEPARSER_H_ |
364 | |