1 | // Copyright 2011 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #ifndef V8_DATEPARSER_INL_H_ |
6 | #define V8_DATEPARSER_INL_H_ |
7 | |
8 | #include "src/char-predicates-inl.h" |
9 | #include "src/dateparser.h" |
10 | #include "src/isolate.h" |
11 | |
12 | namespace v8 { |
13 | namespace internal { |
14 | |
15 | template <typename Char> |
16 | bool DateParser::Parse(Isolate* isolate, Vector<Char> str, FixedArray out) { |
17 | DCHECK(out->length() >= OUTPUT_SIZE); |
18 | InputReader<Char> in(str); |
19 | DateStringTokenizer<Char> scanner(&in); |
20 | TimeZoneComposer tz; |
21 | TimeComposer time; |
22 | DayComposer day; |
23 | |
24 | // Specification: |
25 | // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible |
26 | // with Safari. |
27 | // ES5 ISO 8601 dates: |
28 | // [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]] |
29 | // where yyyy is in the range 0000..9999 and |
30 | // +/-yyyyyy is in the range -999999..+999999 - |
31 | // but -000000 is invalid (year zero must be positive), |
32 | // MM is in the range 01..12, |
33 | // DD is in the range 01..31, |
34 | // MM and DD defaults to 01 if missing,, |
35 | // HH is generally in the range 00..23, but can be 24 if mm, ss |
36 | // and sss are zero (or missing), representing midnight at the |
37 | // end of a day, |
38 | // mm and ss are in the range 00..59, |
39 | // sss is in the range 000..999, |
40 | // hh is in the range 00..23, |
41 | // mm, ss, and sss default to 00 if missing, and |
42 | // timezone defaults to Z if missing |
43 | // (following Safari, ISO actually demands local time). |
44 | // Extensions: |
45 | // We also allow sss to have more or less than three digits (but at |
46 | // least one). |
47 | // We allow hh:mm to be specified as hhmm. |
48 | // Legacy dates: |
49 | // Any unrecognized word before the first number is ignored. |
50 | // Parenthesized text is ignored. |
51 | // An unsigned number followed by ':' is a time value, and is |
52 | // added to the TimeComposer. A number followed by '::' adds a second |
53 | // zero as well. A number followed by '.' is also a time and must be |
54 | // followed by milliseconds. |
55 | // Any other number is a date component and is added to DayComposer. |
56 | // A month name (or really: any word having the same first three letters |
57 | // as a month name) is recorded as a named month in the Day composer. |
58 | // A word recognizable as a time-zone is recorded as such, as is |
59 | // '(+|-)(hhmm|hh:)'. |
60 | // Legacy dates don't allow extra signs ('+' or '-') or umatched ')' |
61 | // after a number has been read (before the first number, any garbage |
62 | // is allowed). |
63 | // Intersection of the two: |
64 | // A string that matches both formats (e.g. 1970-01-01) will be |
65 | // parsed as an ES5 date-time string - which means it will default |
66 | // to UTC time-zone. That's unavoidable if following the ES5 |
67 | // specification. |
68 | // After a valid "T" has been read while scanning an ES5 datetime string, |
69 | // the input can no longer be a valid legacy date, since the "T" is a |
70 | // garbage string after a number has been read. |
71 | |
72 | // First try getting as far as possible with as ES5 Date Time String. |
73 | DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz); |
74 | if (next_unhandled_token.IsInvalid()) return false; |
75 | bool has_read_number = !day.IsEmpty(); |
76 | // If there's anything left, continue with the legacy parser. |
77 | bool legacy_parser = false; |
78 | for (DateToken token = next_unhandled_token; |
79 | !token.IsEndOfInput(); |
80 | token = scanner.Next()) { |
81 | if (token.IsNumber()) { |
82 | legacy_parser = true; |
83 | has_read_number = true; |
84 | int n = token.number(); |
85 | if (scanner.SkipSymbol(':')) { |
86 | if (scanner.SkipSymbol(':')) { |
87 | // n + "::" |
88 | if (!time.IsEmpty()) return false; |
89 | time.Add(n); |
90 | time.Add(0); |
91 | } else { |
92 | // n + ":" |
93 | if (!time.Add(n)) return false; |
94 | if (scanner.Peek().IsSymbol('.')) scanner.Next(); |
95 | } |
96 | } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) { |
97 | time.Add(n); |
98 | if (!scanner.Peek().IsNumber()) return false; |
99 | int n = ReadMilliseconds(scanner.Next()); |
100 | if (n < 0) return false; |
101 | time.AddFinal(n); |
102 | } else if (tz.IsExpecting(n)) { |
103 | tz.SetAbsoluteMinute(n); |
104 | } else if (time.IsExpecting(n)) { |
105 | time.AddFinal(n); |
106 | // Require end, white space, "Z", "+" or "-" immediately after |
107 | // finalizing time. |
108 | DateToken peek = scanner.Peek(); |
109 | if (!peek.IsEndOfInput() && |
110 | !peek.IsWhiteSpace() && |
111 | !peek.IsKeywordZ() && |
112 | !peek.IsAsciiSign()) return false; |
113 | } else { |
114 | if (!day.Add(n)) return false; |
115 | scanner.SkipSymbol('-'); |
116 | } |
117 | } else if (token.IsKeyword()) { |
118 | legacy_parser = true; |
119 | // Parse a "word" (sequence of chars. >= 'A'). |
120 | KeywordType type = token.keyword_type(); |
121 | int value = token.keyword_value(); |
122 | if (type == AM_PM && !time.IsEmpty()) { |
123 | time.SetHourOffset(value); |
124 | } else if (type == MONTH_NAME) { |
125 | day.SetNamedMonth(value); |
126 | scanner.SkipSymbol('-'); |
127 | } else if (type == TIME_ZONE_NAME && has_read_number) { |
128 | tz.Set(value); |
129 | } else { |
130 | // Garbage words are illegal if a number has been read. |
131 | if (has_read_number) return false; |
132 | // The first number has to be separated from garbage words by |
133 | // whitespace or other separators. |
134 | if (scanner.Peek().IsNumber()) return false; |
135 | } |
136 | } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) { |
137 | legacy_parser = true; |
138 | // Parse UTC offset (only after UTC or time). |
139 | tz.SetSign(token.ascii_sign()); |
140 | // The following number may be empty. |
141 | int n = 0; |
142 | int length = 0; |
143 | if (scanner.Peek().IsNumber()) { |
144 | DateToken token = scanner.Next(); |
145 | length = token.length(); |
146 | n = token.number(); |
147 | } |
148 | has_read_number = true; |
149 | |
150 | if (scanner.Peek().IsSymbol(':')) { |
151 | tz.SetAbsoluteHour(n); |
152 | // TODO(littledan): Use minutes as part of timezone? |
153 | tz.SetAbsoluteMinute(kNone); |
154 | } else if (length == 2 || length == 1) { |
155 | // Handle time zones like GMT-8 |
156 | tz.SetAbsoluteHour(n); |
157 | tz.SetAbsoluteMinute(0); |
158 | } else if (length == 4 || length == 3) { |
159 | // Looks like the hhmm format |
160 | tz.SetAbsoluteHour(n / 100); |
161 | tz.SetAbsoluteMinute(n % 100); |
162 | } else { |
163 | // No need to accept time zones like GMT-12345 |
164 | return false; |
165 | } |
166 | } else if ((token.IsAsciiSign() || token.IsSymbol(')')) && |
167 | has_read_number) { |
168 | // Extra sign or ')' is illegal if a number has been read. |
169 | return false; |
170 | } else { |
171 | // Ignore other characters and whitespace. |
172 | } |
173 | } |
174 | |
175 | bool success = day.Write(out) && time.Write(out) && tz.Write(out); |
176 | |
177 | if (legacy_parser && success) { |
178 | isolate->CountUsage(v8::Isolate::kLegacyDateParser); |
179 | } |
180 | |
181 | return success; |
182 | } |
183 | |
184 | template<typename CharType> |
185 | DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() { |
186 | int pre_pos = in_->position(); |
187 | if (in_->IsEnd()) return DateToken::EndOfInput(); |
188 | if (in_->IsAsciiDigit()) { |
189 | int n = in_->ReadUnsignedNumeral(); |
190 | int length = in_->position() - pre_pos; |
191 | return DateToken::Number(n, length); |
192 | } |
193 | if (in_->Skip(':')) return DateToken::Symbol(':'); |
194 | if (in_->Skip('-')) return DateToken::Symbol('-'); |
195 | if (in_->Skip('+')) return DateToken::Symbol('+'); |
196 | if (in_->Skip('.')) return DateToken::Symbol('.'); |
197 | if (in_->Skip(')')) return DateToken::Symbol(')'); |
198 | if (in_->IsAsciiAlphaOrAbove()) { |
199 | DCHECK_EQ(KeywordTable::kPrefixLength, 3); |
200 | uint32_t buffer[3] = {0, 0, 0}; |
201 | int length = in_->ReadWord(buffer, 3); |
202 | int index = KeywordTable::Lookup(buffer, length); |
203 | return DateToken::Keyword(KeywordTable::GetType(index), |
204 | KeywordTable::GetValue(index), |
205 | length); |
206 | } |
207 | if (in_->SkipWhiteSpace()) { |
208 | return DateToken::WhiteSpace(in_->position() - pre_pos); |
209 | } |
210 | if (in_->SkipParentheses()) { |
211 | return DateToken::Unknown(); |
212 | } |
213 | in_->Next(); |
214 | return DateToken::Unknown(); |
215 | } |
216 | |
217 | |
218 | template <typename Char> |
219 | bool DateParser::InputReader<Char>::SkipWhiteSpace() { |
220 | if (IsWhiteSpaceOrLineTerminator(ch_)) { |
221 | Next(); |
222 | return true; |
223 | } |
224 | return false; |
225 | } |
226 | |
227 | |
228 | template <typename Char> |
229 | bool DateParser::InputReader<Char>::SkipParentheses() { |
230 | if (ch_ != '(') return false; |
231 | int balance = 0; |
232 | do { |
233 | if (ch_ == ')') --balance; |
234 | else if (ch_ == '(') ++balance; |
235 | Next(); |
236 | } while (balance > 0 && ch_); |
237 | return true; |
238 | } |
239 | |
240 | |
241 | template <typename Char> |
242 | DateParser::DateToken DateParser::ParseES5DateTime( |
243 | DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time, |
244 | TimeZoneComposer* tz) { |
245 | DCHECK(day->IsEmpty()); |
246 | DCHECK(time->IsEmpty()); |
247 | DCHECK(tz->IsEmpty()); |
248 | |
249 | // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]] |
250 | if (scanner->Peek().IsAsciiSign()) { |
251 | // Keep the sign token, so we can pass it back to the legacy |
252 | // parser if we don't use it. |
253 | DateToken sign_token = scanner->Next(); |
254 | if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token; |
255 | int sign = sign_token.ascii_sign(); |
256 | int year = scanner->Next().number(); |
257 | if (sign < 0 && year == 0) return sign_token; |
258 | day->Add(sign * year); |
259 | } else if (scanner->Peek().IsFixedLengthNumber(4)) { |
260 | day->Add(scanner->Next().number()); |
261 | } else { |
262 | return scanner->Next(); |
263 | } |
264 | if (scanner->SkipSymbol('-')) { |
265 | if (!scanner->Peek().IsFixedLengthNumber(2) || |
266 | !DayComposer::IsMonth(scanner->Peek().number())) return scanner->Next(); |
267 | day->Add(scanner->Next().number()); |
268 | if (scanner->SkipSymbol('-')) { |
269 | if (!scanner->Peek().IsFixedLengthNumber(2) || |
270 | !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next(); |
271 | day->Add(scanner->Next().number()); |
272 | } |
273 | } |
274 | // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z |
275 | if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) { |
276 | if (!scanner->Peek().IsEndOfInput()) return scanner->Next(); |
277 | } else { |
278 | // ES5 Date Time String time part is present. |
279 | scanner->Next(); |
280 | if (!scanner->Peek().IsFixedLengthNumber(2) || |
281 | !Between(scanner->Peek().number(), 0, 24)) { |
282 | return DateToken::Invalid(); |
283 | } |
284 | // Allow 24:00[:00[.000]], but no other time starting with 24. |
285 | bool hour_is_24 = (scanner->Peek().number() == 24); |
286 | time->Add(scanner->Next().number()); |
287 | if (!scanner->SkipSymbol(':')) return DateToken::Invalid(); |
288 | if (!scanner->Peek().IsFixedLengthNumber(2) || |
289 | !TimeComposer::IsMinute(scanner->Peek().number()) || |
290 | (hour_is_24 && scanner->Peek().number() > 0)) { |
291 | return DateToken::Invalid(); |
292 | } |
293 | time->Add(scanner->Next().number()); |
294 | if (scanner->SkipSymbol(':')) { |
295 | if (!scanner->Peek().IsFixedLengthNumber(2) || |
296 | !TimeComposer::IsSecond(scanner->Peek().number()) || |
297 | (hour_is_24 && scanner->Peek().number() > 0)) { |
298 | return DateToken::Invalid(); |
299 | } |
300 | time->Add(scanner->Next().number()); |
301 | if (scanner->SkipSymbol('.')) { |
302 | if (!scanner->Peek().IsNumber() || |
303 | (hour_is_24 && scanner->Peek().number() > 0)) { |
304 | return DateToken::Invalid(); |
305 | } |
306 | // Allow more or less than the mandated three digits. |
307 | time->Add(ReadMilliseconds(scanner->Next())); |
308 | } |
309 | } |
310 | // Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm |
311 | if (scanner->Peek().IsKeywordZ()) { |
312 | scanner->Next(); |
313 | tz->Set(0); |
314 | } else if (scanner->Peek().IsSymbol('+') || |
315 | scanner->Peek().IsSymbol('-')) { |
316 | tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1); |
317 | if (scanner->Peek().IsFixedLengthNumber(4)) { |
318 | // hhmm extension syntax. |
319 | int hourmin = scanner->Next().number(); |
320 | int hour = hourmin / 100; |
321 | int min = hourmin % 100; |
322 | if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) { |
323 | return DateToken::Invalid(); |
324 | } |
325 | tz->SetAbsoluteHour(hour); |
326 | tz->SetAbsoluteMinute(min); |
327 | } else { |
328 | // hh:mm standard syntax. |
329 | if (!scanner->Peek().IsFixedLengthNumber(2) || |
330 | !TimeComposer::IsHour(scanner->Peek().number())) { |
331 | return DateToken::Invalid(); |
332 | } |
333 | tz->SetAbsoluteHour(scanner->Next().number()); |
334 | if (!scanner->SkipSymbol(':')) return DateToken::Invalid(); |
335 | if (!scanner->Peek().IsFixedLengthNumber(2) || |
336 | !TimeComposer::IsMinute(scanner->Peek().number())) { |
337 | return DateToken::Invalid(); |
338 | } |
339 | tz->SetAbsoluteMinute(scanner->Next().number()); |
340 | } |
341 | } |
342 | if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid(); |
343 | } |
344 | // Successfully parsed ES5 Date Time String. |
345 | // ES#sec-date-time-string-format Date Time String Format |
346 | // "When the time zone offset is absent, date-only forms are interpreted |
347 | // as a UTC time and date-time forms are interpreted as a local time." |
348 | if (tz->IsEmpty() && time->IsEmpty()) { |
349 | tz->Set(0); |
350 | } |
351 | day->set_iso_date(); |
352 | return DateToken::EndOfInput(); |
353 | } |
354 | |
355 | |
356 | } // namespace internal |
357 | } // namespace v8 |
358 | |
359 | #endif // V8_DATEPARSER_INL_H_ |
360 | |