1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_DATEPARSER_H_
6#define V8_DATEPARSER_H_
7
8#include "src/allocation.h"
9#include "src/char-predicates.h"
10
11namespace v8 {
12namespace internal {
13
14class DateParser : public AllStatic {
15 public:
16 // Parse the string as a date. If parsing succeeds, return true after
17 // filling out the output array as follows (all integers are Smis):
18 // [0]: year
19 // [1]: month (0 = Jan, 1 = Feb, ...)
20 // [2]: day
21 // [3]: hour
22 // [4]: minute
23 // [5]: second
24 // [6]: millisecond
25 // [7]: UTC offset in seconds, or null value if no timezone specified
26 // If parsing fails, return false (content of output array is not defined).
27 template <typename Char>
28 static bool Parse(Isolate* isolate, Vector<Char> str, FixedArray output);
29
30 enum {
31 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
32 };
33
34 private:
35 // Range testing
36 static inline bool Between(int x, int lo, int hi) {
37 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
38 }
39
40 // Indicates a missing value.
41 static const int kNone = kMaxInt;
42
43 // Maximal number of digits used to build the value of a numeral.
44 // Remaining digits are ignored.
45 static const int kMaxSignificantDigits = 9;
46
47 // InputReader provides basic string parsing and character classification.
48 template <typename Char>
49 class InputReader {
50 public:
51 explicit InputReader(Vector<Char> s) : index_(0), buffer_(s) { Next(); }
52
53 int position() { return index_; }
54
55 // Advance to the next character of the string.
56 void Next() {
57 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
58 index_++;
59 }
60
61 // Read a string of digits as an unsigned number. Cap value at
62 // kMaxSignificantDigits, but skip remaining digits if the numeral
63 // is longer.
64 int ReadUnsignedNumeral() {
65 int n = 0;
66 int i = 0;
67 while (IsAsciiDigit()) {
68 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
69 i++;
70 Next();
71 }
72 return n;
73 }
74
75 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
76 // lower-case prefix, and pad any remainder of the buffer with zeroes.
77 // Return word length.
78 int ReadWord(uint32_t* prefix, int prefix_size) {
79 int len;
80 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
81 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
82 }
83 for (int i = len; i < prefix_size; i++) prefix[i] = 0;
84 return len;
85 }
86
87 // The skip methods return whether they actually skipped something.
88 bool Skip(uint32_t c) {
89 if (ch_ == c) {
90 Next();
91 return true;
92 }
93 return false;
94 }
95
96 inline bool SkipWhiteSpace();
97 inline bool SkipParentheses();
98
99 // Character testing/classification. Non-ASCII digits are not supported.
100 bool Is(uint32_t c) const { return ch_ == c; }
101 bool IsEnd() const { return ch_ == 0; }
102 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
103 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
104 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
105
106 // Return 1 for '+' and -1 for '-'.
107 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
108
109 private:
110 int index_;
111 Vector<Char> buffer_;
112 uint32_t ch_;
113 };
114
115 enum KeywordType {
116 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
117 };
118
119 struct DateToken {
120 public:
121 bool IsInvalid() { return tag_ == kInvalidTokenTag; }
122 bool IsUnknown() { return tag_ == kUnknownTokenTag; }
123 bool IsNumber() { return tag_ == kNumberTag; }
124 bool IsSymbol() { return tag_ == kSymbolTag; }
125 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
126 bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
127 bool IsKeyword() { return tag_ >= kKeywordTagStart; }
128
129 int length() { return length_; }
130
131 int number() {
132 DCHECK(IsNumber());
133 return value_;
134 }
135 KeywordType keyword_type() {
136 DCHECK(IsKeyword());
137 return static_cast<KeywordType>(tag_);
138 }
139 int keyword_value() {
140 DCHECK(IsKeyword());
141 return value_;
142 }
143 char symbol() {
144 DCHECK(IsSymbol());
145 return static_cast<char>(value_);
146 }
147 bool IsSymbol(char symbol) {
148 return IsSymbol() && this->symbol() == symbol;
149 }
150 bool IsKeywordType(KeywordType tag) {
151 return tag_ == tag;
152 }
153 bool IsFixedLengthNumber(int length) {
154 return IsNumber() && length_ == length;
155 }
156 bool IsAsciiSign() {
157 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
158 }
159 int ascii_sign() {
160 DCHECK(IsAsciiSign());
161 return 44 - value_;
162 }
163 bool IsKeywordZ() {
164 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
165 }
166 bool IsUnknown(int character) {
167 return IsUnknown() && value_ == character;
168 }
169 // Factory functions.
170 static DateToken Keyword(KeywordType tag, int value, int length) {
171 return DateToken(tag, length, value);
172 }
173 static DateToken Number(int value, int length) {
174 return DateToken(kNumberTag, length, value);
175 }
176 static DateToken Symbol(char symbol) {
177 return DateToken(kSymbolTag, 1, symbol);
178 }
179 static DateToken EndOfInput() {
180 return DateToken(kEndOfInputTag, 0, -1);
181 }
182 static DateToken WhiteSpace(int length) {
183 return DateToken(kWhiteSpaceTag, length, -1);
184 }
185 static DateToken Unknown() {
186 return DateToken(kUnknownTokenTag, 1, -1);
187 }
188 static DateToken Invalid() {
189 return DateToken(kInvalidTokenTag, 0, -1);
190 }
191
192 private:
193 enum TagType {
194 kInvalidTokenTag = -6,
195 kUnknownTokenTag = -5,
196 kWhiteSpaceTag = -4,
197 kNumberTag = -3,
198 kSymbolTag = -2,
199 kEndOfInputTag = -1,
200 kKeywordTagStart = 0
201 };
202 DateToken(int tag, int length, int value)
203 : tag_(tag),
204 length_(length),
205 value_(value) { }
206
207 int tag_;
208 int length_; // Number of characters.
209 int value_;
210 };
211
212 template <typename Char>
213 class DateStringTokenizer {
214 public:
215 explicit DateStringTokenizer(InputReader<Char>* in)
216 : in_(in), next_(Scan()) { }
217 DateToken Next() {
218 DateToken result = next_;
219 next_ = Scan();
220 return result;
221 }
222
223 DateToken Peek() {
224 return next_;
225 }
226 bool SkipSymbol(char symbol) {
227 if (next_.IsSymbol(symbol)) {
228 next_ = Scan();
229 return true;
230 }
231 return false;
232 }
233
234 private:
235 DateToken Scan();
236
237 InputReader<Char>* in_;
238 DateToken next_;
239 };
240
241 static int ReadMilliseconds(DateToken number);
242
243 // KeywordTable maps names of months, time zones, am/pm to numbers.
244 class KeywordTable : public AllStatic {
245 public:
246 // Look up a word in the keyword table and return an index.
247 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
248 // and 'len' is the word length.
249 static int Lookup(const uint32_t* pre, int len);
250 // Get the type of the keyword at index i.
251 static KeywordType GetType(int i) {
252 return static_cast<KeywordType>(array[i][kTypeOffset]);
253 }
254 // Get the value of the keyword at index i.
255 static int GetValue(int i) { return array[i][kValueOffset]; }
256
257 static const int kPrefixLength = 3;
258 static const int kTypeOffset = kPrefixLength;
259 static const int kValueOffset = kTypeOffset + 1;
260 static const int kEntrySize = kValueOffset + 1;
261 static const int8_t array[][kEntrySize];
262 };
263
264 class TimeZoneComposer {
265 public:
266 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
267 void Set(int offset_in_hours) {
268 sign_ = offset_in_hours < 0 ? -1 : 1;
269 hour_ = offset_in_hours * sign_;
270 minute_ = 0;
271 }
272 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
273 void SetAbsoluteHour(int hour) { hour_ = hour; }
274 void SetAbsoluteMinute(int minute) { minute_ = minute; }
275 bool IsExpecting(int n) const {
276 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
277 }
278 bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
279 bool Write(FixedArray output);
280 bool IsEmpty() { return hour_ == kNone; }
281 private:
282 int sign_;
283 int hour_;
284 int minute_;
285 };
286
287 class TimeComposer {
288 public:
289 TimeComposer() : index_(0), hour_offset_(kNone) {}
290 bool IsEmpty() const { return index_ == 0; }
291 bool IsExpecting(int n) const {
292 return (index_ == 1 && IsMinute(n)) ||
293 (index_ == 2 && IsSecond(n)) ||
294 (index_ == 3 && IsMillisecond(n));
295 }
296 bool Add(int n) {
297 return index_ < kSize ? (comp_[index_++] = n, true) : false;
298 }
299 bool AddFinal(int n) {
300 if (!Add(n)) return false;
301 while (index_ < kSize) comp_[index_++] = 0;
302 return true;
303 }
304 void SetHourOffset(int n) { hour_offset_ = n; }
305 bool Write(FixedArray output);
306
307 static bool IsMinute(int x) { return Between(x, 0, 59); }
308 static bool IsHour(int x) { return Between(x, 0, 23); }
309 static bool IsSecond(int x) { return Between(x, 0, 59); }
310
311 private:
312 static bool IsHour12(int x) { return Between(x, 0, 12); }
313 static bool IsMillisecond(int x) { return Between(x, 0, 999); }
314
315 static const int kSize = 4;
316 int comp_[kSize];
317 int index_;
318 int hour_offset_;
319 };
320
321 class DayComposer {
322 public:
323 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
324 bool IsEmpty() const { return index_ == 0; }
325 bool Add(int n) {
326 if (index_ < kSize) {
327 comp_[index_] = n;
328 index_++;
329 return true;
330 }
331 return false;
332 }
333 void SetNamedMonth(int n) { named_month_ = n; }
334 bool Write(FixedArray output);
335 void set_iso_date() { is_iso_date_ = true; }
336 static bool IsMonth(int x) { return Between(x, 1, 12); }
337 static bool IsDay(int x) { return Between(x, 1, 31); }
338
339 private:
340 static const int kSize = 3;
341 int comp_[kSize];
342 int index_;
343 int named_month_;
344 // If set, ensures that data is always parsed in year-month-date order.
345 bool is_iso_date_;
346 };
347
348 // Tries to parse an ES5 Date Time String. Returns the next token
349 // to continue with in the legacy date string parser. If parsing is
350 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
351 // returns DateToken::Invalid(). Otherwise parsing continues in the
352 // legacy parser.
353 template <typename Char>
354 static DateParser::DateToken ParseES5DateTime(
355 DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
356 TimeZoneComposer* tz);
357};
358
359
360} // namespace internal
361} // namespace v8
362
363#endif // V8_DATEPARSER_H_
364