1 | // Copyright 2016 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | #include "src/builtins/builtins-utils-inl.h" |
6 | #include "src/builtins/builtins.h" |
7 | #include "src/conversions.h" |
8 | #include "src/counters.h" |
9 | #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop. |
10 | #include "src/objects-inl.h" |
11 | #ifdef V8_INTL_SUPPORT |
12 | #include "src/objects/intl-objects.h" |
13 | #endif |
14 | #include "src/regexp/regexp-utils.h" |
15 | #include "src/string-builder-inl.h" |
16 | #include "src/string-case.h" |
17 | #include "src/unicode-inl.h" |
18 | #include "src/unicode.h" |
19 | |
20 | namespace v8 { |
21 | namespace internal { |
22 | |
23 | namespace { // for String.fromCodePoint |
24 | |
25 | bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) { |
26 | if (!value->IsNumber() && |
27 | !Object::ToNumber(isolate, value).ToHandle(&value)) { |
28 | return false; |
29 | } |
30 | |
31 | if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() != |
32 | value->Number()) { |
33 | return false; |
34 | } |
35 | |
36 | if (value->Number() < 0 || value->Number() > 0x10FFFF) { |
37 | return false; |
38 | } |
39 | |
40 | return true; |
41 | } |
42 | |
43 | uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) { |
44 | Handle<Object> value = args.at(1 + index); |
45 | ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value, |
46 | Object::ToNumber(isolate, value), -1); |
47 | if (!IsValidCodePoint(isolate, value)) { |
48 | isolate->Throw(*isolate->factory()->NewRangeError( |
49 | MessageTemplate::kInvalidCodePoint, value)); |
50 | return -1; |
51 | } |
52 | return DoubleToUint32(value->Number()); |
53 | } |
54 | |
55 | } // namespace |
56 | |
57 | // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints ) |
58 | BUILTIN(StringFromCodePoint) { |
59 | HandleScope scope(isolate); |
60 | int const length = args.length() - 1; |
61 | if (length == 0) return ReadOnlyRoots(isolate).empty_string(); |
62 | DCHECK_LT(0, length); |
63 | |
64 | // Optimistically assume that the resulting String contains only one byte |
65 | // characters. |
66 | std::vector<uint8_t> one_byte_buffer; |
67 | one_byte_buffer.reserve(length); |
68 | uc32 code = 0; |
69 | int index; |
70 | for (index = 0; index < length; index++) { |
71 | code = NextCodePoint(isolate, args, index); |
72 | if (code < 0) { |
73 | return ReadOnlyRoots(isolate).exception(); |
74 | } |
75 | if (code > String::kMaxOneByteCharCode) { |
76 | break; |
77 | } |
78 | one_byte_buffer.push_back(code); |
79 | } |
80 | |
81 | if (index == length) { |
82 | RETURN_RESULT_OR_FAILURE( |
83 | isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>( |
84 | one_byte_buffer.data(), one_byte_buffer.size()))); |
85 | } |
86 | |
87 | std::vector<uc16> two_byte_buffer; |
88 | two_byte_buffer.reserve(length - index); |
89 | |
90 | while (true) { |
91 | if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
92 | two_byte_buffer.push_back(code); |
93 | } else { |
94 | two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code)); |
95 | two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code)); |
96 | } |
97 | |
98 | if (++index == length) { |
99 | break; |
100 | } |
101 | code = NextCodePoint(isolate, args, index); |
102 | if (code < 0) { |
103 | return ReadOnlyRoots(isolate).exception(); |
104 | } |
105 | } |
106 | |
107 | Handle<SeqTwoByteString> result; |
108 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
109 | isolate, result, |
110 | isolate->factory()->NewRawTwoByteString( |
111 | static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size()))); |
112 | |
113 | DisallowHeapAllocation no_gc; |
114 | CopyChars(result->GetChars(no_gc), one_byte_buffer.data(), |
115 | one_byte_buffer.size()); |
116 | CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(), |
117 | two_byte_buffer.data(), two_byte_buffer.size()); |
118 | |
119 | return *result; |
120 | } |
121 | |
122 | // ES6 section 21.1.3.9 |
123 | // String.prototype.lastIndexOf ( searchString [ , position ] ) |
124 | BUILTIN(StringPrototypeLastIndexOf) { |
125 | HandleScope handle_scope(isolate); |
126 | return String::LastIndexOf(isolate, args.receiver(), |
127 | args.atOrUndefined(isolate, 1), |
128 | args.atOrUndefined(isolate, 2)); |
129 | } |
130 | |
131 | // ES6 section 21.1.3.10 String.prototype.localeCompare ( that ) |
132 | // |
133 | // This function is implementation specific. For now, we do not |
134 | // do anything locale specific. |
135 | BUILTIN(StringPrototypeLocaleCompare) { |
136 | HandleScope handle_scope(isolate); |
137 | |
138 | isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare); |
139 | |
140 | #ifdef V8_INTL_SUPPORT |
141 | TO_THIS_STRING(str1, "String.prototype.localeCompare" ); |
142 | Handle<String> str2; |
143 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
144 | isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1))); |
145 | RETURN_RESULT_OR_FAILURE( |
146 | isolate, Intl::StringLocaleCompare(isolate, str1, str2, |
147 | args.atOrUndefined(isolate, 2), |
148 | args.atOrUndefined(isolate, 3))); |
149 | #else |
150 | DCHECK_EQ(2, args.length()); |
151 | |
152 | TO_THIS_STRING(str1, "String.prototype.localeCompare" ); |
153 | Handle<String> str2; |
154 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2, |
155 | Object::ToString(isolate, args.at(1))); |
156 | |
157 | if (str1.is_identical_to(str2)) return Smi::kZero; // Equal. |
158 | int str1_length = str1->length(); |
159 | int str2_length = str2->length(); |
160 | |
161 | // Decide trivial cases without flattening. |
162 | if (str1_length == 0) { |
163 | if (str2_length == 0) return Smi::kZero; // Equal. |
164 | return Smi::FromInt(-str2_length); |
165 | } else { |
166 | if (str2_length == 0) return Smi::FromInt(str1_length); |
167 | } |
168 | |
169 | int end = str1_length < str2_length ? str1_length : str2_length; |
170 | |
171 | // No need to flatten if we are going to find the answer on the first |
172 | // character. At this point we know there is at least one character |
173 | // in each string, due to the trivial case handling above. |
174 | int d = str1->Get(0) - str2->Get(0); |
175 | if (d != 0) return Smi::FromInt(d); |
176 | |
177 | str1 = String::Flatten(isolate, str1); |
178 | str2 = String::Flatten(isolate, str2); |
179 | |
180 | DisallowHeapAllocation no_gc; |
181 | String::FlatContent flat1 = str1->GetFlatContent(no_gc); |
182 | String::FlatContent flat2 = str2->GetFlatContent(no_gc); |
183 | |
184 | for (int i = 0; i < end; i++) { |
185 | if (flat1.Get(i) != flat2.Get(i)) { |
186 | return Smi::FromInt(flat1.Get(i) - flat2.Get(i)); |
187 | } |
188 | } |
189 | |
190 | return Smi::FromInt(str1_length - str2_length); |
191 | #endif // !V8_INTL_SUPPORT |
192 | } |
193 | |
194 | #ifndef V8_INTL_SUPPORT |
195 | // ES6 section 21.1.3.12 String.prototype.normalize ( [form] ) |
196 | // |
197 | // Simply checks the argument is valid and returns the string itself. |
198 | // If internationalization is enabled, then intl.js will override this function |
199 | // and provide the proper functionality, so this is just a fallback. |
200 | BUILTIN(StringPrototypeNormalize) { |
201 | HandleScope handle_scope(isolate); |
202 | TO_THIS_STRING(string, "String.prototype.normalize" ); |
203 | |
204 | Handle<Object> form_input = args.atOrUndefined(isolate, 1); |
205 | if (form_input->IsUndefined(isolate)) return *string; |
206 | |
207 | Handle<String> form; |
208 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form, |
209 | Object::ToString(isolate, form_input)); |
210 | |
211 | if (!(String::Equals(isolate, form, |
212 | isolate->factory()->NewStringFromStaticChars("NFC" )) || |
213 | String::Equals(isolate, form, |
214 | isolate->factory()->NewStringFromStaticChars("NFD" )) || |
215 | String::Equals(isolate, form, |
216 | isolate->factory()->NewStringFromStaticChars("NFKC" )) || |
217 | String::Equals(isolate, form, |
218 | isolate->factory()->NewStringFromStaticChars("NFKD" )))) { |
219 | Handle<String> valid_forms = |
220 | isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD" ); |
221 | THROW_NEW_ERROR_RETURN_FAILURE( |
222 | isolate, |
223 | NewRangeError(MessageTemplate::kNormalizationForm, valid_forms)); |
224 | } |
225 | |
226 | return *string; |
227 | } |
228 | #endif // !V8_INTL_SUPPORT |
229 | |
230 | |
231 | #ifndef V8_INTL_SUPPORT |
232 | namespace { |
233 | |
234 | inline bool ToUpperOverflows(uc32 character) { |
235 | // y with umlauts and the micro sign are the only characters that stop |
236 | // fitting into one-byte when converting to uppercase. |
237 | static const uc32 yuml_code = 0xFF; |
238 | static const uc32 micro_code = 0xB5; |
239 | return (character == yuml_code || character == micro_code); |
240 | } |
241 | |
242 | template <class Converter> |
243 | V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper( |
244 | Isolate* isolate, String string, SeqString result, int result_length, |
245 | unibrow::Mapping<Converter, 128>* mapping) { |
246 | DisallowHeapAllocation no_gc; |
247 | // We try this twice, once with the assumption that the result is no longer |
248 | // than the input and, if that assumption breaks, again with the exact |
249 | // length. This may not be pretty, but it is nicer than what was here before |
250 | // and I hereby claim my vaffel-is. |
251 | // |
252 | // NOTE: This assumes that the upper/lower case of an ASCII |
253 | // character is also ASCII. This is currently the case, but it |
254 | // might break in the future if we implement more context and locale |
255 | // dependent upper/lower conversions. |
256 | bool has_changed_character = false; |
257 | |
258 | // Convert all characters to upper case, assuming that they will fit |
259 | // in the buffer |
260 | StringCharacterStream stream(string); |
261 | unibrow::uchar chars[Converter::kMaxWidth]; |
262 | // We can assume that the string is not empty |
263 | uc32 current = stream.GetNext(); |
264 | bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString(); |
265 | for (int i = 0; i < result_length;) { |
266 | bool has_next = stream.HasMore(); |
267 | uc32 next = has_next ? stream.GetNext() : 0; |
268 | int char_length = mapping->get(current, next, chars); |
269 | if (char_length == 0) { |
270 | // The case conversion of this character is the character itself. |
271 | result->Set(i, current); |
272 | i++; |
273 | } else if (char_length == 1 && |
274 | (ignore_overflow || !ToUpperOverflows(current))) { |
275 | // Common case: converting the letter resulted in one character. |
276 | DCHECK(static_cast<uc32>(chars[0]) != current); |
277 | result->Set(i, chars[0]); |
278 | has_changed_character = true; |
279 | i++; |
280 | } else if (result_length == string->length()) { |
281 | bool overflows = ToUpperOverflows(current); |
282 | // We've assumed that the result would be as long as the |
283 | // input but here is a character that converts to several |
284 | // characters. No matter, we calculate the exact length |
285 | // of the result and try the whole thing again. |
286 | // |
287 | // Note that this leaves room for optimization. We could just |
288 | // memcpy what we already have to the result string. Also, |
289 | // the result string is the last object allocated we could |
290 | // "realloc" it and probably, in the vast majority of cases, |
291 | // extend the existing string to be able to hold the full |
292 | // result. |
293 | int next_length = 0; |
294 | if (has_next) { |
295 | next_length = mapping->get(next, 0, chars); |
296 | if (next_length == 0) next_length = 1; |
297 | } |
298 | int current_length = i + char_length + next_length; |
299 | while (stream.HasMore()) { |
300 | current = stream.GetNext(); |
301 | overflows |= ToUpperOverflows(current); |
302 | // NOTE: we use 0 as the next character here because, while |
303 | // the next character may affect what a character converts to, |
304 | // it does not in any case affect the length of what it convert |
305 | // to. |
306 | int char_length = mapping->get(current, 0, chars); |
307 | if (char_length == 0) char_length = 1; |
308 | current_length += char_length; |
309 | if (current_length > String::kMaxLength) { |
310 | AllowHeapAllocation allocate_error_and_return; |
311 | THROW_NEW_ERROR_RETURN_FAILURE(isolate, |
312 | NewInvalidStringLengthError()); |
313 | } |
314 | } |
315 | // Try again with the real length. Return signed if we need |
316 | // to allocate a two-byte string for to uppercase. |
317 | return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length) |
318 | : Smi::FromInt(current_length); |
319 | } else { |
320 | for (int j = 0; j < char_length; j++) { |
321 | result->Set(i, chars[j]); |
322 | i++; |
323 | } |
324 | has_changed_character = true; |
325 | } |
326 | current = next; |
327 | } |
328 | if (has_changed_character) { |
329 | return result; |
330 | } else { |
331 | // If we didn't actually change anything in doing the conversion |
332 | // we simple return the result and let the converted string |
333 | // become garbage; there is no reason to keep two identical strings |
334 | // alive. |
335 | return string; |
336 | } |
337 | } |
338 | |
339 | template <class Converter> |
340 | V8_WARN_UNUSED_RESULT static Object ConvertCase( |
341 | Handle<String> s, Isolate* isolate, |
342 | unibrow::Mapping<Converter, 128>* mapping) { |
343 | s = String::Flatten(isolate, s); |
344 | int length = s->length(); |
345 | // Assume that the string is not empty; we need this assumption later |
346 | if (length == 0) return *s; |
347 | |
348 | // Simpler handling of ASCII strings. |
349 | // |
350 | // NOTE: This assumes that the upper/lower case of an ASCII |
351 | // character is also ASCII. This is currently the case, but it |
352 | // might break in the future if we implement more context and locale |
353 | // dependent upper/lower conversions. |
354 | if (String::IsOneByteRepresentationUnderneath(*s)) { |
355 | // Same length as input. |
356 | Handle<SeqOneByteString> result = |
357 | isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
358 | DisallowHeapAllocation no_gc; |
359 | String::FlatContent flat_content = s->GetFlatContent(no_gc); |
360 | DCHECK(flat_content.IsFlat()); |
361 | bool has_changed_character = false; |
362 | int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>( |
363 | reinterpret_cast<char*>(result->GetChars(no_gc)), |
364 | reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()), |
365 | length, &has_changed_character); |
366 | // If not ASCII, we discard the result and take the 2 byte path. |
367 | if (index_to_first_unprocessed == length) |
368 | return has_changed_character ? *result : *s; |
369 | } |
370 | |
371 | Handle<SeqString> result; // Same length as input. |
372 | if (s->IsOneByteRepresentation()) { |
373 | result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); |
374 | } else { |
375 | result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked(); |
376 | } |
377 | |
378 | Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping); |
379 | if (answer->IsException(isolate) || answer->IsString()) return answer; |
380 | |
381 | DCHECK(answer->IsSmi()); |
382 | length = Smi::ToInt(answer); |
383 | if (s->IsOneByteRepresentation() && length > 0) { |
384 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
385 | isolate, result, isolate->factory()->NewRawOneByteString(length)); |
386 | } else { |
387 | if (length < 0) length = -length; |
388 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
389 | isolate, result, isolate->factory()->NewRawTwoByteString(length)); |
390 | } |
391 | return ConvertCaseHelper(isolate, *s, *result, length, mapping); |
392 | } |
393 | |
394 | } // namespace |
395 | |
396 | BUILTIN(StringPrototypeToLocaleLowerCase) { |
397 | HandleScope scope(isolate); |
398 | TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase" ); |
399 | return ConvertCase(string, isolate, |
400 | isolate->runtime_state()->to_lower_mapping()); |
401 | } |
402 | |
403 | BUILTIN(StringPrototypeToLocaleUpperCase) { |
404 | HandleScope scope(isolate); |
405 | TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase" ); |
406 | return ConvertCase(string, isolate, |
407 | isolate->runtime_state()->to_upper_mapping()); |
408 | } |
409 | |
410 | BUILTIN(StringPrototypeToLowerCase) { |
411 | HandleScope scope(isolate); |
412 | TO_THIS_STRING(string, "String.prototype.toLowerCase" ); |
413 | return ConvertCase(string, isolate, |
414 | isolate->runtime_state()->to_lower_mapping()); |
415 | } |
416 | |
417 | BUILTIN(StringPrototypeToUpperCase) { |
418 | HandleScope scope(isolate); |
419 | TO_THIS_STRING(string, "String.prototype.toUpperCase" ); |
420 | return ConvertCase(string, isolate, |
421 | isolate->runtime_state()->to_upper_mapping()); |
422 | } |
423 | #endif // !V8_INTL_SUPPORT |
424 | |
425 | // ES6 #sec-string.prototype.raw |
426 | BUILTIN(StringRaw) { |
427 | HandleScope scope(isolate); |
428 | Handle<Object> templ = args.atOrUndefined(isolate, 1); |
429 | const uint32_t argc = args.length(); |
430 | Handle<String> raw_string = |
431 | isolate->factory()->NewStringFromAsciiChecked("raw" ); |
432 | |
433 | Handle<Object> cooked; |
434 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked, |
435 | Object::ToObject(isolate, templ)); |
436 | |
437 | Handle<Object> raw; |
438 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
439 | isolate, raw, Object::GetProperty(isolate, cooked, raw_string)); |
440 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw, |
441 | Object::ToObject(isolate, raw)); |
442 | Handle<Object> raw_len; |
443 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
444 | isolate, raw_len, |
445 | Object::GetProperty(isolate, raw, isolate->factory()->length_string())); |
446 | |
447 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len, |
448 | Object::ToLength(isolate, raw_len)); |
449 | |
450 | IncrementalStringBuilder result_builder(isolate); |
451 | // Intentional spec violation: we ignore {length} values >= 2^32, because |
452 | // assuming non-empty chunks they would generate too-long strings anyway. |
453 | const double raw_len_number = raw_len->Number(); |
454 | const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max() |
455 | ? std::numeric_limits<uint32_t>::max() |
456 | : static_cast<uint32_t>(raw_len_number); |
457 | if (length > 0) { |
458 | Handle<Object> first_element; |
459 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element, |
460 | Object::GetElement(isolate, raw, 0)); |
461 | |
462 | Handle<String> first_string; |
463 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
464 | isolate, first_string, Object::ToString(isolate, first_element)); |
465 | result_builder.AppendString(first_string); |
466 | |
467 | for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) { |
468 | if (arg_i < argc) { |
469 | Handle<String> argument_string; |
470 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
471 | isolate, argument_string, |
472 | Object::ToString(isolate, args.at(arg_i))); |
473 | result_builder.AppendString(argument_string); |
474 | } |
475 | |
476 | Handle<Object> element; |
477 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element, |
478 | Object::GetElement(isolate, raw, i)); |
479 | |
480 | Handle<String> element_string; |
481 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string, |
482 | Object::ToString(isolate, element)); |
483 | result_builder.AppendString(element_string); |
484 | } |
485 | } |
486 | |
487 | RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish()); |
488 | } |
489 | |
490 | } // namespace internal |
491 | } // namespace v8 |
492 | |