1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/builtins/builtins-utils-inl.h"
6#include "src/builtins/builtins.h"
7#include "src/conversions.h"
8#include "src/counters.h"
9#include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
10#include "src/objects-inl.h"
11#ifdef V8_INTL_SUPPORT
12#include "src/objects/intl-objects.h"
13#endif
14#include "src/regexp/regexp-utils.h"
15#include "src/string-builder-inl.h"
16#include "src/string-case.h"
17#include "src/unicode-inl.h"
18#include "src/unicode.h"
19
20namespace v8 {
21namespace internal {
22
23namespace { // for String.fromCodePoint
24
25bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
26 if (!value->IsNumber() &&
27 !Object::ToNumber(isolate, value).ToHandle(&value)) {
28 return false;
29 }
30
31 if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
32 value->Number()) {
33 return false;
34 }
35
36 if (value->Number() < 0 || value->Number() > 0x10FFFF) {
37 return false;
38 }
39
40 return true;
41}
42
43uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
44 Handle<Object> value = args.at(1 + index);
45 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value,
46 Object::ToNumber(isolate, value), -1);
47 if (!IsValidCodePoint(isolate, value)) {
48 isolate->Throw(*isolate->factory()->NewRangeError(
49 MessageTemplate::kInvalidCodePoint, value));
50 return -1;
51 }
52 return DoubleToUint32(value->Number());
53}
54
55} // namespace
56
57// ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
58BUILTIN(StringFromCodePoint) {
59 HandleScope scope(isolate);
60 int const length = args.length() - 1;
61 if (length == 0) return ReadOnlyRoots(isolate).empty_string();
62 DCHECK_LT(0, length);
63
64 // Optimistically assume that the resulting String contains only one byte
65 // characters.
66 std::vector<uint8_t> one_byte_buffer;
67 one_byte_buffer.reserve(length);
68 uc32 code = 0;
69 int index;
70 for (index = 0; index < length; index++) {
71 code = NextCodePoint(isolate, args, index);
72 if (code < 0) {
73 return ReadOnlyRoots(isolate).exception();
74 }
75 if (code > String::kMaxOneByteCharCode) {
76 break;
77 }
78 one_byte_buffer.push_back(code);
79 }
80
81 if (index == length) {
82 RETURN_RESULT_OR_FAILURE(
83 isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
84 one_byte_buffer.data(), one_byte_buffer.size())));
85 }
86
87 std::vector<uc16> two_byte_buffer;
88 two_byte_buffer.reserve(length - index);
89
90 while (true) {
91 if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
92 two_byte_buffer.push_back(code);
93 } else {
94 two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
95 two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
96 }
97
98 if (++index == length) {
99 break;
100 }
101 code = NextCodePoint(isolate, args, index);
102 if (code < 0) {
103 return ReadOnlyRoots(isolate).exception();
104 }
105 }
106
107 Handle<SeqTwoByteString> result;
108 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
109 isolate, result,
110 isolate->factory()->NewRawTwoByteString(
111 static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
112
113 DisallowHeapAllocation no_gc;
114 CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
115 one_byte_buffer.size());
116 CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
117 two_byte_buffer.data(), two_byte_buffer.size());
118
119 return *result;
120}
121
122// ES6 section 21.1.3.9
123// String.prototype.lastIndexOf ( searchString [ , position ] )
124BUILTIN(StringPrototypeLastIndexOf) {
125 HandleScope handle_scope(isolate);
126 return String::LastIndexOf(isolate, args.receiver(),
127 args.atOrUndefined(isolate, 1),
128 args.atOrUndefined(isolate, 2));
129}
130
131// ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
132//
133// This function is implementation specific. For now, we do not
134// do anything locale specific.
135BUILTIN(StringPrototypeLocaleCompare) {
136 HandleScope handle_scope(isolate);
137
138 isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
139
140#ifdef V8_INTL_SUPPORT
141 TO_THIS_STRING(str1, "String.prototype.localeCompare");
142 Handle<String> str2;
143 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
144 isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
145 RETURN_RESULT_OR_FAILURE(
146 isolate, Intl::StringLocaleCompare(isolate, str1, str2,
147 args.atOrUndefined(isolate, 2),
148 args.atOrUndefined(isolate, 3)));
149#else
150 DCHECK_EQ(2, args.length());
151
152 TO_THIS_STRING(str1, "String.prototype.localeCompare");
153 Handle<String> str2;
154 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
155 Object::ToString(isolate, args.at(1)));
156
157 if (str1.is_identical_to(str2)) return Smi::kZero; // Equal.
158 int str1_length = str1->length();
159 int str2_length = str2->length();
160
161 // Decide trivial cases without flattening.
162 if (str1_length == 0) {
163 if (str2_length == 0) return Smi::kZero; // Equal.
164 return Smi::FromInt(-str2_length);
165 } else {
166 if (str2_length == 0) return Smi::FromInt(str1_length);
167 }
168
169 int end = str1_length < str2_length ? str1_length : str2_length;
170
171 // No need to flatten if we are going to find the answer on the first
172 // character. At this point we know there is at least one character
173 // in each string, due to the trivial case handling above.
174 int d = str1->Get(0) - str2->Get(0);
175 if (d != 0) return Smi::FromInt(d);
176
177 str1 = String::Flatten(isolate, str1);
178 str2 = String::Flatten(isolate, str2);
179
180 DisallowHeapAllocation no_gc;
181 String::FlatContent flat1 = str1->GetFlatContent(no_gc);
182 String::FlatContent flat2 = str2->GetFlatContent(no_gc);
183
184 for (int i = 0; i < end; i++) {
185 if (flat1.Get(i) != flat2.Get(i)) {
186 return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
187 }
188 }
189
190 return Smi::FromInt(str1_length - str2_length);
191#endif // !V8_INTL_SUPPORT
192}
193
194#ifndef V8_INTL_SUPPORT
195// ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
196//
197// Simply checks the argument is valid and returns the string itself.
198// If internationalization is enabled, then intl.js will override this function
199// and provide the proper functionality, so this is just a fallback.
200BUILTIN(StringPrototypeNormalize) {
201 HandleScope handle_scope(isolate);
202 TO_THIS_STRING(string, "String.prototype.normalize");
203
204 Handle<Object> form_input = args.atOrUndefined(isolate, 1);
205 if (form_input->IsUndefined(isolate)) return *string;
206
207 Handle<String> form;
208 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
209 Object::ToString(isolate, form_input));
210
211 if (!(String::Equals(isolate, form,
212 isolate->factory()->NewStringFromStaticChars("NFC")) ||
213 String::Equals(isolate, form,
214 isolate->factory()->NewStringFromStaticChars("NFD")) ||
215 String::Equals(isolate, form,
216 isolate->factory()->NewStringFromStaticChars("NFKC")) ||
217 String::Equals(isolate, form,
218 isolate->factory()->NewStringFromStaticChars("NFKD")))) {
219 Handle<String> valid_forms =
220 isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
221 THROW_NEW_ERROR_RETURN_FAILURE(
222 isolate,
223 NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
224 }
225
226 return *string;
227}
228#endif // !V8_INTL_SUPPORT
229
230
231#ifndef V8_INTL_SUPPORT
232namespace {
233
234inline bool ToUpperOverflows(uc32 character) {
235 // y with umlauts and the micro sign are the only characters that stop
236 // fitting into one-byte when converting to uppercase.
237 static const uc32 yuml_code = 0xFF;
238 static const uc32 micro_code = 0xB5;
239 return (character == yuml_code || character == micro_code);
240}
241
242template <class Converter>
243V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
244 Isolate* isolate, String string, SeqString result, int result_length,
245 unibrow::Mapping<Converter, 128>* mapping) {
246 DisallowHeapAllocation no_gc;
247 // We try this twice, once with the assumption that the result is no longer
248 // than the input and, if that assumption breaks, again with the exact
249 // length. This may not be pretty, but it is nicer than what was here before
250 // and I hereby claim my vaffel-is.
251 //
252 // NOTE: This assumes that the upper/lower case of an ASCII
253 // character is also ASCII. This is currently the case, but it
254 // might break in the future if we implement more context and locale
255 // dependent upper/lower conversions.
256 bool has_changed_character = false;
257
258 // Convert all characters to upper case, assuming that they will fit
259 // in the buffer
260 StringCharacterStream stream(string);
261 unibrow::uchar chars[Converter::kMaxWidth];
262 // We can assume that the string is not empty
263 uc32 current = stream.GetNext();
264 bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
265 for (int i = 0; i < result_length;) {
266 bool has_next = stream.HasMore();
267 uc32 next = has_next ? stream.GetNext() : 0;
268 int char_length = mapping->get(current, next, chars);
269 if (char_length == 0) {
270 // The case conversion of this character is the character itself.
271 result->Set(i, current);
272 i++;
273 } else if (char_length == 1 &&
274 (ignore_overflow || !ToUpperOverflows(current))) {
275 // Common case: converting the letter resulted in one character.
276 DCHECK(static_cast<uc32>(chars[0]) != current);
277 result->Set(i, chars[0]);
278 has_changed_character = true;
279 i++;
280 } else if (result_length == string->length()) {
281 bool overflows = ToUpperOverflows(current);
282 // We've assumed that the result would be as long as the
283 // input but here is a character that converts to several
284 // characters. No matter, we calculate the exact length
285 // of the result and try the whole thing again.
286 //
287 // Note that this leaves room for optimization. We could just
288 // memcpy what we already have to the result string. Also,
289 // the result string is the last object allocated we could
290 // "realloc" it and probably, in the vast majority of cases,
291 // extend the existing string to be able to hold the full
292 // result.
293 int next_length = 0;
294 if (has_next) {
295 next_length = mapping->get(next, 0, chars);
296 if (next_length == 0) next_length = 1;
297 }
298 int current_length = i + char_length + next_length;
299 while (stream.HasMore()) {
300 current = stream.GetNext();
301 overflows |= ToUpperOverflows(current);
302 // NOTE: we use 0 as the next character here because, while
303 // the next character may affect what a character converts to,
304 // it does not in any case affect the length of what it convert
305 // to.
306 int char_length = mapping->get(current, 0, chars);
307 if (char_length == 0) char_length = 1;
308 current_length += char_length;
309 if (current_length > String::kMaxLength) {
310 AllowHeapAllocation allocate_error_and_return;
311 THROW_NEW_ERROR_RETURN_FAILURE(isolate,
312 NewInvalidStringLengthError());
313 }
314 }
315 // Try again with the real length. Return signed if we need
316 // to allocate a two-byte string for to uppercase.
317 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
318 : Smi::FromInt(current_length);
319 } else {
320 for (int j = 0; j < char_length; j++) {
321 result->Set(i, chars[j]);
322 i++;
323 }
324 has_changed_character = true;
325 }
326 current = next;
327 }
328 if (has_changed_character) {
329 return result;
330 } else {
331 // If we didn't actually change anything in doing the conversion
332 // we simple return the result and let the converted string
333 // become garbage; there is no reason to keep two identical strings
334 // alive.
335 return string;
336 }
337}
338
339template <class Converter>
340V8_WARN_UNUSED_RESULT static Object ConvertCase(
341 Handle<String> s, Isolate* isolate,
342 unibrow::Mapping<Converter, 128>* mapping) {
343 s = String::Flatten(isolate, s);
344 int length = s->length();
345 // Assume that the string is not empty; we need this assumption later
346 if (length == 0) return *s;
347
348 // Simpler handling of ASCII strings.
349 //
350 // NOTE: This assumes that the upper/lower case of an ASCII
351 // character is also ASCII. This is currently the case, but it
352 // might break in the future if we implement more context and locale
353 // dependent upper/lower conversions.
354 if (String::IsOneByteRepresentationUnderneath(*s)) {
355 // Same length as input.
356 Handle<SeqOneByteString> result =
357 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
358 DisallowHeapAllocation no_gc;
359 String::FlatContent flat_content = s->GetFlatContent(no_gc);
360 DCHECK(flat_content.IsFlat());
361 bool has_changed_character = false;
362 int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
363 reinterpret_cast<char*>(result->GetChars(no_gc)),
364 reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
365 length, &has_changed_character);
366 // If not ASCII, we discard the result and take the 2 byte path.
367 if (index_to_first_unprocessed == length)
368 return has_changed_character ? *result : *s;
369 }
370
371 Handle<SeqString> result; // Same length as input.
372 if (s->IsOneByteRepresentation()) {
373 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
374 } else {
375 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
376 }
377
378 Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
379 if (answer->IsException(isolate) || answer->IsString()) return answer;
380
381 DCHECK(answer->IsSmi());
382 length = Smi::ToInt(answer);
383 if (s->IsOneByteRepresentation() && length > 0) {
384 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
385 isolate, result, isolate->factory()->NewRawOneByteString(length));
386 } else {
387 if (length < 0) length = -length;
388 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
389 isolate, result, isolate->factory()->NewRawTwoByteString(length));
390 }
391 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
392}
393
394} // namespace
395
396BUILTIN(StringPrototypeToLocaleLowerCase) {
397 HandleScope scope(isolate);
398 TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
399 return ConvertCase(string, isolate,
400 isolate->runtime_state()->to_lower_mapping());
401}
402
403BUILTIN(StringPrototypeToLocaleUpperCase) {
404 HandleScope scope(isolate);
405 TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
406 return ConvertCase(string, isolate,
407 isolate->runtime_state()->to_upper_mapping());
408}
409
410BUILTIN(StringPrototypeToLowerCase) {
411 HandleScope scope(isolate);
412 TO_THIS_STRING(string, "String.prototype.toLowerCase");
413 return ConvertCase(string, isolate,
414 isolate->runtime_state()->to_lower_mapping());
415}
416
417BUILTIN(StringPrototypeToUpperCase) {
418 HandleScope scope(isolate);
419 TO_THIS_STRING(string, "String.prototype.toUpperCase");
420 return ConvertCase(string, isolate,
421 isolate->runtime_state()->to_upper_mapping());
422}
423#endif // !V8_INTL_SUPPORT
424
425// ES6 #sec-string.prototype.raw
426BUILTIN(StringRaw) {
427 HandleScope scope(isolate);
428 Handle<Object> templ = args.atOrUndefined(isolate, 1);
429 const uint32_t argc = args.length();
430 Handle<String> raw_string =
431 isolate->factory()->NewStringFromAsciiChecked("raw");
432
433 Handle<Object> cooked;
434 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
435 Object::ToObject(isolate, templ));
436
437 Handle<Object> raw;
438 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
439 isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
440 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
441 Object::ToObject(isolate, raw));
442 Handle<Object> raw_len;
443 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
444 isolate, raw_len,
445 Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
446
447 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
448 Object::ToLength(isolate, raw_len));
449
450 IncrementalStringBuilder result_builder(isolate);
451 // Intentional spec violation: we ignore {length} values >= 2^32, because
452 // assuming non-empty chunks they would generate too-long strings anyway.
453 const double raw_len_number = raw_len->Number();
454 const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
455 ? std::numeric_limits<uint32_t>::max()
456 : static_cast<uint32_t>(raw_len_number);
457 if (length > 0) {
458 Handle<Object> first_element;
459 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
460 Object::GetElement(isolate, raw, 0));
461
462 Handle<String> first_string;
463 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
464 isolate, first_string, Object::ToString(isolate, first_element));
465 result_builder.AppendString(first_string);
466
467 for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
468 if (arg_i < argc) {
469 Handle<String> argument_string;
470 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
471 isolate, argument_string,
472 Object::ToString(isolate, args.at(arg_i)));
473 result_builder.AppendString(argument_string);
474 }
475
476 Handle<Object> element;
477 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
478 Object::GetElement(isolate, raw, i));
479
480 Handle<String> element_string;
481 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
482 Object::ToString(isolate, element));
483 result_builder.AppendString(element_string);
484 }
485 }
486
487 RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
488}
489
490} // namespace internal
491} // namespace v8
492