builtins-string.cc source code [v8/src/builtins/builtins-string.cc]

1	// Copyright 2016 the V8 project authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style license that can be
3	// found in the LICENSE file.
4
5	#include "src/builtins/builtins-utils-inl.h"
6	#include "src/builtins/builtins.h"
7	#include "src/conversions.h"
8	#include "src/counters.h"
9	#include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
10	#include "src/objects-inl.h"
11	#ifdef V8_INTL_SUPPORT
12	#include "src/objects/intl-objects.h"
13	#endif
14	#include "src/regexp/regexp-utils.h"
15	#include "src/string-builder-inl.h"
16	#include "src/string-case.h"
17	#include "src/unicode-inl.h"
18	#include "src/unicode.h"
19
20	namespace v8 {
21	namespace internal {
22
23	namespace { // for String.fromCodePoint
24
25	bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
26	if (!value ->IsNumber() &&
27	!Object::ToNumber(isolate, value).ToHandle(&value)) {
28	return false;
29	}
30
31	if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
32	value ->Number()) {
33	return false;
34	}
35
36	if (value ->Number() < `0` \|\| value ->Number() > `0x10FFFF`) {
37	return false;
38	}
39
40	return true;
41	}
42
43	uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
44	Handle<Object> value = args.at(`1` + index);
45	ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value,
46	Object::ToNumber(isolate, value), -`1`);
47	if (!IsValidCodePoint(isolate, value)) {
48	isolate->Throw(*isolate->factory()->NewRangeError(
49	MessageTemplate::kInvalidCodePoint, value));
50	return -`1`;
51	}
52	return DoubleToUint32(value ->Number());
53	}
54
55	} // namespace
56
57	// ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
58	BUILTIN(StringFromCodePoint) {
59	HandleScope scope(isolate);
60	int const length = args.length() - `1`;
61	if (length == `0`) return ReadOnlyRoots (isolate).empty_string();
62	DCHECK_LT(`0`, length);
63
64	// Optimistically assume that the resulting String contains only one byte
65	// characters.
66	std::vector<uint8_t> one_byte_buffer;
67	one_byte_buffer.reserve(length);
68	uc32 code = `0`;
69	int index;
70	for (index = `0`; index < length; index++) {
71	code = NextCodePoint(isolate, args, index);
72	if (code < `0`) {
73	return ReadOnlyRoots (isolate).exception();
74	}
75	if (code > String::kMaxOneByteCharCode) {
76	break;
77	}
78	one_byte_buffer.push_back(code);
79	}
80
81	if (index == length) {
82	RETURN_RESULT_OR_FAILURE(
83	isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
84	one_byte_buffer.data(), one_byte_buffer.size())));
85	}
86
87	std::vector<uc16> two_byte_buffer;
88	two_byte_buffer.reserve(length - index);
89
90	while (true) {
91	if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
92	two_byte_buffer.push_back(code);
93	} else {
94	two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
95	two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
96	}
97
98	if (++index == length) {
99	break;
100	}
101	code = NextCodePoint(isolate, args, index);
102	if (code < `0`) {
103	return ReadOnlyRoots (isolate).exception();
104	}
105	}
106
107	Handle<SeqTwoByteString> result;
108	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
109	isolate, result,
110	isolate->factory()->NewRawTwoByteString(
111	static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
112
113	DisallowHeapAllocation no_gc;
114	CopyChars(result ->GetChars(no_gc), one_byte_buffer.data(),
115	one_byte_buffer.size());
116	CopyChars(result ->GetChars(no_gc) + one_byte_buffer.size(),
117	two_byte_buffer.data(), two_byte_buffer.size());
118
119	return *result;
120	}
121
122	// ES6 section 21.1.3.9
123	// String.prototype.lastIndexOf ( searchString [ , position ] )
124	BUILTIN(StringPrototypeLastIndexOf) {
125	HandleScope handle_scope(isolate);
126	return String::LastIndexOf(isolate, args.receiver(),
127	args.atOrUndefined(isolate, `1`),
128	args.atOrUndefined(isolate, `2`));
129	}
130
131	// ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
132	//
133	// This function is implementation specific. For now, we do not
134	// do anything locale specific.
135	BUILTIN(StringPrototypeLocaleCompare) {
136	HandleScope handle_scope(isolate);
137
138	isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
139
140	#ifdef V8_INTL_SUPPORT
141	TO_THIS_STRING(str1, "String.prototype.localeCompare");
142	Handle<String> str2;
143	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
144	isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, `1`)));
145	RETURN_RESULT_OR_FAILURE(
146	isolate, Intl::StringLocaleCompare(isolate, str1, str2,
147	args.atOrUndefined(isolate, `2`),
148	args.atOrUndefined(isolate, `3`)));
149	#else
150	DCHECK_EQ(`2`, args.length());
151
152	TO_THIS_STRING(str1, "String.prototype.localeCompare");
153	Handle<String> str2;
154	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
155	Object::ToString(isolate, args.at(`1`)));
156
157	if (str1.is_identical_to(str2)) return Smi::kZero; // Equal.
158	int str1_length = str1->length();
159	int str2_length = str2->length();
160
161	// Decide trivial cases without flattening.
162	if (str1_length == `0`) {
163	if (str2_length == `0`) return Smi::kZero; // Equal.
164	return Smi::FromInt(-str2_length);
165	} else {
166	if (str2_length == `0`) return Smi::FromInt(str1_length);
167	}
168
169	int end = str1_length < str2_length ? str1_length : str2_length;
170
171	// No need to flatten if we are going to find the answer on the first
172	// character. At this point we know there is at least one character
173	// in each string, due to the trivial case handling above.
174	int d = str1->Get(`0`) - str2->Get(`0`);
175	if (d != `0`) return Smi::FromInt(d);
176
177	str1 = String::Flatten(isolate, str1);
178	str2 = String::Flatten(isolate, str2);
179
180	DisallowHeapAllocation no_gc;
181	String::FlatContent flat1 = str1->GetFlatContent(no_gc);
182	String::FlatContent flat2 = str2->GetFlatContent(no_gc);
183
184	for (int i = `0`; i < end; i++) {
185	if (flat1.Get(i) != flat2.Get(i)) {
186	return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
187	}
188	}
189
190	return Smi::FromInt(str1_length - str2_length);
191	#endif // !V8_INTL_SUPPORT
192	}
193
194	#ifndef V8_INTL_SUPPORT
195	// ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
196	//
197	// Simply checks the argument is valid and returns the string itself.
198	// If internationalization is enabled, then intl.js will override this function
199	// and provide the proper functionality, so this is just a fallback.
200	BUILTIN(StringPrototypeNormalize) {
201	HandleScope handle_scope(isolate);
202	TO_THIS_STRING(string, "String.prototype.normalize");
203
204	Handle<Object> form_input = args.atOrUndefined(isolate, `1`);
205	if (form_input->IsUndefined(isolate)) return *string;
206
207	Handle<String> form;
208	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
209	Object::ToString(isolate, form_input));
210
211	if (!(String::Equals(isolate, form,
212	isolate->factory()->NewStringFromStaticChars("NFC")) \|\|
213	String::Equals(isolate, form,
214	isolate->factory()->NewStringFromStaticChars("NFD")) \|\|
215	String::Equals(isolate, form,
216	isolate->factory()->NewStringFromStaticChars("NFKC")) \|\|
217	String::Equals(isolate, form,
218	isolate->factory()->NewStringFromStaticChars("NFKD")))) {
219	Handle<String> valid_forms =
220	isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
221	THROW_NEW_ERROR_RETURN_FAILURE(
222	isolate,
223	NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
224	}
225
226	return *string;
227	}
228	#endif // !V8_INTL_SUPPORT
229
230
231	#ifndef V8_INTL_SUPPORT
232	namespace {
233
234	inline bool ToUpperOverflows(uc32 character) {
235	// y with umlauts and the micro sign are the only characters that stop
236	// fitting into one-byte when converting to uppercase.
237	static const uc32 yuml_code = `0xFF`;
238	static const uc32 micro_code = `0xB5`;
239	return (character == yuml_code \|\| character == micro_code);
240	}
241
242	template <class Converter>
243	V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
244	Isolate* isolate, String string, SeqString result, int result_length,
245	unibrow::Mapping<Converter, `128`>* mapping) {
246	DisallowHeapAllocation no_gc;
247	// We try this twice, once with the assumption that the result is no longer
248	// than the input and, if that assumption breaks, again with the exact
249	// length. This may not be pretty, but it is nicer than what was here before
250	// and I hereby claim my vaffel-is.
251	//
252	// NOTE: This assumes that the upper/lower case of an ASCII
253	// character is also ASCII. This is currently the case, but it
254	// might break in the future if we implement more context and locale
255	// dependent upper/lower conversions.
256	bool has_changed_character = false;
257
258	// Convert all characters to upper case, assuming that they will fit
259	// in the buffer
260	StringCharacterStream stream(string);
261	unibrow::uchar chars[Converter::kMaxWidth];
262	// We can assume that the string is not empty
263	uc32 current = stream.GetNext();
264	bool ignore_overflow = Converter::kIsToLower \|\| result->IsSeqTwoByteString();
265	for (int i = `0`; i < result_length;) {
266	bool has_next = stream.HasMore();
267	uc32 next = has_next ? stream.GetNext() : `0`;
268	int char_length = mapping->get(current, next, chars);
269	if (char_length == `0`) {
270	// The case conversion of this character is the character itself.
271	result->Set(i, current);
272	i++;
273	} else if (char_length == `1` &&
274	(ignore_overflow \|\| !ToUpperOverflows(current))) {
275	// Common case: converting the letter resulted in one character.
276	DCHECK(static_cast<uc32>(chars[`0`]) != current);
277	result->Set(i, chars[`0`]);
278	has_changed_character = true;
279	i++;
280	} else if (result_length == string->length()) {
281	bool overflows = ToUpperOverflows(current);
282	// We've assumed that the result would be as long as the
283	// input but here is a character that converts to several
284	// characters. No matter, we calculate the exact length
285	// of the result and try the whole thing again.
286	//
287	// Note that this leaves room for optimization. We could just
288	// memcpy what we already have to the result string. Also,
289	// the result string is the last object allocated we could
290	// "realloc" it and probably, in the vast majority of cases,
291	// extend the existing string to be able to hold the full
292	// result.
293	int next_length = `0`;
294	if (has_next) {
295	next_length = mapping->get(next, `0`, chars);
296	if (next_length == `0`) next_length = `1`;
297	}
298	int current_length = i + char_length + next_length;
299	while (stream.HasMore()) {
300	current = stream.GetNext();
301	overflows \|= ToUpperOverflows(current);
302	// NOTE: we use 0 as the next character here because, while
303	// the next character may affect what a character converts to,
304	// it does not in any case affect the length of what it convert
305	// to.
306	int char_length = mapping->get(current, `0`, chars);
307	if (char_length == `0`) char_length = `1`;
308	current_length += char_length;
309	if (current_length > String::kMaxLength) {
310	AllowHeapAllocation allocate_error_and_return;
311	THROW_NEW_ERROR_RETURN_FAILURE(isolate,
312	NewInvalidStringLengthError());
313	}
314	}
315	// Try again with the real length. Return signed if we need
316	// to allocate a two-byte string for to uppercase.
317	return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
318	: Smi::FromInt(current_length);
319	} else {
320	for (int j = `0`; j < char_length; j++) {
321	result->Set(i, chars[j]);
322	i++;
323	}
324	has_changed_character = true;
325	}
326	current = next;
327	}
328	if (has_changed_character) {
329	return result;
330	} else {
331	// If we didn't actually change anything in doing the conversion
332	// we simple return the result and let the converted string
333	// become garbage; there is no reason to keep two identical strings
334	// alive.
335	return string;
336	}
337	}
338
339	template <class Converter>
340	V8_WARN_UNUSED_RESULT static Object ConvertCase(
341	Handle<String> s, Isolate* isolate,
342	unibrow::Mapping<Converter, `128`>* mapping) {
343	s = String::Flatten(isolate, s);
344	int length = s->length();
345	// Assume that the string is not empty; we need this assumption later
346	if (length == `0`) return *s;
347
348	// Simpler handling of ASCII strings.
349	//
350	// NOTE: This assumes that the upper/lower case of an ASCII
351	// character is also ASCII. This is currently the case, but it
352	// might break in the future if we implement more context and locale
353	// dependent upper/lower conversions.
354	if (String::IsOneByteRepresentationUnderneath(*s)) {
355	// Same length as input.
356	Handle<SeqOneByteString> result =
357	isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
358	DisallowHeapAllocation no_gc;
359	String::FlatContent flat_content = s->GetFlatContent(no_gc);
360	DCHECK(flat_content.IsFlat());
361	bool has_changed_character = false;
362	int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
363	reinterpret_cast<char*>(result->GetChars(no_gc)),
364	reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
365	length, &has_changed_character);
366	// If not ASCII, we discard the result and take the 2 byte path.
367	if (index_to_first_unprocessed == length)
368	return has_changed_character ? result : s;
369	}
370
371	Handle<SeqString> result; // Same length as input.
372	if (s->IsOneByteRepresentation()) {
373	result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
374	} else {
375	result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
376	}
377
378	Object answer = ConvertCaseHelper(isolate, s, result, length, mapping);
379	if (answer->IsException(isolate) \|\| answer->IsString()) return answer;
380
381	DCHECK(answer->IsSmi());
382	length = Smi::ToInt(answer);
383	if (s->IsOneByteRepresentation() && length > `0`) {
384	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
385	isolate, result, isolate->factory()->NewRawOneByteString(length));
386	} else {
387	if (length < `0`) length = -length;
388	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
389	isolate, result, isolate->factory()->NewRawTwoByteString(length));
390	}
391	return ConvertCaseHelper(isolate, s, result, length, mapping);
392	}
393
394	} // namespace
395
396	BUILTIN(StringPrototypeToLocaleLowerCase) {
397	HandleScope scope(isolate);
398	TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
399	return ConvertCase(string, isolate,
400	isolate->runtime_state()->to_lower_mapping());
401	}
402
403	BUILTIN(StringPrototypeToLocaleUpperCase) {
404	HandleScope scope(isolate);
405	TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
406	return ConvertCase(string, isolate,
407	isolate->runtime_state()->to_upper_mapping());
408	}
409
410	BUILTIN(StringPrototypeToLowerCase) {
411	HandleScope scope(isolate);
412	TO_THIS_STRING(string, "String.prototype.toLowerCase");
413	return ConvertCase(string, isolate,
414	isolate->runtime_state()->to_lower_mapping());
415	}
416
417	BUILTIN(StringPrototypeToUpperCase) {
418	HandleScope scope(isolate);
419	TO_THIS_STRING(string, "String.prototype.toUpperCase");
420	return ConvertCase(string, isolate,
421	isolate->runtime_state()->to_upper_mapping());
422	}
423	#endif // !V8_INTL_SUPPORT
424
425	// ES6 #sec-string.prototype.raw
426	BUILTIN(StringRaw) {
427	HandleScope scope(isolate);
428	Handle<Object> templ = args.atOrUndefined(isolate, `1`);
429	const uint32_t argc = args.length();
430	Handle<String> raw_string =
431	isolate->factory()->NewStringFromAsciiChecked("raw");
432
433	Handle<Object> cooked;
434	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
435	Object::ToObject(isolate, templ));
436
437	Handle<Object> raw;
438	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
439	isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
440	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
441	Object::ToObject(isolate, raw));
442	Handle<Object> raw_len;
443	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
444	isolate, raw_len,
445	Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
446
447	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
448	Object::ToLength(isolate, raw_len));
449
450	IncrementalStringBuilder result_builder(isolate);
451	// Intentional spec violation: we ignore {length} values >= 2^32, because
452	// assuming non-empty chunks they would generate too-long strings anyway.
453	const double raw_len_number = raw_len ->Number();
454	const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
455	? std::numeric_limits<uint32_t>::max()
456	: static_cast<uint32_t>(raw_len_number);
457	if (length > `0`) {
458	Handle<Object> first_element;
459	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
460	Object::GetElement(isolate, raw, `0`));
461
462	Handle<String> first_string;
463	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
464	isolate, first_string, Object::ToString(isolate, first_element));
465	result_builder.AppendString(first_string);
466
467	for (uint32_t i = `1`, arg_i = `2`; i < length; i++, arg_i++) {
468	if (arg_i < argc) {
469	Handle<String> argument_string;
470	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
471	isolate, argument_string,
472	Object::ToString(isolate, args.at(arg_i)));
473	result_builder.AppendString(argument_string);
474	}
475
476	Handle<Object> element;
477	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
478	Object::GetElement(isolate, raw, i));
479
480	Handle<String> element_string;
481	ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
482	Object::ToString(isolate, element));
483	result_builder.AppendString(element_string);
484	}
485	}
486
487	RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
488	}
489
490	} // namespace internal
491	} // namespace v8
492

Browse the source code of v8/src/builtins/builtins-string.cc