1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_OBJECTS_STRING_H_
6#define V8_OBJECTS_STRING_H_
7
8#include "src/base/bits.h"
9#include "src/base/export-template.h"
10#include "src/objects/instance-type.h"
11#include "src/objects/name.h"
12#include "src/objects/smi.h"
13#include "src/unicode-decoder.h"
14
15// Has to be the last include (doesn't have include guards):
16#include "src/objects/object-macros.h"
17
18namespace v8 {
19namespace internal {
20
21enum InstanceType : uint16_t;
22
23enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
24enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
25
26// The characteristics of a string are stored in its map. Retrieving these
27// few bits of information is moderately expensive, involving two memory
28// loads where the second is dependent on the first. To improve efficiency
29// the shape of the string is given its own class so that it can be retrieved
30// once and used for several string operations. A StringShape is small enough
31// to be passed by value and is immutable, but be aware that flattening a
32// string can potentially alter its shape. Also be aware that a GC caused by
33// something else can alter the shape of a string due to ConsString
34// shortcutting. Keeping these restrictions in mind has proven to be error-
35// prone and so we no longer put StringShapes in variables unless there is a
36// concrete performance benefit at that particular point in the code.
37class StringShape {
38 public:
39 inline explicit StringShape(const String s);
40 inline explicit StringShape(Map s);
41 inline explicit StringShape(InstanceType t);
42 inline bool IsSequential();
43 inline bool IsExternal();
44 inline bool IsCons();
45 inline bool IsSliced();
46 inline bool IsThin();
47 inline bool IsIndirect();
48 inline bool IsExternalOneByte();
49 inline bool IsExternalTwoByte();
50 inline bool IsSequentialOneByte();
51 inline bool IsSequentialTwoByte();
52 inline bool IsInternalized();
53 inline StringRepresentationTag representation_tag();
54 inline uint32_t encoding_tag();
55 inline uint32_t full_representation_tag();
56#ifdef DEBUG
57 inline uint32_t type() { return type_; }
58 inline void invalidate() { valid_ = false; }
59 inline bool valid() { return valid_; }
60#else
61 inline void invalidate() {}
62#endif
63
64 private:
65 uint32_t type_;
66#ifdef DEBUG
67 inline void set_valid() { valid_ = true; }
68 bool valid_;
69#else
70 inline void set_valid() {}
71#endif
72};
73
74// The String abstract class captures JavaScript string values:
75//
76// Ecma-262:
77// 4.3.16 String Value
78// A string value is a member of the type String and is a finite
79// ordered sequence of zero or more 16-bit unsigned integer values.
80//
81// All string values have a length field.
82class String : public Name {
83 public:
84 enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
85
86 // Representation of the flat content of a String.
87 // A non-flat string doesn't have flat content.
88 // A flat string has content that's encoded as a sequence of either
89 // one-byte chars or two-byte UC16.
90 // Returned by String::GetFlatContent().
91 class FlatContent {
92 public:
93 // Returns true if the string is flat and this structure contains content.
94 bool IsFlat() const { return state_ != NON_FLAT; }
95 // Returns true if the structure contains one-byte content.
96 bool IsOneByte() const { return state_ == ONE_BYTE; }
97 // Returns true if the structure contains two-byte content.
98 bool IsTwoByte() const { return state_ == TWO_BYTE; }
99
100 // Return the one byte content of the string. Only use if IsOneByte()
101 // returns true.
102 Vector<const uint8_t> ToOneByteVector() const {
103 DCHECK_EQ(ONE_BYTE, state_);
104 return Vector<const uint8_t>(onebyte_start, length_);
105 }
106 // Return the two-byte content of the string. Only use if IsTwoByte()
107 // returns true.
108 Vector<const uc16> ToUC16Vector() const {
109 DCHECK_EQ(TWO_BYTE, state_);
110 return Vector<const uc16>(twobyte_start, length_);
111 }
112
113 uc16 Get(int i) const {
114 DCHECK(i < length_);
115 DCHECK(state_ != NON_FLAT);
116 if (state_ == ONE_BYTE) return onebyte_start[i];
117 return twobyte_start[i];
118 }
119
120 bool UsesSameString(const FlatContent& other) const {
121 return onebyte_start == other.onebyte_start;
122 }
123
124 private:
125 enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
126
127 // Constructors only used by String::GetFlatContent().
128 explicit FlatContent(const uint8_t* start, int length)
129 : onebyte_start(start), length_(length), state_(ONE_BYTE) {}
130 explicit FlatContent(const uc16* start, int length)
131 : twobyte_start(start), length_(length), state_(TWO_BYTE) {}
132 FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {}
133
134 union {
135 const uint8_t* onebyte_start;
136 const uc16* twobyte_start;
137 };
138 int length_;
139 State state_;
140
141 friend class String;
142 friend class IterableSubString;
143 };
144
145 template <typename Char>
146 V8_INLINE Vector<const Char> GetCharVector(
147 const DisallowHeapAllocation& no_gc);
148
149 // Get and set the length of the string.
150 inline int length() const;
151 inline void set_length(int value);
152
153 // Get and set the length of the string using acquire loads and release
154 // stores.
155 inline int synchronized_length() const;
156 inline void synchronized_set_length(int value);
157
158 // Returns whether this string has only one-byte chars, i.e. all of them can
159 // be one-byte encoded. This might be the case even if the string is
160 // two-byte. Such strings may appear when the embedder prefers
161 // two-byte external representations even for one-byte data.
162 inline bool IsOneByteRepresentation() const;
163 inline bool IsTwoByteRepresentation() const;
164
165 // Cons and slices have an encoding flag that may not represent the actual
166 // encoding of the underlying string. This is taken into account here.
167 // This function is static because that helps it get inlined.
168 // Requires: string.IsFlat()
169 static inline bool IsOneByteRepresentationUnderneath(String string);
170
171 // Get and set individual two byte chars in the string.
172 inline void Set(int index, uint16_t value);
173 // Get individual two byte char in the string. Repeated calls
174 // to this method are not efficient unless the string is flat.
175 V8_INLINE uint16_t Get(int index);
176
177 // ES6 section 7.1.3.1 ToNumber Applied to the String Type
178 static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
179
180 // Flattens the string. Checks first inline to see if it is
181 // necessary. Does nothing if the string is not a cons string.
182 // Flattening allocates a sequential string with the same data as
183 // the given string and mutates the cons string to a degenerate
184 // form, where the first component is the new sequential string and
185 // the second component is the empty string. If allocation fails,
186 // this function returns a failure. If flattening succeeds, this
187 // function returns the sequential string that is now the first
188 // component of the cons string.
189 //
190 // Degenerate cons strings are handled specially by the garbage
191 // collector (see IsShortcutCandidate).
192
193 static inline Handle<String> Flatten(
194 Isolate* isolate, Handle<String> string,
195 AllocationType allocation = AllocationType::kYoung);
196
197 // Tries to return the content of a flat string as a structure holding either
198 // a flat vector of char or of uc16.
199 // If the string isn't flat, and therefore doesn't have flat content, the
200 // returned structure will report so, and can't provide a vector of either
201 // kind.
202 V8_EXPORT_PRIVATE FlatContent
203 GetFlatContent(const DisallowHeapAllocation& no_gc);
204
205 // Returns the parent of a sliced string or first part of a flat cons string.
206 // Requires: StringShape(this).IsIndirect() && this->IsFlat()
207 inline String GetUnderlying();
208
209 // String relational comparison, implemented according to ES6 section 7.2.11
210 // Abstract Relational Comparison (step 5): The comparison of Strings uses a
211 // simple lexicographic ordering on sequences of code unit values. There is no
212 // attempt to use the more complex, semantically oriented definitions of
213 // character or string equality and collating order defined in the Unicode
214 // specification. Therefore String values that are canonically equal according
215 // to the Unicode standard could test as unequal. In effect this algorithm
216 // assumes that both Strings are already in normalized form. Also, note that
217 // for strings containing supplementary characters, lexicographic ordering on
218 // sequences of UTF-16 code unit values differs from that on sequences of code
219 // point values.
220 V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
221 Handle<String> x,
222 Handle<String> y);
223
224 // Perform ES6 21.1.3.8, including checking arguments.
225 static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
226 Handle<Object> search, Handle<Object> position);
227 // Perform string match of pattern on subject, starting at start index.
228 // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
229 // check any arguments.
230 static int IndexOf(Isolate* isolate, Handle<String> receiver,
231 Handle<String> search, int start_index);
232
233 static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
234 Handle<Object> search, Handle<Object> position);
235
236 // Encapsulates logic related to a match and its capture groups as required
237 // by GetSubstitution.
238 class Match {
239 public:
240 virtual Handle<String> GetMatch() = 0;
241 virtual Handle<String> GetPrefix() = 0;
242 virtual Handle<String> GetSuffix() = 0;
243
244 // A named capture can be invalid (if it is not specified in the pattern),
245 // unmatched (specified but not matched in the current string), and matched.
246 enum CaptureState { INVALID, UNMATCHED, MATCHED };
247
248 virtual int CaptureCount() = 0;
249 virtual bool HasNamedCaptures() = 0;
250 virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
251 virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
252 CaptureState* state) = 0;
253
254 virtual ~Match() = default;
255 };
256
257 // ES#sec-getsubstitution
258 // GetSubstitution(matched, str, position, captures, replacement)
259 // Expand the $-expressions in the string and return a new string with
260 // the result.
261 // A {start_index} can be passed to specify where to start scanning the
262 // replacement string.
263 V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
264 Isolate* isolate, Match* match, Handle<String> replacement,
265 int start_index = 0);
266
267 // String equality operations.
268 inline bool Equals(String other);
269 inline static bool Equals(Isolate* isolate, Handle<String> one,
270 Handle<String> two);
271 V8_EXPORT_PRIVATE bool IsUtf8EqualTo(Vector<const char> str,
272 bool allow_prefix_match = false);
273
274 // Dispatches to Is{One,Two}ByteEqualTo.
275 template <typename Char>
276 bool IsEqualTo(Vector<const Char> str);
277
278 V8_EXPORT_PRIVATE bool IsOneByteEqualTo(Vector<const uint8_t> str);
279 bool IsTwoByteEqualTo(Vector<const uc16> str);
280
281 // Return a UTF8 representation of the string. The string is null
282 // terminated but may optionally contain nulls. Length is returned
283 // in length_output if length_output is not a null pointer The string
284 // should be nearly flat, otherwise the performance of this method may
285 // be very slow (quadratic in the length). Setting robustness_flag to
286 // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust This means it
287 // handles unexpected data without causing assert failures and it does not
288 // do any heap allocations. This is useful when printing stack traces.
289 std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
290 RobustnessFlag robustness_flag, int offset,
291 int length, int* length_output = nullptr);
292 V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString(
293 AllowNullsFlag allow_nulls = DISALLOW_NULLS,
294 RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
295 int* length_output = nullptr);
296
297 bool ComputeArrayIndex(uint32_t* index);
298
299 // Externalization.
300 V8_EXPORT_PRIVATE bool MakeExternal(
301 v8::String::ExternalStringResource* resource);
302 V8_EXPORT_PRIVATE bool MakeExternal(
303 v8::String::ExternalOneByteStringResource* resource);
304 bool SupportsExternalization();
305
306 // Conversion.
307 inline bool AsArrayIndex(uint32_t* index);
308 uint32_t inline ToValidIndex(Object number);
309
310 // Trimming.
311 enum TrimMode { kTrim, kTrimStart, kTrimEnd };
312 static Handle<String> Trim(Isolate* isolate, Handle<String> string,
313 TrimMode mode);
314
315 DECL_CAST(String)
316
317 V8_EXPORT_PRIVATE void PrintOn(FILE* out);
318
319 // For use during stack traces. Performs rudimentary sanity check.
320 bool LooksValid();
321
322 // Dispatched behavior.
323 void StringShortPrint(StringStream* accumulator, bool show_details = true);
324 void PrintUC16(std::ostream& os, int start = 0, int end = -1); // NOLINT
325#if defined(DEBUG) || defined(OBJECT_PRINT)
326 char* ToAsciiArray();
327#endif
328 DECL_PRINTER(String)
329 DECL_VERIFIER(String)
330
331 inline bool IsFlat();
332
333 DEFINE_FIELD_OFFSET_CONSTANTS(Name::kHeaderSize,
334 TORQUE_GENERATED_STRING_FIELDS)
335
336 static const int kHeaderSize = kSize;
337
338 // Max char codes.
339 static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
340 static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
341 static const int kMaxUtf16CodeUnit = 0xffff;
342 static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
343 static const uc32 kMaxCodePoint = 0x10ffff;
344
345 // Maximal string length.
346 // The max length is different on 32 and 64 bit platforms. Max length for a
347 // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is
348 // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize
349 // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as
350 // each char needs two bytes, subtract 24 bytes for the string header size.
351
352 // See include/v8.h for the definition.
353 static const int kMaxLength = v8::String::kMaxLength;
354 static_assert(kMaxLength <= (Smi::kMaxValue / 2 - kHeaderSize),
355 "Unexpected max String length");
356
357 // Max length for computing hash. For strings longer than this limit the
358 // string length is used as the hash value.
359 static const int kMaxHashCalcLength = 16383;
360
361 // Limit for truncation in short printing.
362 static const int kMaxShortPrintLength = 1024;
363
364 // Helper function for flattening strings.
365 template <typename sinkchar>
366 EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
367 static void WriteToFlat(String source, sinkchar* sink, int from, int to);
368
369 // The return value may point to the first aligned word containing the first
370 // non-one-byte character, rather than directly to the non-one-byte character.
371 // If the return value is >= the passed length, the entire string was
372 // one-byte.
373 static inline int NonAsciiStart(const char* chars, int length) {
374 const char* start = chars;
375 const char* limit = chars + length;
376
377 if (length >= kIntptrSize) {
378 // Check unaligned bytes.
379 while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) {
380 if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
381 return static_cast<int>(chars - start);
382 }
383 ++chars;
384 }
385 // Check aligned words.
386 DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
387 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
388 while (chars + sizeof(uintptr_t) <= limit) {
389 if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
390 return static_cast<int>(chars - start);
391 }
392 chars += sizeof(uintptr_t);
393 }
394 }
395 // Check remaining unaligned bytes.
396 while (chars < limit) {
397 if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
398 return static_cast<int>(chars - start);
399 }
400 ++chars;
401 }
402
403 return static_cast<int>(chars - start);
404 }
405
406 static inline bool IsAscii(const char* chars, int length) {
407 return NonAsciiStart(chars, length) >= length;
408 }
409
410 static inline bool IsAscii(const uint8_t* chars, int length) {
411 return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >=
412 length;
413 }
414
415 static inline int NonOneByteStart(const uc16* chars, int length) {
416 const uc16* limit = chars + length;
417 const uc16* start = chars;
418 while (chars < limit) {
419 if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start);
420 ++chars;
421 }
422 return static_cast<int>(chars - start);
423 }
424
425 static inline bool IsOneByte(const uc16* chars, int length) {
426 return NonOneByteStart(chars, length) >= length;
427 }
428
429 template <class Visitor>
430 static inline ConsString VisitFlat(Visitor* visitor, String string,
431 int offset = 0);
432
433 static Handle<FixedArray> CalculateLineEnds(Isolate* isolate,
434 Handle<String> string,
435 bool include_ending_line);
436
437 private:
438 friend class Name;
439 friend class StringTableInsertionKey;
440 friend class InternalizedStringKey;
441
442 V8_EXPORT_PRIVATE static Handle<String> SlowFlatten(
443 Isolate* isolate, Handle<ConsString> cons, AllocationType allocation);
444
445 // Slow case of String::Equals. This implementation works on any strings
446 // but it is most efficient on strings that are almost flat.
447 V8_EXPORT_PRIVATE bool SlowEquals(String other);
448
449 V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate, Handle<String> one,
450 Handle<String> two);
451
452 // Slow case of AsArrayIndex.
453 V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
454
455 // Compute and set the hash code.
456 V8_EXPORT_PRIVATE uint32_t ComputeAndSetHash();
457
458 OBJECT_CONSTRUCTORS(String, Name);
459};
460
461// clang-format off
462extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
463void String::WriteToFlat(String source, uint16_t* sink, int from, int to);
464// clang-format on
465
466class SubStringRange {
467 public:
468 inline SubStringRange(String string, const DisallowHeapAllocation& no_gc,
469 int first = 0, int length = -1);
470 class iterator;
471 inline iterator begin();
472 inline iterator end();
473
474 private:
475 String string_;
476 int first_;
477 int length_;
478 const DisallowHeapAllocation& no_gc_;
479};
480
481// The SeqString abstract class captures sequential string values.
482class SeqString : public String {
483 public:
484 DECL_CAST(SeqString)
485
486 // Truncate the string in-place if possible and return the result.
487 // In case of new_length == 0, the empty string is returned without
488 // truncating the original string.
489 V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
490 int new_length);
491
492 OBJECT_CONSTRUCTORS(SeqString, String);
493};
494
495class InternalizedString : public String {
496 public:
497 DECL_CAST(InternalizedString)
498 // TODO(neis): Possibly move some stuff from String here.
499
500 OBJECT_CONSTRUCTORS(InternalizedString, String);
501};
502
503// The OneByteString class captures sequential one-byte string objects.
504// Each character in the OneByteString is an one-byte character.
505class SeqOneByteString : public SeqString {
506 public:
507 static const bool kHasOneByteEncoding = true;
508
509 // Dispatched behavior.
510 inline uint16_t SeqOneByteStringGet(int index);
511 inline void SeqOneByteStringSet(int index, uint16_t value);
512
513 // Get the address of the characters in this string.
514 inline Address GetCharsAddress();
515
516 inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc);
517
518 // Clear uninitialized padding space. This ensures that the snapshot content
519 // is deterministic.
520 void clear_padding();
521
522 DECL_CAST(SeqOneByteString)
523
524 // Garbage collection support. This method is called by the
525 // garbage collector to compute the actual size of an OneByteString
526 // instance.
527 inline int SeqOneByteStringSize(InstanceType instance_type);
528
529 // Computes the size for an OneByteString instance of a given length.
530 static int SizeFor(int length) {
531 return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize);
532 }
533
534 // Maximal memory usage for a single sequential one-byte string.
535 static const int kMaxCharsSize = kMaxLength;
536 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
537 STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
538
539 class BodyDescriptor;
540
541 OBJECT_CONSTRUCTORS(SeqOneByteString, SeqString);
542};
543
544// The TwoByteString class captures sequential unicode string objects.
545// Each character in the TwoByteString is a two-byte uint16_t.
546class SeqTwoByteString : public SeqString {
547 public:
548 static const bool kHasOneByteEncoding = false;
549
550 // Dispatched behavior.
551 inline uint16_t SeqTwoByteStringGet(int index);
552 inline void SeqTwoByteStringSet(int index, uint16_t value);
553
554 // Get the address of the characters in this string.
555 inline Address GetCharsAddress();
556
557 inline uc16* GetChars(const DisallowHeapAllocation& no_gc);
558
559 // Clear uninitialized padding space. This ensures that the snapshot content
560 // is deterministic.
561 void clear_padding();
562
563 DECL_CAST(SeqTwoByteString)
564
565 // Garbage collection support. This method is called by the
566 // garbage collector to compute the actual size of a TwoByteString
567 // instance.
568 inline int SeqTwoByteStringSize(InstanceType instance_type);
569
570 // Computes the size for a TwoByteString instance of a given length.
571 static int SizeFor(int length) {
572 return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize);
573 }
574
575 // Maximal memory usage for a single sequential two-byte string.
576 static const int kMaxCharsSize = kMaxLength * 2;
577 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
578 STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
579 String::kMaxLength);
580
581 class BodyDescriptor;
582
583 OBJECT_CONSTRUCTORS(SeqTwoByteString, SeqString);
584};
585
586// The ConsString class describes string values built by using the
587// addition operator on strings. A ConsString is a pair where the
588// first and second components are pointers to other string values.
589// One or both components of a ConsString can be pointers to other
590// ConsStrings, creating a binary tree of ConsStrings where the leaves
591// are non-ConsString string values. The string value represented by
592// a ConsString can be obtained by concatenating the leaf string
593// values in a left-to-right depth-first traversal of the tree.
594class ConsString : public String {
595 public:
596 // First string of the cons cell.
597 inline String first();
598 // Doesn't check that the result is a string, even in debug mode. This is
599 // useful during GC where the mark bits confuse the checks.
600 inline Object unchecked_first();
601 inline void set_first(Isolate* isolate, String first,
602 WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
603
604 // Second string of the cons cell.
605 inline String second();
606 // Doesn't check that the result is a string, even in debug mode. This is
607 // useful during GC where the mark bits confuse the checks.
608 inline Object unchecked_second();
609 inline void set_second(Isolate* isolate, String second,
610 WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
611
612 // Dispatched behavior.
613 V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index);
614
615 DECL_CAST(ConsString)
616
617 DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
618 TORQUE_GENERATED_CONS_STRING_FIELDS)
619
620 // Minimum length for a cons string.
621 static const int kMinLength = 13;
622
623 using BodyDescriptor = FixedBodyDescriptor<kFirstOffset, kSize, kSize>;
624
625 DECL_VERIFIER(ConsString)
626
627 OBJECT_CONSTRUCTORS(ConsString, String);
628};
629
630// The ThinString class describes string objects that are just references
631// to another string object. They are used for in-place internalization when
632// the original string cannot actually be internalized in-place: in these
633// cases, the original string is converted to a ThinString pointing at its
634// internalized version (which is allocated as a new object).
635// In terms of memory layout and most algorithms operating on strings,
636// ThinStrings can be thought of as "one-part cons strings".
637class ThinString : public String {
638 public:
639 // Actual string that this ThinString refers to.
640 inline String actual() const;
641 inline HeapObject unchecked_actual() const;
642 inline void set_actual(String s,
643 WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
644
645 V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index);
646
647 DECL_CAST(ThinString)
648 DECL_VERIFIER(ThinString)
649
650 DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
651 TORQUE_GENERATED_THIN_STRING_FIELDS)
652
653 using BodyDescriptor = FixedBodyDescriptor<kActualOffset, kSize, kSize>;
654
655 OBJECT_CONSTRUCTORS(ThinString, String);
656};
657
658// The Sliced String class describes strings that are substrings of another
659// sequential string. The motivation is to save time and memory when creating
660// a substring. A Sliced String is described as a pointer to the parent,
661// the offset from the start of the parent string and the length. Using
662// a Sliced String therefore requires unpacking of the parent string and
663// adding the offset to the start address. A substring of a Sliced String
664// are not nested since the double indirection is simplified when creating
665// such a substring.
666// Currently missing features are:
667// - handling externalized parent strings
668// - external strings as parent
669// - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
670class SlicedString : public String {
671 public:
672 inline String parent();
673 inline void set_parent(Isolate* isolate, String parent,
674 WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
675 inline int offset() const;
676 inline void set_offset(int offset);
677
678 // Dispatched behavior.
679 V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index);
680
681 DECL_CAST(SlicedString)
682
683 DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
684 TORQUE_GENERATED_SLICED_STRING_FIELDS)
685
686 // Minimum length for a sliced string.
687 static const int kMinLength = 13;
688
689 using BodyDescriptor = FixedBodyDescriptor<kParentOffset, kSize, kSize>;
690
691 DECL_VERIFIER(SlicedString)
692
693 OBJECT_CONSTRUCTORS(SlicedString, String);
694};
695
696// The ExternalString class describes string values that are backed by
697// a string resource that lies outside the V8 heap. ExternalStrings
698// consist of the length field common to all strings, a pointer to the
699// external resource. It is important to ensure (externally) that the
700// resource is not deallocated while the ExternalString is live in the
701// V8 heap.
702//
703// The API expects that all ExternalStrings are created through the
704// API. Therefore, ExternalStrings should not be used internally.
705class ExternalString : public String {
706 public:
707 DECL_CAST(ExternalString)
708
709 DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
710 TORQUE_GENERATED_EXTERNAL_STRING_FIELDS)
711
712 // Size of uncached external strings.
713 static const int kUncachedSize =
714 kResourceOffset + FIELD_SIZE(kResourceOffset);
715
716 // Return whether the external string data pointer is not cached.
717 inline bool is_uncached() const;
718 // Size in bytes of the external payload.
719 int ExternalPayloadSize() const;
720
721 // Used in the serializer/deserializer.
722 inline Address resource_as_address();
723 inline void set_address_as_resource(Address address);
724 inline uint32_t resource_as_uint32();
725 inline void set_uint32_as_resource(uint32_t value);
726
727 // Disposes string's resource object if it has not already been disposed.
728 inline void DisposeResource();
729
730 STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
731
732 OBJECT_CONSTRUCTORS(ExternalString, String);
733};
734
735// The ExternalOneByteString class is an external string backed by an
736// one-byte string.
737class ExternalOneByteString : public ExternalString {
738 public:
739 static const bool kHasOneByteEncoding = true;
740
741 using Resource = v8::String::ExternalOneByteStringResource;
742
743 // The underlying resource.
744 inline const Resource* resource();
745
746 // It is assumed that the previous resource is null. If it is not null, then
747 // it is the responsability of the caller the handle the previous resource.
748 inline void SetResource(Isolate* isolate, const Resource* buffer);
749 // Used only during serialization.
750 inline void set_resource(const Resource* buffer);
751
752 // Update the pointer cache to the external character array.
753 // The cached pointer is always valid, as the external character array does =
754 // not move during lifetime. Deserialization is the only exception, after
755 // which the pointer cache has to be refreshed.
756 inline void update_data_cache();
757
758 inline const uint8_t* GetChars();
759
760 // Dispatched behavior.
761 inline uint16_t ExternalOneByteStringGet(int index);
762
763 DECL_CAST(ExternalOneByteString)
764
765 class BodyDescriptor;
766
767 OBJECT_CONSTRUCTORS(ExternalOneByteString, ExternalString);
768};
769
770// The ExternalTwoByteString class is an external string backed by a UTF-16
771// encoded string.
772class ExternalTwoByteString : public ExternalString {
773 public:
774 static const bool kHasOneByteEncoding = false;
775
776 using Resource = v8::String::ExternalStringResource;
777
778 // The underlying string resource.
779 inline const Resource* resource();
780
781 // It is assumed that the previous resource is null. If it is not null, then
782 // it is the responsability of the caller the handle the previous resource.
783 inline void SetResource(Isolate* isolate, const Resource* buffer);
784 // Used only during serialization.
785 inline void set_resource(const Resource* buffer);
786
787 // Update the pointer cache to the external character array.
788 // The cached pointer is always valid, as the external character array does =
789 // not move during lifetime. Deserialization is the only exception, after
790 // which the pointer cache has to be refreshed.
791 inline void update_data_cache();
792
793 inline const uint16_t* GetChars();
794
795 // Dispatched behavior.
796 inline uint16_t ExternalTwoByteStringGet(int index);
797
798 // For regexp code.
799 inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
800
801 DECL_CAST(ExternalTwoByteString)
802
803 class BodyDescriptor;
804
805 OBJECT_CONSTRUCTORS(ExternalTwoByteString, ExternalString);
806};
807
808// A flat string reader provides random access to the contents of a
809// string independent of the character width of the string. The handle
810// must be valid as long as the reader is being used.
811class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable {
812 public:
813 FlatStringReader(Isolate* isolate, Handle<String> str);
814 FlatStringReader(Isolate* isolate, Vector<const char> input);
815 void PostGarbageCollection() override;
816 inline uc32 Get(int index);
817 template <typename Char>
818 inline Char Get(int index);
819 int length() { return length_; }
820
821 private:
822 Address* str_;
823 bool is_one_byte_;
824 int length_;
825 const void* start_;
826};
827
828// This maintains an off-stack representation of the stack frames required
829// to traverse a ConsString, allowing an entirely iterative and restartable
830// traversal of the entire string
831class ConsStringIterator {
832 public:
833 inline ConsStringIterator() = default;
834 inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
835 Reset(cons_string, offset);
836 }
837 inline void Reset(ConsString cons_string, int offset = 0) {
838 depth_ = 0;
839 // Next will always return nullptr.
840 if (cons_string.is_null()) return;
841 Initialize(cons_string, offset);
842 }
843 // Returns nullptr when complete.
844 inline String Next(int* offset_out) {
845 *offset_out = 0;
846 if (depth_ == 0) return String();
847 return Continue(offset_out);
848 }
849
850 private:
851 static const int kStackSize = 32;
852 // Use a mask instead of doing modulo operations for stack wrapping.
853 static const int kDepthMask = kStackSize - 1;
854 static_assert(base::bits::IsPowerOfTwo(kStackSize),
855 "kStackSize must be power of two");
856 static inline int OffsetForDepth(int depth);
857
858 inline void PushLeft(ConsString string);
859 inline void PushRight(ConsString string);
860 inline void AdjustMaximumDepth();
861 inline void Pop();
862 inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
863 V8_EXPORT_PRIVATE void Initialize(ConsString cons_string, int offset);
864 V8_EXPORT_PRIVATE String Continue(int* offset_out);
865 String NextLeaf(bool* blew_stack);
866 String Search(int* offset_out);
867
868 // Stack must always contain only frames for which right traversal
869 // has not yet been performed.
870 ConsString frames_[kStackSize];
871 ConsString root_;
872 int depth_;
873 int maximum_depth_;
874 int consumed_;
875 DISALLOW_COPY_AND_ASSIGN(ConsStringIterator);
876};
877
878class StringCharacterStream {
879 public:
880 inline explicit StringCharacterStream(String string, int offset = 0);
881 inline uint16_t GetNext();
882 inline bool HasMore();
883 inline void Reset(String string, int offset = 0);
884 inline void VisitOneByteString(const uint8_t* chars, int length);
885 inline void VisitTwoByteString(const uint16_t* chars, int length);
886
887 private:
888 ConsStringIterator iter_;
889 bool is_one_byte_;
890 union {
891 const uint8_t* buffer8_;
892 const uint16_t* buffer16_;
893 };
894 const uint8_t* end_;
895 DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
896};
897
898} // namespace internal
899} // namespace v8
900
901#include "src/objects/object-macros-undef.h"
902
903#endif // V8_OBJECTS_STRING_H_
904