1 | // Copyright 2014 the V8 project authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | |
6 | #include "src/unicode-inl.h" |
7 | #include "src/unicode-decoder.h" |
8 | #include <stdio.h> |
9 | #include <stdlib.h> |
10 | |
11 | namespace unibrow { |
12 | |
13 | uint16_t Utf8Iterator::operator*() { |
14 | if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode)) { |
15 | return trailing_ ? Utf16::TrailSurrogate(char_) |
16 | : Utf16::LeadSurrogate(char_); |
17 | } |
18 | |
19 | DCHECK_EQ(trailing_, false); |
20 | return char_; |
21 | } |
22 | |
23 | Utf8Iterator& Utf8Iterator::operator++() { |
24 | if (V8_UNLIKELY(this->Done())) { |
25 | char_ = Utf8::kBufferEmpty; |
26 | return *this; |
27 | } |
28 | |
29 | if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode && !trailing_)) { |
30 | trailing_ = true; |
31 | return *this; |
32 | } |
33 | |
34 | trailing_ = false; |
35 | offset_ = cursor_; |
36 | |
37 | char_ = |
38 | Utf8::ValueOf(reinterpret_cast<const uint8_t*>(stream_.begin()) + cursor_, |
39 | stream_.length() - cursor_, &cursor_); |
40 | return *this; |
41 | } |
42 | |
43 | Utf8Iterator Utf8Iterator::operator++(int) { |
44 | Utf8Iterator old(*this); |
45 | ++*this; |
46 | return old; |
47 | } |
48 | |
49 | bool Utf8Iterator::Done() { |
50 | return offset_ == static_cast<size_t>(stream_.length()); |
51 | } |
52 | |
53 | void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, |
54 | const v8::internal::Vector<const char>& stream) { |
55 | size_t utf16_length = 0; |
56 | |
57 | Utf8Iterator it = Utf8Iterator(stream); |
58 | // Loop until stream is read, writing to buffer as long as buffer has space. |
59 | while (utf16_length < buffer_length && !it.Done()) { |
60 | *buffer++ = *it; |
61 | ++it; |
62 | utf16_length++; |
63 | } |
64 | bytes_read_ = it.Offset(); |
65 | trailing_ = it.Trailing(); |
66 | chars_written_ = utf16_length; |
67 | |
68 | // Now that writing to buffer is done, we just need to calculate utf16_length |
69 | while (!it.Done()) { |
70 | ++it; |
71 | utf16_length++; |
72 | } |
73 | utf16_length_ = utf16_length; |
74 | } |
75 | |
76 | void Utf8DecoderBase::WriteUtf16Slow( |
77 | uint16_t* data, size_t length, |
78 | const v8::internal::Vector<const char>& stream, size_t offset, |
79 | bool trailing) { |
80 | Utf8Iterator it = Utf8Iterator(stream, offset, trailing); |
81 | while (!it.Done()) { |
82 | DCHECK_GT(length--, 0); |
83 | *data++ = *it; |
84 | ++it; |
85 | } |
86 | } |
87 | |
88 | } // namespace unibrow |
89 | |