1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5
6#include "src/unicode-inl.h"
7#include "src/unicode-decoder.h"
8#include <stdio.h>
9#include <stdlib.h>
10
11namespace unibrow {
12
13uint16_t Utf8Iterator::operator*() {
14 if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode)) {
15 return trailing_ ? Utf16::TrailSurrogate(char_)
16 : Utf16::LeadSurrogate(char_);
17 }
18
19 DCHECK_EQ(trailing_, false);
20 return char_;
21}
22
23Utf8Iterator& Utf8Iterator::operator++() {
24 if (V8_UNLIKELY(this->Done())) {
25 char_ = Utf8::kBufferEmpty;
26 return *this;
27 }
28
29 if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode && !trailing_)) {
30 trailing_ = true;
31 return *this;
32 }
33
34 trailing_ = false;
35 offset_ = cursor_;
36
37 char_ =
38 Utf8::ValueOf(reinterpret_cast<const uint8_t*>(stream_.begin()) + cursor_,
39 stream_.length() - cursor_, &cursor_);
40 return *this;
41}
42
43Utf8Iterator Utf8Iterator::operator++(int) {
44 Utf8Iterator old(*this);
45 ++*this;
46 return old;
47}
48
49bool Utf8Iterator::Done() {
50 return offset_ == static_cast<size_t>(stream_.length());
51}
52
53void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
54 const v8::internal::Vector<const char>& stream) {
55 size_t utf16_length = 0;
56
57 Utf8Iterator it = Utf8Iterator(stream);
58 // Loop until stream is read, writing to buffer as long as buffer has space.
59 while (utf16_length < buffer_length && !it.Done()) {
60 *buffer++ = *it;
61 ++it;
62 utf16_length++;
63 }
64 bytes_read_ = it.Offset();
65 trailing_ = it.Trailing();
66 chars_written_ = utf16_length;
67
68 // Now that writing to buffer is done, we just need to calculate utf16_length
69 while (!it.Done()) {
70 ++it;
71 utf16_length++;
72 }
73 utf16_length_ = utf16_length;
74}
75
76void Utf8DecoderBase::WriteUtf16Slow(
77 uint16_t* data, size_t length,
78 const v8::internal::Vector<const char>& stream, size_t offset,
79 bool trailing) {
80 Utf8Iterator it = Utf8Iterator(stream, offset, trailing);
81 while (!it.Done()) {
82 DCHECK_GT(length--, 0);
83 *data++ = *it;
84 ++it;
85 }
86}
87
88} // namespace unibrow
89