| 1 | /* |
| 2 | * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family) |
| 3 | * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com) |
| 4 | * Copyright (C) 2016-2019 Apple Inc. All Rights Reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * |
| 15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| 17 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| 19 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 20 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 21 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 22 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| 25 | * THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
| 28 | #include "config.h" |
| 29 | #include "IntlCollator.h" |
| 30 | |
| 31 | #if ENABLE(INTL) |
| 32 | |
| 33 | #include "CatchScope.h" |
| 34 | #include "Error.h" |
| 35 | #include "IntlCollatorConstructor.h" |
| 36 | #include "IntlObject.h" |
| 37 | #include "JSBoundFunction.h" |
| 38 | #include "JSCInlines.h" |
| 39 | #include "ObjectConstructor.h" |
| 40 | #include "SlotVisitorInlines.h" |
| 41 | #include "StructureInlines.h" |
| 42 | #include <unicode/ucol.h> |
| 43 | #include <wtf/unicode/Collator.h> |
| 44 | |
| 45 | namespace JSC { |
| 46 | |
| 47 | const ClassInfo IntlCollator::s_info = { "Object" , &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) }; |
| 48 | |
| 49 | static const char* const relevantCollatorExtensionKeys[3] = { "co" , "kn" , "kf" }; |
| 50 | static const size_t indexOfExtensionKeyCo = 0; |
| 51 | static const size_t indexOfExtensionKeyKn = 1; |
| 52 | static const size_t indexOfExtensionKeyKf = 2; |
| 53 | |
| 54 | void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const |
| 55 | { |
| 56 | if (collator) |
| 57 | ucol_close(collator); |
| 58 | } |
| 59 | |
| 60 | IntlCollator* IntlCollator::create(VM& vm, Structure* structure) |
| 61 | { |
| 62 | IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure); |
| 63 | format->finishCreation(vm); |
| 64 | return format; |
| 65 | } |
| 66 | |
| 67 | Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) |
| 68 | { |
| 69 | return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); |
| 70 | } |
| 71 | |
| 72 | IntlCollator::IntlCollator(VM& vm, Structure* structure) |
| 73 | : JSDestructibleObject(vm, structure) |
| 74 | { |
| 75 | } |
| 76 | |
| 77 | void IntlCollator::finishCreation(VM& vm) |
| 78 | { |
| 79 | Base::finishCreation(vm); |
| 80 | ASSERT(inherits(vm, info())); |
| 81 | } |
| 82 | |
| 83 | void IntlCollator::destroy(JSCell* cell) |
| 84 | { |
| 85 | static_cast<IntlCollator*>(cell)->IntlCollator::~IntlCollator(); |
| 86 | } |
| 87 | |
| 88 | void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor) |
| 89 | { |
| 90 | IntlCollator* thisObject = jsCast<IntlCollator*>(cell); |
| 91 | ASSERT_GC_OBJECT_INHERITS(thisObject, info()); |
| 92 | |
| 93 | Base::visitChildren(thisObject, visitor); |
| 94 | |
| 95 | visitor.append(thisObject->m_boundCompare); |
| 96 | } |
| 97 | |
| 98 | static Vector<String> sortLocaleData(const String& locale, size_t keyIndex) |
| 99 | { |
| 100 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
| 101 | Vector<String> keyLocaleData; |
| 102 | switch (keyIndex) { |
| 103 | case indexOfExtensionKeyCo: { |
| 104 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
| 105 | keyLocaleData.append({ }); |
| 106 | |
| 107 | UErrorCode status = U_ZERO_ERROR; |
| 108 | UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation" , locale.utf8().data(), false, &status); |
| 109 | if (U_SUCCESS(status)) { |
| 110 | const char* collation; |
| 111 | while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) { |
| 112 | // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array." |
| 113 | if (!strcmp(collation, "standard" ) || !strcmp(collation, "search" )) |
| 114 | continue; |
| 115 | |
| 116 | // Map keyword values to BCP 47 equivalents. |
| 117 | if (!strcmp(collation, "dictionary" )) |
| 118 | collation = "dict" ; |
| 119 | else if (!strcmp(collation, "gb2312han" )) |
| 120 | collation = "gb2312" ; |
| 121 | else if (!strcmp(collation, "phonebook" )) |
| 122 | collation = "phonebk" ; |
| 123 | else if (!strcmp(collation, "traditional" )) |
| 124 | collation = "trad" ; |
| 125 | |
| 126 | keyLocaleData.append(collation); |
| 127 | } |
| 128 | uenum_close(enumeration); |
| 129 | } |
| 130 | break; |
| 131 | } |
| 132 | case indexOfExtensionKeyKn: |
| 133 | keyLocaleData.reserveInitialCapacity(2); |
| 134 | keyLocaleData.uncheckedAppend("false"_s ); |
| 135 | keyLocaleData.uncheckedAppend("true"_s ); |
| 136 | break; |
| 137 | case indexOfExtensionKeyKf: |
| 138 | keyLocaleData.reserveInitialCapacity(3); |
| 139 | keyLocaleData.uncheckedAppend("false"_s ); |
| 140 | keyLocaleData.uncheckedAppend("lower"_s ); |
| 141 | keyLocaleData.uncheckedAppend("upper"_s ); |
| 142 | break; |
| 143 | default: |
| 144 | ASSERT_NOT_REACHED(); |
| 145 | } |
| 146 | return keyLocaleData; |
| 147 | } |
| 148 | |
| 149 | static Vector<String> searchLocaleData(const String&, size_t keyIndex) |
| 150 | { |
| 151 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
| 152 | Vector<String> keyLocaleData; |
| 153 | switch (keyIndex) { |
| 154 | case indexOfExtensionKeyCo: |
| 155 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
| 156 | keyLocaleData.reserveInitialCapacity(1); |
| 157 | keyLocaleData.append({ }); |
| 158 | break; |
| 159 | case indexOfExtensionKeyKn: |
| 160 | keyLocaleData.reserveInitialCapacity(2); |
| 161 | keyLocaleData.uncheckedAppend("false"_s ); |
| 162 | keyLocaleData.uncheckedAppend("true"_s ); |
| 163 | break; |
| 164 | case indexOfExtensionKeyKf: |
| 165 | keyLocaleData.reserveInitialCapacity(3); |
| 166 | keyLocaleData.uncheckedAppend("false"_s ); |
| 167 | keyLocaleData.uncheckedAppend("lower"_s ); |
| 168 | keyLocaleData.uncheckedAppend("upper"_s ); |
| 169 | break; |
| 170 | default: |
| 171 | ASSERT_NOT_REACHED(); |
| 172 | } |
| 173 | return keyLocaleData; |
| 174 | } |
| 175 | |
| 176 | void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue) |
| 177 | { |
| 178 | VM& vm = globalObject->vm(); |
| 179 | auto scope = DECLARE_THROW_SCOPE(vm); |
| 180 | |
| 181 | // 10.1.1 InitializeCollator (collator, locales, options) (ECMA-402) |
| 182 | // https://tc39.github.io/ecma402/#sec-initializecollator |
| 183 | |
| 184 | auto requestedLocales = canonicalizeLocaleList(globalObject, locales); |
| 185 | RETURN_IF_EXCEPTION(scope, void()); |
| 186 | |
| 187 | JSValue options = optionsValue; |
| 188 | if (!optionsValue.isUndefined()) { |
| 189 | options = optionsValue.toObject(globalObject); |
| 190 | RETURN_IF_EXCEPTION(scope, void()); |
| 191 | } |
| 192 | |
| 193 | String usageString = intlStringOption(globalObject, options, vm.propertyNames->usage, { "sort" , "search" }, "usage must be either \"sort\" or \"search\"" , "sort" ); |
| 194 | RETURN_IF_EXCEPTION(scope, void()); |
| 195 | if (usageString == "sort" ) |
| 196 | m_usage = Usage::Sort; |
| 197 | else if (usageString == "search" ) |
| 198 | m_usage = Usage::Search; |
| 199 | else |
| 200 | ASSERT_NOT_REACHED(); |
| 201 | |
| 202 | auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData; |
| 203 | |
| 204 | HashMap<String, String> opt; |
| 205 | |
| 206 | String matcher = intlStringOption(globalObject, options, vm.propertyNames->localeMatcher, { "lookup" , "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"" , "best fit" ); |
| 207 | RETURN_IF_EXCEPTION(scope, void()); |
| 208 | opt.add("localeMatcher"_s , matcher); |
| 209 | |
| 210 | { |
| 211 | String numericString; |
| 212 | bool usesFallback; |
| 213 | bool numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric, usesFallback); |
| 214 | RETURN_IF_EXCEPTION(scope, void()); |
| 215 | if (!usesFallback) |
| 216 | numericString = numeric ? "true"_s : "false"_s ; |
| 217 | if (!numericString.isNull()) |
| 218 | opt.add("kn"_s , numericString); |
| 219 | } |
| 220 | { |
| 221 | String caseFirst = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper" , "lower" , "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"" , nullptr); |
| 222 | RETURN_IF_EXCEPTION(scope, void()); |
| 223 | if (!caseFirst.isNull()) |
| 224 | opt.add("kf"_s , caseFirst); |
| 225 | } |
| 226 | |
| 227 | auto& availableLocales = globalObject->intlCollatorAvailableLocales(); |
| 228 | auto result = resolveLocale(globalObject, availableLocales, requestedLocales, opt, relevantCollatorExtensionKeys, WTF_ARRAY_LENGTH(relevantCollatorExtensionKeys), localeData); |
| 229 | |
| 230 | m_locale = result.get("locale"_s ); |
| 231 | if (m_locale.isEmpty()) { |
| 232 | throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s ); |
| 233 | return; |
| 234 | } |
| 235 | |
| 236 | const String& collation = result.get("co"_s ); |
| 237 | m_collation = collation.isNull() ? "default"_s : collation; |
| 238 | m_numeric = result.get("kn"_s ) == "true" ; |
| 239 | |
| 240 | const String& caseFirst = result.get("kf"_s ); |
| 241 | if (caseFirst == "lower" ) |
| 242 | m_caseFirst = CaseFirst::Lower; |
| 243 | else if (caseFirst == "upper" ) |
| 244 | m_caseFirst = CaseFirst::Upper; |
| 245 | else |
| 246 | m_caseFirst = CaseFirst::False; |
| 247 | |
| 248 | String sensitivityString = intlStringOption(globalObject, options, vm.propertyNames->sensitivity, { "base" , "accent" , "case" , "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"" , nullptr); |
| 249 | RETURN_IF_EXCEPTION(scope, void()); |
| 250 | if (sensitivityString == "base" ) |
| 251 | m_sensitivity = Sensitivity::Base; |
| 252 | else if (sensitivityString == "accent" ) |
| 253 | m_sensitivity = Sensitivity::Accent; |
| 254 | else if (sensitivityString == "case" ) |
| 255 | m_sensitivity = Sensitivity::Case; |
| 256 | else |
| 257 | m_sensitivity = Sensitivity::Variant; |
| 258 | |
| 259 | bool usesFallback; |
| 260 | bool ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation, usesFallback); |
| 261 | if (usesFallback) |
| 262 | ignorePunctuation = false; |
| 263 | RETURN_IF_EXCEPTION(scope, void()); |
| 264 | m_ignorePunctuation = ignorePunctuation; |
| 265 | |
| 266 | m_initializedCollator = true; |
| 267 | } |
| 268 | |
| 269 | void IntlCollator::createCollator(JSGlobalObject* globalObject) |
| 270 | { |
| 271 | VM& vm = globalObject->vm(); |
| 272 | auto scope = DECLARE_CATCH_SCOPE(vm); |
| 273 | ASSERT(!m_collator); |
| 274 | |
| 275 | if (!m_initializedCollator) { |
| 276 | initializeCollator(globalObject, jsUndefined(), jsUndefined()); |
| 277 | scope.assertNoException(); |
| 278 | } |
| 279 | |
| 280 | UErrorCode status = U_ZERO_ERROR; |
| 281 | auto collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(m_locale.utf8().data(), &status)); |
| 282 | if (U_FAILURE(status)) |
| 283 | return; |
| 284 | |
| 285 | UColAttributeValue strength = UCOL_PRIMARY; |
| 286 | UColAttributeValue caseLevel = UCOL_OFF; |
| 287 | UColAttributeValue caseFirst = UCOL_OFF; |
| 288 | switch (m_sensitivity) { |
| 289 | case Sensitivity::Base: |
| 290 | break; |
| 291 | case Sensitivity::Accent: |
| 292 | strength = UCOL_SECONDARY; |
| 293 | break; |
| 294 | case Sensitivity::Case: |
| 295 | caseLevel = UCOL_ON; |
| 296 | break; |
| 297 | case Sensitivity::Variant: |
| 298 | strength = UCOL_TERTIARY; |
| 299 | break; |
| 300 | } |
| 301 | switch (m_caseFirst) { |
| 302 | case CaseFirst::False: |
| 303 | break; |
| 304 | case CaseFirst::Lower: |
| 305 | caseFirst = UCOL_LOWER_FIRST; |
| 306 | break; |
| 307 | case CaseFirst::Upper: |
| 308 | caseFirst = UCOL_UPPER_FIRST; |
| 309 | break; |
| 310 | } |
| 311 | |
| 312 | ucol_setAttribute(collator.get(), UCOL_STRENGTH, strength, &status); |
| 313 | ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, caseLevel, &status); |
| 314 | ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, caseFirst, &status); |
| 315 | ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status); |
| 316 | |
| 317 | // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be |
| 318 | // ignored. There is currently no way to ignore only punctuation. |
| 319 | ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status); |
| 320 | |
| 321 | // "The method is required to return 0 when comparing Strings that are considered canonically |
| 322 | // equivalent by the Unicode standard." |
| 323 | ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
| 324 | if (U_FAILURE(status)) |
| 325 | return; |
| 326 | |
| 327 | m_collator = WTFMove(collator); |
| 328 | } |
| 329 | |
| 330 | JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y) |
| 331 | { |
| 332 | VM& vm = globalObject->vm(); |
| 333 | auto scope = DECLARE_THROW_SCOPE(vm); |
| 334 | |
| 335 | // 10.3.4 CompareStrings abstract operation (ECMA-402 2.0) |
| 336 | if (!m_collator) { |
| 337 | createCollator(globalObject); |
| 338 | if (!m_collator) |
| 339 | return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s )); |
| 340 | } |
| 341 | |
| 342 | UErrorCode status = U_ZERO_ERROR; |
| 343 | UCollationResult result = UCOL_EQUAL; |
| 344 | if (x.is8Bit() && y.is8Bit() && x.isAllASCII() && y.isAllASCII()) |
| 345 | result = ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status); |
| 346 | else { |
| 347 | auto getCharacters = [&] (const StringView& view, Vector<UChar>& buffer) -> const UChar* { |
| 348 | if (!view.is8Bit()) |
| 349 | return view.characters16(); |
| 350 | buffer.resize(view.length()); |
| 351 | StringImpl::copyCharacters(buffer.data(), view.characters8(), view.length()); |
| 352 | return buffer.data(); |
| 353 | }; |
| 354 | |
| 355 | Vector<UChar> xBuffer; |
| 356 | Vector<UChar> yBuffer; |
| 357 | const UChar* xCharacters = getCharacters(x, xBuffer); |
| 358 | const UChar* yCharacters = getCharacters(y, yBuffer); |
| 359 | result = ucol_strcoll(m_collator.get(), xCharacters, x.length(), yCharacters, y.length()); |
| 360 | } |
| 361 | if (U_FAILURE(status)) |
| 362 | return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s )); |
| 363 | return jsNumber(result); |
| 364 | } |
| 365 | |
| 366 | ASCIILiteral IntlCollator::usageString(Usage usage) |
| 367 | { |
| 368 | switch (usage) { |
| 369 | case Usage::Sort: |
| 370 | return "sort"_s ; |
| 371 | case Usage::Search: |
| 372 | return "search"_s ; |
| 373 | } |
| 374 | ASSERT_NOT_REACHED(); |
| 375 | return ASCIILiteral::null(); |
| 376 | } |
| 377 | |
| 378 | ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity) |
| 379 | { |
| 380 | switch (sensitivity) { |
| 381 | case Sensitivity::Base: |
| 382 | return "base"_s ; |
| 383 | case Sensitivity::Accent: |
| 384 | return "accent"_s ; |
| 385 | case Sensitivity::Case: |
| 386 | return "case"_s ; |
| 387 | case Sensitivity::Variant: |
| 388 | return "variant"_s ; |
| 389 | } |
| 390 | ASSERT_NOT_REACHED(); |
| 391 | return ASCIILiteral::null(); |
| 392 | } |
| 393 | |
| 394 | ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst) |
| 395 | { |
| 396 | switch (caseFirst) { |
| 397 | case CaseFirst::False: |
| 398 | return "false"_s ; |
| 399 | case CaseFirst::Lower: |
| 400 | return "lower"_s ; |
| 401 | case CaseFirst::Upper: |
| 402 | return "upper"_s ; |
| 403 | } |
| 404 | ASSERT_NOT_REACHED(); |
| 405 | return ASCIILiteral::null(); |
| 406 | } |
| 407 | |
| 408 | JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject) |
| 409 | { |
| 410 | VM& vm = globalObject->vm(); |
| 411 | auto scope = DECLARE_THROW_SCOPE(vm); |
| 412 | |
| 413 | // 10.3.5 Intl.Collator.prototype.resolvedOptions() (ECMA-402 2.0) |
| 414 | // The function returns a new object whose properties and attributes are set as if |
| 415 | // constructed by an object literal assigning to each of the following properties the |
| 416 | // value of the corresponding internal slot of this Collator object (see 10.4): locale, |
| 417 | // usage, sensitivity, ignorePunctuation, collation, as well as those properties shown |
| 418 | // in Table 1 whose keys are included in the %Collator%[[relevantExtensionKeys]] |
| 419 | // internal slot of the standard built-in object that is the initial value of |
| 420 | // Intl.Collator. |
| 421 | |
| 422 | if (!m_initializedCollator) { |
| 423 | initializeCollator(globalObject, jsUndefined(), jsUndefined()); |
| 424 | scope.assertNoException(); |
| 425 | } |
| 426 | |
| 427 | JSObject* options = constructEmptyObject(globalObject); |
| 428 | options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale)); |
| 429 | options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage))); |
| 430 | options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity))); |
| 431 | options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation)); |
| 432 | options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation)); |
| 433 | options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric)); |
| 434 | options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst))); |
| 435 | return options; |
| 436 | } |
| 437 | |
| 438 | void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format) |
| 439 | { |
| 440 | m_boundCompare.set(vm, this, format); |
| 441 | } |
| 442 | |
| 443 | } // namespace JSC |
| 444 | |
| 445 | #endif // ENABLE(INTL) |
| 446 | |