1/*
2 * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
3 * Copyright (C) 2008, 2009, 2010, 2011 Google Inc. All rights reserved.
4 * Copyright (C) 2011 Igalia S.L.
5 * Copyright (C) 2011 Motorola Mobility. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include "markup.h"
31
32#include "ArchiveResource.h"
33#include "CSSPrimitiveValue.h"
34#include "CSSPropertyNames.h"
35#include "CSSValue.h"
36#include "CSSValueKeywords.h"
37#include "CacheStorageProvider.h"
38#include "ChildListMutationScope.h"
39#include "Comment.h"
40#include "ComposedTreeIterator.h"
41#include "CustomHeaderFields.h"
42#include "DocumentFragment.h"
43#include "DocumentLoader.h"
44#include "DocumentType.h"
45#include "Editing.h"
46#include "Editor.h"
47#include "EditorClient.h"
48#include "ElementIterator.h"
49#include "EmptyClients.h"
50#include "File.h"
51#include "Frame.h"
52#include "FrameLoader.h"
53#include "HTMLAttachmentElement.h"
54#include "HTMLBRElement.h"
55#include "HTMLBodyElement.h"
56#include "HTMLDivElement.h"
57#include "HTMLHeadElement.h"
58#include "HTMLHtmlElement.h"
59#include "HTMLImageElement.h"
60#include "HTMLNames.h"
61#include "HTMLStyleElement.h"
62#include "HTMLTableElement.h"
63#include "HTMLTextAreaElement.h"
64#include "HTMLTextFormControlElement.h"
65#include "LibWebRTCProvider.h"
66#include "MarkupAccumulator.h"
67#include "NodeList.h"
68#include "Page.h"
69#include "PageConfiguration.h"
70#include "Range.h"
71#include "RenderBlock.h"
72#include "RuntimeEnabledFeatures.h"
73#include "Settings.h"
74#include "SocketProvider.h"
75#include "StyleProperties.h"
76#include "TextIterator.h"
77#include "TypedElementDescendantIterator.h"
78#include "VisibleSelection.h"
79#include "VisibleUnits.h"
80#include <wtf/StdLibExtras.h>
81#include <wtf/URL.h>
82#include <wtf/URLParser.h>
83#include <wtf/text/StringBuilder.h>
84#include <platform/PasteboardItemInfo.h>
85
86namespace WebCore {
87
88using namespace HTMLNames;
89
90static bool propertyMissingOrEqualToNone(StyleProperties*, CSSPropertyID);
91
92class AttributeChange {
93public:
94 AttributeChange()
95 : m_name(nullAtom(), nullAtom(), nullAtom())
96 {
97 }
98
99 AttributeChange(Element* element, const QualifiedName& name, const String& value)
100 : m_element(element), m_name(name), m_value(value)
101 {
102 }
103
104 void apply()
105 {
106 m_element->setAttribute(m_name, m_value);
107 }
108
109private:
110 RefPtr<Element> m_element;
111 QualifiedName m_name;
112 String m_value;
113};
114
115static void completeURLs(DocumentFragment* fragment, const String& baseURL)
116{
117 Vector<AttributeChange> changes;
118
119 URL parsedBaseURL({ }, baseURL);
120
121 for (auto& element : descendantsOfType<Element>(*fragment)) {
122 if (!element.hasAttributes())
123 continue;
124 for (const Attribute& attribute : element.attributesIterator()) {
125 if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty())
126 changes.append(AttributeChange(&element, attribute.name(), element.completeURLsInAttributeValue(parsedBaseURL, attribute)));
127 }
128 }
129
130 for (auto& change : changes)
131 change.apply();
132}
133
134void replaceSubresourceURLs(Ref<DocumentFragment>&& fragment, HashMap<AtomString, AtomString>&& replacementMap)
135{
136 Vector<AttributeChange> changes;
137 for (auto& element : descendantsOfType<Element>(fragment)) {
138 if (!element.hasAttributes())
139 continue;
140 for (const Attribute& attribute : element.attributesIterator()) {
141 // FIXME: This won't work for srcset.
142 if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
143 auto replacement = replacementMap.get(attribute.value());
144 if (!replacement.isNull())
145 changes.append({ &element, attribute.name(), replacement });
146 }
147 }
148 }
149 for (auto& change : changes)
150 change.apply();
151}
152
153struct ElementAttribute {
154 Ref<Element> element;
155 QualifiedName attributeName;
156};
157
158void removeSubresourceURLAttributes(Ref<DocumentFragment>&& fragment, WTF::Function<bool(const URL&)> shouldRemoveURL)
159{
160 Vector<ElementAttribute> attributesToRemove;
161 for (auto& element : descendantsOfType<Element>(fragment)) {
162 if (!element.hasAttributes())
163 continue;
164 for (const Attribute& attribute : element.attributesIterator()) {
165 // FIXME: This won't work for srcset.
166 if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) {
167 URL url({ }, attribute.value());
168 if (shouldRemoveURL(url))
169 attributesToRemove.append({ element, attribute.name() });
170 }
171 }
172 }
173 for (auto& item : attributesToRemove)
174 item.element->removeAttribute(item.attributeName);
175}
176
177std::unique_ptr<Page> createPageForSanitizingWebContent()
178{
179 auto pageConfiguration = pageConfigurationWithEmptyClients();
180
181 auto page = std::make_unique<Page>(WTFMove(pageConfiguration));
182 page->settings().setMediaEnabled(false);
183 page->settings().setScriptEnabled(false);
184 page->settings().setPluginsEnabled(false);
185 page->settings().setAcceleratedCompositingEnabled(false);
186
187 Frame& frame = page->mainFrame();
188 frame.setView(FrameView::create(frame, IntSize { 800, 600 }));
189 frame.init();
190
191 FrameLoader& loader = frame.loader();
192 static char markup[] = "<!DOCTYPE html><html><body></body></html>";
193 ASSERT(loader.activeDocumentLoader());
194 auto& writer = loader.activeDocumentLoader()->writer();
195 writer.setMIMEType("text/html");
196 writer.begin();
197 writer.insertDataSynchronously(String(markup));
198 writer.end();
199 RELEASE_ASSERT(page->mainFrame().document()->body());
200
201 return page;
202}
203
204String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer)
205{
206 auto page = createPageForSanitizingWebContent();
207 Document* stagingDocument = page->mainFrame().document();
208 ASSERT(stagingDocument);
209
210 auto fragment = createFragmentFromMarkup(*stagingDocument, rawHTML, emptyString(), DisallowScriptingAndPluginContent);
211
212 if (fragmentSanitizer)
213 (*fragmentSanitizer)(fragment);
214
215 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML);
216}
217
218enum class MSOListMode { Preserve, DoNotPreserve };
219class StyledMarkupAccumulator final : public MarkupAccumulator {
220public:
221 enum RangeFullySelectsNode { DoesFullySelectNode, DoesNotFullySelectNode };
222
223 StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs, SerializeComposedTree,
224 AnnotateForInterchange, MSOListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized = nullptr);
225
226 Node* serializeNodes(const Position& start, const Position& end);
227 void wrapWithNode(Node&, bool convertBlocksToInlines = false, RangeFullySelectsNode = DoesFullySelectNode);
228 void wrapWithStyleNode(StyleProperties*, Document&, bool isBlock = false);
229 String takeResults();
230
231 bool needRelativeStyleWrapper() const { return m_needRelativeStyleWrapper; }
232 bool needClearingDiv() const { return m_needClearingDiv; }
233
234 using MarkupAccumulator::appendString;
235
236 ContainerNode* parentNode(Node& node)
237 {
238 if (UNLIKELY(m_useComposedTree))
239 return node.parentInComposedTree();
240 return node.parentOrShadowHostNode();
241 }
242
243private:
244 void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false);
245 const String& styleNodeCloseTag(bool isBlock = false);
246
247 String renderedTextRespectingRange(const Text&);
248 String textContentRespectingRange(const Text&);
249
250 bool shouldPreserveMSOListStyleForElement(const Element&);
251
252 void appendStartTag(StringBuilder& out, const Element&, bool addDisplayInline, RangeFullySelectsNode);
253 void appendEndTag(StringBuilder& out, const Element&) override;
254 void appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) override;
255
256 void appendText(StringBuilder& out, const Text&) override;
257 void appendStartTag(StringBuilder& out, const Element& element, Namespaces*) override
258 {
259 appendStartTag(out, element, false, DoesFullySelectNode);
260 }
261
262 Node* firstChild(Node& node)
263 {
264 if (UNLIKELY(m_useComposedTree))
265 return firstChildInComposedTreeIgnoringUserAgentShadow(node);
266 return node.firstChild();
267 }
268
269 Node* nextSibling(Node& node)
270 {
271 if (UNLIKELY(m_useComposedTree))
272 return nextSiblingInComposedTreeIgnoringUserAgentShadow(node);
273 return node.nextSibling();
274 }
275
276 Node* nextSkippingChildren(Node& node)
277 {
278 if (UNLIKELY(m_useComposedTree))
279 return nextSkippingChildrenInComposedTreeIgnoringUserAgentShadow(node);
280 return NodeTraversal::nextSkippingChildren(node);
281 }
282
283 bool hasChildNodes(Node& node)
284 {
285 if (UNLIKELY(m_useComposedTree))
286 return firstChildInComposedTreeIgnoringUserAgentShadow(node);
287 return node.hasChildNodes();
288 }
289
290 bool isDescendantOf(Node& node, Node& possibleAncestor)
291 {
292 if (UNLIKELY(m_useComposedTree))
293 return node.isDescendantOrShadowDescendantOf(&possibleAncestor);
294 return node.isDescendantOf(&possibleAncestor);
295 }
296
297 enum class NodeTraversalMode { EmitString, DoNotEmitString };
298 Node* traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode);
299
300 bool appendNodeToPreserveMSOList(Node&);
301
302 bool shouldAnnotate()
303 {
304 return m_annotate == AnnotateForInterchange::Yes;
305 }
306
307 bool shouldApplyWrappingStyle(const Node& node) const
308 {
309 return m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode() == node.parentNode() && m_wrappingStyle && m_wrappingStyle->style();
310 }
311
312 Position m_start;
313 Position m_end;
314 Vector<String> m_reversedPrecedingMarkup;
315 const AnnotateForInterchange m_annotate;
316 RefPtr<Node> m_highestNodeToBeSerialized;
317 RefPtr<EditingStyle> m_wrappingStyle;
318 bool m_useComposedTree;
319 bool m_needsPositionStyleConversion;
320 bool m_needRelativeStyleWrapper { false };
321 bool m_needClearingDiv { false };
322 bool m_shouldPreserveMSOList;
323 bool m_inMSOList { false };
324};
325
326inline StyledMarkupAccumulator::StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
327 AnnotateForInterchange annotate, MSOListMode msoListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized)
328 : MarkupAccumulator(nodes, urlsToResolve)
329 , m_start(start)
330 , m_end(end)
331 , m_annotate(annotate)
332 , m_highestNodeToBeSerialized(highestNodeToBeSerialized)
333 , m_useComposedTree(serializeComposedTree == SerializeComposedTree::Yes)
334 , m_needsPositionStyleConversion(needsPositionStyleConversion)
335 , m_shouldPreserveMSOList(msoListMode == MSOListMode::Preserve)
336{
337}
338
339void StyledMarkupAccumulator::wrapWithNode(Node& node, bool convertBlocksToInlines, RangeFullySelectsNode rangeFullySelectsNode)
340{
341 StringBuilder markup;
342 if (is<Element>(node))
343 appendStartTag(markup, downcast<Element>(node), convertBlocksToInlines && isBlock(&node), rangeFullySelectsNode);
344 else
345 appendNonElementNode(markup, node, nullptr);
346 m_reversedPrecedingMarkup.append(markup.toString());
347 endAppendingNode(node);
348 if (m_nodes)
349 m_nodes->append(&node);
350}
351
352void StyledMarkupAccumulator::wrapWithStyleNode(StyleProperties* style, Document& document, bool isBlock)
353{
354 StringBuilder openTag;
355 appendStyleNodeOpenTag(openTag, style, document, isBlock);
356 m_reversedPrecedingMarkup.append(openTag.toString());
357 appendString(styleNodeCloseTag(isBlock));
358}
359
360void StyledMarkupAccumulator::appendStyleNodeOpenTag(StringBuilder& out, StyleProperties* style, Document& document, bool isBlock)
361{
362 // wrappingStyleForSerialization should have removed -webkit-text-decorations-in-effect
363 ASSERT(propertyMissingOrEqualToNone(style, CSSPropertyWebkitTextDecorationsInEffect));
364 if (isBlock)
365 out.appendLiteral("<div style=\"");
366 else
367 out.appendLiteral("<span style=\"");
368 appendAttributeValue(out, style->asText(), document.isHTMLDocument());
369 out.appendLiteral("\">");
370}
371
372const String& StyledMarkupAccumulator::styleNodeCloseTag(bool isBlock)
373{
374 static NeverDestroyed<const String> divClose(MAKE_STATIC_STRING_IMPL("</div>"));
375 static NeverDestroyed<const String> styleSpanClose(MAKE_STATIC_STRING_IMPL("</span>"));
376 return isBlock ? divClose : styleSpanClose;
377}
378
379String StyledMarkupAccumulator::takeResults()
380{
381 StringBuilder result;
382 result.reserveCapacity(totalLength(m_reversedPrecedingMarkup) + length());
383
384 for (size_t i = m_reversedPrecedingMarkup.size(); i > 0; --i)
385 result.append(m_reversedPrecedingMarkup[i - 1]);
386
387 concatenateMarkup(result);
388
389 // We remove '\0' characters because they are not visibly rendered to the user.
390 return result.toString().replaceWithLiteral('\0', "");
391}
392
393void StyledMarkupAccumulator::appendText(StringBuilder& out, const Text& text)
394{
395 const bool parentIsTextarea = is<HTMLTextAreaElement>(text.parentElement());
396 const bool wrappingSpan = shouldApplyWrappingStyle(text) && !parentIsTextarea;
397 if (wrappingSpan) {
398 RefPtr<EditingStyle> wrappingStyle = m_wrappingStyle->copy();
399 // FIXME: <rdar://problem/5371536> Style rules that match pasted content can change it's appearance
400 // Make sure spans are inline style in paste side e.g. span { display: block }.
401 wrappingStyle->forceInline();
402 // FIXME: Should this be included in forceInline?
403 wrappingStyle->style()->setProperty(CSSPropertyFloat, CSSValueNone);
404
405 appendStyleNodeOpenTag(out, wrappingStyle->style(), text.document());
406 }
407
408 if (!shouldAnnotate() || parentIsTextarea) {
409 auto content = textContentRespectingRange(text);
410 appendCharactersReplacingEntities(out, content, 0, content.length(), entityMaskForText(text));
411 } else {
412 const bool useRenderedText = !enclosingElementWithTag(firstPositionInNode(const_cast<Text*>(&text)), selectTag);
413 String content = useRenderedText ? renderedTextRespectingRange(text) : textContentRespectingRange(text);
414 StringBuilder buffer;
415 appendCharactersReplacingEntities(buffer, content, 0, content.length(), EntityMaskInPCDATA);
416 out.append(convertHTMLTextToInterchangeFormat(buffer.toString(), &text));
417 }
418
419 if (wrappingSpan)
420 out.append(styleNodeCloseTag());
421}
422
423String StyledMarkupAccumulator::renderedTextRespectingRange(const Text& text)
424{
425 TextIteratorBehavior behavior = TextIteratorDefaultBehavior;
426 Position start = &text == m_start.containerNode() ? m_start : firstPositionInNode(const_cast<Text*>(&text));
427 Position end;
428 if (&text == m_end.containerNode())
429 end = m_end;
430 else {
431 end = lastPositionInNode(const_cast<Text*>(&text));
432 if (!m_end.isNull())
433 behavior = TextIteratorBehavesAsIfNodesFollowing;
434 }
435
436 return plainText(Range::create(text.document(), start, end).ptr(), behavior);
437}
438
439String StyledMarkupAccumulator::textContentRespectingRange(const Text& text)
440{
441 if (m_start.isNull() && m_end.isNull())
442 return text.data();
443
444 unsigned start = 0;
445 unsigned end = std::numeric_limits<unsigned>::max();
446 if (&text == m_start.containerNode())
447 start = m_start.offsetInContainerNode();
448 if (&text == m_end.containerNode())
449 end = m_end.offsetInContainerNode();
450 ASSERT(start < end);
451 return text.data().substring(start, end - start);
452}
453
454void StyledMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces)
455{
456#if ENABLE(ATTACHMENT_ELEMENT)
457 if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
458 return;
459
460 if (is<HTMLAttachmentElement>(element)) {
461 auto& attachment = downcast<HTMLAttachmentElement>(element);
462 appendAttribute(out, element, { webkitattachmentidAttr, attachment.uniqueIdentifier() }, namespaces);
463 if (auto* file = attachment.file()) {
464 // These attributes are only intended for File deserialization, and are removed from the generated attachment
465 // element after we've deserialized and set its backing File, in restoreAttachmentElementsInFragment.
466 appendAttribute(out, element, { webkitattachmentpathAttr, file->path() }, namespaces);
467 appendAttribute(out, element, { webkitattachmentbloburlAttr, file->url().string() }, namespaces);
468 }
469 } else if (is<HTMLImageElement>(element)) {
470 if (auto attachment = downcast<HTMLImageElement>(element).attachmentElement())
471 appendAttribute(out, element, { webkitattachmentidAttr, attachment->uniqueIdentifier() }, namespaces);
472 }
473#else
474 UNUSED_PARAM(out);
475 UNUSED_PARAM(element);
476 UNUSED_PARAM(namespaces);
477#endif
478}
479
480bool StyledMarkupAccumulator::shouldPreserveMSOListStyleForElement(const Element& element)
481{
482 if (m_inMSOList)
483 return true;
484 if (m_shouldPreserveMSOList) {
485 auto style = element.getAttribute(styleAttr);
486 return style.startsWith("mso-list:") || style.contains(";mso-list:") || style.contains("\nmso-list:");
487 }
488 return false;
489}
490
491void StyledMarkupAccumulator::appendStartTag(StringBuilder& out, const Element& element, bool addDisplayInline, RangeFullySelectsNode rangeFullySelectsNode)
492{
493 const bool documentIsHTML = element.document().isHTMLDocument();
494 const bool isSlotElement = is<HTMLSlotElement>(element);
495 if (UNLIKELY(isSlotElement))
496 out.append("<span");
497 else
498 appendOpenTag(out, element, nullptr);
499
500 appendCustomAttributes(out, element, nullptr);
501
502 const bool shouldAnnotateOrForceInline = element.isHTMLElement() && (shouldAnnotate() || addDisplayInline);
503 bool shouldOverrideStyleAttr = (shouldAnnotateOrForceInline || shouldApplyWrappingStyle(element) || isSlotElement) && !shouldPreserveMSOListStyleForElement(element);
504 if (element.hasAttributes()) {
505 for (const Attribute& attribute : element.attributesIterator()) {
506 // We'll handle the style attribute separately, below.
507 if (attribute.name() == styleAttr && shouldOverrideStyleAttr)
508 continue;
509 if (element.isEventHandlerAttribute(attribute) || element.isJavaScriptURLAttribute(attribute))
510 continue;
511 appendAttribute(out, element, attribute, 0);
512 }
513 }
514
515 if (shouldOverrideStyleAttr) {
516 RefPtr<EditingStyle> newInlineStyle;
517
518 if (shouldApplyWrappingStyle(element)) {
519 newInlineStyle = m_wrappingStyle->copy();
520 newInlineStyle->removePropertiesInElementDefaultStyle(*const_cast<Element*>(&element));
521 newInlineStyle->removeStyleConflictingWithStyleOfNode(*const_cast<Element*>(&element));
522 } else
523 newInlineStyle = EditingStyle::create();
524
525 if (isSlotElement)
526 newInlineStyle->addDisplayContents();
527
528 if (is<StyledElement>(element) && downcast<StyledElement>(element).inlineStyle())
529 newInlineStyle->overrideWithStyle(*downcast<StyledElement>(element).inlineStyle());
530
531 if (shouldAnnotateOrForceInline) {
532 if (shouldAnnotate())
533 newInlineStyle->mergeStyleFromRulesForSerialization(downcast<HTMLElement>(*const_cast<Element*>(&element)));
534
535 if (addDisplayInline)
536 newInlineStyle->forceInline();
537
538 if (m_needsPositionStyleConversion) {
539 m_needRelativeStyleWrapper |= newInlineStyle->convertPositionStyle();
540 m_needClearingDiv |= newInlineStyle->isFloating();
541 }
542
543 // If the node is not fully selected by the range, then we don't want to keep styles that affect its relationship to the nodes around it
544 // only the ones that affect it and the nodes within it.
545 if (rangeFullySelectsNode == DoesNotFullySelectNode && newInlineStyle->style())
546 newInlineStyle->style()->removeProperty(CSSPropertyFloat);
547 }
548
549 if (!newInlineStyle->isEmpty()) {
550 out.appendLiteral(" style=\"");
551 appendAttributeValue(out, newInlineStyle->style()->asText(), documentIsHTML);
552 out.append('\"');
553 }
554 }
555
556 appendCloseTag(out, element);
557}
558
559void StyledMarkupAccumulator::appendEndTag(StringBuilder& out, const Element& element)
560{
561 if (UNLIKELY(is<HTMLSlotElement>(element)))
562 out.append("</span>");
563 else
564 MarkupAccumulator::appendEndTag(out, element);
565}
566
567Node* StyledMarkupAccumulator::serializeNodes(const Position& start, const Position& end)
568{
569 ASSERT(comparePositions(start, end) <= 0);
570 auto startNode = start.firstNode();
571 Node* pastEnd = end.computeNodeAfterPosition();
572 if (!pastEnd && end.containerNode())
573 pastEnd = nextSkippingChildren(*end.containerNode());
574
575 if (!m_highestNodeToBeSerialized) {
576 Node* lastClosed = traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::DoNotEmitString);
577 m_highestNodeToBeSerialized = lastClosed;
578 }
579
580 if (m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode())
581 m_wrappingStyle = EditingStyle::wrappingStyleForSerialization(*m_highestNodeToBeSerialized->parentNode(), shouldAnnotate());
582
583 return traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::EmitString);
584}
585
586Node* StyledMarkupAccumulator::traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode traversalMode)
587{
588 const bool shouldEmit = traversalMode == NodeTraversalMode::EmitString;
589
590 m_inMSOList = false;
591
592 unsigned depth = 0;
593 auto enterNode = [&] (Node& node) {
594 if (UNLIKELY(m_shouldPreserveMSOList) && shouldEmit) {
595 if (appendNodeToPreserveMSOList(node))
596 return false;
597 }
598
599 bool isDisplayContents = is<Element>(node) && downcast<Element>(node).hasDisplayContents();
600 if (!node.renderer() && !isDisplayContents && !enclosingElementWithTag(firstPositionInOrBeforeNode(&node), selectTag))
601 return false;
602
603 ++depth;
604 if (shouldEmit)
605 startAppendingNode(node);
606
607 return true;
608 };
609
610 Node* lastClosed = nullptr;
611 auto exitNode = [&] (Node& node) {
612 bool closing = depth;
613 if (depth)
614 --depth;
615 if (shouldEmit) {
616 if (closing)
617 endAppendingNode(node);
618 else
619 wrapWithNode(node);
620 }
621 lastClosed = &node;
622 };
623
624 Node* lastNode = nullptr;
625 Node* next = nullptr;
626 for (auto* n = startNode; n != pastEnd; lastNode = n, n = next) {
627
628 Vector<Node*, 8> exitedAncestors;
629 next = nullptr;
630 if (auto* child = firstChild(*n))
631 next = child;
632 else if (auto* sibling = nextSibling(*n))
633 next = sibling;
634 else {
635 for (auto* ancestor = parentNode(*n); ancestor; ancestor = parentNode(*ancestor)) {
636 exitedAncestors.append(ancestor);
637 if (auto* sibling = nextSibling(*ancestor)) {
638 next = sibling;
639 break;
640 }
641 }
642 }
643 ASSERT(next || !pastEnd);
644
645 if (isBlock(n) && canHaveChildrenForEditing(*n) && next == pastEnd) {
646 // Don't write out empty block containers that aren't fully selected.
647 continue;
648 }
649
650 if (!enterNode(*n)) {
651 next = nextSkippingChildren(*n);
652 // Don't skip over pastEnd.
653 if (pastEnd && isDescendantOf(*pastEnd, *n))
654 next = pastEnd;
655 ASSERT(next || !pastEnd);
656 } else {
657 if (!hasChildNodes(*n))
658 exitNode(*n);
659 }
660
661 for (auto* ancestor : exitedAncestors) {
662 if (!depth && next == pastEnd)
663 break;
664 exitNode(*ancestor);
665 }
666 }
667
668 ASSERT(lastNode || !depth);
669 if (depth) {
670 for (auto* ancestor = parentNode(pastEnd ? *pastEnd : *lastNode); ancestor && depth; ancestor = parentNode(*ancestor))
671 exitNode(*ancestor);
672 }
673
674 return lastClosed;
675}
676
677bool StyledMarkupAccumulator::appendNodeToPreserveMSOList(Node& node)
678{
679 if (is<Comment>(node)) {
680 auto& commentNode = downcast<Comment>(node);
681 if (!m_inMSOList && commentNode.data() == "[if !supportLists]")
682 m_inMSOList = true;
683 else if (m_inMSOList && commentNode.data() == "[endif]")
684 m_inMSOList = false;
685 else
686 return false;
687 startAppendingNode(commentNode);
688 return true;
689 }
690 if (is<HTMLStyleElement>(node)) {
691 auto* firstChild = node.firstChild();
692 if (!is<Text>(firstChild))
693 return false;
694
695 auto& textChild = downcast<Text>(*firstChild);
696 auto& styleContent = textChild.data();
697
698 const auto msoStyleDefinitionsStart = styleContent.find("/* Style Definitions */");
699 const auto msoListDefinitionsStart = styleContent.find("/* List Definitions */");
700 const auto lastListItem = styleContent.reverseFind("\n@list");
701 if (msoListDefinitionsStart == notFound || lastListItem == notFound)
702 return false;
703 const auto start = msoStyleDefinitionsStart != notFound && msoStyleDefinitionsStart < msoListDefinitionsStart ? msoStyleDefinitionsStart : msoListDefinitionsStart;
704
705 const auto msoListDefinitionsEnd = styleContent.find(";}\n", lastListItem);
706 if (msoListDefinitionsEnd == notFound || start >= msoListDefinitionsEnd)
707 return false;
708
709 appendString("<head><style class=\"" WebKitMSOListQuirksStyle "\">\n<!--\n");
710 appendStringView(StringView(textChild.data()).substring(start, msoListDefinitionsEnd - start + 3));
711 appendString("\n-->\n</style></head>");
712
713 return true;
714 }
715 return false;
716}
717
718static Node* ancestorToRetainStructureAndAppearanceForBlock(Node* commonAncestorBlock)
719{
720 if (!commonAncestorBlock)
721 return nullptr;
722
723 if (commonAncestorBlock->hasTagName(tbodyTag) || commonAncestorBlock->hasTagName(trTag)) {
724 ContainerNode* table = commonAncestorBlock->parentNode();
725 while (table && !is<HTMLTableElement>(*table))
726 table = table->parentNode();
727
728 return table;
729 }
730
731 if (isNonTableCellHTMLBlockElement(commonAncestorBlock))
732 return commonAncestorBlock;
733
734 return nullptr;
735}
736
737static inline Node* ancestorToRetainStructureAndAppearance(Node* commonAncestor)
738{
739 return ancestorToRetainStructureAndAppearanceForBlock(enclosingBlock(commonAncestor));
740}
741
742static bool propertyMissingOrEqualToNone(StyleProperties* style, CSSPropertyID propertyID)
743{
744 if (!style)
745 return false;
746 RefPtr<CSSValue> value = style->getPropertyCSSValue(propertyID);
747 if (!value)
748 return true;
749 if (!is<CSSPrimitiveValue>(*value))
750 return false;
751 return downcast<CSSPrimitiveValue>(*value).valueID() == CSSValueNone;
752}
753
754static bool needInterchangeNewlineAfter(const VisiblePosition& v)
755{
756 VisiblePosition next = v.next();
757 Node* upstreamNode = next.deepEquivalent().upstream().deprecatedNode();
758 Node* downstreamNode = v.deepEquivalent().downstream().deprecatedNode();
759 // Add an interchange newline if a paragraph break is selected and a br won't already be added to the markup to represent it.
760 return isEndOfParagraph(v) && isStartOfParagraph(next) && !(upstreamNode->hasTagName(brTag) && upstreamNode == downstreamNode);
761}
762
763static RefPtr<EditingStyle> styleFromMatchedRulesAndInlineDecl(Node& node)
764{
765 if (!is<HTMLElement>(node))
766 return nullptr;
767
768 auto& element = downcast<HTMLElement>(node);
769 auto style = EditingStyle::create(element.inlineStyle());
770 style->mergeStyleFromRules(element);
771 return style;
772}
773
774static bool isElementPresentational(const Node* node)
775{
776 return node->hasTagName(uTag) || node->hasTagName(sTag) || node->hasTagName(strikeTag)
777 || node->hasTagName(iTag) || node->hasTagName(emTag) || node->hasTagName(bTag) || node->hasTagName(strongTag);
778}
779
780static Node* highestAncestorToWrapMarkup(const Position& start, const Position& end, Node& commonAncestor, AnnotateForInterchange annotate)
781{
782 Node* specialCommonAncestor = nullptr;
783 if (annotate == AnnotateForInterchange::Yes) {
784 // Include ancestors that aren't completely inside the range but are required to retain
785 // the structure and appearance of the copied markup.
786 specialCommonAncestor = ancestorToRetainStructureAndAppearance(&commonAncestor);
787
788 if (auto* parentListNode = enclosingNodeOfType(start, isListItem)) {
789 if (!editingIgnoresContent(*parentListNode) && VisibleSelection::selectionFromContentsOfNode(parentListNode) == VisibleSelection(start, end)) {
790 specialCommonAncestor = parentListNode->parentNode();
791 while (specialCommonAncestor && !isListHTMLElement(specialCommonAncestor))
792 specialCommonAncestor = specialCommonAncestor->parentNode();
793 }
794 }
795
796 // Retain the Mail quote level by including all ancestor mail block quotes.
797 if (Node* highestMailBlockquote = highestEnclosingNodeOfType(start, isMailBlockquote, CanCrossEditingBoundary))
798 specialCommonAncestor = highestMailBlockquote;
799 }
800
801 auto* checkAncestor = specialCommonAncestor ? specialCommonAncestor : &commonAncestor;
802 if (checkAncestor->renderer() && checkAncestor->renderer()->containingBlock()) {
803 Node* newSpecialCommonAncestor = highestEnclosingNodeOfType(firstPositionInNode(checkAncestor), &isElementPresentational, CanCrossEditingBoundary, checkAncestor->renderer()->containingBlock()->element());
804 if (newSpecialCommonAncestor)
805 specialCommonAncestor = newSpecialCommonAncestor;
806 }
807
808 // If a single tab is selected, commonAncestor will be a text node inside a tab span.
809 // If two or more tabs are selected, commonAncestor will be the tab span.
810 // In either case, if there is a specialCommonAncestor already, it will necessarily be above
811 // any tab span that needs to be included.
812 if (!specialCommonAncestor && isTabSpanTextNode(&commonAncestor))
813 specialCommonAncestor = commonAncestor.parentNode();
814 if (!specialCommonAncestor && isTabSpanNode(&commonAncestor))
815 specialCommonAncestor = &commonAncestor;
816
817 if (auto* enclosingAnchor = enclosingElementWithTag(firstPositionInNode(specialCommonAncestor ? specialCommonAncestor : &commonAncestor), aTag))
818 specialCommonAncestor = enclosingAnchor;
819
820 return specialCommonAncestor;
821}
822
823static String serializePreservingVisualAppearanceInternal(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree,
824 AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, MSOListMode msoListMode)
825{
826 static NeverDestroyed<const String> interchangeNewlineString(MAKE_STATIC_STRING_IMPL("<br class=\"" AppleInterchangeNewline "\">"));
827
828 if (!comparePositions(start, end))
829 return emptyString();
830
831 RefPtr<Node> commonAncestor = commonShadowIncludingAncestor(start, end);
832 if (!commonAncestor)
833 return emptyString();
834
835 auto& document = *start.document();
836 document.updateLayoutIgnorePendingStylesheets();
837
838 VisiblePosition visibleStart { start };
839 VisiblePosition visibleEnd { end };
840
841 auto body = makeRefPtr(enclosingElementWithTag(firstPositionInNode(commonAncestor.get()), bodyTag));
842 RefPtr<Element> fullySelectedRoot;
843 // FIXME: Do this for all fully selected blocks, not just the body.
844 if (body && VisiblePosition(firstPositionInNode(body.get())) == visibleStart && VisiblePosition(lastPositionInNode(body.get())) == visibleEnd)
845 fullySelectedRoot = body;
846 bool needsPositionStyleConversion = body && fullySelectedRoot == body && document.settings().shouldConvertPositionStyleOnCopy();
847
848 Node* specialCommonAncestor = highestAncestorToWrapMarkup(start, end, *commonAncestor, annotate);
849
850 StyledMarkupAccumulator accumulator(start, end, nodes, urlsToResolve, serializeComposedTree, annotate, msoListMode, needsPositionStyleConversion, specialCommonAncestor);
851
852 Position startAdjustedForInterchangeNewline = start;
853 if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleStart)) {
854 if (visibleStart == visibleEnd.previous())
855 return interchangeNewlineString;
856
857 accumulator.appendString(interchangeNewlineString);
858 startAdjustedForInterchangeNewline = visibleStart.next().deepEquivalent();
859
860 if (comparePositions(startAdjustedForInterchangeNewline, end) >= 0)
861 return interchangeNewlineString;
862 }
863
864 Node* lastClosed = accumulator.serializeNodes(startAdjustedForInterchangeNewline, end);
865
866 if (specialCommonAncestor && lastClosed) {
867 // Also include all of the ancestors of lastClosed up to this special ancestor.
868 for (ContainerNode* ancestor = accumulator.parentNode(*lastClosed); ancestor; ancestor = accumulator.parentNode(*ancestor)) {
869 if (ancestor == fullySelectedRoot && convertBlocksToInlines == ConvertBlocksToInlines::No) {
870 RefPtr<EditingStyle> fullySelectedRootStyle = styleFromMatchedRulesAndInlineDecl(*fullySelectedRoot);
871
872 // Bring the background attribute over, but not as an attribute because a background attribute on a div
873 // appears to have no effect.
874 if ((!fullySelectedRootStyle || !fullySelectedRootStyle->style() || !fullySelectedRootStyle->style()->getPropertyCSSValue(CSSPropertyBackgroundImage))
875 && fullySelectedRoot->hasAttributeWithoutSynchronization(backgroundAttr))
876 fullySelectedRootStyle->style()->setProperty(CSSPropertyBackgroundImage, "url('" + fullySelectedRoot->getAttribute(backgroundAttr) + "')");
877
878 if (fullySelectedRootStyle->style()) {
879 // Reset the CSS properties to avoid an assertion error in addStyleMarkup().
880 // This assertion is caused at least when we select all text of a <body> element whose
881 // 'text-decoration' property is "inherit", and copy it.
882 if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyTextDecoration))
883 fullySelectedRootStyle->style()->setProperty(CSSPropertyTextDecoration, CSSValueNone);
884 if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyWebkitTextDecorationsInEffect))
885 fullySelectedRootStyle->style()->setProperty(CSSPropertyWebkitTextDecorationsInEffect, CSSValueNone);
886 accumulator.wrapWithStyleNode(fullySelectedRootStyle->style(), document, true);
887 }
888 } else {
889 // Since this node and all the other ancestors are not in the selection we want to set RangeFullySelectsNode to DoesNotFullySelectNode
890 // so that styles that affect the exterior of the node are not included.
891 accumulator.wrapWithNode(*ancestor, convertBlocksToInlines == ConvertBlocksToInlines::Yes, StyledMarkupAccumulator::DoesNotFullySelectNode);
892 }
893 if (nodes)
894 nodes->append(ancestor);
895
896 if (ancestor == specialCommonAncestor)
897 break;
898 }
899 }
900
901 if (accumulator.needRelativeStyleWrapper() && needsPositionStyleConversion) {
902 if (accumulator.needClearingDiv())
903 accumulator.appendString("<div style=\"clear: both;\"></div>");
904 RefPtr<EditingStyle> positionRelativeStyle = styleFromMatchedRulesAndInlineDecl(*body);
905 positionRelativeStyle->style()->setProperty(CSSPropertyPosition, CSSValueRelative);
906 accumulator.wrapWithStyleNode(positionRelativeStyle->style(), document, true);
907 }
908
909 // FIXME: The interchange newline should be placed in the block that it's in, not after all of the content, unconditionally.
910 if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous()))
911 accumulator.appendString(interchangeNewlineString);
912
913 return accumulator.takeResults();
914}
915
916String serializePreservingVisualAppearance(const Range& range, Vector<Node*>* nodes, AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, ResolveURLs urlsToReslve)
917{
918 return serializePreservingVisualAppearanceInternal(range.startPosition(), range.endPosition(), nodes, urlsToReslve, SerializeComposedTree::No,
919 annotate, convertBlocksToInlines, MSOListMode::DoNotPreserve);
920}
921
922String serializePreservingVisualAppearance(const VisibleSelection& selection, ResolveURLs resolveURLs, SerializeComposedTree serializeComposedTree, Vector<Node*>* nodes)
923{
924 return serializePreservingVisualAppearanceInternal(selection.start(), selection.end(), nodes, resolveURLs, serializeComposedTree,
925 AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, MSOListMode::DoNotPreserve);
926}
927
928
929static bool shouldPreserveMSOLists(const String& markup)
930{
931 if (!markup.startsWith("<html xmlns:"))
932 return false;
933 auto tagClose = markup.find('>');
934 if (tagClose == notFound)
935 return false;
936 auto htmlTag = markup.substring(0, tagClose);
937 return htmlTag.contains("xmlns:o=\"urn:schemas-microsoft-com:office:office\"")
938 && htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"");
939}
940
941String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup)
942{
943 MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup)
944 ? MSOListMode::Preserve : MSOListMode::DoNotPreserve;
945
946 auto bodyElement = makeRefPtr(document.body());
947 ASSERT(bodyElement);
948 bodyElement->appendChild(fragment.get());
949
950 // SerializeComposedTree::No because there can't be a shadow tree in the pasted fragment.
951 auto result = serializePreservingVisualAppearanceInternal(firstPositionInNode(bodyElement.get()), lastPositionInNode(bodyElement.get()), nullptr,
952 ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, msoListMode);
953
954 if (msoListMode == MSOListMode::Preserve) {
955 StringBuilder builder;
956 builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n"
957 "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n"
958 "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n"
959 "xmlns=\"http://www.w3.org/TR/REC-html40\">");
960 builder.append(result);
961 builder.appendLiteral("</html>");
962 return builder.toString();
963 }
964
965 return result;
966}
967
968static void restoreAttachmentElementsInFragment(DocumentFragment& fragment)
969{
970#if ENABLE(ATTACHMENT_ELEMENT)
971 if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled())
972 return;
973
974 // When creating a fragment we must strip the webkit-attachment-path attribute after restoring the File object.
975 Vector<Ref<HTMLAttachmentElement>> attachments;
976 for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment))
977 attachments.append(attachment);
978
979 for (auto& attachment : attachments) {
980 attachment->setUniqueIdentifier(attachment->attributeWithoutSynchronization(webkitattachmentidAttr));
981
982 auto attachmentPath = attachment->attachmentPath();
983 auto blobURL = attachment->blobURL();
984 if (!attachmentPath.isEmpty())
985 attachment->setFile(File::create(attachmentPath));
986 else if (!blobURL.isEmpty())
987 attachment->setFile(File::deserialize({ }, blobURL, attachment->attachmentType(), attachment->attachmentTitle()));
988
989 // Remove temporary attributes that were previously added in StyledMarkupAccumulator::appendCustomAttributes.
990 attachment->removeAttribute(webkitattachmentidAttr);
991 attachment->removeAttribute(webkitattachmentpathAttr);
992 attachment->removeAttribute(webkitattachmentbloburlAttr);
993 }
994
995 Vector<Ref<HTMLImageElement>> images;
996 for (auto& image : descendantsOfType<HTMLImageElement>(fragment))
997 images.append(image);
998
999 for (auto& image : images) {
1000 auto attachmentIdentifier = image->attributeWithoutSynchronization(webkitattachmentidAttr);
1001 if (attachmentIdentifier.isEmpty())
1002 continue;
1003
1004 auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, *fragment.ownerDocument());
1005 attachment->setUniqueIdentifier(attachmentIdentifier);
1006 image->setAttachmentElement(WTFMove(attachment));
1007 image->removeAttribute(webkitattachmentidAttr);
1008 }
1009#else
1010 UNUSED_PARAM(fragment);
1011#endif
1012}
1013
1014Ref<DocumentFragment> createFragmentFromMarkup(Document& document, const String& markup, const String& baseURL, ParserContentPolicy parserContentPolicy)
1015{
1016 // We use a fake body element here to trick the HTML parser into using the InBody insertion mode.
1017 auto fakeBody = HTMLBodyElement::create(document);
1018 auto fragment = DocumentFragment::create(document);
1019
1020 fragment->parseHTML(markup, fakeBody.ptr(), parserContentPolicy);
1021 restoreAttachmentElementsInFragment(fragment);
1022 if (!baseURL.isEmpty() && baseURL != WTF::blankURL() && baseURL != document.baseURL())
1023 completeURLs(fragment.ptr(), baseURL);
1024
1025 return fragment;
1026}
1027
1028String serializeFragment(const Node& node, SerializedNodes root, Vector<Node*>* nodes, ResolveURLs urlsToResolve, Vector<QualifiedName>* tagNamesToSkip, SerializationSyntax serializationSyntax)
1029{
1030 MarkupAccumulator accumulator(nodes, urlsToResolve, serializationSyntax);
1031 return accumulator.serializeNodes(const_cast<Node&>(node), root, tagNamesToSkip);
1032}
1033
1034static void fillContainerFromString(ContainerNode& paragraph, const String& string)
1035{
1036 Document& document = paragraph.document();
1037
1038 if (string.isEmpty()) {
1039 paragraph.appendChild(createBlockPlaceholderElement(document));
1040 return;
1041 }
1042
1043 ASSERT(string.find('\n') == notFound);
1044
1045 Vector<String> tabList = string.splitAllowingEmptyEntries('\t');
1046 String tabText = emptyString();
1047 bool first = true;
1048 size_t numEntries = tabList.size();
1049 for (size_t i = 0; i < numEntries; ++i) {
1050 const String& s = tabList[i];
1051
1052 // append the non-tab textual part
1053 if (!s.isEmpty()) {
1054 if (!tabText.isEmpty()) {
1055 paragraph.appendChild(createTabSpanElement(document, tabText));
1056 tabText = emptyString();
1057 }
1058 Ref<Node> textNode = document.createTextNode(stringWithRebalancedWhitespace(s, first, i + 1 == numEntries));
1059 paragraph.appendChild(textNode);
1060 }
1061
1062 // there is a tab after every entry, except the last entry
1063 // (if the last character is a tab, the list gets an extra empty entry)
1064 if (i + 1 != numEntries)
1065 tabText.append('\t');
1066 else if (!tabText.isEmpty())
1067 paragraph.appendChild(createTabSpanElement(document, tabText));
1068
1069 first = false;
1070 }
1071}
1072
1073bool isPlainTextMarkup(Node* node)
1074{
1075 ASSERT(node);
1076 if (!is<HTMLDivElement>(*node))
1077 return false;
1078
1079 HTMLDivElement& element = downcast<HTMLDivElement>(*node);
1080 if (element.hasAttributes())
1081 return false;
1082
1083 Node* firstChild = element.firstChild();
1084 if (!firstChild)
1085 return false;
1086
1087 Node* secondChild = firstChild->nextSibling();
1088 if (!secondChild)
1089 return firstChild->isTextNode() || firstChild->firstChild();
1090
1091 if (secondChild->nextSibling())
1092 return false;
1093
1094 return isTabSpanTextNode(firstChild->firstChild()) && secondChild->isTextNode();
1095}
1096
1097static bool contextPreservesNewline(const Range& context)
1098{
1099 VisiblePosition position(context.startPosition());
1100 Node* container = position.deepEquivalent().containerNode();
1101 if (!container || !container->renderer())
1102 return false;
1103
1104 return container->renderer()->style().preserveNewline();
1105}
1106
1107Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text)
1108{
1109 Document& document = context.ownerDocument();
1110 Ref<DocumentFragment> fragment = document.createDocumentFragment();
1111
1112 if (text.isEmpty())
1113 return fragment;
1114
1115 String string = text;
1116 string.replace("\r\n", "\n");
1117 string.replace('\r', '\n');
1118
1119 auto createHTMLBRElement = [&document]() {
1120 auto element = HTMLBRElement::create(document);
1121 element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline);
1122 return element;
1123 };
1124
1125 if (contextPreservesNewline(context)) {
1126 fragment->appendChild(document.createTextNode(string));
1127 if (string.endsWith('\n')) {
1128 fragment->appendChild(createHTMLBRElement());
1129 }
1130 return fragment;
1131 }
1132
1133 // A string with no newlines gets added inline, rather than being put into a paragraph.
1134 if (string.find('\n') == notFound) {
1135 fillContainerFromString(fragment, string);
1136 return fragment;
1137 }
1138
1139 if (string.length() == 1 && string[0] == '\n') {
1140 // This is a single newline char, thus just create one HTMLBRElement.
1141 fragment->appendChild(createHTMLBRElement());
1142 return fragment;
1143 }
1144
1145 // Break string into paragraphs. Extra line breaks turn into empty paragraphs.
1146 Node* blockNode = enclosingBlock(context.firstNode());
1147 Element* block = downcast<Element>(blockNode);
1148 bool useClonesOfEnclosingBlock = blockNode
1149 && blockNode->isElementNode()
1150 && !block->hasTagName(bodyTag)
1151 && !block->hasTagName(htmlTag)
1152 && block != editableRootForPosition(context.startPosition());
1153 bool useLineBreak = enclosingTextFormControl(context.startPosition());
1154
1155 Vector<String> list = string.splitAllowingEmptyEntries('\n');
1156 size_t numLines = list.size();
1157 for (size_t i = 0; i < numLines; ++i) {
1158 const String& s = list[i];
1159
1160 RefPtr<Element> element;
1161 if (s.isEmpty() && i + 1 == numLines) {
1162 // For last line, use the "magic BR" rather than a P.
1163 element = createHTMLBRElement();
1164 } else if (useLineBreak) {
1165 element = HTMLBRElement::create(document);
1166 fillContainerFromString(fragment, s);
1167 } else {
1168 if (useClonesOfEnclosingBlock)
1169 element = block->cloneElementWithoutChildren(document);
1170 else
1171 element = createDefaultParagraphElement(document);
1172 fillContainerFromString(*element, s);
1173 }
1174 fragment->appendChild(*element);
1175 }
1176 return fragment;
1177}
1178
1179String documentTypeString(const Document& document)
1180{
1181 DocumentType* documentType = document.doctype();
1182 if (!documentType)
1183 return emptyString();
1184 return serializeFragment(*documentType, SerializedNodes::SubtreeIncludingNode);
1185}
1186
1187String urlToMarkup(const URL& url, const String& title)
1188{
1189 StringBuilder markup;
1190 markup.appendLiteral("<a href=\"");
1191 markup.append(url.string());
1192 markup.appendLiteral("\">");
1193 MarkupAccumulator::appendCharactersReplacingEntities(markup, title, 0, title.length(), EntityMaskInPCDATA);
1194 markup.appendLiteral("</a>");
1195 return markup.toString();
1196}
1197
1198ExceptionOr<Ref<DocumentFragment>> createFragmentForInnerOuterHTML(Element& contextElement, const String& markup, ParserContentPolicy parserContentPolicy)
1199{
1200 auto* document = &contextElement.document();
1201 if (contextElement.hasTagName(templateTag))
1202 document = &document->ensureTemplateDocument();
1203 auto fragment = DocumentFragment::create(*document);
1204
1205 if (document->isHTMLDocument()) {
1206 fragment->parseHTML(markup, &contextElement, parserContentPolicy);
1207 return fragment;
1208 }
1209
1210 bool wasValid = fragment->parseXML(markup, &contextElement, parserContentPolicy);
1211 if (!wasValid)
1212 return Exception { SyntaxError };
1213 return fragment;
1214}
1215
1216RefPtr<DocumentFragment> createFragmentForTransformToFragment(Document& outputDoc, const String& sourceString, const String& sourceMIMEType)
1217{
1218 RefPtr<DocumentFragment> fragment = outputDoc.createDocumentFragment();
1219
1220 if (sourceMIMEType == "text/html") {
1221 // As far as I can tell, there isn't a spec for how transformToFragment is supposed to work.
1222 // Based on the documentation I can find, it looks like we want to start parsing the fragment in the InBody insertion mode.
1223 // Unfortunately, that's an implementation detail of the parser.
1224 // We achieve that effect here by passing in a fake body element as context for the fragment.
1225 auto fakeBody = HTMLBodyElement::create(outputDoc);
1226 fragment->parseHTML(sourceString, fakeBody.ptr());
1227 } else if (sourceMIMEType == "text/plain")
1228 fragment->parserAppendChild(Text::create(outputDoc, sourceString));
1229 else {
1230 bool successfulParse = fragment->parseXML(sourceString, 0);
1231 if (!successfulParse)
1232 return nullptr;
1233 }
1234
1235 // FIXME: Do we need to mess with URLs here?
1236
1237 return fragment;
1238}
1239
1240Ref<DocumentFragment> createFragmentForImageAndURL(Document& document, const String& url, PresentationSize preferredSize)
1241{
1242 auto imageElement = HTMLImageElement::create(document);
1243 imageElement->setAttributeWithoutSynchronization(HTMLNames::srcAttr, url);
1244 if (preferredSize.width)
1245 imageElement->setAttributeWithoutSynchronization(HTMLNames::widthAttr, AtomString::number(*preferredSize.width));
1246 if (preferredSize.height)
1247 imageElement->setAttributeWithoutSynchronization(HTMLNames::heightAttr, AtomString::number(*preferredSize.height));
1248 auto fragment = document.createDocumentFragment();
1249 fragment->appendChild(imageElement);
1250
1251 return fragment;
1252}
1253
1254static Vector<Ref<HTMLElement>> collectElementsToRemoveFromFragment(ContainerNode& container)
1255{
1256 Vector<Ref<HTMLElement>> toRemove;
1257 for (auto& element : childrenOfType<HTMLElement>(container)) {
1258 if (is<HTMLHtmlElement>(element)) {
1259 toRemove.append(element);
1260 collectElementsToRemoveFromFragment(element);
1261 continue;
1262 }
1263 if (is<HTMLHeadElement>(element) || is<HTMLBodyElement>(element))
1264 toRemove.append(element);
1265 }
1266 return toRemove;
1267}
1268
1269static void removeElementFromFragmentPreservingChildren(DocumentFragment& fragment, HTMLElement& element)
1270{
1271 RefPtr<Node> nextChild;
1272 for (RefPtr<Node> child = element.firstChild(); child; child = nextChild) {
1273 nextChild = child->nextSibling();
1274 element.removeChild(*child);
1275 fragment.insertBefore(*child, &element);
1276 }
1277 fragment.removeChild(element);
1278}
1279
1280ExceptionOr<Ref<DocumentFragment>> createContextualFragment(Element& element, const String& markup, ParserContentPolicy parserContentPolicy)
1281{
1282 auto result = createFragmentForInnerOuterHTML(element, markup, parserContentPolicy);
1283 if (result.hasException())
1284 return result.releaseException();
1285
1286 auto fragment = result.releaseReturnValue();
1287
1288 // We need to pop <html> and <body> elements and remove <head> to
1289 // accommodate folks passing complete HTML documents to make the
1290 // child of an element.
1291 auto toRemove = collectElementsToRemoveFromFragment(fragment);
1292 for (auto& element : toRemove)
1293 removeElementFromFragmentPreservingChildren(fragment, element);
1294
1295 return fragment;
1296}
1297
1298static inline bool hasOneChild(ContainerNode& node)
1299{
1300 Node* firstChild = node.firstChild();
1301 return firstChild && !firstChild->nextSibling();
1302}
1303
1304static inline bool hasOneTextChild(ContainerNode& node)
1305{
1306 return hasOneChild(node) && node.firstChild()->isTextNode();
1307}
1308
1309static inline bool hasMutationEventListeners(const Document& document)
1310{
1311 return document.hasListenerType(Document::DOMSUBTREEMODIFIED_LISTENER)
1312 || document.hasListenerType(Document::DOMNODEINSERTED_LISTENER)
1313 || document.hasListenerType(Document::DOMNODEREMOVED_LISTENER)
1314 || document.hasListenerType(Document::DOMNODEREMOVEDFROMDOCUMENT_LISTENER)
1315 || document.hasListenerType(Document::DOMCHARACTERDATAMODIFIED_LISTENER);
1316}
1317
1318// We can use setData instead of replacing Text node as long as script can't observe the difference.
1319static inline bool canUseSetDataOptimization(const Text& containerChild, const ChildListMutationScope& mutationScope)
1320{
1321 bool authorScriptMayHaveReference = containerChild.refCount();
1322 return !authorScriptMayHaveReference && !mutationScope.canObserve() && !hasMutationEventListeners(containerChild.document());
1323}
1324
1325ExceptionOr<void> replaceChildrenWithFragment(ContainerNode& container, Ref<DocumentFragment>&& fragment)
1326{
1327 Ref<ContainerNode> containerNode(container);
1328 ChildListMutationScope mutation(containerNode);
1329
1330 if (!fragment->firstChild()) {
1331 containerNode->removeChildren();
1332 return { };
1333 }
1334
1335 auto* containerChild = containerNode->firstChild();
1336 if (containerChild && !containerChild->nextSibling()) {
1337 if (is<Text>(*containerChild) && hasOneTextChild(fragment) && canUseSetDataOptimization(downcast<Text>(*containerChild), mutation)) {
1338 ASSERT(!fragment->firstChild()->refCount());
1339 downcast<Text>(*containerChild).setData(downcast<Text>(*fragment->firstChild()).data());
1340 return { };
1341 }
1342
1343 return containerNode->replaceChild(fragment, *containerChild);
1344 }
1345
1346 containerNode->removeChildren();
1347 return containerNode->appendChild(fragment);
1348}
1349
1350}
1351