1 | /* |
2 | * Copyright (C) 2004-2017 Apple Inc. All rights reserved. |
3 | * Copyright (C) 2008, 2009, 2010, 2011 Google Inc. All rights reserved. |
4 | * Copyright (C) 2011 Igalia S.L. |
5 | * Copyright (C) 2011 Motorola Mobility. All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
20 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
21 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
24 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "config.h" |
30 | #include "markup.h" |
31 | |
32 | #include "ArchiveResource.h" |
33 | #include "CSSPrimitiveValue.h" |
34 | #include "CSSPropertyNames.h" |
35 | #include "CSSValue.h" |
36 | #include "CSSValueKeywords.h" |
37 | #include "CacheStorageProvider.h" |
38 | #include "ChildListMutationScope.h" |
39 | #include "Comment.h" |
40 | #include "ComposedTreeIterator.h" |
41 | #include "CustomHeaderFields.h" |
42 | #include "DocumentFragment.h" |
43 | #include "DocumentLoader.h" |
44 | #include "DocumentType.h" |
45 | #include "Editing.h" |
46 | #include "Editor.h" |
47 | #include "EditorClient.h" |
48 | #include "ElementIterator.h" |
49 | #include "EmptyClients.h" |
50 | #include "File.h" |
51 | #include "Frame.h" |
52 | #include "FrameLoader.h" |
53 | #include "HTMLAttachmentElement.h" |
54 | #include "HTMLBRElement.h" |
55 | #include "HTMLBodyElement.h" |
56 | #include "HTMLDivElement.h" |
57 | #include "HTMLHeadElement.h" |
58 | #include "HTMLHtmlElement.h" |
59 | #include "HTMLImageElement.h" |
60 | #include "HTMLNames.h" |
61 | #include "HTMLStyleElement.h" |
62 | #include "HTMLTableElement.h" |
63 | #include "HTMLTextAreaElement.h" |
64 | #include "HTMLTextFormControlElement.h" |
65 | #include "LibWebRTCProvider.h" |
66 | #include "MarkupAccumulator.h" |
67 | #include "NodeList.h" |
68 | #include "Page.h" |
69 | #include "PageConfiguration.h" |
70 | #include "Range.h" |
71 | #include "RenderBlock.h" |
72 | #include "RuntimeEnabledFeatures.h" |
73 | #include "Settings.h" |
74 | #include "SocketProvider.h" |
75 | #include "StyleProperties.h" |
76 | #include "TextIterator.h" |
77 | #include "TypedElementDescendantIterator.h" |
78 | #include "VisibleSelection.h" |
79 | #include "VisibleUnits.h" |
80 | #include <wtf/StdLibExtras.h> |
81 | #include <wtf/URL.h> |
82 | #include <wtf/URLParser.h> |
83 | #include <wtf/text/StringBuilder.h> |
84 | #include <platform/PasteboardItemInfo.h> |
85 | |
86 | namespace WebCore { |
87 | |
88 | using namespace HTMLNames; |
89 | |
90 | static bool propertyMissingOrEqualToNone(StyleProperties*, CSSPropertyID); |
91 | |
92 | class AttributeChange { |
93 | public: |
94 | AttributeChange() |
95 | : m_name(nullAtom(), nullAtom(), nullAtom()) |
96 | { |
97 | } |
98 | |
99 | AttributeChange(Element* element, const QualifiedName& name, const String& value) |
100 | : m_element(element), m_name(name), m_value(value) |
101 | { |
102 | } |
103 | |
104 | void apply() |
105 | { |
106 | m_element->setAttribute(m_name, m_value); |
107 | } |
108 | |
109 | private: |
110 | RefPtr<Element> m_element; |
111 | QualifiedName m_name; |
112 | String m_value; |
113 | }; |
114 | |
115 | static void completeURLs(DocumentFragment* fragment, const String& baseURL) |
116 | { |
117 | Vector<AttributeChange> changes; |
118 | |
119 | URL parsedBaseURL({ }, baseURL); |
120 | |
121 | for (auto& element : descendantsOfType<Element>(*fragment)) { |
122 | if (!element.hasAttributes()) |
123 | continue; |
124 | for (const Attribute& attribute : element.attributesIterator()) { |
125 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) |
126 | changes.append(AttributeChange(&element, attribute.name(), element.completeURLsInAttributeValue(parsedBaseURL, attribute))); |
127 | } |
128 | } |
129 | |
130 | for (auto& change : changes) |
131 | change.apply(); |
132 | } |
133 | |
134 | void replaceSubresourceURLs(Ref<DocumentFragment>&& fragment, HashMap<AtomString, AtomString>&& replacementMap) |
135 | { |
136 | Vector<AttributeChange> changes; |
137 | for (auto& element : descendantsOfType<Element>(fragment)) { |
138 | if (!element.hasAttributes()) |
139 | continue; |
140 | for (const Attribute& attribute : element.attributesIterator()) { |
141 | // FIXME: This won't work for srcset. |
142 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) { |
143 | auto replacement = replacementMap.get(attribute.value()); |
144 | if (!replacement.isNull()) |
145 | changes.append({ &element, attribute.name(), replacement }); |
146 | } |
147 | } |
148 | } |
149 | for (auto& change : changes) |
150 | change.apply(); |
151 | } |
152 | |
153 | struct ElementAttribute { |
154 | Ref<Element> element; |
155 | QualifiedName attributeName; |
156 | }; |
157 | |
158 | void removeSubresourceURLAttributes(Ref<DocumentFragment>&& fragment, WTF::Function<bool(const URL&)> shouldRemoveURL) |
159 | { |
160 | Vector<ElementAttribute> attributesToRemove; |
161 | for (auto& element : descendantsOfType<Element>(fragment)) { |
162 | if (!element.hasAttributes()) |
163 | continue; |
164 | for (const Attribute& attribute : element.attributesIterator()) { |
165 | // FIXME: This won't work for srcset. |
166 | if (element.attributeContainsURL(attribute) && !attribute.value().isEmpty()) { |
167 | URL url({ }, attribute.value()); |
168 | if (shouldRemoveURL(url)) |
169 | attributesToRemove.append({ element, attribute.name() }); |
170 | } |
171 | } |
172 | } |
173 | for (auto& item : attributesToRemove) |
174 | item.element->removeAttribute(item.attributeName); |
175 | } |
176 | |
177 | std::unique_ptr<Page> createPageForSanitizingWebContent() |
178 | { |
179 | auto pageConfiguration = pageConfigurationWithEmptyClients(); |
180 | |
181 | auto page = std::make_unique<Page>(WTFMove(pageConfiguration)); |
182 | page->settings().setMediaEnabled(false); |
183 | page->settings().setScriptEnabled(false); |
184 | page->settings().setPluginsEnabled(false); |
185 | page->settings().setAcceleratedCompositingEnabled(false); |
186 | |
187 | Frame& frame = page->mainFrame(); |
188 | frame.setView(FrameView::create(frame, IntSize { 800, 600 })); |
189 | frame.init(); |
190 | |
191 | FrameLoader& loader = frame.loader(); |
192 | static char markup[] = "<!DOCTYPE html><html><body></body></html>" ; |
193 | ASSERT(loader.activeDocumentLoader()); |
194 | auto& writer = loader.activeDocumentLoader()->writer(); |
195 | writer.setMIMEType("text/html" ); |
196 | writer.begin(); |
197 | writer.insertDataSynchronously(String(markup)); |
198 | writer.end(); |
199 | RELEASE_ASSERT(page->mainFrame().document()->body()); |
200 | |
201 | return page; |
202 | } |
203 | |
204 | String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer) |
205 | { |
206 | auto page = createPageForSanitizingWebContent(); |
207 | Document* stagingDocument = page->mainFrame().document(); |
208 | ASSERT(stagingDocument); |
209 | |
210 | auto fragment = createFragmentFromMarkup(*stagingDocument, rawHTML, emptyString(), DisallowScriptingAndPluginContent); |
211 | |
212 | if (fragmentSanitizer) |
213 | (*fragmentSanitizer)(fragment); |
214 | |
215 | return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML); |
216 | } |
217 | |
218 | enum class MSOListMode { Preserve, DoNotPreserve }; |
219 | class StyledMarkupAccumulator final : public MarkupAccumulator { |
220 | public: |
221 | enum RangeFullySelectsNode { DoesFullySelectNode, DoesNotFullySelectNode }; |
222 | |
223 | StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs, SerializeComposedTree, |
224 | AnnotateForInterchange, MSOListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized = nullptr); |
225 | |
226 | Node* serializeNodes(const Position& start, const Position& end); |
227 | void wrapWithNode(Node&, bool convertBlocksToInlines = false, RangeFullySelectsNode = DoesFullySelectNode); |
228 | void wrapWithStyleNode(StyleProperties*, Document&, bool isBlock = false); |
229 | String takeResults(); |
230 | |
231 | bool needRelativeStyleWrapper() const { return m_needRelativeStyleWrapper; } |
232 | bool needClearingDiv() const { return m_needClearingDiv; } |
233 | |
234 | using MarkupAccumulator::appendString; |
235 | |
236 | ContainerNode* parentNode(Node& node) |
237 | { |
238 | if (UNLIKELY(m_useComposedTree)) |
239 | return node.parentInComposedTree(); |
240 | return node.parentOrShadowHostNode(); |
241 | } |
242 | |
243 | private: |
244 | void appendStyleNodeOpenTag(StringBuilder&, StyleProperties*, Document&, bool isBlock = false); |
245 | const String& styleNodeCloseTag(bool isBlock = false); |
246 | |
247 | String renderedTextRespectingRange(const Text&); |
248 | String textContentRespectingRange(const Text&); |
249 | |
250 | bool shouldPreserveMSOListStyleForElement(const Element&); |
251 | |
252 | void appendStartTag(StringBuilder& out, const Element&, bool addDisplayInline, RangeFullySelectsNode); |
253 | void appendEndTag(StringBuilder& out, const Element&) override; |
254 | void appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) override; |
255 | |
256 | void appendText(StringBuilder& out, const Text&) override; |
257 | void appendStartTag(StringBuilder& out, const Element& element, Namespaces*) override |
258 | { |
259 | appendStartTag(out, element, false, DoesFullySelectNode); |
260 | } |
261 | |
262 | Node* firstChild(Node& node) |
263 | { |
264 | if (UNLIKELY(m_useComposedTree)) |
265 | return firstChildInComposedTreeIgnoringUserAgentShadow(node); |
266 | return node.firstChild(); |
267 | } |
268 | |
269 | Node* nextSibling(Node& node) |
270 | { |
271 | if (UNLIKELY(m_useComposedTree)) |
272 | return nextSiblingInComposedTreeIgnoringUserAgentShadow(node); |
273 | return node.nextSibling(); |
274 | } |
275 | |
276 | Node* nextSkippingChildren(Node& node) |
277 | { |
278 | if (UNLIKELY(m_useComposedTree)) |
279 | return nextSkippingChildrenInComposedTreeIgnoringUserAgentShadow(node); |
280 | return NodeTraversal::nextSkippingChildren(node); |
281 | } |
282 | |
283 | bool hasChildNodes(Node& node) |
284 | { |
285 | if (UNLIKELY(m_useComposedTree)) |
286 | return firstChildInComposedTreeIgnoringUserAgentShadow(node); |
287 | return node.hasChildNodes(); |
288 | } |
289 | |
290 | bool isDescendantOf(Node& node, Node& possibleAncestor) |
291 | { |
292 | if (UNLIKELY(m_useComposedTree)) |
293 | return node.isDescendantOrShadowDescendantOf(&possibleAncestor); |
294 | return node.isDescendantOf(&possibleAncestor); |
295 | } |
296 | |
297 | enum class NodeTraversalMode { EmitString, DoNotEmitString }; |
298 | Node* traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode); |
299 | |
300 | bool appendNodeToPreserveMSOList(Node&); |
301 | |
302 | bool shouldAnnotate() |
303 | { |
304 | return m_annotate == AnnotateForInterchange::Yes; |
305 | } |
306 | |
307 | bool shouldApplyWrappingStyle(const Node& node) const |
308 | { |
309 | return m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode() == node.parentNode() && m_wrappingStyle && m_wrappingStyle->style(); |
310 | } |
311 | |
312 | Position m_start; |
313 | Position m_end; |
314 | Vector<String> m_reversedPrecedingMarkup; |
315 | const AnnotateForInterchange m_annotate; |
316 | RefPtr<Node> m_highestNodeToBeSerialized; |
317 | RefPtr<EditingStyle> m_wrappingStyle; |
318 | bool m_useComposedTree; |
319 | bool m_needsPositionStyleConversion; |
320 | bool m_needRelativeStyleWrapper { false }; |
321 | bool m_needClearingDiv { false }; |
322 | bool m_shouldPreserveMSOList; |
323 | bool m_inMSOList { false }; |
324 | }; |
325 | |
326 | inline StyledMarkupAccumulator::StyledMarkupAccumulator(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree, |
327 | AnnotateForInterchange annotate, MSOListMode msoListMode, bool needsPositionStyleConversion, Node* highestNodeToBeSerialized) |
328 | : MarkupAccumulator(nodes, urlsToResolve) |
329 | , m_start(start) |
330 | , m_end(end) |
331 | , m_annotate(annotate) |
332 | , m_highestNodeToBeSerialized(highestNodeToBeSerialized) |
333 | , m_useComposedTree(serializeComposedTree == SerializeComposedTree::Yes) |
334 | , m_needsPositionStyleConversion(needsPositionStyleConversion) |
335 | , m_shouldPreserveMSOList(msoListMode == MSOListMode::Preserve) |
336 | { |
337 | } |
338 | |
339 | void StyledMarkupAccumulator::wrapWithNode(Node& node, bool convertBlocksToInlines, RangeFullySelectsNode rangeFullySelectsNode) |
340 | { |
341 | StringBuilder markup; |
342 | if (is<Element>(node)) |
343 | appendStartTag(markup, downcast<Element>(node), convertBlocksToInlines && isBlock(&node), rangeFullySelectsNode); |
344 | else |
345 | appendNonElementNode(markup, node, nullptr); |
346 | m_reversedPrecedingMarkup.append(markup.toString()); |
347 | endAppendingNode(node); |
348 | if (m_nodes) |
349 | m_nodes->append(&node); |
350 | } |
351 | |
352 | void StyledMarkupAccumulator::wrapWithStyleNode(StyleProperties* style, Document& document, bool isBlock) |
353 | { |
354 | StringBuilder openTag; |
355 | appendStyleNodeOpenTag(openTag, style, document, isBlock); |
356 | m_reversedPrecedingMarkup.append(openTag.toString()); |
357 | appendString(styleNodeCloseTag(isBlock)); |
358 | } |
359 | |
360 | void StyledMarkupAccumulator::appendStyleNodeOpenTag(StringBuilder& out, StyleProperties* style, Document& document, bool isBlock) |
361 | { |
362 | // wrappingStyleForSerialization should have removed -webkit-text-decorations-in-effect |
363 | ASSERT(propertyMissingOrEqualToNone(style, CSSPropertyWebkitTextDecorationsInEffect)); |
364 | if (isBlock) |
365 | out.appendLiteral("<div style=\"" ); |
366 | else |
367 | out.appendLiteral("<span style=\"" ); |
368 | appendAttributeValue(out, style->asText(), document.isHTMLDocument()); |
369 | out.appendLiteral("\">" ); |
370 | } |
371 | |
372 | const String& StyledMarkupAccumulator::styleNodeCloseTag(bool isBlock) |
373 | { |
374 | static NeverDestroyed<const String> divClose(MAKE_STATIC_STRING_IMPL("</div>" )); |
375 | static NeverDestroyed<const String> styleSpanClose(MAKE_STATIC_STRING_IMPL("</span>" )); |
376 | return isBlock ? divClose : styleSpanClose; |
377 | } |
378 | |
379 | String StyledMarkupAccumulator::takeResults() |
380 | { |
381 | StringBuilder result; |
382 | result.reserveCapacity(totalLength(m_reversedPrecedingMarkup) + length()); |
383 | |
384 | for (size_t i = m_reversedPrecedingMarkup.size(); i > 0; --i) |
385 | result.append(m_reversedPrecedingMarkup[i - 1]); |
386 | |
387 | concatenateMarkup(result); |
388 | |
389 | // We remove '\0' characters because they are not visibly rendered to the user. |
390 | return result.toString().replaceWithLiteral('\0', "" ); |
391 | } |
392 | |
393 | void StyledMarkupAccumulator::appendText(StringBuilder& out, const Text& text) |
394 | { |
395 | const bool parentIsTextarea = is<HTMLTextAreaElement>(text.parentElement()); |
396 | const bool wrappingSpan = shouldApplyWrappingStyle(text) && !parentIsTextarea; |
397 | if (wrappingSpan) { |
398 | RefPtr<EditingStyle> wrappingStyle = m_wrappingStyle->copy(); |
399 | // FIXME: <rdar://problem/5371536> Style rules that match pasted content can change it's appearance |
400 | // Make sure spans are inline style in paste side e.g. span { display: block }. |
401 | wrappingStyle->forceInline(); |
402 | // FIXME: Should this be included in forceInline? |
403 | wrappingStyle->style()->setProperty(CSSPropertyFloat, CSSValueNone); |
404 | |
405 | appendStyleNodeOpenTag(out, wrappingStyle->style(), text.document()); |
406 | } |
407 | |
408 | if (!shouldAnnotate() || parentIsTextarea) { |
409 | auto content = textContentRespectingRange(text); |
410 | appendCharactersReplacingEntities(out, content, 0, content.length(), entityMaskForText(text)); |
411 | } else { |
412 | const bool useRenderedText = !enclosingElementWithTag(firstPositionInNode(const_cast<Text*>(&text)), selectTag); |
413 | String content = useRenderedText ? renderedTextRespectingRange(text) : textContentRespectingRange(text); |
414 | StringBuilder buffer; |
415 | appendCharactersReplacingEntities(buffer, content, 0, content.length(), EntityMaskInPCDATA); |
416 | out.append(convertHTMLTextToInterchangeFormat(buffer.toString(), &text)); |
417 | } |
418 | |
419 | if (wrappingSpan) |
420 | out.append(styleNodeCloseTag()); |
421 | } |
422 | |
423 | String StyledMarkupAccumulator::renderedTextRespectingRange(const Text& text) |
424 | { |
425 | TextIteratorBehavior behavior = TextIteratorDefaultBehavior; |
426 | Position start = &text == m_start.containerNode() ? m_start : firstPositionInNode(const_cast<Text*>(&text)); |
427 | Position end; |
428 | if (&text == m_end.containerNode()) |
429 | end = m_end; |
430 | else { |
431 | end = lastPositionInNode(const_cast<Text*>(&text)); |
432 | if (!m_end.isNull()) |
433 | behavior = TextIteratorBehavesAsIfNodesFollowing; |
434 | } |
435 | |
436 | return plainText(Range::create(text.document(), start, end).ptr(), behavior); |
437 | } |
438 | |
439 | String StyledMarkupAccumulator::textContentRespectingRange(const Text& text) |
440 | { |
441 | if (m_start.isNull() && m_end.isNull()) |
442 | return text.data(); |
443 | |
444 | unsigned start = 0; |
445 | unsigned end = std::numeric_limits<unsigned>::max(); |
446 | if (&text == m_start.containerNode()) |
447 | start = m_start.offsetInContainerNode(); |
448 | if (&text == m_end.containerNode()) |
449 | end = m_end.offsetInContainerNode(); |
450 | ASSERT(start < end); |
451 | return text.data().substring(start, end - start); |
452 | } |
453 | |
454 | void StyledMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces) |
455 | { |
456 | #if ENABLE(ATTACHMENT_ELEMENT) |
457 | if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) |
458 | return; |
459 | |
460 | if (is<HTMLAttachmentElement>(element)) { |
461 | auto& attachment = downcast<HTMLAttachmentElement>(element); |
462 | appendAttribute(out, element, { webkitattachmentidAttr, attachment.uniqueIdentifier() }, namespaces); |
463 | if (auto* file = attachment.file()) { |
464 | // These attributes are only intended for File deserialization, and are removed from the generated attachment |
465 | // element after we've deserialized and set its backing File, in restoreAttachmentElementsInFragment. |
466 | appendAttribute(out, element, { webkitattachmentpathAttr, file->path() }, namespaces); |
467 | appendAttribute(out, element, { webkitattachmentbloburlAttr, file->url().string() }, namespaces); |
468 | } |
469 | } else if (is<HTMLImageElement>(element)) { |
470 | if (auto attachment = downcast<HTMLImageElement>(element).attachmentElement()) |
471 | appendAttribute(out, element, { webkitattachmentidAttr, attachment->uniqueIdentifier() }, namespaces); |
472 | } |
473 | #else |
474 | UNUSED_PARAM(out); |
475 | UNUSED_PARAM(element); |
476 | UNUSED_PARAM(namespaces); |
477 | #endif |
478 | } |
479 | |
480 | bool StyledMarkupAccumulator::shouldPreserveMSOListStyleForElement(const Element& element) |
481 | { |
482 | if (m_inMSOList) |
483 | return true; |
484 | if (m_shouldPreserveMSOList) { |
485 | auto style = element.getAttribute(styleAttr); |
486 | return style.startsWith("mso-list:" ) || style.contains(";mso-list:" ) || style.contains("\nmso-list:" ); |
487 | } |
488 | return false; |
489 | } |
490 | |
491 | void StyledMarkupAccumulator::appendStartTag(StringBuilder& out, const Element& element, bool addDisplayInline, RangeFullySelectsNode rangeFullySelectsNode) |
492 | { |
493 | const bool documentIsHTML = element.document().isHTMLDocument(); |
494 | const bool isSlotElement = is<HTMLSlotElement>(element); |
495 | if (UNLIKELY(isSlotElement)) |
496 | out.append("<span" ); |
497 | else |
498 | appendOpenTag(out, element, nullptr); |
499 | |
500 | appendCustomAttributes(out, element, nullptr); |
501 | |
502 | const bool shouldAnnotateOrForceInline = element.isHTMLElement() && (shouldAnnotate() || addDisplayInline); |
503 | bool shouldOverrideStyleAttr = (shouldAnnotateOrForceInline || shouldApplyWrappingStyle(element) || isSlotElement) && !shouldPreserveMSOListStyleForElement(element); |
504 | if (element.hasAttributes()) { |
505 | for (const Attribute& attribute : element.attributesIterator()) { |
506 | // We'll handle the style attribute separately, below. |
507 | if (attribute.name() == styleAttr && shouldOverrideStyleAttr) |
508 | continue; |
509 | if (element.isEventHandlerAttribute(attribute) || element.isJavaScriptURLAttribute(attribute)) |
510 | continue; |
511 | appendAttribute(out, element, attribute, 0); |
512 | } |
513 | } |
514 | |
515 | if (shouldOverrideStyleAttr) { |
516 | RefPtr<EditingStyle> newInlineStyle; |
517 | |
518 | if (shouldApplyWrappingStyle(element)) { |
519 | newInlineStyle = m_wrappingStyle->copy(); |
520 | newInlineStyle->removePropertiesInElementDefaultStyle(*const_cast<Element*>(&element)); |
521 | newInlineStyle->removeStyleConflictingWithStyleOfNode(*const_cast<Element*>(&element)); |
522 | } else |
523 | newInlineStyle = EditingStyle::create(); |
524 | |
525 | if (isSlotElement) |
526 | newInlineStyle->addDisplayContents(); |
527 | |
528 | if (is<StyledElement>(element) && downcast<StyledElement>(element).inlineStyle()) |
529 | newInlineStyle->overrideWithStyle(*downcast<StyledElement>(element).inlineStyle()); |
530 | |
531 | if (shouldAnnotateOrForceInline) { |
532 | if (shouldAnnotate()) |
533 | newInlineStyle->mergeStyleFromRulesForSerialization(downcast<HTMLElement>(*const_cast<Element*>(&element))); |
534 | |
535 | if (addDisplayInline) |
536 | newInlineStyle->forceInline(); |
537 | |
538 | if (m_needsPositionStyleConversion) { |
539 | m_needRelativeStyleWrapper |= newInlineStyle->convertPositionStyle(); |
540 | m_needClearingDiv |= newInlineStyle->isFloating(); |
541 | } |
542 | |
543 | // If the node is not fully selected by the range, then we don't want to keep styles that affect its relationship to the nodes around it |
544 | // only the ones that affect it and the nodes within it. |
545 | if (rangeFullySelectsNode == DoesNotFullySelectNode && newInlineStyle->style()) |
546 | newInlineStyle->style()->removeProperty(CSSPropertyFloat); |
547 | } |
548 | |
549 | if (!newInlineStyle->isEmpty()) { |
550 | out.appendLiteral(" style=\"" ); |
551 | appendAttributeValue(out, newInlineStyle->style()->asText(), documentIsHTML); |
552 | out.append('\"'); |
553 | } |
554 | } |
555 | |
556 | appendCloseTag(out, element); |
557 | } |
558 | |
559 | void StyledMarkupAccumulator::appendEndTag(StringBuilder& out, const Element& element) |
560 | { |
561 | if (UNLIKELY(is<HTMLSlotElement>(element))) |
562 | out.append("</span>" ); |
563 | else |
564 | MarkupAccumulator::appendEndTag(out, element); |
565 | } |
566 | |
567 | Node* StyledMarkupAccumulator::serializeNodes(const Position& start, const Position& end) |
568 | { |
569 | ASSERT(comparePositions(start, end) <= 0); |
570 | auto startNode = start.firstNode(); |
571 | Node* pastEnd = end.computeNodeAfterPosition(); |
572 | if (!pastEnd && end.containerNode()) |
573 | pastEnd = nextSkippingChildren(*end.containerNode()); |
574 | |
575 | if (!m_highestNodeToBeSerialized) { |
576 | Node* lastClosed = traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::DoNotEmitString); |
577 | m_highestNodeToBeSerialized = lastClosed; |
578 | } |
579 | |
580 | if (m_highestNodeToBeSerialized && m_highestNodeToBeSerialized->parentNode()) |
581 | m_wrappingStyle = EditingStyle::wrappingStyleForSerialization(*m_highestNodeToBeSerialized->parentNode(), shouldAnnotate()); |
582 | |
583 | return traverseNodesForSerialization(startNode.get(), pastEnd, NodeTraversalMode::EmitString); |
584 | } |
585 | |
586 | Node* StyledMarkupAccumulator::traverseNodesForSerialization(Node* startNode, Node* pastEnd, NodeTraversalMode traversalMode) |
587 | { |
588 | const bool shouldEmit = traversalMode == NodeTraversalMode::EmitString; |
589 | |
590 | m_inMSOList = false; |
591 | |
592 | unsigned depth = 0; |
593 | auto enterNode = [&] (Node& node) { |
594 | if (UNLIKELY(m_shouldPreserveMSOList) && shouldEmit) { |
595 | if (appendNodeToPreserveMSOList(node)) |
596 | return false; |
597 | } |
598 | |
599 | bool isDisplayContents = is<Element>(node) && downcast<Element>(node).hasDisplayContents(); |
600 | if (!node.renderer() && !isDisplayContents && !enclosingElementWithTag(firstPositionInOrBeforeNode(&node), selectTag)) |
601 | return false; |
602 | |
603 | ++depth; |
604 | if (shouldEmit) |
605 | startAppendingNode(node); |
606 | |
607 | return true; |
608 | }; |
609 | |
610 | Node* lastClosed = nullptr; |
611 | auto exitNode = [&] (Node& node) { |
612 | bool closing = depth; |
613 | if (depth) |
614 | --depth; |
615 | if (shouldEmit) { |
616 | if (closing) |
617 | endAppendingNode(node); |
618 | else |
619 | wrapWithNode(node); |
620 | } |
621 | lastClosed = &node; |
622 | }; |
623 | |
624 | Node* lastNode = nullptr; |
625 | Node* next = nullptr; |
626 | for (auto* n = startNode; n != pastEnd; lastNode = n, n = next) { |
627 | |
628 | Vector<Node*, 8> exitedAncestors; |
629 | next = nullptr; |
630 | if (auto* child = firstChild(*n)) |
631 | next = child; |
632 | else if (auto* sibling = nextSibling(*n)) |
633 | next = sibling; |
634 | else { |
635 | for (auto* ancestor = parentNode(*n); ancestor; ancestor = parentNode(*ancestor)) { |
636 | exitedAncestors.append(ancestor); |
637 | if (auto* sibling = nextSibling(*ancestor)) { |
638 | next = sibling; |
639 | break; |
640 | } |
641 | } |
642 | } |
643 | ASSERT(next || !pastEnd); |
644 | |
645 | if (isBlock(n) && canHaveChildrenForEditing(*n) && next == pastEnd) { |
646 | // Don't write out empty block containers that aren't fully selected. |
647 | continue; |
648 | } |
649 | |
650 | if (!enterNode(*n)) { |
651 | next = nextSkippingChildren(*n); |
652 | // Don't skip over pastEnd. |
653 | if (pastEnd && isDescendantOf(*pastEnd, *n)) |
654 | next = pastEnd; |
655 | ASSERT(next || !pastEnd); |
656 | } else { |
657 | if (!hasChildNodes(*n)) |
658 | exitNode(*n); |
659 | } |
660 | |
661 | for (auto* ancestor : exitedAncestors) { |
662 | if (!depth && next == pastEnd) |
663 | break; |
664 | exitNode(*ancestor); |
665 | } |
666 | } |
667 | |
668 | ASSERT(lastNode || !depth); |
669 | if (depth) { |
670 | for (auto* ancestor = parentNode(pastEnd ? *pastEnd : *lastNode); ancestor && depth; ancestor = parentNode(*ancestor)) |
671 | exitNode(*ancestor); |
672 | } |
673 | |
674 | return lastClosed; |
675 | } |
676 | |
677 | bool StyledMarkupAccumulator::appendNodeToPreserveMSOList(Node& node) |
678 | { |
679 | if (is<Comment>(node)) { |
680 | auto& = downcast<Comment>(node); |
681 | if (!m_inMSOList && commentNode.data() == "[if !supportLists]" ) |
682 | m_inMSOList = true; |
683 | else if (m_inMSOList && commentNode.data() == "[endif]" ) |
684 | m_inMSOList = false; |
685 | else |
686 | return false; |
687 | startAppendingNode(commentNode); |
688 | return true; |
689 | } |
690 | if (is<HTMLStyleElement>(node)) { |
691 | auto* firstChild = node.firstChild(); |
692 | if (!is<Text>(firstChild)) |
693 | return false; |
694 | |
695 | auto& textChild = downcast<Text>(*firstChild); |
696 | auto& styleContent = textChild.data(); |
697 | |
698 | const auto msoStyleDefinitionsStart = styleContent.find("/* Style Definitions */" ); |
699 | const auto msoListDefinitionsStart = styleContent.find("/* List Definitions */" ); |
700 | const auto lastListItem = styleContent.reverseFind("\n@list" ); |
701 | if (msoListDefinitionsStart == notFound || lastListItem == notFound) |
702 | return false; |
703 | const auto start = msoStyleDefinitionsStart != notFound && msoStyleDefinitionsStart < msoListDefinitionsStart ? msoStyleDefinitionsStart : msoListDefinitionsStart; |
704 | |
705 | const auto msoListDefinitionsEnd = styleContent.find(";}\n" , lastListItem); |
706 | if (msoListDefinitionsEnd == notFound || start >= msoListDefinitionsEnd) |
707 | return false; |
708 | |
709 | appendString("<head><style class=\"" WebKitMSOListQuirksStyle "\">\n<!--\n" ); |
710 | appendStringView(StringView(textChild.data()).substring(start, msoListDefinitionsEnd - start + 3)); |
711 | appendString("\n-->\n</style></head>" ); |
712 | |
713 | return true; |
714 | } |
715 | return false; |
716 | } |
717 | |
718 | static Node* ancestorToRetainStructureAndAppearanceForBlock(Node* commonAncestorBlock) |
719 | { |
720 | if (!commonAncestorBlock) |
721 | return nullptr; |
722 | |
723 | if (commonAncestorBlock->hasTagName(tbodyTag) || commonAncestorBlock->hasTagName(trTag)) { |
724 | ContainerNode* table = commonAncestorBlock->parentNode(); |
725 | while (table && !is<HTMLTableElement>(*table)) |
726 | table = table->parentNode(); |
727 | |
728 | return table; |
729 | } |
730 | |
731 | if (isNonTableCellHTMLBlockElement(commonAncestorBlock)) |
732 | return commonAncestorBlock; |
733 | |
734 | return nullptr; |
735 | } |
736 | |
737 | static inline Node* ancestorToRetainStructureAndAppearance(Node* commonAncestor) |
738 | { |
739 | return ancestorToRetainStructureAndAppearanceForBlock(enclosingBlock(commonAncestor)); |
740 | } |
741 | |
742 | static bool propertyMissingOrEqualToNone(StyleProperties* style, CSSPropertyID propertyID) |
743 | { |
744 | if (!style) |
745 | return false; |
746 | RefPtr<CSSValue> value = style->getPropertyCSSValue(propertyID); |
747 | if (!value) |
748 | return true; |
749 | if (!is<CSSPrimitiveValue>(*value)) |
750 | return false; |
751 | return downcast<CSSPrimitiveValue>(*value).valueID() == CSSValueNone; |
752 | } |
753 | |
754 | static bool needInterchangeNewlineAfter(const VisiblePosition& v) |
755 | { |
756 | VisiblePosition next = v.next(); |
757 | Node* upstreamNode = next.deepEquivalent().upstream().deprecatedNode(); |
758 | Node* downstreamNode = v.deepEquivalent().downstream().deprecatedNode(); |
759 | // Add an interchange newline if a paragraph break is selected and a br won't already be added to the markup to represent it. |
760 | return isEndOfParagraph(v) && isStartOfParagraph(next) && !(upstreamNode->hasTagName(brTag) && upstreamNode == downstreamNode); |
761 | } |
762 | |
763 | static RefPtr<EditingStyle> styleFromMatchedRulesAndInlineDecl(Node& node) |
764 | { |
765 | if (!is<HTMLElement>(node)) |
766 | return nullptr; |
767 | |
768 | auto& element = downcast<HTMLElement>(node); |
769 | auto style = EditingStyle::create(element.inlineStyle()); |
770 | style->mergeStyleFromRules(element); |
771 | return style; |
772 | } |
773 | |
774 | static bool isElementPresentational(const Node* node) |
775 | { |
776 | return node->hasTagName(uTag) || node->hasTagName(sTag) || node->hasTagName(strikeTag) |
777 | || node->hasTagName(iTag) || node->hasTagName(emTag) || node->hasTagName(bTag) || node->hasTagName(strongTag); |
778 | } |
779 | |
780 | static Node* highestAncestorToWrapMarkup(const Position& start, const Position& end, Node& commonAncestor, AnnotateForInterchange annotate) |
781 | { |
782 | Node* specialCommonAncestor = nullptr; |
783 | if (annotate == AnnotateForInterchange::Yes) { |
784 | // Include ancestors that aren't completely inside the range but are required to retain |
785 | // the structure and appearance of the copied markup. |
786 | specialCommonAncestor = ancestorToRetainStructureAndAppearance(&commonAncestor); |
787 | |
788 | if (auto* parentListNode = enclosingNodeOfType(start, isListItem)) { |
789 | if (!editingIgnoresContent(*parentListNode) && VisibleSelection::selectionFromContentsOfNode(parentListNode) == VisibleSelection(start, end)) { |
790 | specialCommonAncestor = parentListNode->parentNode(); |
791 | while (specialCommonAncestor && !isListHTMLElement(specialCommonAncestor)) |
792 | specialCommonAncestor = specialCommonAncestor->parentNode(); |
793 | } |
794 | } |
795 | |
796 | // Retain the Mail quote level by including all ancestor mail block quotes. |
797 | if (Node* highestMailBlockquote = highestEnclosingNodeOfType(start, isMailBlockquote, CanCrossEditingBoundary)) |
798 | specialCommonAncestor = highestMailBlockquote; |
799 | } |
800 | |
801 | auto* checkAncestor = specialCommonAncestor ? specialCommonAncestor : &commonAncestor; |
802 | if (checkAncestor->renderer() && checkAncestor->renderer()->containingBlock()) { |
803 | Node* newSpecialCommonAncestor = highestEnclosingNodeOfType(firstPositionInNode(checkAncestor), &isElementPresentational, CanCrossEditingBoundary, checkAncestor->renderer()->containingBlock()->element()); |
804 | if (newSpecialCommonAncestor) |
805 | specialCommonAncestor = newSpecialCommonAncestor; |
806 | } |
807 | |
808 | // If a single tab is selected, commonAncestor will be a text node inside a tab span. |
809 | // If two or more tabs are selected, commonAncestor will be the tab span. |
810 | // In either case, if there is a specialCommonAncestor already, it will necessarily be above |
811 | // any tab span that needs to be included. |
812 | if (!specialCommonAncestor && isTabSpanTextNode(&commonAncestor)) |
813 | specialCommonAncestor = commonAncestor.parentNode(); |
814 | if (!specialCommonAncestor && isTabSpanNode(&commonAncestor)) |
815 | specialCommonAncestor = &commonAncestor; |
816 | |
817 | if (auto* enclosingAnchor = enclosingElementWithTag(firstPositionInNode(specialCommonAncestor ? specialCommonAncestor : &commonAncestor), aTag)) |
818 | specialCommonAncestor = enclosingAnchor; |
819 | |
820 | return specialCommonAncestor; |
821 | } |
822 | |
823 | static String serializePreservingVisualAppearanceInternal(const Position& start, const Position& end, Vector<Node*>* nodes, ResolveURLs urlsToResolve, SerializeComposedTree serializeComposedTree, |
824 | AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, MSOListMode msoListMode) |
825 | { |
826 | static NeverDestroyed<const String> interchangeNewlineString(MAKE_STATIC_STRING_IMPL("<br class=\"" AppleInterchangeNewline "\">" )); |
827 | |
828 | if (!comparePositions(start, end)) |
829 | return emptyString(); |
830 | |
831 | RefPtr<Node> commonAncestor = commonShadowIncludingAncestor(start, end); |
832 | if (!commonAncestor) |
833 | return emptyString(); |
834 | |
835 | auto& document = *start.document(); |
836 | document.updateLayoutIgnorePendingStylesheets(); |
837 | |
838 | VisiblePosition visibleStart { start }; |
839 | VisiblePosition visibleEnd { end }; |
840 | |
841 | auto body = makeRefPtr(enclosingElementWithTag(firstPositionInNode(commonAncestor.get()), bodyTag)); |
842 | RefPtr<Element> fullySelectedRoot; |
843 | // FIXME: Do this for all fully selected blocks, not just the body. |
844 | if (body && VisiblePosition(firstPositionInNode(body.get())) == visibleStart && VisiblePosition(lastPositionInNode(body.get())) == visibleEnd) |
845 | fullySelectedRoot = body; |
846 | bool needsPositionStyleConversion = body && fullySelectedRoot == body && document.settings().shouldConvertPositionStyleOnCopy(); |
847 | |
848 | Node* specialCommonAncestor = highestAncestorToWrapMarkup(start, end, *commonAncestor, annotate); |
849 | |
850 | StyledMarkupAccumulator accumulator(start, end, nodes, urlsToResolve, serializeComposedTree, annotate, msoListMode, needsPositionStyleConversion, specialCommonAncestor); |
851 | |
852 | Position startAdjustedForInterchangeNewline = start; |
853 | if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleStart)) { |
854 | if (visibleStart == visibleEnd.previous()) |
855 | return interchangeNewlineString; |
856 | |
857 | accumulator.appendString(interchangeNewlineString); |
858 | startAdjustedForInterchangeNewline = visibleStart.next().deepEquivalent(); |
859 | |
860 | if (comparePositions(startAdjustedForInterchangeNewline, end) >= 0) |
861 | return interchangeNewlineString; |
862 | } |
863 | |
864 | Node* lastClosed = accumulator.serializeNodes(startAdjustedForInterchangeNewline, end); |
865 | |
866 | if (specialCommonAncestor && lastClosed) { |
867 | // Also include all of the ancestors of lastClosed up to this special ancestor. |
868 | for (ContainerNode* ancestor = accumulator.parentNode(*lastClosed); ancestor; ancestor = accumulator.parentNode(*ancestor)) { |
869 | if (ancestor == fullySelectedRoot && convertBlocksToInlines == ConvertBlocksToInlines::No) { |
870 | RefPtr<EditingStyle> fullySelectedRootStyle = styleFromMatchedRulesAndInlineDecl(*fullySelectedRoot); |
871 | |
872 | // Bring the background attribute over, but not as an attribute because a background attribute on a div |
873 | // appears to have no effect. |
874 | if ((!fullySelectedRootStyle || !fullySelectedRootStyle->style() || !fullySelectedRootStyle->style()->getPropertyCSSValue(CSSPropertyBackgroundImage)) |
875 | && fullySelectedRoot->hasAttributeWithoutSynchronization(backgroundAttr)) |
876 | fullySelectedRootStyle->style()->setProperty(CSSPropertyBackgroundImage, "url('" + fullySelectedRoot->getAttribute(backgroundAttr) + "')" ); |
877 | |
878 | if (fullySelectedRootStyle->style()) { |
879 | // Reset the CSS properties to avoid an assertion error in addStyleMarkup(). |
880 | // This assertion is caused at least when we select all text of a <body> element whose |
881 | // 'text-decoration' property is "inherit", and copy it. |
882 | if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyTextDecoration)) |
883 | fullySelectedRootStyle->style()->setProperty(CSSPropertyTextDecoration, CSSValueNone); |
884 | if (!propertyMissingOrEqualToNone(fullySelectedRootStyle->style(), CSSPropertyWebkitTextDecorationsInEffect)) |
885 | fullySelectedRootStyle->style()->setProperty(CSSPropertyWebkitTextDecorationsInEffect, CSSValueNone); |
886 | accumulator.wrapWithStyleNode(fullySelectedRootStyle->style(), document, true); |
887 | } |
888 | } else { |
889 | // Since this node and all the other ancestors are not in the selection we want to set RangeFullySelectsNode to DoesNotFullySelectNode |
890 | // so that styles that affect the exterior of the node are not included. |
891 | accumulator.wrapWithNode(*ancestor, convertBlocksToInlines == ConvertBlocksToInlines::Yes, StyledMarkupAccumulator::DoesNotFullySelectNode); |
892 | } |
893 | if (nodes) |
894 | nodes->append(ancestor); |
895 | |
896 | if (ancestor == specialCommonAncestor) |
897 | break; |
898 | } |
899 | } |
900 | |
901 | if (accumulator.needRelativeStyleWrapper() && needsPositionStyleConversion) { |
902 | if (accumulator.needClearingDiv()) |
903 | accumulator.appendString("<div style=\"clear: both;\"></div>" ); |
904 | RefPtr<EditingStyle> positionRelativeStyle = styleFromMatchedRulesAndInlineDecl(*body); |
905 | positionRelativeStyle->style()->setProperty(CSSPropertyPosition, CSSValueRelative); |
906 | accumulator.wrapWithStyleNode(positionRelativeStyle->style(), document, true); |
907 | } |
908 | |
909 | // FIXME: The interchange newline should be placed in the block that it's in, not after all of the content, unconditionally. |
910 | if (annotate == AnnotateForInterchange::Yes && needInterchangeNewlineAfter(visibleEnd.previous())) |
911 | accumulator.appendString(interchangeNewlineString); |
912 | |
913 | return accumulator.takeResults(); |
914 | } |
915 | |
916 | String serializePreservingVisualAppearance(const Range& range, Vector<Node*>* nodes, AnnotateForInterchange annotate, ConvertBlocksToInlines convertBlocksToInlines, ResolveURLs urlsToReslve) |
917 | { |
918 | return serializePreservingVisualAppearanceInternal(range.startPosition(), range.endPosition(), nodes, urlsToReslve, SerializeComposedTree::No, |
919 | annotate, convertBlocksToInlines, MSOListMode::DoNotPreserve); |
920 | } |
921 | |
922 | String serializePreservingVisualAppearance(const VisibleSelection& selection, ResolveURLs resolveURLs, SerializeComposedTree serializeComposedTree, Vector<Node*>* nodes) |
923 | { |
924 | return serializePreservingVisualAppearanceInternal(selection.start(), selection.end(), nodes, resolveURLs, serializeComposedTree, |
925 | AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, MSOListMode::DoNotPreserve); |
926 | } |
927 | |
928 | |
929 | static bool shouldPreserveMSOLists(const String& markup) |
930 | { |
931 | if (!markup.startsWith("<html xmlns:" )) |
932 | return false; |
933 | auto tagClose = markup.find('>'); |
934 | if (tagClose == notFound) |
935 | return false; |
936 | auto htmlTag = markup.substring(0, tagClose); |
937 | return htmlTag.contains("xmlns:o=\"urn:schemas-microsoft-com:office:office\"" ) |
938 | && htmlTag.contains("xmlns:w=\"urn:schemas-microsoft-com:office:word\"" ); |
939 | } |
940 | |
941 | String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup) |
942 | { |
943 | MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup) |
944 | ? MSOListMode::Preserve : MSOListMode::DoNotPreserve; |
945 | |
946 | auto bodyElement = makeRefPtr(document.body()); |
947 | ASSERT(bodyElement); |
948 | bodyElement->appendChild(fragment.get()); |
949 | |
950 | // SerializeComposedTree::No because there can't be a shadow tree in the pasted fragment. |
951 | auto result = serializePreservingVisualAppearanceInternal(firstPositionInNode(bodyElement.get()), lastPositionInNode(bodyElement.get()), nullptr, |
952 | ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, msoListMode); |
953 | |
954 | if (msoListMode == MSOListMode::Preserve) { |
955 | StringBuilder builder; |
956 | builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n" |
957 | "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n" |
958 | "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n" |
959 | "xmlns=\"http://www.w3.org/TR/REC-html40\">" ); |
960 | builder.append(result); |
961 | builder.appendLiteral("</html>" ); |
962 | return builder.toString(); |
963 | } |
964 | |
965 | return result; |
966 | } |
967 | |
968 | static void restoreAttachmentElementsInFragment(DocumentFragment& fragment) |
969 | { |
970 | #if ENABLE(ATTACHMENT_ELEMENT) |
971 | if (!RuntimeEnabledFeatures::sharedFeatures().attachmentElementEnabled()) |
972 | return; |
973 | |
974 | // When creating a fragment we must strip the webkit-attachment-path attribute after restoring the File object. |
975 | Vector<Ref<HTMLAttachmentElement>> attachments; |
976 | for (auto& attachment : descendantsOfType<HTMLAttachmentElement>(fragment)) |
977 | attachments.append(attachment); |
978 | |
979 | for (auto& attachment : attachments) { |
980 | attachment->setUniqueIdentifier(attachment->attributeWithoutSynchronization(webkitattachmentidAttr)); |
981 | |
982 | auto attachmentPath = attachment->attachmentPath(); |
983 | auto blobURL = attachment->blobURL(); |
984 | if (!attachmentPath.isEmpty()) |
985 | attachment->setFile(File::create(attachmentPath)); |
986 | else if (!blobURL.isEmpty()) |
987 | attachment->setFile(File::deserialize({ }, blobURL, attachment->attachmentType(), attachment->attachmentTitle())); |
988 | |
989 | // Remove temporary attributes that were previously added in StyledMarkupAccumulator::appendCustomAttributes. |
990 | attachment->removeAttribute(webkitattachmentidAttr); |
991 | attachment->removeAttribute(webkitattachmentpathAttr); |
992 | attachment->removeAttribute(webkitattachmentbloburlAttr); |
993 | } |
994 | |
995 | Vector<Ref<HTMLImageElement>> images; |
996 | for (auto& image : descendantsOfType<HTMLImageElement>(fragment)) |
997 | images.append(image); |
998 | |
999 | for (auto& image : images) { |
1000 | auto attachmentIdentifier = image->attributeWithoutSynchronization(webkitattachmentidAttr); |
1001 | if (attachmentIdentifier.isEmpty()) |
1002 | continue; |
1003 | |
1004 | auto attachment = HTMLAttachmentElement::create(HTMLNames::attachmentTag, *fragment.ownerDocument()); |
1005 | attachment->setUniqueIdentifier(attachmentIdentifier); |
1006 | image->setAttachmentElement(WTFMove(attachment)); |
1007 | image->removeAttribute(webkitattachmentidAttr); |
1008 | } |
1009 | #else |
1010 | UNUSED_PARAM(fragment); |
1011 | #endif |
1012 | } |
1013 | |
1014 | Ref<DocumentFragment> createFragmentFromMarkup(Document& document, const String& markup, const String& baseURL, ParserContentPolicy parserContentPolicy) |
1015 | { |
1016 | // We use a fake body element here to trick the HTML parser into using the InBody insertion mode. |
1017 | auto fakeBody = HTMLBodyElement::create(document); |
1018 | auto fragment = DocumentFragment::create(document); |
1019 | |
1020 | fragment->parseHTML(markup, fakeBody.ptr(), parserContentPolicy); |
1021 | restoreAttachmentElementsInFragment(fragment); |
1022 | if (!baseURL.isEmpty() && baseURL != WTF::blankURL() && baseURL != document.baseURL()) |
1023 | completeURLs(fragment.ptr(), baseURL); |
1024 | |
1025 | return fragment; |
1026 | } |
1027 | |
1028 | String serializeFragment(const Node& node, SerializedNodes root, Vector<Node*>* nodes, ResolveURLs urlsToResolve, Vector<QualifiedName>* tagNamesToSkip, SerializationSyntax serializationSyntax) |
1029 | { |
1030 | MarkupAccumulator accumulator(nodes, urlsToResolve, serializationSyntax); |
1031 | return accumulator.serializeNodes(const_cast<Node&>(node), root, tagNamesToSkip); |
1032 | } |
1033 | |
1034 | static void fillContainerFromString(ContainerNode& paragraph, const String& string) |
1035 | { |
1036 | Document& document = paragraph.document(); |
1037 | |
1038 | if (string.isEmpty()) { |
1039 | paragraph.appendChild(createBlockPlaceholderElement(document)); |
1040 | return; |
1041 | } |
1042 | |
1043 | ASSERT(string.find('\n') == notFound); |
1044 | |
1045 | Vector<String> tabList = string.splitAllowingEmptyEntries('\t'); |
1046 | String tabText = emptyString(); |
1047 | bool first = true; |
1048 | size_t numEntries = tabList.size(); |
1049 | for (size_t i = 0; i < numEntries; ++i) { |
1050 | const String& s = tabList[i]; |
1051 | |
1052 | // append the non-tab textual part |
1053 | if (!s.isEmpty()) { |
1054 | if (!tabText.isEmpty()) { |
1055 | paragraph.appendChild(createTabSpanElement(document, tabText)); |
1056 | tabText = emptyString(); |
1057 | } |
1058 | Ref<Node> textNode = document.createTextNode(stringWithRebalancedWhitespace(s, first, i + 1 == numEntries)); |
1059 | paragraph.appendChild(textNode); |
1060 | } |
1061 | |
1062 | // there is a tab after every entry, except the last entry |
1063 | // (if the last character is a tab, the list gets an extra empty entry) |
1064 | if (i + 1 != numEntries) |
1065 | tabText.append('\t'); |
1066 | else if (!tabText.isEmpty()) |
1067 | paragraph.appendChild(createTabSpanElement(document, tabText)); |
1068 | |
1069 | first = false; |
1070 | } |
1071 | } |
1072 | |
1073 | bool isPlainTextMarkup(Node* node) |
1074 | { |
1075 | ASSERT(node); |
1076 | if (!is<HTMLDivElement>(*node)) |
1077 | return false; |
1078 | |
1079 | HTMLDivElement& element = downcast<HTMLDivElement>(*node); |
1080 | if (element.hasAttributes()) |
1081 | return false; |
1082 | |
1083 | Node* firstChild = element.firstChild(); |
1084 | if (!firstChild) |
1085 | return false; |
1086 | |
1087 | Node* secondChild = firstChild->nextSibling(); |
1088 | if (!secondChild) |
1089 | return firstChild->isTextNode() || firstChild->firstChild(); |
1090 | |
1091 | if (secondChild->nextSibling()) |
1092 | return false; |
1093 | |
1094 | return isTabSpanTextNode(firstChild->firstChild()) && secondChild->isTextNode(); |
1095 | } |
1096 | |
1097 | static bool contextPreservesNewline(const Range& context) |
1098 | { |
1099 | VisiblePosition position(context.startPosition()); |
1100 | Node* container = position.deepEquivalent().containerNode(); |
1101 | if (!container || !container->renderer()) |
1102 | return false; |
1103 | |
1104 | return container->renderer()->style().preserveNewline(); |
1105 | } |
1106 | |
1107 | Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text) |
1108 | { |
1109 | Document& document = context.ownerDocument(); |
1110 | Ref<DocumentFragment> fragment = document.createDocumentFragment(); |
1111 | |
1112 | if (text.isEmpty()) |
1113 | return fragment; |
1114 | |
1115 | String string = text; |
1116 | string.replace("\r\n" , "\n" ); |
1117 | string.replace('\r', '\n'); |
1118 | |
1119 | auto createHTMLBRElement = [&document]() { |
1120 | auto element = HTMLBRElement::create(document); |
1121 | element->setAttributeWithoutSynchronization(classAttr, AppleInterchangeNewline); |
1122 | return element; |
1123 | }; |
1124 | |
1125 | if (contextPreservesNewline(context)) { |
1126 | fragment->appendChild(document.createTextNode(string)); |
1127 | if (string.endsWith('\n')) { |
1128 | fragment->appendChild(createHTMLBRElement()); |
1129 | } |
1130 | return fragment; |
1131 | } |
1132 | |
1133 | // A string with no newlines gets added inline, rather than being put into a paragraph. |
1134 | if (string.find('\n') == notFound) { |
1135 | fillContainerFromString(fragment, string); |
1136 | return fragment; |
1137 | } |
1138 | |
1139 | if (string.length() == 1 && string[0] == '\n') { |
1140 | // This is a single newline char, thus just create one HTMLBRElement. |
1141 | fragment->appendChild(createHTMLBRElement()); |
1142 | return fragment; |
1143 | } |
1144 | |
1145 | // Break string into paragraphs. Extra line breaks turn into empty paragraphs. |
1146 | Node* blockNode = enclosingBlock(context.firstNode()); |
1147 | Element* block = downcast<Element>(blockNode); |
1148 | bool useClonesOfEnclosingBlock = blockNode |
1149 | && blockNode->isElementNode() |
1150 | && !block->hasTagName(bodyTag) |
1151 | && !block->hasTagName(htmlTag) |
1152 | && block != editableRootForPosition(context.startPosition()); |
1153 | bool useLineBreak = enclosingTextFormControl(context.startPosition()); |
1154 | |
1155 | Vector<String> list = string.splitAllowingEmptyEntries('\n'); |
1156 | size_t numLines = list.size(); |
1157 | for (size_t i = 0; i < numLines; ++i) { |
1158 | const String& s = list[i]; |
1159 | |
1160 | RefPtr<Element> element; |
1161 | if (s.isEmpty() && i + 1 == numLines) { |
1162 | // For last line, use the "magic BR" rather than a P. |
1163 | element = createHTMLBRElement(); |
1164 | } else if (useLineBreak) { |
1165 | element = HTMLBRElement::create(document); |
1166 | fillContainerFromString(fragment, s); |
1167 | } else { |
1168 | if (useClonesOfEnclosingBlock) |
1169 | element = block->cloneElementWithoutChildren(document); |
1170 | else |
1171 | element = createDefaultParagraphElement(document); |
1172 | fillContainerFromString(*element, s); |
1173 | } |
1174 | fragment->appendChild(*element); |
1175 | } |
1176 | return fragment; |
1177 | } |
1178 | |
1179 | String documentTypeString(const Document& document) |
1180 | { |
1181 | DocumentType* documentType = document.doctype(); |
1182 | if (!documentType) |
1183 | return emptyString(); |
1184 | return serializeFragment(*documentType, SerializedNodes::SubtreeIncludingNode); |
1185 | } |
1186 | |
1187 | String urlToMarkup(const URL& url, const String& title) |
1188 | { |
1189 | StringBuilder markup; |
1190 | markup.appendLiteral("<a href=\"" ); |
1191 | markup.append(url.string()); |
1192 | markup.appendLiteral("\">" ); |
1193 | MarkupAccumulator::appendCharactersReplacingEntities(markup, title, 0, title.length(), EntityMaskInPCDATA); |
1194 | markup.appendLiteral("</a>" ); |
1195 | return markup.toString(); |
1196 | } |
1197 | |
1198 | ExceptionOr<Ref<DocumentFragment>> createFragmentForInnerOuterHTML(Element& contextElement, const String& markup, ParserContentPolicy parserContentPolicy) |
1199 | { |
1200 | auto* document = &contextElement.document(); |
1201 | if (contextElement.hasTagName(templateTag)) |
1202 | document = &document->ensureTemplateDocument(); |
1203 | auto fragment = DocumentFragment::create(*document); |
1204 | |
1205 | if (document->isHTMLDocument()) { |
1206 | fragment->parseHTML(markup, &contextElement, parserContentPolicy); |
1207 | return fragment; |
1208 | } |
1209 | |
1210 | bool wasValid = fragment->parseXML(markup, &contextElement, parserContentPolicy); |
1211 | if (!wasValid) |
1212 | return Exception { SyntaxError }; |
1213 | return fragment; |
1214 | } |
1215 | |
1216 | RefPtr<DocumentFragment> createFragmentForTransformToFragment(Document& outputDoc, const String& sourceString, const String& sourceMIMEType) |
1217 | { |
1218 | RefPtr<DocumentFragment> fragment = outputDoc.createDocumentFragment(); |
1219 | |
1220 | if (sourceMIMEType == "text/html" ) { |
1221 | // As far as I can tell, there isn't a spec for how transformToFragment is supposed to work. |
1222 | // Based on the documentation I can find, it looks like we want to start parsing the fragment in the InBody insertion mode. |
1223 | // Unfortunately, that's an implementation detail of the parser. |
1224 | // We achieve that effect here by passing in a fake body element as context for the fragment. |
1225 | auto fakeBody = HTMLBodyElement::create(outputDoc); |
1226 | fragment->parseHTML(sourceString, fakeBody.ptr()); |
1227 | } else if (sourceMIMEType == "text/plain" ) |
1228 | fragment->parserAppendChild(Text::create(outputDoc, sourceString)); |
1229 | else { |
1230 | bool successfulParse = fragment->parseXML(sourceString, 0); |
1231 | if (!successfulParse) |
1232 | return nullptr; |
1233 | } |
1234 | |
1235 | // FIXME: Do we need to mess with URLs here? |
1236 | |
1237 | return fragment; |
1238 | } |
1239 | |
1240 | Ref<DocumentFragment> createFragmentForImageAndURL(Document& document, const String& url, PresentationSize preferredSize) |
1241 | { |
1242 | auto imageElement = HTMLImageElement::create(document); |
1243 | imageElement->setAttributeWithoutSynchronization(HTMLNames::srcAttr, url); |
1244 | if (preferredSize.width) |
1245 | imageElement->setAttributeWithoutSynchronization(HTMLNames::widthAttr, AtomString::number(*preferredSize.width)); |
1246 | if (preferredSize.height) |
1247 | imageElement->setAttributeWithoutSynchronization(HTMLNames::heightAttr, AtomString::number(*preferredSize.height)); |
1248 | auto fragment = document.createDocumentFragment(); |
1249 | fragment->appendChild(imageElement); |
1250 | |
1251 | return fragment; |
1252 | } |
1253 | |
1254 | static Vector<Ref<HTMLElement>> collectElementsToRemoveFromFragment(ContainerNode& container) |
1255 | { |
1256 | Vector<Ref<HTMLElement>> toRemove; |
1257 | for (auto& element : childrenOfType<HTMLElement>(container)) { |
1258 | if (is<HTMLHtmlElement>(element)) { |
1259 | toRemove.append(element); |
1260 | collectElementsToRemoveFromFragment(element); |
1261 | continue; |
1262 | } |
1263 | if (is<HTMLHeadElement>(element) || is<HTMLBodyElement>(element)) |
1264 | toRemove.append(element); |
1265 | } |
1266 | return toRemove; |
1267 | } |
1268 | |
1269 | static void removeElementFromFragmentPreservingChildren(DocumentFragment& fragment, HTMLElement& element) |
1270 | { |
1271 | RefPtr<Node> nextChild; |
1272 | for (RefPtr<Node> child = element.firstChild(); child; child = nextChild) { |
1273 | nextChild = child->nextSibling(); |
1274 | element.removeChild(*child); |
1275 | fragment.insertBefore(*child, &element); |
1276 | } |
1277 | fragment.removeChild(element); |
1278 | } |
1279 | |
1280 | ExceptionOr<Ref<DocumentFragment>> createContextualFragment(Element& element, const String& markup, ParserContentPolicy parserContentPolicy) |
1281 | { |
1282 | auto result = createFragmentForInnerOuterHTML(element, markup, parserContentPolicy); |
1283 | if (result.hasException()) |
1284 | return result.releaseException(); |
1285 | |
1286 | auto fragment = result.releaseReturnValue(); |
1287 | |
1288 | // We need to pop <html> and <body> elements and remove <head> to |
1289 | // accommodate folks passing complete HTML documents to make the |
1290 | // child of an element. |
1291 | auto toRemove = collectElementsToRemoveFromFragment(fragment); |
1292 | for (auto& element : toRemove) |
1293 | removeElementFromFragmentPreservingChildren(fragment, element); |
1294 | |
1295 | return fragment; |
1296 | } |
1297 | |
1298 | static inline bool hasOneChild(ContainerNode& node) |
1299 | { |
1300 | Node* firstChild = node.firstChild(); |
1301 | return firstChild && !firstChild->nextSibling(); |
1302 | } |
1303 | |
1304 | static inline bool hasOneTextChild(ContainerNode& node) |
1305 | { |
1306 | return hasOneChild(node) && node.firstChild()->isTextNode(); |
1307 | } |
1308 | |
1309 | static inline bool hasMutationEventListeners(const Document& document) |
1310 | { |
1311 | return document.hasListenerType(Document::DOMSUBTREEMODIFIED_LISTENER) |
1312 | || document.hasListenerType(Document::DOMNODEINSERTED_LISTENER) |
1313 | || document.hasListenerType(Document::DOMNODEREMOVED_LISTENER) |
1314 | || document.hasListenerType(Document::DOMNODEREMOVEDFROMDOCUMENT_LISTENER) |
1315 | || document.hasListenerType(Document::DOMCHARACTERDATAMODIFIED_LISTENER); |
1316 | } |
1317 | |
1318 | // We can use setData instead of replacing Text node as long as script can't observe the difference. |
1319 | static inline bool canUseSetDataOptimization(const Text& containerChild, const ChildListMutationScope& mutationScope) |
1320 | { |
1321 | bool authorScriptMayHaveReference = containerChild.refCount(); |
1322 | return !authorScriptMayHaveReference && !mutationScope.canObserve() && !hasMutationEventListeners(containerChild.document()); |
1323 | } |
1324 | |
1325 | ExceptionOr<void> replaceChildrenWithFragment(ContainerNode& container, Ref<DocumentFragment>&& fragment) |
1326 | { |
1327 | Ref<ContainerNode> containerNode(container); |
1328 | ChildListMutationScope mutation(containerNode); |
1329 | |
1330 | if (!fragment->firstChild()) { |
1331 | containerNode->removeChildren(); |
1332 | return { }; |
1333 | } |
1334 | |
1335 | auto* containerChild = containerNode->firstChild(); |
1336 | if (containerChild && !containerChild->nextSibling()) { |
1337 | if (is<Text>(*containerChild) && hasOneTextChild(fragment) && canUseSetDataOptimization(downcast<Text>(*containerChild), mutation)) { |
1338 | ASSERT(!fragment->firstChild()->refCount()); |
1339 | downcast<Text>(*containerChild).setData(downcast<Text>(*fragment->firstChild()).data()); |
1340 | return { }; |
1341 | } |
1342 | |
1343 | return containerNode->replaceChild(fragment, *containerChild); |
1344 | } |
1345 | |
1346 | containerNode->removeChildren(); |
1347 | return containerNode->appendChild(fragment); |
1348 | } |
1349 | |
1350 | } |
1351 | |