diff options
author | Benjamin Muskalla | 2014-01-08 12:17:44 +0000 |
---|---|---|
committer | Gerrit Code Review @ Eclipse.org | 2014-01-08 19:34:20 +0000 |
commit | ca6156ee4ba285e4b40300107a1e08a249f084fe (patch) | |
tree | 37f0d01a8bcfe596e31dd7a3fa802ecf607f84b1 | |
parent | 5ea4867c5d798e7fd3cf9bb1c007623ec0e583eb (diff) | |
download | org.eclipse.mylyn.docs-ca6156ee4ba285e4b40300107a1e08a249f084fe.tar.gz org.eclipse.mylyn.docs-ca6156ee4ba285e4b40300107a1e08a249f084fe.tar.xz org.eclipse.mylyn.docs-ca6156ee4ba285e4b40300107a1e08a249f084fe.zip |
425080: HTML cleanup should ignore insignificant whitespaces in lists
Change-Id: I6febcaf4ea664b6fa9b7ea545bbe30713115aa8e
Task-Url: https://bugs.eclipse.org/bugs/show_bug.cgi?id=425080
Signed-off-by: Benjamin Muskalla <benjamin.muskalla@tasktop.com>
2 files changed, 65 insertions, 0 deletions
diff --git a/org.eclipse.mylyn.wikitext.core.tests/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/HtmlCleanerTest.java b/org.eclipse.mylyn.wikitext.core.tests/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/HtmlCleanerTest.java index d161b3268..58418e2b9 100644 --- a/org.eclipse.mylyn.wikitext.core.tests/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/HtmlCleanerTest.java +++ b/org.eclipse.mylyn.wikitext.core.tests/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/HtmlCleanerTest.java @@ -195,6 +195,36 @@ public class HtmlCleanerTest { } @Test + public void testWhitespacesBetweenListItems() { + String result = cleanToBody("<body><ol><li>item 0</li> <li>item 1</li>\t\n <li>item 2</li></ol></body>"); + + assertEquals("<body><ol><li>item 0</li><li>item 1</li><li>item 2</li></ol></body>", result); + } + + @Test + public void testWhitespacesBetweenOrderedListAndItems() { + String result = cleanToBody("<body><ol> <li>item 0</li> \n</ol></body>"); + + assertEquals("<body><ol><li>item 0</li></ol></body>", result); + } + + @Test + public void testWhitespacesBetweenUnorderedListAndItems() { + String result = cleanToBody("<body><ul> <li>item 0</li> \n</ul></body>"); + + assertEquals("<body><ul><li>item 0</li></ul></body>", result); + } + + @Test + public void testWhitespacesBetweenTableCells() { + String result = cleanToBody("<body><table><tbody><tr><th>cell 0.0</th> <th>cell 0.1</th>\t\n <th>cell 0.2</th></tr> <tr><td>cell 1.0</td> <td>cell 1.1</td> \t\r\n<td>cell 1.2</td></tr> <tr><td>cell 2.0</td> <td>cell 2.1</td> <td>cell 2.2</td></th></tbody></table></body>"); + + assertEquals( + "<body><table><tbody><tr><th>cell 0.0</th><th>cell 0.1</th><th>cell 0.2</th></tr><tr><td>cell 1.0</td><td>cell 1.1</td><td>cell 1.2</td></tr><tr><td>cell 2.0</td><td>cell 2.1</td><td>cell 2.2</td></tr></tbody></table></body>", + result); + } + + @Test public void testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2() { // bug 406943 Document document = Document.createShell(""); diff --git a/org.eclipse.mylyn.wikitext.core/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/WhitespaceCleanupProcessor.java b/org.eclipse.mylyn.wikitext.core/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/WhitespaceCleanupProcessor.java index 0e5ccfcac..1063d8e1a 100644 --- a/org.eclipse.mylyn.wikitext.core/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/WhitespaceCleanupProcessor.java +++ b/org.eclipse.mylyn.wikitext.core/src/org/eclipse/mylyn/internal/wikitext/core/parser/html/WhitespaceCleanupProcessor.java @@ -15,16 +15,22 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import org.jsoup.helper.StringUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + /** * @author David Green */ class WhitespaceCleanupProcessor extends DocumentProcessor { + private final Set<String> CHILD_TAGS = ImmutableSet.of("li", "th", "tr", "td"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ + @Override public void process(Document document) { Element body = document.body(); @@ -60,6 +66,7 @@ class WhitespaceCleanupProcessor extends DocumentProcessor { affectedParents.add(textNode.parent()); } } + normalizeEmptySpaceBetweenNodes(element); children = element.childNodes(); if (!children.isEmpty()) { @@ -102,6 +109,34 @@ class WhitespaceCleanupProcessor extends DocumentProcessor { } } + private void normalizeEmptySpaceBetweenNodes(Element parent) { + List<Node> children = parent.childNodes(); + if (!children.isEmpty()) { + children = Lists.newArrayList(children); + for (Node child : children) { + Node previousSibling = child.previousSibling(); + Node nextSibling = child.nextSibling(); + if (child instanceof TextNode && previousSibling instanceof Element && nextSibling instanceof Element) { + TextNode textNode = (TextNode) child; + Element prevElement = (Element) previousSibling; + Element nextElement = (Element) nextSibling; + normalizeTextBetweenNodes(textNode, prevElement, nextElement); + } + } + } + } + + private void normalizeTextBetweenNodes(TextNode textNode, Element prevElement, Element nextElement) { + String wholeText = StringUtil.normaliseWhitespace(textNode.getWholeText()).trim(); + if (wholeText.isEmpty()) { + boolean isSurroundedByEqualTags = nextElement.tagName().equals(prevElement.tagName()) + && CHILD_TAGS.contains(nextElement.tagName()); + if (isSurroundedByEqualTags) { + textNode.remove(); + } + } + } + private Element computeAfterTarget(Element element) { if (element.parent() != null && !element.nodeName().equalsIgnoreCase("html")) { //$NON-NLS-1$ List<Node> elementParentChildNodes = element.parent().childNodes(); |