From 5cc73fe224f37087e74a6443b1b69c59e14f7f32 Mon Sep 17 00:00:00 2001 From: mpotterc0k Date: Thu, 9 May 2013 14:46:58 -0700 Subject: feature[ats_A0KLY]: Add Normalize HTML utility class Change-Id: I320acbef928cc71dab5460e8281330a99e4e39a8 --- .../org.eclipse.osee.framework.feature/feature.xml | 3 +- .../skynet/core/AllSkynetCoreJunitTestSuite.java | 2 + .../skynet/core/utility/NormalizeHtmlTest.java | 71 ++++ .../skynet/core/utility/UtilityTestSuite.java | 23 ++ .../utility/support/NormalizeHtml_converted.htm | 427 +++++++++++++++++++++ .../utility/support/NormalizeHtml_test_doc.htm | 170 ++++++++ .../META-INF/MANIFEST.MF | 3 +- .../skynet/core/utility/NormalizeHtml.java | 195 ++++++++++ 8 files changed, 892 insertions(+), 2 deletions(-) create mode 100644 plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtmlTest.java create mode 100644 plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/UtilityTestSuite.java create mode 100644 plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_converted.htm create mode 100644 plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_test_doc.htm create mode 100644 plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java diff --git a/features/org.eclipse.osee.framework.feature/feature.xml b/features/org.eclipse.osee.framework.feature/feature.xml index e4b6982e4b4..534eb4b2013 100644 --- a/features/org.eclipse.osee.framework.feature/feature.xml +++ b/features/org.eclipse.osee.framework.feature/feature.xml @@ -39,7 +39,8 @@ - + + '); + return input.substring(iBodyStart, iBodyEnd - 1); + } + +} diff --git a/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/UtilityTestSuite.java b/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/UtilityTestSuite.java new file mode 100644 index 00000000000..77ceddf5b2f --- /dev/null +++ b/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/UtilityTestSuite.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2013 Boeing. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Boeing - initial API and implementation + *******************************************************************************/ +package org.eclipse.osee.framework.skynet.core.utility; + +import org.junit.runner.RunWith; +import org.junit.runners.Suite; + +@RunWith(Suite.class) +@Suite.SuiteClasses({NormalizeHtmlTest.class}) +/** + * @author Marc A. Potter + */ +public class UtilityTestSuite { + // Test Suite +} diff --git a/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_converted.htm b/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_converted.htm new file mode 100644 index 00000000000..092f5e515e8 --- /dev/null +++ b/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_converted.htm @@ -0,0 +1,427 @@ + + + + + + + + + + + + + + + + + + +

+ + 6 pt font + +

+

+ + + 7 pt font bold + + +

+

+ + + + 8 pt font bold underline + + + +

+

+ + + + + + 9 pt font bold, unerline italic + + + + + +

+

+ + + + + + 10 pt font bold, underline, italic, red + + + + + +

+

+ + + 10.5 pt font normal + + +

+

+ + + + 11 pt font highlighted + + + +

+

+ + + + 12 pt font normal right justified + + + +

+

+ + + + + 13 pt font centered + + + + +

+

+ + + + 14 pt font justified (doesn't seem to work in HTML or OO) + + + +

+

+ + + + 15 pt font normal outline + + + +

+

+ + + + 16 pt font shadow + + + +

+

+ + + + + 18 point font strikeout + + + + +

+

+ + + + + 20 pt font + + + + + + super script + + + + + +

+

+ + + + + 22 pt font + + + + + + sub script + + + + + +

+

+ + + + 24 pt font normal + + + +

+ + + + + + + + + + + + + + + +
+

+ + 26 pt in table with border + +

+
+

+ + 26 pt in table with border + +

+
+

+ + 28 pt in table with border + +

+
+

+ + 28 pt in table with border + +

+
+

+
+

+

+ + + + 32 pt font + + + +

+ + + + + + + + + + + + + + + +
+

+ + 36 pt in table without border + +

+
+

+ + 36 pt in table without border + +

+
+

+ + 40 pt in table w/o border + +

+
+

+ + 40 pt in table w/o border + +

+
+

+
+

+

+ + + + 48 pt + + + +

+
    +
  1. +

    + + + + 54 pt numbered list + + + +

    +
  2. +
  3. +

    + + + + 60 pt + + + +

    +
  4. +
+

+
+

+
    +
  • +

    + + + + 66 pt bulleted + + + +

    +
  • +
  • +

    + + + + 72 pt + + + +

    +
  • +
+

+
+

+

+ + + + 80 pt indent right + + + +

+

+ + + + 88 pt + + + +

+

+ + + + 96 pt + + + +

+

+ + + + + + New font 10 pt with special chars !@#$%^&*()_+}{[]<>,.:";'/?~` + + + + + +

+

+
+

+

+
+

+

+
+

+

+
+

+

+ + + + + + This is a test: " this is a test " ' this isn't not a test' + + + + + +

+

+
+

+

+
+

+

+
+

+

+
+
+

+ + \ No newline at end of file diff --git a/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_test_doc.htm b/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_test_doc.htm new file mode 100644 index 00000000000..ef338dbdb40 --- /dev/null +++ b/plugins/org.eclipse.osee.framework.skynet.core.test/src/org/eclipse/osee/framework/skynet/core/utility/support/NormalizeHtml_test_doc.htm @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + +

6 +pt font

+

7 +pt font bold +

+

8 +pt font bold underline +

+

9 +pt font bold, unerline italic +

+

10 +pt font bold, underline, italic, red +

+

10.5 +pt font normal

+

11 +pt font highlighted +

+

+12 +pt font normal right justified

+

+13 +pt font centered

+

14 +pt font justified (doesn't seem to work in HTML or OO)

+

15 +pt font normal outline

+

16 +pt font shadow

+

18 +point font strikeout

+

20 +pt font super +script

+

22 +pt font sub +script

+

24 +pt font normal +

+ + + + + + + + + + + +
+

26 + pt in table with border

+
+

26 + pt in table with border

+
+

28 + pt in table with border

+
+

28 + pt in table with border

+
+


+

+

32 +pt font

+ + + + + + + + + + + +
+

36 + pt in table without border

+
+

36 + pt in table without border

+
+

40 + pt in table w/o border

+
+

40 + pt in table w/o border

+
+


+

+

48 +pt

+
    +
  1. + 54 + pt numbered list

    +
  2. + 60 + pt

    +
+


+

+
    +
  • + 66 + pt bulleted

    +
  • + 72 + pt

    +
+


+

+

+80 +pt indent right +

+

88 +pt

+

96 +pt

+

New +font 10 pt with special chars !@#$%^&*()_+}{[]<>,.:”;'/?~`

+


+

+


+

+


+

+


+

+

This +is a test: “ this is a test ” ‘ this isn't not a test’

+


+

+


+

+


+

+



+

+ + \ No newline at end of file diff --git a/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF b/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF index 27deb6faf7e..9bc3c8ca5ad 100644 --- a/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF +++ b/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF @@ -13,7 +13,8 @@ Require-Bundle: org.eclipse.core.runtime, org.eclipse.osee.framework.database, org.eclipse.osee.framework.messaging.event.res, org.eclipse.osee.framework.messaging, - org.eclipse.osee.framework.lifecycle + org.eclipse.osee.framework.lifecycle, + org.jsoup;bundle-version="1.7.2" Export-Package: org.eclipse.osee.framework.skynet.core, org.eclipse.osee.framework.skynet.core.artifact, org.eclipse.osee.framework.skynet.core.artifact.factory, diff --git a/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java new file mode 100644 index 00000000000..6a88e13707b --- /dev/null +++ b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java @@ -0,0 +1,195 @@ +/******************************************************************************* + * Copyright (c) 2013 Boeing. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Boeing - initial API and implementation + *******************************************************************************/ +package org.eclipse.osee.framework.skynet.core.utility; + +import java.util.TreeMap; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Attribute; +import org.jsoup.nodes.Attributes; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Document.OutputSettings; +import org.jsoup.nodes.Document.QuirksMode; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Entities.EscapeMode; +import org.jsoup.select.Elements; + +/** + * This class will convert an HTML string to a normalized format. This allows the same output from HTML regardless of + * the input editor. The least versatile editor will be used. Currently that is TinyMCE. The reason to do this is that + * options can be removed, but not added to the HTML + * + *
+ * Assumptions:
+ *    The input is valid HTML
+ * Items that change
+ *    HTML tags may be upper case, tinyMCE are lower
+ *     is converted to 
+ *    tinyMCE does NOT use font point only small, medium, ...
+ *          6-8 point == xx-small
+ *          9-11 point == small
+ *          12-13 point == medium
+ *          14-16 point == large
+ *          18-20 point == x-large
+ *          22-28 point == xx-large
+ *          >28 point == 300%
+ *    replace &ldquot; &rdquot; &lsquot; &rsquot; to " and '
+ *    replace  with 
+ *    replace  with
+ * 
+ *    replace  with 
+ *    remove bordercolor from table tag
+ * 
+ *
+ * @author Marc A. Potter
+ */
+public final class NormalizeHtml {
+
+   private static final TreeMap FONT_MAP = initializeFontMap();
+   private static final String[] FONT_VALUES = FONT_MAP.values().toArray(new String[0]);
+   private static final String MEDIUM_FONT = "medium;";
+   private static final String CHARSET = "UTF-8";
+   private static final int INDENT_AMOUNT = 4;
+   private static final String ldquo = String.valueOf('\u201C');
+   private static final String rdquo = String.valueOf('\u201D');
+   private static final String lsquo = String.valueOf('\u2018');
+   private static final String rsquo = String.valueOf('\u2019');
+
+   private NormalizeHtml() {
+      // Utility Class
+   }
+
+   private static TreeMap initializeFontMap() {
+      TreeMap map = new TreeMap();
+      map.put(new Integer(8), "xx-small;");
+      map.put(new Integer(11), "small;");
+      map.put(new Integer(13), "medium;");
+      map.put(new Integer(16), "large;");
+      map.put(new Integer(20), "x-large;");
+      map.put(new Integer(28), "xx-large;");
+      map.put(new Integer(Integer.MAX_VALUE), "300%;");
+      return map;
+   }
+
+   public static String convertToNormalizedHTML(String inputHTML) {
+      Document doc = Jsoup.parse(inputHTML);
+      doc.quirksMode(QuirksMode.noQuirks);
+      OutputSettings outputSettings = doc.outputSettings();
+      outputSettings.charset(CHARSET);
+      outputSettings.escapeMode(EscapeMode.xhtml);
+      outputSettings.prettyPrint(true);
+      outputSettings.outline(true);
+      outputSettings.indentAmount(INDENT_AMOUNT);
+      doc.outputSettings(outputSettings);
+      Elements bold = doc.select("b");
+      for (Element e : bold) {
+         e.tagName("strong");
+      }
+      Elements italic = doc.select("i");
+      for (Element e : italic) {
+         e.tagName("em");
+      }
+      Elements underline = doc.select("u");
+      for (Element e : underline) {
+         e.tagName("span");
+         e.attr("style", "text-decoration: underline;");
+      }
+      Elements strike = doc.select("strike");
+      for (Element e : strike) {
+         e.tagName("span");
+         e.attr("style", "text-decoration: line-through;");
+      }
+      processFontTags(doc);
+      return processText(doc);
+   }
+
+   private static String processText(Document doc) {
+      /**
+       * Nothing is ever as easy as it should be, since text nodes are not elements the select does not work. Therefore,
+       * process the output HTML
+       */
+      String theText = doc.outerHtml();
+
+      /**
+       * convert “ and ” to " convert ‘ and ’ to ' The parser itself changes the symbols to
+       * the appropriate HTML variable at read/write time. Therefore, just change the actual symbols. Use the unicode
+       * definitions of the special characters since they are multibyte Jsoup sets ' to &apos on input, reset this to '
+       */
+      theText = theText.replaceAll(ldquo, "\"");
+      theText = theText.replaceAll(rdquo, "\"");
+      theText = theText.replaceAll(lsquo, "'");
+      theText = theText.replaceAll(rsquo, "'");
+      theText = theText.replaceAll("'", "'");
+      return theText;
+   }
+
+   private static void processFontTags(Document doc) {
+      Elements font = doc.select("font");
+      for (Element e : font) {
+         Attributes attrs = e.attributes().clone();
+         StringBuilder styleString = new StringBuilder();
+         String theSizeString = "";
+         for (Attribute attribute : attrs) {
+            String attributeName = attribute.getKey();
+            String attributeValue = attribute.getValue();
+            if (attributeName.equalsIgnoreCase("face")) {
+               styleString.append(" font-family: ");
+               styleString.append(attributeValue);
+            } else if (attributeName.equalsIgnoreCase("size")) {
+               int theSize = Integer.valueOf(attributeValue.trim());
+               if (theSize <= FONT_VALUES.length) {
+                  theSizeString = FONT_VALUES[theSize - 1];
+               }
+            } else if (attributeName.equalsIgnoreCase("color")) {
+               styleString.append(" color: ");
+               styleString.append(attributeValue);
+            } else if (attributeName.equalsIgnoreCase("style")) {
+               // possible that font size specified here (font-size: xxpt)
+               int size = attributeValue.indexOf("font-size:");
+               if (size != -1) {
+                  size += "font-size:".length();
+                  theSizeString = getFontSize(attributeValue.substring(size));
+               }
+            }
+            e.removeAttr(attributeName);
+         }
+         if (theSizeString.length() > 0) {
+            styleString.append(" font-size: ");
+            styleString.append(theSizeString);
+         }
+         e.tagName("span");
+         e.attr("style", styleString.toString());
+      }
+   }
+
+   /**
+    * Expected format of the input is font-size: NNpt Note that there may be other information after the pt font-size:
+    * NNpt font-family: .... If the string is not formatted correctly, return a medium font as default
+    */
+   private static String getFontSize(String inputStyle) {
+      int theSize = 1;
+      String theReturn;
+      int thePointStart = inputStyle.indexOf(' '), thePointEnd = inputStyle.lastIndexOf("pt");
+      if (thePointStart == -1) {
+         thePointStart = 0;
+      }
+      while ((inputStyle.charAt(thePointStart) == ' ') && (thePointStart < inputStyle.length())) {
+         thePointStart++;
+      }
+      if (thePointStart >= thePointEnd) {
+         theReturn = MEDIUM_FONT; // average middle font
+      } else {
+         theSize = Integer.valueOf(inputStyle.substring(thePointStart, thePointEnd));
+         theReturn = FONT_MAP.ceilingEntry(theSize).getValue();
+      }
+      return theReturn;
+   }
+
+}
-- 
cgit v1.2.3