Skip to main content
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormpotterc0k2013-09-05 15:17:28 +0000
committermpotterc0k2013-09-10 16:09:11 +0000
commit5f99e29c5a3f035c7295f8e571dd4c093945acdb (patch)
treed5981fea90bc120ba4fdb95ef1feb039f019982d
parent6a110dcfe66720fc47705f3b0ea4972149a4cf71 (diff)
downloadorg.eclipse.osee-5f99e29c5a3f035c7295f8e571dd4c093945acdb.tar.gz
org.eclipse.osee-5f99e29c5a3f035c7295f8e571dd4c093945acdb.tar.xz
org.eclipse.osee-5f99e29c5a3f035c7295f8e571dd4c093945acdb.zip
feature[ats_URKSZ]: Update WORDML conversion and DOORS importer
Change-Id: I470a35fa7c748331c16d5a73f33f837089dee3c0 Signed-off-by: mpotterc0k <marc.a.potter@boeing.com>
-rw-r--r--plugins/org.eclipse.osee.client.integration.tests/src/org/eclipse/osee/client/integration/tests/integration/skynet/core/DoorsArtifactExtractorTest.java37
-rw-r--r--plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/Expected_list.htm16
-rw-r--r--plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/sample_DOORS_export.htm390
-rw-r--r--plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java100
-rw-r--r--plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java65
5 files changed, 444 insertions, 164 deletions
diff --git a/plugins/org.eclipse.osee.client.integration.tests/src/org/eclipse/osee/client/integration/tests/integration/skynet/core/DoorsArtifactExtractorTest.java b/plugins/org.eclipse.osee.client.integration.tests/src/org/eclipse/osee/client/integration/tests/integration/skynet/core/DoorsArtifactExtractorTest.java
index be86cf15bc8..9ae52f8917c 100644
--- a/plugins/org.eclipse.osee.client.integration.tests/src/org/eclipse/osee/client/integration/tests/integration/skynet/core/DoorsArtifactExtractorTest.java
+++ b/plugins/org.eclipse.osee.client.integration.tests/src/org/eclipse/osee/client/integration/tests/integration/skynet/core/DoorsArtifactExtractorTest.java
@@ -29,6 +29,7 @@ import org.eclipse.osee.framework.core.operation.NullOperationLogger;
import org.eclipse.osee.framework.jdk.core.util.Lib;
import org.eclipse.osee.framework.skynet.core.importing.RoughArtifact;
import org.eclipse.osee.framework.skynet.core.importing.RoughArtifactKind;
+import org.eclipse.osee.framework.skynet.core.importing.RoughAttributeSet;
import org.eclipse.osee.framework.skynet.core.importing.operations.RoughArtifactCollector;
import org.eclipse.osee.framework.skynet.core.importing.parsers.DoorsArtifactExtractor;
import org.junit.Before;
@@ -49,20 +50,28 @@ public class DoorsArtifactExtractorTest {
private static final String IMAGE_CONTENT = "Image Content";
private static final String PRIME_ITEM_DIAGRAM = "Prime item diagram.";
private static final String COMPANY_DOCUMENTS = "Company documents.";
+ private static final String VOICE_STATUS = "Voice status.";
private static final String[] ARTIFACT_NAMES = {
- "Door_Requirements",
+ "Example_DOORS_File",
"SCOPE",
"APPLICABLE DOCUMENTS",
- "Non-Government documents.",
- "Company documents.",
+ "Government documents.",
+ "Specifications.",
+ "Military.",
+ "Standards.",
"REQUIREMENTS",
"Prime item definition.",
- PRIME_ITEM_DIAGRAM};
+ PRIME_ITEM_DIAGRAM,
+ VOICE_STATUS};
private static final String[] ATTRIBUTE_TYPE_LIST = {
"Name",
+ "Subsystem",
"Legacy Id",
+ "Verification Event",
"HTML Content",
IMAGE_CONTENT,
+ "Qualification Method",
+ "Verification Level",
"Paragraph Number"};
private static final RoughArtifactKind[] ARTIFACT_TYPES = {
@@ -72,8 +81,11 @@ public class DoorsArtifactExtractorTest {
RoughArtifactKind.SECONDARY,
RoughArtifactKind.SECONDARY,
RoughArtifactKind.SECONDARY,
+ RoughArtifactKind.SECONDARY,
+ RoughArtifactKind.SECONDARY,
+ RoughArtifactKind.PRIMARY,
RoughArtifactKind.PRIMARY,
- RoughArtifactKind.SECONDARY,};
+ RoughArtifactKind.PRIMARY};
private static final String DOCUMENT_APPLICABILITY = "Document 1";
@@ -83,6 +95,7 @@ public class DoorsArtifactExtractorTest {
private DoorsArtifactExtractor extractor = null;
private RoughArtifactCollector collector;
private static File doorHtmlExport;
+ private static File expectedList;
@BeforeClass
public static void setUpResources() throws IOException {
@@ -90,6 +103,8 @@ public class DoorsArtifactExtractorTest {
copyResource("sample_DOORS_export.htm", doorHtmlExport);
copyResource(THIS_IS_A_JPEG_IMAGE_JPG, folder.newFile(THIS_IS_A_JPEG_IMAGE_JPG));
copyResource(THIS_IS_A_PNG_IMAGE_PNG, folder.newFile(THIS_IS_A_PNG_IMAGE_PNG));
+ expectedList = folder.newFile("Expected_list.htm");
+ copyResource("Expected_list.htm", expectedList);
}
@Before
@@ -123,6 +138,8 @@ public class DoorsArtifactExtractorTest {
*/
if (PRIME_ITEM_DIAGRAM.equals(actualName)) {
checkPrimeItemDiagram(artifact);
+ } else if (VOICE_STATUS.equals(actualName)) {
+ checkList(artifact);
}
}
}
@@ -172,7 +189,7 @@ public class DoorsArtifactExtractorTest {
*/
if (COMPANY_DOCUMENTS.equals(actualName)) {
String theHtml = artifact.getRoughAttribute(CoreAttributeTypes.HTMLContent.getName());
- assertEquals("Document Applicability filter failed", theHtml.indexOf("ABC-DEF"), -1);
+ assertEquals("Document Applicability filter failed", theHtml.indexOf("XYZ-ABC"), -1);
}
}
}
@@ -202,4 +219,12 @@ public class DoorsArtifactExtractorTest {
Lib.close(outputStream);
}
}
+
+ private void checkList(RoughArtifact artifact) throws Exception {
+ RoughAttributeSet attributes = artifact.getAttributes();
+ String input = attributes.getSoleAttributeValue("HTML Content");
+ String expected = Lib.fileToString(expectedList);
+ assertEquals("Document Applicability filter failed", input, expected);
+
+ }
}
diff --git a/plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/Expected_list.htm b/plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/Expected_list.htm
new file mode 100644
index 00000000000..ae16ce4b6fd
--- /dev/null
+++ b/plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/Expected_list.htm
@@ -0,0 +1,16 @@
+The following voice warning status shall be provided for each crewmember:
+ <br />
+ <ol type="a">
+ <li>
+ Aircraft voice warning. An audio indication to alert the crew of detected EXA threats, including the following:
+ <ol>
+ <li>Countermeasures maneuver cue (if enabled).</li>
+ <li>Threat type.</li>
+ <li>Threat azimuth.</li>
+ <li>Threat mode.</li>
+ </ol>
+ </li>
+ <li>EXA voice threshold. An indication as to the current voice threshold mode selected.</li>
+ <li>Maneuver cue. An indication as to whether the EXA maneuver cue voice warning is on or off.</li>
+ <li>HFI voice selection. An indication as to whether HFI voice warning is on or off.</li>
+ </ol> \ No newline at end of file
diff --git a/plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/sample_DOORS_export.htm b/plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/sample_DOORS_export.htm
index d8a0ee913e9..1a4ae9ae893 100644
--- a/plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/sample_DOORS_export.htm
+++ b/plugins/org.eclipse.osee.client.integration.tests/support/doorsArtifactExtractor/sample_DOORS_export.htm
@@ -1,7 +1,8 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html><head>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-<title>Door Requirements</title></head>
+<title>Example_DOORS_File</title></head>
<BODY BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#EE0000" ALINK="#808080" VLINK="#808080">
+<!-- 09 August 2013 Miller, David W -->
<table border><tr bgcolor="#FFC0CB">
<th width=50 align=Right>ID</th>
<th width=420 align=Left>Requirements</th>
@@ -12,11 +13,12 @@
<th width=130 align=Left>Effectivity</th>
<th width=160 align=Left>Paragraph Heading</th>
<th width=160 align=Left>Document Applicability</th>
-<th width=142 align=Left>Verification Criteria (V-PIDS_Verification)</th>
+<th width=166 align=Left>Verification Criteria (V-PIDS_Verification)</th>
<th width=87 align=Left>Change Status</th>
<th width=111 align=Left>Proposed Object Heading</th>
<th width=116 align=Left>Proposed Object Text</th>
-<th width=119 align=Left>Change Rationale</th>
+<th width=182 align=Left>OSEE GUID</th>
+<th width=222 align=Left>Subsystem</th>
<th nowrap> Links </th>
</tr>
<tr><td>EXA-1</td>
@@ -26,7 +28,7 @@
</DIV></b><br></td>
<td>1</td>
<td>False</td>
-<td>Heading</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
<td>SCOPE</td>
@@ -36,27 +38,27 @@
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5880"> EXA-1<br> (5880)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-2</td>
-<td><a name="X2">
-</a>This specification establishes performance, design, development, and test requirements <br>
+<tr><td>EXA-368</td>
+<td><a name="X368">
+</a>This specification establishes performance, design, development, and test requirements.<br>
</td>
-<td>1.0-1</td>
+<td>1.0-3</td>
<td>False</td>
<td>Information</td>
<td><br></td>
<td><br></td>
<td>SCOPE</td>
-<td>Some Documented information</td>
+<td>Document 1</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5881"> EXA-2<br> (5881)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
<tr><td>EXA-3</td>
<td><a name="X3">
@@ -65,7 +67,7 @@
</DIV></b><br></td>
<td>2</td>
<td>False</td>
-<td>Heading</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
<td>APPLICABLE DOCUMENTS</td>
@@ -75,105 +77,145 @@
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5884"> EXA-3<br> (1234)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
+</tr>
+<tr><td>EXA-4</td>
+<td><a name="X4">
+</a>The revisions or issues shown below of documents listed hereunder form a part of this specification to the extent invoked by specific reference in paragraphs of this specification.<br>
+</td>
+<td>2.0-1</td>
+<td>False</td>
+<td>Information</td>
+<td><br></td>
+<td><br></td>
+<td>APPLICABLE DOCUMENTS</td>
+<td>Document 1</td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-22</td>
-<td><a name="X22">
-</a><b>2.2
-<DIV style="margin-left: 36px">Non-Government documents.
+<tr><td>EXA-5</td>
+<td><a name="X5">
+</a><b>2.1
+<DIV style="margin-left: 36px">Government documents.
</DIV></b><br></td>
-<td>2.2</td>
+<td>2.1</td>
<td>False</td>
-<td>Heading</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
-<td>Non-Government documents.</td>
+<td>Government documents.</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5907"> EXA-22<br> (5907)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-23</td>
-<td><a name="X23">
-</a><b>2.2.1
-<DIV style="margin-left: 36px">Company documents.
+<tr><td>EXA-6</td>
+<td><a name="X6">
+</a><b>2.1.1
+<DIV style="margin-left: 36px">Specifications.
</DIV></b><br></td>
-<td>2.2.1</td>
+<td>2.1.1</td>
<td>False</td>
-<td>Heading</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
-<td>Company documents.</td>
+<td>Specifications.</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5908"> EXA-23<br> (5908)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-26</td>
-<td><a name="X26">
-</a>XYZ-123&nbsp;&nbsp;&nbsp;&#9;&nbsp;&nbsp;&nbsp;&#9;Latest Revision&nbsp;&nbsp;&nbsp;&#9;First Document<br>
+<tr><td>EXA-7</td>
+<td><a name="X7">
+</a><b>2.1.1.1
+<DIV style="margin-left: 36px">Military.
+</DIV></b><br></td>
+<td>2.1.1.1</td>
+<td>False</td>
+<td>Header</td>
+<td><br></td>
+<td><br></td>
+<td>Military.</td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
+</tr>
+<tr><td>EXA-8</td>
+<td><a name="X8">
+</a>ABC-DEF&nbsp;&nbsp;&nbsp;&#9;01-Apr-87&nbsp;&nbsp;&nbsp;&#9;Specification<br>
</td>
-<td>2.2.1.0-3</td>
+<td>2.1.1.1.0-1</td>
<td>False</td>
<td>Information</td>
<td><br></td>
<td><br></td>
-<td>Company documents.</td>
+<td>Military.</td>
<td>Document 1</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5911"> EXA-26<br> (5911)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-29</td>
-<td><a name="X29">
-</a>ABC-DEF&nbsp;&nbsp;&nbsp;&#9;11-Nov-11&nbsp;&nbsp;&nbsp;&#9;Second Document<br>
+<tr><td>EXA-9</td>
+<td><a name="X9">
+</a>XYZ-ABC&nbsp;&nbsp;&nbsp;&#9;03-Jan-84&nbsp;&nbsp;&nbsp;&#9;Specification,<br>
</td>
-<td>2.2.1.0-6</td>
+<td>2.1.1.1.0-2</td>
<td>False</td>
<td>Information</td>
<td><br></td>
<td><br></td>
-<td>Company documents.</td>
+<td>Military.</td>
<td>Document 2</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5914"> EXA-29<br> (5914)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-30</td>
-<td><a name="X30">
-</a>ADOC-2222&nbsp;&nbsp;&nbsp;&#9;11-Nov-11&nbsp;&nbsp;&nbsp;&#9;Third Document<br>
-</td>
-<td>2.2.1.0-8</td>
+<tr><td>EXA-10</td>
+<td><a name="X10">
+</a><b>2.1.2
+<DIV style="margin-left: 36px">Standards.
+</DIV></b><br></td>
+<td>2.1.2</td>
<td>False</td>
-<td>Information</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
-<td>Company documents.</td>
-<td>Document 3</td>
+<td>Standards.</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5916"> EXA-30<br> (5916)</a>(IN)<br>
-<small>...</small></td>
+<td><br></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
<tr><td>EXA-37</td>
<td><a name="X37">
@@ -182,7 +224,7 @@
</DIV></b><br></td>
<td>3</td>
<td>False</td>
-<td>Heading</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
<td>REQUIREMENTS</td>
@@ -192,8 +234,8 @@
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5937"> EXA-37<br> (5937)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
<tr><td>EXA-38</td>
<td><a name="X38">
@@ -202,7 +244,7 @@
</DIV></b><br></td>
<td>3.1</td>
<td>False</td>
-<td>Heading</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
<td>Prime item definition.</td>
@@ -212,127 +254,130 @@
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5938"> EXA-38<br> (5938)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-39</td>
-<td><a name="X39">
-</a>The subsystem shall be integrated <br>
+<tr><td>EXA-380</td>
+<td><a name="X380">
+</a>The EXA subsystem shall be integrated automatically to support the interface configuration in accordance with table I.<br>
</td>
-<td>3.1.0-1</td>
-<td>False</td>
+<td>3.1.0-3</td>
+<td>True</td>
<td>Requirement</td>
-<td>SREQ-1234<br>SREQ-5678<br>SREQ-2468<br>SREQ-1357<br>SREQ-0987<br>SREQ-4321</td>
-<td>Earth</td>
+<td>SREQ-4191<br>SREQ-4200<br>SREQ-4217<br>SREQ-4224<br>SREQ-4235<br>SREQ-6242<br>SREQ-6244</td>
+<td>AB3 Lot 1</td>
<td>Prime item definition.</td>
-<td>DRC-P-H1M5000_Earth</td>
-<td>Effectivity: <br>Verf Method: Inspection<br>Verf Level: System<br>Verf Location: <br>Verf Type: <br>Verified By: Company<br>Criteria:</td>
+<td>Document 1</td>
+<td>Effectivity: <br>Verf Method: Test<br>Verf Level: System<br>Verf Location: Laboratory Test<br>Verf Type: <br>Verified By: Boeing<br>Criteria:</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "_Block_III_Tier_2_V-PIDS_Verification.htm#X1495"> EXA-39<br> (1495)</a>(IN)<br>
-<a href = "_external doc.htm#X1234"> Complete provisions shall be provided for<br> (SREQ-1234)</a>(OUT)<br>
-<a href = "_external doc.htm#X5678"> Complete provisions shall be provided for<br> (SREQ-5678)</a>(OUT)<br>
-<a href = "_external doc.htm#X2468"> Complete provisions shall be provided to<br> (SREQ-2468)</a>(OUT)<br>
-<a href = "_external doc.htm#X1357"> Complete provisions shall be provided for<br> (SREQ-1357)</a>(OUT)<br>
-<a href = "_external doc.htm#X0987"> Complete provisions for one GFE AN/APR-3<br> (SREQ-0987)</a>(OUT)<br>
-<a href = "_external doc.htm#X4321"> Complete provisions shall be provided fo<br> (SREQ-4321)</a>(OUT)<br>
-<a href = "sample_DOORS_export#X5939"> EXA-39<br>1495<br> (5939)</a>(IN)<br>
+<td>Example</td>
+<td nowrap align="left"><a href = "_Block_III_Tier_2_PIDS_Change_Requests.htm#X247"> RCR-5-8223-1 Requirement Change Request <br> (CR-247)</a>(IN)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X6244"> Complete provisions shall be provided fo<br> (SREQ-6244)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X6242"> Complete provisions for one GOI-furnishe<br> (SREQ-6242)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4224"> Complete provisions shall be provided to<br> (SREQ-4224)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4217"> Complete provisions shall be provided fo<br> (SREQ-4217)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4235"> Complete provisions shall be provided fo<br> (SREQ-4235)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4200"> Complete provisions shall be provided fo<br> (SREQ-4200)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4191"> Complete provisions for one GFE AN/APR-3<br> (SREQ-4191)</a>(OUT)<br>
+<a href = "_Block_III_Tier_2_V-PIDS_Verification.htm#X4483"> EXA-380<br> (4483)</a>(IN)<br>
<small>...</small></td>
</tr>
-<tr><td>EXA-523</td>
-<td><a name="X523">
+<tr><td>EXA-426</td>
+<td><a name="X426">
</a>EXA System Configuration<br>
</td>
-<td>3.1.0-4</td>
+<td>3.1.0-5</td>
<td>False</td>
-<td>Information</td>
+<td>Table</td>
<td><br></td>
<td><br></td>
<td>Prime item definition.</td>
-<td>DRC-P-H1M5000_Earth</td>
+<td>Document 1</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5942"> EXA-523<br> (5942)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-<tr><td>EXA-526</td>
-<td><a name="X524">
+<tr><td>EXA-429</td>
+<td><a name="X427">
</a><table border = 1 cellPadding = 0 cellSpacing = 0>
<tr>
-<td colSpan=200 valign="top"><a name="X526">
+<td colSpan=200 valign="top"><a name="X429">
</a>EXA<br>
</td>
-<td colSpan=200 valign="top"><a name="X527">
+<td colSpan=200 valign="top"><a name="X430">
</a>System ID<br>
</td>
</tr>
<tr>
-<td colSpan=200 valign="top"><a name="X529">
-</a>EXA AGP<br>
+<td colSpan=200 valign="top"><a name="X432">
+</a>EXA ABC<br>
</td>
-<td colSpan=200 valign="top"><a name="X530">
+<td colSpan=200 valign="top"><a name="X433">
</a>N/A<br>
</td>
</tr>
<tr>
-<td colSpan=200 valign="top"><a name="X532">
+<td colSpan=200 valign="top"><a name="X435">
</a>Warning <br>
</td>
-<td colSpan=200 valign="top"><a name="X533">
-</a>Sys - 1<br>
+<td colSpan=200 valign="top"><a name="X436">
+</a>APR-39A<br>
</td>
</tr>
<tr>
-<td colSpan=200 valign="top"><a name="X535">
+<td colSpan=200 valign="top"><a name="X438">
</a>Another Warning <br>
</td>
-<td colSpan=200 valign="top"><a name="X536">
-</a>Sys - 2<br>
+<td colSpan=200 valign="top"><a name="X439">
+</a>AVR-2A<br>AVR-2B<br>
</td>
</tr>
<tr>
-<td colSpan=200 valign="top"><a name="X538">
-</a>Still Another Warning <br>
+<td colSpan=200 valign="top"><a name="X441">
+</a>Still Another Warning<br>
</td>
-<td colSpan=200 valign="top"><a name="X539">
-</a>Sys - 3<br>
+<td colSpan=200 valign="top"><a name="X442">
+</a>AAR-57<br>
</td>
</tr>
<tr>
-<td colSpan=200 valign="top"><a name="X541">
+<td colSpan=200 valign="top"><a name="X444">
</a>Jammer<br>
</td>
-<td colSpan=200 valign="top"><a name="X542">
-</a>Sys - 4<br>
+<td colSpan=200 valign="top"><a name="X445">
+</a>ALQ-136<br>
</td>
</tr>
<tr>
-<td colSpan=200 valign="top"><a name="X544">
-</a>Countermeasures <br>
+<td colSpan=200 valign="top"><a name="X447">
+</a>Countermeasures<br>
</td>
-<td colSpan=200 valign="top"><a name="X545">
-</a>Man waving flag<br>
+<td colSpan=200 valign="top"><a name="X448">
+</a>ICMD (1 Chaff, 2 Flare)<br>
</td>
</tr>
</table>
</td>
-<td>1.2.3.4.5.6.7.8.9</td>
+<td>3.1.0-5.0-1.0-1.0-1</td>
<td>False</td>
<td>Table</td>
<td><br></td>
<td><br></td>
<td>Row</td>
-<td>DRC-P-H1M5000_Earth</td>
+<td>Document 1</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
+<td>Example</td>
<td nowrap align="left"><small>...</small></td>
</tr>
<tr><td>EXA-41</td>
@@ -342,7 +387,7 @@
</DIV></b><br></td>
<td>3.1.1</td>
<td>False</td>
-<td>Heading</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
<td>Prime item diagram.</td>
@@ -352,89 +397,138 @@
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5945"> EXA-41<br> (5945)</a>(IN)<br>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
+</tr>
+<tr><td>EXA-42</td>
+<td><a name="X42">
+</a>Figure 1 is a block diagram of the EXA subsystem.<br>
+</td>
+<td>3.1.1.0-1</td>
+<td>False</td>
+<td>Requirement</td>
+<td><br></td>
+<td>Taiwan</td>
+<td>Prime item diagram.</td>
+<td>Document 1</td>
+<td>Effectivity: <br>Verf Method: N/A<br>Verf Level: N/A<br>Verf Location: N/A<br>Verf Type: <br>Verified By: Boeing<br>Criteria:</td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td>Example</td>
+<td nowrap align="left"><a href = "_Block_III_Tier_2_PIDS_Change_Requests.htm#X1304"> RCR-5-8224-1 Requirement Change Request <br> (CR-1304)</a>(IN)<br>
+<a href = "_Block_III_Tier_2_V-PIDS_Verification.htm#X1496"> EXA-42<br> (1496)</a>(IN)<br>
<small>...</small></td>
</tr>
-<tr><td>EXA-43</td>
-<td><a name="X43">
-</a><img src="This_is_a_JPEG_image.jpg" height=446 width=576 alt="OLE Object" title="OLE Object"> <br>
+<tr><td>EXA-521</td>
+<td><a name="X521">
+</a><img src="This_is_a_JPEG_image.jpg" height=468 width=575 alt="OLE Object" title="OLE Object"> <br>
</td>
-<td>3.1.1.0-3</td>
+<td>3.1.1.0-2</td>
<td>False</td>
<td>Figure</td>
<td><br></td>
<td><br></td>
<td>Prime item diagram.</td>
-<td>DRC-P-H1M5000_Earth</td>
+<td>Document 1</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5948"> EXA-43<br> (5948)</a>(IN)<br>
+<td>Example</td>
+<td nowrap align="left"><a href = "_Block_III_Tier_2_PIDS_Change_Requests.htm#X1706"> <b>Note: </b>EXA-521-CR-1706<br><b>Requirement Change </b><br> (CR-1706)</a>(IN)<br>
<small>...</small></td>
</tr>
-<tr><td>EXA-43A</td>
-<td><a name="X43">
-</a><img src="This_is_a_PNG_image.png" height=446 width=576 alt="OLE Object" title="OLE Object"> <br>
+<tr><td>EXA-522</td>
+<td><a name="X522">
+</a><img src="This_is_a_PNG_image.png" height=468 width=575 alt="OLE Object" title="OLE Object"> <br>
</td>
-<td>3.1.1.0-3</td>
+<td>3.1.1.0-2</td>
<td>False</td>
<td>Figure</td>
<td><br></td>
<td><br></td>
<td>Prime item diagram.</td>
-<td>DRC-P-H1M5000_Earth</td>
+<td>Document 1</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5948"> EXA-43<br> (5948)</a>(IN)<br>
+<td>Example</td>
+<td nowrap align="left"><a href = "_Block_III_Tier_2_PIDS_Change_Requests.htm#X1706"> <b>Note: </b>EXA-521-CR-1706<br><b>Requirement Change </b><br> (CR-1706)</a>(IN)<br>
<small>...</small></td>
</tr>
-
-<tr><td>EXA-1</td>
-<td><a name="X1">
-</a><b>1
-<DIV style="margin-left: 36px">SCOPE
+<tr><td>EXA-71</td>
+<td><a name="X71">
+</a><b>3.1.2.4.1.2
+<DIV style="margin-left: 36px">Voice status.
</DIV></b><br></td>
-<td>3.1.2</td>
+<td>3.1.2.4.1.2</td>
<td>False</td>
-<td>Not Defined</td>
+<td>Header</td>
<td><br></td>
<td><br></td>
-<td>Bad Row</td>
+<td>Voice status.</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5880"> EXA-1<br> (5880)</a>(IN)<br>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
+</tr>
+<tr><td>EXA-713</td>
+<td><a name="X713">
+</a>The following voice warning status shall be provided for each crewmember:<br>
+</td>
+<td>3.1.2.4.1.2.0-2</td>
+<td>True</td>
+<td>Requirement</td>
+<td>SREQ-4195<br>SREQ-4200<br>SREQ-4221<br>SREQ-4235<br>SREQ-6244<br>SREQ-6361</td>
+<td>AB3 Lot 4</td>
+<td>Voice status.</td>
+<td>DRC-P-H1L5001_Lot4, DRC-P-H1L5002_Lot6</td>
+<td>Effectivity: <br>Verf Method: Test<br>Verf Level: System<br>Verf Location: Laboratory Test<br>Verf Type: <br>Verified By: Boeing<br>Criteria:</td>
+<td><br></td>
+<td><br></td>
+<td><br></td>
+<td>AAABHx7XiQYBDr4YDrfQnQ</td>
+<td>Example</td>
+<td nowrap align="left"><a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X6244"> Complete provisions shall be provided fo<br> (SREQ-6244)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4235"> Complete provisions shall be provided fo<br> (SREQ-4235)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4200"> Complete provisions shall be provided fo<br> (SREQ-4200)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X6361"> Display and control shall be provided by<br> (SREQ-6361)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4195"> Display and control shall be provided by<br> (SREQ-4195)</a>(OUT)<br>
+<a href = "_Block_III_Tier_1_R-Performance_Specification.htm#X4221"> Displays and controls shall be provided <br> (SREQ-4221)</a>(OUT)<br>
+<a href = "_Block_III_Tier_2_V-PIDS_Verification.htm#X7806"> EXA-713<br> (7806)</a>(IN)<br>
+<a href = "_Block_III_Functional_Analysis_AB3_Functional_Analysis.htm#X434"> HFI voice selection<br> (FA-434)</a>(IN)<br>
<small>...</small></td>
</tr>
-<tr><td>EXA-2</td>
-<td><a name="X2">
-</a>This verifies that a Not Defined row will not be imported <br>
+<tr><td>EXA-714</td>
+<td><a name="X714">
+</a>
+<DIV style="margin-left: 72px">a.&nbsp;&nbsp;&nbsp;&#9;Aircraft voice warning. An audio indication to alert the crew of detected EXA threats, including the following:<br>&nbsp;&nbsp;&nbsp;&#9;1.&nbsp;&nbsp;&nbsp;&#9;Countermeasures maneuver cue (if enabled).<br>&nbsp;&nbsp;&nbsp;&#9;2.&nbsp;&nbsp;&nbsp;&#9;Threat type.<br>&nbsp;&nbsp;&nbsp;&#9;3.&nbsp;&nbsp;&nbsp;&#9;Threat azimuth.<br>&nbsp;&nbsp;&nbsp;&#9;4.&nbsp;&nbsp;&nbsp;&#9;Threat mode.<br>b.&nbsp;&nbsp;&nbsp;&#9;EXA voice threshold. An indication as to the current voice threshold mode selected.<br>c.&nbsp;&nbsp;&nbsp;&#9;Maneuver cue. An indication as to whether the EXA maneuver cue voice warning is on or off.<br>d.&nbsp;&nbsp;&nbsp;&#9;HFI voice selection. An indication as to whether HFI voice warning is on or off.
+</DIV><br>
</td>
-<td>1.0-1</td>
+<td>3.1.2.4.1.2.0-2.0-1</td>
<td>False</td>
-<td>Not Defined</td>
+<td>List</td>
<td><br></td>
<td><br></td>
-<td>SCOPE</td>
-<td>Some Documented information</td>
+<td>Voice status.</td>
+<td>DRC-P-H1L5001_Lot4, DRC-P-H1L5002_Lot6</td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
<td><br></td>
-<td nowrap align="left"><a href = "sample_DOORS_export#X5881"> EXA-2<br> (5881)</a>(IN)<br>
-<small>...</small></td>
+<td>Example</td>
+<td nowrap align="left"><small>...</small></td>
</tr>
-
-
</table>
<DIV align="center"><p><a href = "http://www.ibm.com/software/awdtools/doors/">Produced by DOORS 9.3.0.6</a></DIV>
</body>
diff --git a/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java
index 30cbfcb8f16..fdf8ad08928 100644
--- a/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java
+++ b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java
@@ -429,7 +429,7 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
RowTypeEnum rowType = rowIndexToRowTypeMap.get(rowIndex);
if (rowType == RowTypeEnum.DOCUMENT_APPLICABILITY) {
String rowValue = row[rowIndex].toLowerCase().trim();
- if (rowValue.equals("") || rowValue.equals("<br></br>") || rowValue.equals("<br>")) {
+ if (rowValue.equals("") || rowValue.equals("<br></br>") || rowValue.equals("<br>") || rowValue.equals("<br />")) {
if (inArtifact) {
processArtifact();
}
@@ -593,6 +593,7 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
private String processList(String inputValue) {
inputValue = normalizeHtml(inputValue);
+ inputValue = inputValue.replaceAll("\\s+", " ");
/**************************************************************************************
* The way Doors export works with lists is that there is badly spaced <div> statements -- remove them
*/
@@ -643,6 +644,7 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
endOfList -= 2;
startOfNextList -= 2;
String insertValue = null;
+
if (isNumeric) {
insertValue = "<ol>";
} else if (isLowerCase) {
@@ -656,7 +658,9 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
startOfNextList = startOfNextList + insertValue.length();
}
iPos += insertValue.length();
-
+ int adjust = removeForcedSpaces(returnString, iPos - 1, false);
+ startOfNextList -= adjust;
+ endOfList -= adjust;
listData theListData = new listData();
boolean lastWasSublist = false;
while (nextItem != -1) {
@@ -671,6 +675,9 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
}
iPos += LIST_ITEM_TAG.length() - 1;
}
+ adjust = removeForcedSpaces(returnString, nextItem + LIST_ITEM_TAG.length(), false);
+ startOfNextList -= adjust;
+ endOfList -= adjust;
theChars = stringBuilderToChars(returnString);
nextItem = findNextListItem(theChars, iPos, isNumeric, isLowerCase, currentNumber, currentLetter, theListData);
if (nextItem == -1) {
@@ -679,26 +686,33 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
if (theListData.getNewList()) {
int startPoint = (nextItem < startOfNextList) ? nextItem : startOfNextList;
+ int delta = removeForcedSpaces(returnString, startPoint - 1, true);
+ if (delta > 0) {
+ theChars = stringBuilderToChars(returnString);
+ startPoint -= delta;
+ endOfList -= delta;
+ }
String theSublist = returnString.substring(0, startPoint);
- int end = theListData.getNextItem();
+ int end = theListData.getNextItem() - delta;
+ if (theListData.getNextItem() != -1) {
+ theListData.setNextItem(end);
+ }
if (end >= returnString.length()) {
end = returnString.length() - 1;
}
-
String theRawSublist = new String(theChars, startPoint, end - startPoint + 1);
int initialLen = theRawSublist.length();
theRawSublist = processList(theRawSublist);
theSublist += theRawSublist;
theSublist += LIST_ITEM_END_TAG;
- int delta = (theRawSublist.length() - initialLen) + LIST_ITEM_END_TAG.length();
+ delta = (theRawSublist.length() - initialLen) + LIST_ITEM_END_TAG.length();
endOfList += delta;
startOfNextList += delta;
- if ((theListData.getNextItem() != -1) && (theListData.getNextItem() < returnString.length())) {
+ if ((theListData.getNextItem() != -1) && (theListData.getNextItem() < (returnString.length() - 1))) {
theSublist += returnString.substring(theListData.getNextItem() + 1);
}
returnString.delete(0, returnString.length());
returnString.append(theSublist);
-
} else {
if (isNumeric) {
currentNumber =
@@ -741,12 +755,12 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
theChars = stringBuilderToChars(returnString);
}
// find the insertion point for list end
- String tokenToInsert = "</li></ol>";
+ String tokenToInsert = LIST_ITEM_END_TAG + "</ol>";
if (theListData.getNewList()) {
tokenToInsert = "</ol>";
}
- if (endOfList < theChars.length) {
+ if (endOfList < returnString.length()) {
returnString.insert(endOfList, tokenToInsert);
} else {
// verify the list doesn't end with <BR></BR>
@@ -1199,7 +1213,8 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
private String normalizeHtml(String inputHtml) {
- String returnValue = NormalizeHtml.convertToNormalizedHTML(inputHtml, true, true, true);
+ String returnValue = preprocessHTML(inputHtml);
+ returnValue = NormalizeHtml.convertToNormalizedHTML(returnValue, true, true, true);
int bodyStart = returnValue.indexOf(BODY_START_TAG);
int bodyEnd = returnValue.indexOf(BODY_END_TAG);
if (bodyStart != -1) {
@@ -1240,6 +1255,71 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
returnValue = returnValue.substring(0, brTag).trim();
brTag = returnValue.toLowerCase().lastIndexOf(BR_TAG);
}
+
+ //@formatter:off
+ /************************************************************************************
+ * change <br />spacespace
+ * to <br />space
+ */
+ //@formatter:on
+ returnValue = returnValue.replaceAll("<br /> ", "<br /> ");
return returnValue;
}
+
+ private String preprocessHTML(String inputHTML) {
+ String toReturn = inputHTML;
+ toReturn = toReturn.replaceAll("\t", " ");
+ toReturn = toReturn.replaceAll("<BR></BR>", "<BR />");
+ toReturn = toReturn.replaceAll("<br></br>", "<br />");
+ return toReturn;
+ }
+
+ private int removeForcedSpaces(StringBuilder returnString, int iPos, boolean reverse) {
+ /***********************************************************************
+ * remove any &nbsp; or &#9 after the start or end of the list -- the HTML list takes care of spacing
+ */
+ int adjust = 0;
+ String nbsp = "&nbsp;", tab = "&#9;";
+ if (iPos > 0) {
+ if (reverse) {
+ char[] theChars = stringBuilderToChars(returnString);
+ while (Character.isWhitespace(theChars[iPos])) {
+ returnString.delete(iPos, iPos + 1);
+ iPos--;
+ adjust++;
+ }
+ int nbspPos = returnString.lastIndexOf(nbsp, iPos);
+ int tabPos = returnString.lastIndexOf(tab, iPos);
+ while ((nbspPos == (iPos - nbsp.length() + 1)) || (tabPos == (iPos - tab.length() + 1))) {
+ if (nbspPos == (iPos - nbsp.length() + 1)) {
+ returnString.replace(nbspPos, nbspPos + nbsp.length(), "");
+ adjust += nbsp.length();
+ iPos -= nbsp.length();
+ } else {
+ returnString.replace(tabPos, tabPos + tab.length(), "");
+ adjust += tab.length();
+ iPos -= tab.length();
+ }
+ nbspPos = returnString.lastIndexOf(nbsp, iPos);
+ tabPos = returnString.lastIndexOf(tab, iPos);
+ }
+ } else {
+ int nbspPos = returnString.indexOf(nbsp, iPos);
+ int tabPos = returnString.indexOf(tab, iPos);
+ while ((nbspPos == iPos) || (tabPos == iPos)) {
+ if (nbspPos == iPos) {
+ returnString.replace(nbspPos, nbspPos + nbsp.length(), "");
+ adjust += nbsp.length();
+ } else {
+ returnString.replace(tabPos, tabPos + tab.length(), "");
+ adjust += tab.length();
+ }
+ nbspPos = returnString.indexOf(nbsp, iPos);
+ tabPos = returnString.indexOf(tab, iPos);
+ }
+ }
+ }
+ return adjust;
+ }
+
}
diff --git a/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java
index 04e3af90248..3994966464e 100644
--- a/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java
+++ b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/utility/NormalizeHtml.java
@@ -11,6 +11,7 @@
package org.eclipse.osee.framework.skynet.core.utility;
import java.util.ArrayList;
+import java.util.List;
import java.util.TreeMap;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
@@ -20,6 +21,8 @@ import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.nodes.Document.QuirksMode;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities.EscapeMode;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;
@@ -144,9 +147,66 @@ public final class NormalizeHtml {
processFontTags(doc);
processInitialStyleTags(doc, removeInitialStyle);
processEmptyTags(doc, removeEmptyTags);
+ processSelfFormattingTags(doc);
return processText(doc);
}
+ static void processSelfFormattingTags(Document doc) {
+ /**********************************************************
+ * Documents that are converted from MS Word have an extra \n in the list items remove. Also trim the item of
+ * leading/trailing blanks
+ */
+ String[] tagsToCheck = {"li", "tr", "td", "table"};
+ for (String select : tagsToCheck) {
+ Elements theNode = doc.select(select);
+ ArrayList<Node> remove = new ArrayList<Node>();
+ for (Element item : theNode) {
+ List<Node> kids = item.childNodes();
+ for (Node n : kids) {
+ if (n instanceof TextNode) {
+ TextNode t = (TextNode) n;
+ String theText = t.text();
+ theText = theText.replaceAll(NON_BREAK_SPACE, " ");
+ theText = theText.replaceAll(NON_BREAK_FIGURE_SPACE, " ");
+ theText = theText.replaceAll(NON_BREAK_NARROW_SPACE, " ");
+ theText = theText.replaceAll(NON_BREAK_WORD_JOINER, " ");
+ theText = theText.replaceAll(NON_BREAK_ZERO_WIDTH, " ");
+ theText = theText.trim().replaceAll("\\s+", "");
+ if (theText.isEmpty()) {
+ remove.add(t);
+ } else {
+ theText = t.text().trim();
+ String nbsp = "&nbsp;";
+ theText = theText.replaceAll(NON_BREAK_SPACE, nbsp);
+ theText = theText.replaceAll(NON_BREAK_FIGURE_SPACE, nbsp);
+ theText = theText.replaceAll(NON_BREAK_NARROW_SPACE, nbsp);
+ theText = theText.replaceAll(NON_BREAK_WORD_JOINER, nbsp);
+ theText = theText.replaceAll(NON_BREAK_ZERO_WIDTH, nbsp);
+ while (theText.indexOf(nbsp) == 0) {
+ theText = theText.substring(nbsp.length()).trim();
+ }
+ while ((theText.lastIndexOf(nbsp) != -1) && (theText.lastIndexOf(nbsp) == theText.length() - nbsp.length())) {
+ theText = theText.substring(0, theText.length() - nbsp.length()).trim();
+ }
+ if (theText.isEmpty()) {
+ remove.add(t);
+ } else {
+ t.replaceWith(TextNode.createFromEncoded(theText, t.baseUri()));
+ }
+ }
+ } else if (n instanceof Element) {
+ if (((Element) n).tagName().equals("br")) {
+ remove.add(n);
+ }
+ }
+ }
+ }
+ for (Node n : remove) {
+ n.remove();
+ }
+ }
+ }
+
static void removeDepreactedTags(Document doc) {
Elements center = doc.select("center");
for (Element e : center) {
@@ -342,6 +402,11 @@ public final class NormalizeHtml {
theText = theText.replaceAll(NON_BREAK_NARROW_SPACE, "&nbsp;");
theText = theText.replaceAll(NON_BREAK_WORD_JOINER, "&nbsp;");
theText = theText.replaceAll(NON_BREAK_ZERO_WIDTH, "&nbsp;");
+
+ /***********************************************************************************
+ * Remove spaces after end of tags at end of lines
+ */
+ theText = theText.replaceAll("> {1,}\n", ">\n");
return theText;
}

Back to the top