Skip to main content
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormpotterc0k2013-05-06 14:54:00 +0000
committerRoberto E. Escobar2013-05-14 22:38:55 +0000
commit907e48cae845e082c4ae9cb330dc29d524db1c05 (patch)
tree070d4751a839395024df125e4169903523af6516
parenta9e0b3935b50ce923dfed1a9304e01b10caeea45 (diff)
downloadorg.eclipse.osee-907e48cae845e082c4ae9cb330dc29d524db1c05.tar.gz
org.eclipse.osee-907e48cae845e082c4ae9cb330dc29d524db1c05.tar.xz
org.eclipse.osee-907e48cae845e082c4ae9cb330dc29d524db1c05.zip
refinement[ats_76PJ4]: Change DOORS parsing to use robust SAX parser
-rw-r--r--plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF3
-rw-r--r--plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java704
2 files changed, 256 insertions, 451 deletions
diff --git a/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF b/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF
index 9bc3c8ca5ad..9b71593b1af 100644
--- a/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF
+++ b/plugins/org.eclipse.osee.framework.skynet.core/META-INF/MANIFEST.MF
@@ -50,7 +50,10 @@ Export-Package: org.eclipse.osee.framework.skynet.core,
org.eclipse.osee.framework.skynet.core.word
Bundle-Vendor: Eclipse Open System Engineering Environment
Import-Package: org.apache.commons.lang;version="2.4.0",
+ org.apache.xerces.parsers;version="2.9.0",
+ org.apache.xerces.xni.parser;version="2.9.0",
org.apache.xml.serialize;version="2.9.0",
+ org.cyberneko.html.parsers,
org.eclipse.debug.core,
org.eclipse.osee.cache.admin,
org.eclipse.osee.framework.core.client,
diff --git a/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java
index 52cee3023b5..888c1f2302e 100644
--- a/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java
+++ b/plugins/org.eclipse.osee.framework.skynet.core/src/org/eclipse/osee/framework/skynet/core/importing/parsers/DoorsArtifactExtractor.java
@@ -12,11 +12,6 @@ package org.eclipse.osee.framework.skynet.core.importing.parsers;
import java.io.File;
import java.io.FileFilter;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
@@ -24,23 +19,21 @@ import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Vector;
-import javax.xml.stream.XMLEventReader;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.events.Characters;
-import javax.xml.stream.events.EndElement;
-import javax.xml.stream.events.StartElement;
-import javax.xml.stream.events.XMLEvent;
+import org.cyberneko.html.parsers.SAXParser;
import org.eclipse.osee.framework.core.data.IAttributeType;
import org.eclipse.osee.framework.core.enums.CoreAttributeTypes;
-import org.eclipse.osee.framework.core.exception.OseeArgumentException;
import org.eclipse.osee.framework.core.exception.OseeCoreException;
import org.eclipse.osee.framework.core.operation.OperationLogger;
import org.eclipse.osee.framework.jdk.core.util.GUID;
import org.eclipse.osee.framework.jdk.core.util.Strings;
+import org.eclipse.osee.framework.jdk.core.util.io.xml.AbstractSaxHandler;
import org.eclipse.osee.framework.skynet.core.artifact.Artifact;
import org.eclipse.osee.framework.skynet.core.importing.RoughArtifact;
import org.eclipse.osee.framework.skynet.core.importing.RoughArtifactKind;
import org.eclipse.osee.framework.skynet.core.importing.operations.RoughArtifactCollector;
+import org.eclipse.osee.framework.skynet.core.utility.NormalizeHtml;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
/**
* @author Marc A. Potter
@@ -50,9 +43,30 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
private final Vector<String> postProcessGuids = new Vector<String>();
private final Map<Integer, RowTypeEnum> rowIndexToRowTypeMap = new HashMap<Integer, RowTypeEnum>();
private String[] headerRow;
- RoughArtifactCollector collector;
- private static final String imageBaseName = "Image Content_";
- private static int READ_BUFFER_LEN = 4096;
+ private RoughArtifactCollector collector;
+ private boolean inArtifact = false;
+ private final Vector<String> theArtifact = new Vector<String>();
+ private String paragraphNumber = "", paragraphName = "";
+ private String uriDirectoryName = "";
+ private final static String BODY_START_TAG = "<body>";
+ private final static String BODY_END_TAG = "</body>";
+ private static final String IMAGE_BASE_NAME = "Image Content_";
+ private final static String[] VERIFICATION_KEYWORDS = {
+ "Effectivity:",
+ "Verf Method:",
+ "Verf Level:",
+ "Verf Location:",
+ "Verf Type:",
+ "Verified By:",
+ "Criteria:"};
+ private final static IAttributeType[] FIELD_TYPE = {
+ null,
+ CoreAttributeTypes.QualificationMethod,
+ CoreAttributeTypes.VerificationLevel,
+ CoreAttributeTypes.VerificationEvent,
+ null,
+ null,
+ null}; // Last one is actually a string
@Override
public String getDescription() {
@@ -79,374 +93,191 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
return false;
}
- @Override
- public void extractFromSource(OperationLogger logger, URI source, RoughArtifactCollector collector) throws Exception {
+ private class Handler extends AbstractSaxHandler {
- /**************************************************************
- * DOORS uses non standard HTML. Read in the file and standardize it
- */
- this.collector = collector;
- String fileName = source.getAuthority();
- if (fileName == null) {
- fileName = "";
+ private boolean isTitle = false;
+ private final Vector<String> currentRow = new Vector<String>();
+ private final StringBuilder cell = new StringBuilder("");
+ private boolean tableFound = false;
+ private int embededTableCount = 0;
+ private boolean inHeaderCell = false;
+
+ public Handler() {
}
- fileName += source.getPath();
- String standardHTML = standardizeDOORS(fileName);
-
- XMLInputFactory factory = XMLInputFactory.newInstance();
- Reader myStringReader = new StringReader(standardHTML);
- XMLEventReader reader = factory.createXMLEventReader(myStringReader);
- XMLEvent event = null;
-
- boolean tableFound = false, inHeaderCell = false;
- int embededTableCount = 0;
- String cell = "";
- String title = fileName;
- title = title.replaceAll("\\\\", "_");
- title = title.replaceAll("/", "_");
- title = title.replaceAll(" ", "_");
- Boolean isTitle = false;
- Vector<String> currentRow = new Vector<String>();
-
- while (reader.hasNext()) {
- event = reader.nextEvent();
- if (event.isStartElement()) {
- isTitle = false;
- StartElement startElement = (StartElement) event;
- String qName = startElement.getName().toString().trim();
- if (qName.equalsIgnoreCase("title")) {
- cell = "";
- isTitle = true;
- } else if (qName.equalsIgnoreCase("table")) {
- if (tableFound) {
- // table within the table
- cell += event.toString();
- embededTableCount++;
- } else {
- tableFound = true;
- }
- } else if (qName.equalsIgnoreCase("tr")) {
- // Do nothing here -- no processing needed
- } else if (qName.equalsIgnoreCase("th")) {
- if (embededTableCount > 0) {
- // table within the table
- cell += event.toString();
- } else {
- inHeaderCell = true;
- cell = "";
- }
- } else if (qName.equalsIgnoreCase("td")) {
- if (embededTableCount > 0) {
- // table within the table
- cell += event.toString();
- } else {
- cell = "";
+
+ private String elementToString(String qName, Attributes attributes, boolean isEndElement) {
+ StringBuilder returnValue = new StringBuilder("<");
+ if (isEndElement) {
+ returnValue.append('/');
+ }
+ returnValue.append(qName);
+ if (attributes != null) {
+ for (int i = 0; i < attributes.getLength(); i++) {
+ returnValue.append(" ");
+ returnValue.append(attributes.getQName(i));
+ String value = attributes.getValue(i);
+ if (Strings.isValid(value)) {
+ returnValue.append("=\"");
+ returnValue.append(value);
+ returnValue.append('"');
}
+ }
+ }
+ returnValue.append(">");
+ return returnValue.toString();
+ }
+
+ @Override
+ public void startElementFound(String uri, String localName, String qName, Attributes attributes) {
+ isTitle = false;
+ if (qName.equalsIgnoreCase("title")) {
+ cell.delete(0, cell.length());
+ isTitle = true;
+ } else if (qName.equalsIgnoreCase("table")) {
+ if (tableFound) {
+ // table within the table
+ cell.append(elementToString(qName, attributes, false));
+ embededTableCount++;
+ } else {
+ tableFound = true;
+ }
+ } else if (qName.equalsIgnoreCase("tr")) {
+ // Do nothing here -- no processing needed
+ } else if (qName.equalsIgnoreCase("th")) {
+ if (embededTableCount > 0) {
+ // table within the table
+ cell.append(elementToString(qName, attributes, false));
+ } else {
+ inHeaderCell = true;
+ cell.delete(0, cell.length());
+ }
+ } else if (qName.equalsIgnoreCase("td")) {
+ if (embededTableCount > 0) {
+ // table within the table
+ cell.append(elementToString(qName, attributes, false));
+ } else {
+ cell.delete(0, cell.length());
+ }
+ } else {
+ cell.append(elementToString(qName, attributes, false));
+ }
+ }
+
+ @Override
+ public void endElementFound(String uri, String localName, String qName) throws SAXException {
+ isTitle = false;
+ if (qName.equalsIgnoreCase("title")) {
+ foundStartOfWorksheet(cell.toString());
+ cell.delete(0, cell.length());
+ } else if (qName.equalsIgnoreCase("table")) {
+ if (embededTableCount > 0) {
+ // end of table within the table
+ cell.append(elementToString(qName, null, true));
+ embededTableCount--;
} else {
- cell += event.toString();
+ // we are done!
+ tableFound = false;
}
- } else if (event.isEndElement()) {
- isTitle = false;
- EndElement endElement = (EndElement) event;
- String qName = endElement.getName().toString().trim();
- if (qName.equalsIgnoreCase("title")) {
- foundStartOfWorksheet(cell);
- cell = "";
- } else if (qName.equalsIgnoreCase("table")) {
- if (embededTableCount > 0) {
- // end of table within the table
- cell += event.toString();
- embededTableCount--;
+ } else if (qName.equalsIgnoreCase("tr")) {
+ if (embededTableCount == 0) {
+ String[] row = new String[currentRow.size()];
+ row = currentRow.toArray(row);
+ currentRow.clear();
+ if (inHeaderCell) {
+ processHeaderRow(row);
+ inHeaderCell = false;
} else {
- // we are done!
- tableFound = false;
- }
- } else if (qName.equalsIgnoreCase("tr")) {
- if (embededTableCount == 0) {
- String[] row = new String[currentRow.size()];
- row = currentRow.toArray(row);
- currentRow.clear();
- if (inHeaderCell) {
- processHeaderRow(row);
- inHeaderCell = false;
- } else {
+ try {
processRow(row);
+ } catch (OseeCoreException ex) {
+ throw new SAXException(ex);
}
}
- } else if (qName.equalsIgnoreCase("th")) {
- if (embededTableCount > 0) {
- // table within the table
- cell += event.toString();
- } else {
- /***********************************************
- * In order to parse the DOORS import, ='xXx' had to be added to some keywords. This is because the
- * parser does not support simple keywords (e.g.
- * <table nowrap>
- * ) remove the additional code before adding it to the row
- */
-
- String StripCell = cell.replaceAll("=\'xXx\'", " ");
- currentRow.add(StripCell);
- cell = "";
- }
- } else if (qName.equalsIgnoreCase("td")) {
- if (embededTableCount > 0) {
- // table within the table
- cell += event.toString();
- } else {
- /***********************************************
- * In order to parse the DOORS import, ='xXx' had to be added to some keywords. This is because the
- * parser does not support simple keywords (e.g.
- * <table nowrap>
- * ) remove the additional code before adding it to the row
- */
-
- String StripCell = cell.replaceAll("=\'xXx\'", " ");
- currentRow.add(StripCell);
- cell = "";
- }
+ }
+ } else if (qName.equalsIgnoreCase("th")) {
+ if (embededTableCount > 0) {
+ // table within the table
+ cell.append(elementToString(qName, null, true));
} else {
- cell += event.toString();
+ currentRow.add(cell.toString());
+ cell.delete(0, cell.length());
}
+ } else if (qName.equalsIgnoreCase("td")) {
+ if (embededTableCount > 0) {
+ // table within the table
+ cell.append(elementToString(qName, null, true));
+ } else {
+ currentRow.add(cell.toString());
+ cell.delete(0, cell.length());
+ }
+ } else {
+ cell.append(elementToString(qName, null, true));
+ }
+ }
- } else if (event.isCharacters()) {
- Characters characters = (Characters) event;
- cell += characters.toString();
- if (isTitle) {
- title = cell;
- title = title.replaceAll("/", "_");
- title = title.replaceAll(" ", "_");
- if (title.equals("")) {
- title = "empty_title";
- }
+ @Override
+ public void characters(char ch[], int start, int length) {
+ for (int i = 0; i < length; i++) {
+ cell.append(Character.toString(ch[i + start]));
+ }
+ String title = "";
+ if (isTitle) {
+ title = cell.toString();
+ title = title.replaceAll("/", "_");
+ title = title.replaceAll(" ", "_");
+ if (title.equals("")) {
+ title = "empty_title";
+ }
+ try {
RoughArtifact roughArtifact = new RoughArtifact(RoughArtifactKind.CONTAINER);
roughArtifact.addAttribute(CoreAttributeTypes.Name, title.trim());
roughArtifact.setGuid(GUID.create());
roughArtifact.setSectionNumber("0");
collector.addRoughArtifact(roughArtifact);
isTitle = false;
+ } catch (OseeCoreException ex) {
+ // do nothing
}
}
}
- myStringReader.close();
- // Do last artifact
- processArtifact();
+ @Override
+ public void endDocument() throws SAXException {
+ try {
+ processArtifact();
+ } catch (OseeCoreException ex) {
+ throw new SAXException(ex);
+ }
+ }
}
- private String getToken(byte[] input, int startChar) {
- String returnValue = "";
- int iPos = 0;
- boolean inSingleQuote = false, inDoubleQuote = false;
- while (returnValue.equals("") && iPos < input.length - startChar) {
- char theChar = (char) input[iPos + startChar];
- switch (theChar) {
-
- case '\'':
- if (inSingleQuote) {
- // have to include closing ' (iPos + 1);
- returnValue = new String(input, startChar, iPos + 1);
- iPos++;
- inSingleQuote = false;
- } else {
- inSingleQuote = true;
- }
- break;
+ @Override
+ public void extractFromSource(OperationLogger logger, URI source, RoughArtifactCollector collector) throws Exception {
- case '\"':
- if (inDoubleQuote) {
- // have to include closing " (iPos + 1);
- returnValue = new String(input, startChar, iPos + 1);
- iPos++;
- inDoubleQuote = false;
- } else {
- inDoubleQuote = true;
- }
- break;
+ /**************************************************************
+ * DOORS uses non standard HTML. Read in the file and standardize it
+ */
- case '<':
- case '>':
- inDoubleQuote = inSingleQuote = false;
- case '=':
- case ' ':
- if ((!inSingleQuote) && (!inDoubleQuote)) {
- // end of token
- if (iPos == 0) {
- // starts with a terminator, token is 1 char
- returnValue = String.valueOf(theChar);
- } else {
- returnValue = new String(input, startChar, iPos);
- iPos++;
- }
- }
- break;
- }
- iPos++;
+ postProcessGuids.clear();
+ inArtifact = false;
+ theArtifact.clear();
+ paragraphNumber = "";
+ paragraphName = "";
+
+ this.collector = collector;
+ String fileName = source.getAuthority();
+ if (fileName == null) {
+ fileName = "";
}
- return returnValue;
- }
+ fileName += source.getPath();
+ fileName = "file://" + fileName;
+ uriDirectoryName = fileName.substring(0, fileName.lastIndexOf('/') + 1);
- /**********************************************************************
- * @param file name of the DOORS export file
- * @return HTML representation of the DOORS export in standard HTML format DOORS export uses nonstandard HTML, this
- * method standardizes it for use with the parser Known issues The opening <META> tab is not terminated (no </META>). <br>
- * tags are not terminated (no </br>) <img> tags are not terminated (no </img>)
- * <p>
- * not terminated (no
- * </p>
- * ). This is a problem because it would take a lot to determine the point where the </p> needs to go. However, the
- * way
- * <p>
- * is used it looks like it can simply be ignored (converted to
- * </p>
- * ) attributes within tags are not given values (<td border>instead of <td border='small'>) attributes values not
- * quoted (
- * <th width=50>instead of
- * <th width='50'>)
- */
- private String standardizeDOORS(String input) throws OseeArgumentException {
- StringBuilder rawValue = new StringBuilder(""), returnValue = new StringBuilder("");
- int iLastSlash = input.lastIndexOf('/'), iLastBackslash = input.lastIndexOf('\\');
- int iLast = (iLastBackslash > iLastSlash) ? iLastBackslash : iLastSlash;
- String filePath = input.substring(0, iLast + 1);
- FileInputStream readStream = null;
- try {
- readStream = new FileInputStream(input);
- int iRead = 0;
- byte[] readBytes = new byte[READ_BUFFER_LEN];
- iRead = READ_BUFFER_LEN;
- while (iRead == READ_BUFFER_LEN) {
- iRead = readStream.read(readBytes);
- String readString = new String(readBytes, 0, iRead);
- rawValue.append(readString);
- }
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- try {
- readStream.close();
- } catch (IOException e) {
+ SAXParser parser = new SAXParser();
+ Handler theHandler = new Handler();
+ parser.setContentHandler(theHandler);
+ parser.parse(fileName);
- }
- }
- // We now have the whole file as a string --
- // walk through it one token at a time
- int iStart = 0;
- boolean inTag = false, inMeta = false, inBr = false, inImg = false, inP = false, tagName = false, equalFound =
- false, attributeFound = false, LiteralTag = false, isSrcTag = false;
- byte[] rawBytes = null;
- try {
- rawBytes = rawValue.toString().getBytes("UTF-8");
- } catch (UnsupportedEncodingException e) {
- // use default encoding
- rawBytes = rawValue.toString().getBytes();
- }
- while (iStart < rawValue.length()) {
- String token = getToken(rawBytes, iStart);
- iStart += token.length();
- if (token.length() == 0) {
- // do nothing, we are done
- inTag = false; // breakpoint
- } else if (token.equals("<")) {
- if (inTag) {
- throw (new OseeArgumentException("< within a tag in HTML"));
- }
- inTag = true;
- } else if (token.equals(">")) {
- if (!inTag) {
- throw (new OseeArgumentException("> outside a tag in HTML"));
- }
- inTag = false;
- if (inMeta) {
- token = " />";
- inMeta = false;
- } else if (inBr) {
- token = " />";
- inBr = false;
- } else if (inImg) {
- token += "</img>";
- inImg = false;
- } else if (inP) {
- token = "/>";
- inP = false;
- } else if (attributeFound) {
- token = "='xXx'>";
- }
- tagName = false;
- attributeFound = false;
- LiteralTag = false;
- equalFound = false;
- } else if (token.equalsIgnoreCase("META") && inTag) {
- inMeta = true;
- tagName = true;
- } else if (token.equalsIgnoreCase("br") && inTag) {
- inBr = true;
- tagName = true;
- } else if (token.equalsIgnoreCase("img") && inTag) {
- inImg = true;
- tagName = true;
- } else if (token.equalsIgnoreCase("p") && inTag) {
- inP = true;
- tagName = true;
- } else if (token.equalsIgnoreCase("!DOCTYPE") && inTag) {
- LiteralTag = true;
- } else if (token.equalsIgnoreCase("BODY") && inTag) {
- // This is a parser issue has to be same case?
- token = "body";
- } else if (token.equalsIgnoreCase("/BODY") && inTag) {
- // This is a parser issue has to be same case?
- token = "/body";
- } else if (token.equalsIgnoreCase("!--") && inTag) {
- LiteralTag = true;
- } else if (token.equalsIgnoreCase("&nbsp") && !inTag) {
- // no closing semicolon
- token = "&nbsp;";
- } else if (!LiteralTag) {
- if (inTag) {
- /***************************************************
- * If this is an attribute verify that the value is of the form 'value' or "value"
- */
- if (!tagName) {
- tagName = true;
- } else if (!attributeFound) {
- attributeFound = !token.equals(" ");
- /************************************
- * for images, DOORS exports the file to the same directory as the HTML file and does not qualify the
- * src= keyword. This is fine for rendering in a browser, but in order to import the file later, it
- * must be qualified
- */
- if (attributeFound && token.equalsIgnoreCase("src")) {
- isSrcTag = true;
- } else {
- isSrcTag = false;
- }
- } else if (!equalFound) {
- if (token.equals("=")) {
- equalFound = true;
- } else if (!token.equals(" ")) {
- // this is just an attribute no =
- token = "='xXx' " + token;
- }
- } else if (!token.equals(" ")) {
- // is the value quoted?
- if (!((token.charAt(0) == '\'') || (token.charAt(0) == '"'))) {
- // add quotes
- token = "'" + token + "'";
- }
- if (isSrcTag) {
- // if not qualified, qualify it
- if ((token.indexOf('/') == -1) && (token.indexOf('\\') == -1) && token.indexOf("://") == -1) {
- token = token.substring(0, 1) + "file:///" + filePath + token.substring(1);
- }
- }
- attributeFound = false;
- equalFound = false;
- }
- }
- }
- returnValue.append(token);
- }
- return returnValue.toString();
}
@Override
@@ -462,16 +293,17 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
for (String htmlVal : HTML) {
int iCount = 0;
for (Integer imageNumber : Ids) {
- htmlVal = htmlVal.replaceAll(imageBaseName + Integer.toString(iCount), imageNumber.toString());
+ htmlVal = htmlVal.replaceAll(IMAGE_BASE_NAME + Integer.toString(iCount), imageNumber.toString());
iCount++;
}
theArtifact.addAttribute(CoreAttributeTypes.HTMLContent, htmlVal);
}
} catch (OseeCoreException e) {
- e.printStackTrace();
+ // do nothing
}
}
}
+
private static enum RowTypeEnum {
ID("ID"),
REQUIREMENTS("Requirements"),
@@ -533,10 +365,6 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
}
}
- private boolean inArtifact = false;
- private final Vector<String> theArtifact = new Vector<String>();
- private String paragraphNumber = "", paragraphName = "";
-
public void processRow(String[] row) throws OseeCoreException {
/***************************************************************
* First check the document applicability box, if it is empty this is a header row
@@ -546,8 +374,8 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
for (rowIndex = 0; rowIndex < row.length; rowIndex++) {
RowTypeEnum rowType = rowIndexToRowTypeMap.get(rowIndex);
if (rowType == RowTypeEnum.DOCUMENT_APPLICABILITY) {
- String rowValue = row[rowIndex];
- if (rowValue.trim().equals("") || rowValue.trim().equals("<br></br>")) {
+ String rowValue = row[rowIndex].toLowerCase();
+ if (rowValue.trim().equals("") || rowValue.trim().equals("<br></br>") || rowValue.trim().equals("<br>")) {
if (inArtifact) {
processArtifact();
}
@@ -583,6 +411,7 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
}
}
}
+ rowValue = "";
}
break;
@@ -618,10 +447,10 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
if (inArtifact) {
ListIterator<String> iter = theArtifact.listIterator(rowIndex);
String theColumnValue = iter.next();
- theColumnValue += "\n" + rowValue;
+ theColumnValue += "\n" + rowValue.trim();
iter.set(theColumnValue);
} else {
- theArtifact.add(rowValue);
+ theArtifact.add(rowValue.trim());
}
}
@@ -646,7 +475,8 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
case REQUIREMENTS:
StringBuffer imageFileList = new StringBuffer("");
- rowValue = translateRequirements(rowValue, imageFileList);
+ getImageList(rowValue, imageFileList);
+ rowValue = normailizeHtml(rowValue);
String imageFile = imageFileList.toString();
if (!imageFile.isEmpty()) {
String theImage;
@@ -656,23 +486,25 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
do {
comma = imageFile.indexOf(',');
if (comma == -1) {
- theImage = imageFile;
+ theImage = uriDirectoryName + imageFile;
imageFile = " ";
} else {
- theImage = imageFile.substring(0, comma);
+ theImage = uriDirectoryName + imageFile.substring(0, comma);
imageFile = imageFile.substring(comma + 1);
}
try {
URI imageURI = new URI(theImage);
roughArtifact.addAttribute("Image Content", imageURI);
- rowValue = rowValue.replace(theImage, imageBaseName + Integer.toString(imageNumber));
+ rowValue = rowValue.replace(theImage, IMAGE_BASE_NAME + Integer.toString(imageNumber));
imageNumber++;
} catch (URISyntaxException e) {
e.printStackTrace();
}
} while (comma != -1);
}
- roughArtifact.addAttribute(CoreAttributeTypes.HTMLContent, rowValue);
+ if (Strings.isValid(rowValue)) {
+ roughArtifact.addAttribute(CoreAttributeTypes.HTMLContent, rowValue);
+ }
break;
case ID:
@@ -719,12 +551,7 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
* The followings possibilities exist for this field 1) Field empty 2) Some/all keywords The keywords may not be
* filled in. In other words a keyword may be followed by a keyword instead of data.
*/
- String[] keywords =
- {"Effectivity:", "Verf Method:", "Verf Level:", "Verf Location:", "Verf Type:", "Verified By:",
- "Criteria:"};
- IAttributeType[] FieldType =
- {null, CoreAttributeTypes.QualificationMethod, CoreAttributeTypes.VerificationLevel,
- CoreAttributeTypes.VerificationEvent, null, null, null}; // Last one is actually a string
+
String trimmed = clearHTML(column);
if (trimmed.trim().isEmpty()) {
// empty
@@ -733,44 +560,45 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
/*****************************************************************
* There are some keywords that do not map-- need them in the list to check if there is data
*/
- for (int i = 0; i < keywords.length; i++) {
+ for (int i = 0; i < VERIFICATION_KEYWORDS.length; i++) {
// special case Criteria is a string attribute
- if ((FieldType[i] == null) && (!keywords[i].equals("Criteria:"))) {
+ if ((FIELD_TYPE[i] == null) && (!VERIFICATION_KEYWORDS[i].equals("Criteria:"))) {
continue;
}
- int iStart = trimmed.indexOf(keywords[i]);
+ int iStart = trimmed.indexOf(VERIFICATION_KEYWORDS[i]);
if (iStart != -1) {
boolean dataFound = true;
// any data?
- String rest = trimmed.substring(iStart + keywords[i].length());
+ String rest = trimmed.substring(iStart + VERIFICATION_KEYWORDS[i].length());
rest = rest.trim();
// is it empty?
dataFound = !rest.isEmpty();
- for (int j = 0; (j < keywords.length) && dataFound; j++) {
- dataFound = !rest.startsWith(keywords[j]);
+ for (int j = 0; (j < VERIFICATION_KEYWORDS.length) && dataFound; j++) {
+ dataFound = !rest.startsWith(VERIFICATION_KEYWORDS[j]);
}
if (dataFound) {
// find the data
int colon = rest.indexOf(':');
if (colon == -1) {
- if (keywords[i].equals("Criteria:")) {
+ if (VERIFICATION_KEYWORDS[i].equals("Criteria:")) {
// special case Criteria is a string attribute
roughArtifact.addAttribute("Verification Acceptance Criteria", rest);
} else {
- roughArtifact.addAttribute(FieldType[i], rest);
+ roughArtifact.addAttribute(FIELD_TYPE[i], rest);
}
} else {
// find the start of the keyword
boolean foundKeyword = false;
- for (int j = 0; (j < keywords.length); j++) {
- if (rest.indexOf(keywords[j]) == (colon - keywords[j].length() + 1)) {
- roughArtifact.addAttribute(FieldType[i], rest.substring(0, rest.indexOf(keywords[j]) - 1));
+ for (int j = 0; (j < VERIFICATION_KEYWORDS.length); j++) {
+ if (rest.indexOf(VERIFICATION_KEYWORDS[j]) == (colon - VERIFICATION_KEYWORDS[j].length() + 1)) {
+ roughArtifact.addAttribute(FIELD_TYPE[i],
+ rest.substring(0, rest.indexOf(VERIFICATION_KEYWORDS[j]) - 1));
foundKeyword = true;
break;
}
}
if (!foundKeyword) {
- roughArtifact.addAttribute(FieldType[i], rest);
+ roughArtifact.addAttribute(FIELD_TYPE[i], rest);
}
}
@@ -785,83 +613,32 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
/*************************************************************
* @param inputHTML Input value of the requirements field (as exported from DOORS)
- * @param imageFileList is a comma separated list of image file names in the HTML
- * @return HTML translated to common format Following transformations will be performed <code>
- * DOORS format Common format
- * <i></i> <em></em>
- * <b></b> <strong></strong>
- * &#9 &nbsp;&nbsp;&nbsp;&nbsp;
- * <br></br> <p>&nbsp;</p>
- *
- * DOORS also puts <div tags within other tags, For example
- * <b>xxx<div ... >yyy</div></b>.
- * Translate this to
- * <b>xxx</b><div ...><b>yyy</b></div>
- *
- * If there is an <img tag, add the file name of the image file in imageFile
- * </code>
+ * @param imageFileList is a comma separated list of image file names in the HTML If there is an <img tag, add the
+ * file name of the image file in imageFile
*/
- private String translateRequirements(String inputHTML, StringBuffer imageFileList) {
- String outputHTML = inputHTML;
- String imageFiles = "";
- outputHTML = outputHTML.replaceAll("<i>", "<em>");
- outputHTML = outputHTML.replaceAll("</i>", "</em>");
- outputHTML = outputHTML.replaceAll("<b>", "<strong>");
- outputHTML = outputHTML.replaceAll("</b>", "</strong>");
- outputHTML = outputHTML.replaceAll("&#9", "&nbsp;&nbsp;&nbsp;&nbsp;");
- outputHTML = outputHTML.replaceAll("<br></br>", "<p>&nbsp;</p>");
-
- String Lower = outputHTML.toLowerCase();
- if (Lower.indexOf("<div") != -1) {
- // is the DIV inside of a <strong> or <i>
- int div = Lower.indexOf("<div"), bold = Lower.indexOf("<strong>"), italic = Lower.indexOf("<em>");
- if ((bold != -1) && (bold < div)) {
- // find the </b>
- int boldEnd = Lower.indexOf("</strong>");
- if (boldEnd > div) {
- int divEND = Lower.indexOf(">", div), divClose = Lower.indexOf("</div>", div);
- Lower =
- outputHTML.substring(0, div - 1) + "</strong>" + outputHTML.substring(div - 1, divEND + 1) + "<strong>" + outputHTML.substring(
- divEND + 1, divClose) + "</strong>" + outputHTML.substring(divClose, boldEnd) + outputHTML.substring(boldEnd + "</strong>".length());
- outputHTML = Lower;
- // Set up in case there is also an <em>
- Lower = outputHTML.toLowerCase();
- italic = Lower.indexOf("<em>");
- div = Lower.indexOf("<div");
- }
- }
- if ((italic != -1) && (italic < div)) {
- // find the </b>
- int italicEnd = Lower.indexOf("</em>");
- if (italicEnd > div) {
- int divEND = Lower.indexOf(">", div), divClose = Lower.indexOf("</div>", div);
- Lower =
- outputHTML.substring(0, div - 1) + "</em>" + outputHTML.substring(div - 1, divEND + 1) + "<em>" + outputHTML.substring(
- divEND + 1, divClose) + "</em>" + outputHTML.substring(divClose, italicEnd) + outputHTML.substring(italicEnd + "</em>".length());
- outputHTML = Lower;
- }
- }
-
- }
- Lower = outputHTML.toLowerCase();
+ private void getImageList(String inputHTML, StringBuffer imageFileList) {
+ String outputHtml = inputHTML;
+ String Lower = outputHtml.toLowerCase();
int img = Lower.indexOf("img ");
+ imageFileList.setLength(0);
+ boolean first = true;
while (img != -1) {
int src = Lower.indexOf("src=", img);
if (src != -1) {
src += 4;
char qte = Lower.charAt(src);
int iEnd = Lower.indexOf(qte, src + 1);
- if (imageFiles.isEmpty()) {
- imageFiles = outputHTML.substring(src + 1, iEnd);
+ if (first) {
+ imageFileList.append(inputHTML.substring(src + 1, iEnd));
+ first = false;
} else {
- imageFiles += "," + outputHTML.substring(src + 1, iEnd);
+ imageFileList.append("," + outputHtml.substring(src + 1, iEnd));
}
img = Lower.indexOf("img ", src);
+ } else {
+ img = -1;
}
}
- imageFileList.append(imageFiles);
-
- return outputHTML;
}
/************************************************************
@@ -871,6 +648,9 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
private String clearHTML(String input) {
String returnValue = "", processString = input;
int openBracket = processString.indexOf('<'), closeBracket;
+ if (openBracket == -1) {
+ returnValue = input;
+ }
while (openBracket >= 0) {
/************************************************************
* if the bracket doesn't start the string, copy the start to the return (plus a space). Find the close bracket
@@ -887,4 +667,26 @@ public class DoorsArtifactExtractor extends AbstractArtifactExtractor {
}
return returnValue;
}
+
+ private String normailizeHtml(String inputHtml) {
+
+ String returnValue = NormalizeHtml.convertToNormalizedHTML(inputHtml);
+ int bodyStart = returnValue.indexOf(BODY_START_TAG);
+ int bodyEnd = returnValue.indexOf(BODY_END_TAG);
+ if (bodyStart != -1) {
+ bodyStart += BODY_START_TAG.length();
+ if (bodyEnd == -1) {
+ bodyEnd = returnValue.length() - 1;
+ } else {
+ bodyEnd--;
+ }
+ if (bodyEnd <= bodyStart) {
+ returnValue = ""; // no body
+ } else {
+ returnValue = returnValue.substring(bodyStart, bodyEnd);
+ }
+ }
+ returnValue = returnValue.trim();
+ return returnValue;
+ }
}

Back to the top