Skip to main content
summaryrefslogtreecommitdiffstats
blob: a1b8ba1a5c2ce237de98ed7a66115ba7ed983e6a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
/*******************************************************************************
 * Copyright (c) 2004, 2007 Boeing.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     Boeing - initial API and implementation
 *******************************************************************************/

package org.eclipse.osee.framework.ui.skynet.render.artifactElement;

import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.eclipse.osee.framework.jdk.core.type.OseeCoreException;
import org.eclipse.osee.framework.skynet.core.linking.OseeLinkBuilder;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * @author Jeff C. Phillips
 */
public class WordImageArtifactElementExtractor implements IElementExtractor {
   private static final String SECTION_TAG = "wx:sect";
   private static final String SUB_SECTION_TAG = "wx:sub-section";
   private static final String BODY_TAG = "w:body";
   private static final String PICT = "w:pict";
   private static final String SRC = "src";
   private static final String BIN_DATA = "w:binData";
   private static final String IMAGE = "v:imagedata";
   private static String START_IMG_ID;
   private static String END_IMG_ID;
   private static int START_IMAGE_CHECKSUM;
   private static int END_IMAGE_CHECKSUM;
   private static final String TITLE = "o:title";

   private final Map<String, Element> pictureMap;
   private Element oleDataElement;
   private final Document document;
   private int numberOfStartTags;
   private int numberOfEndTags;
   private String guid;

   private enum ParseState {
      LOOKING_FOR_START,
      LOOKING_FOR_END
   };

   private enum Side {
      left,
      right
   };

   public WordImageArtifactElementExtractor(Document document) {
      this.document = document;
      this.numberOfEndTags = 0;
      this.numberOfStartTags = 0;
      this.pictureMap = new HashMap<>();

      WordImageArtifactElementExtractor.START_IMAGE_CHECKSUM = -1;
      WordImageArtifactElementExtractor.END_IMAGE_CHECKSUM = -1;
   }

   @Override
   public Element getOleDataElement() {
      return oleDataElement;
   }

   @Override
   public List<WordExtractorData> extractElements() throws DOMException, OseeCoreException {
      OseeLinkBuilder linkBuilder = new OseeLinkBuilder();
      return extractElements(linkBuilder);
   }

   private void resetClassFields() {
      pictureMap.clear();
      numberOfStartTags = 0;
      numberOfEndTags = 0;
      oleDataElement = null;
      guid = null;
   }

   public List<WordExtractorData> extractElements(OseeLinkBuilder linkBuilder) throws DOMException, OseeCoreException {
      final List<WordExtractorData> artifactElements = new LinkedList<>();
      Element rootElement = document.getDocumentElement();
      resetClassFields();

      NodeList nodeList = rootElement.getElementsByTagName("*");
      ParseState parseState = ParseState.LOOKING_FOR_START;

      handleImages(rootElement);

      oleDataElement = (Element) getElement(rootElement, "w:docOleData");

      WordExtractorData extractorData = null;

      int nodeSize = nodeList.getLength();
      for (int i = 0; i < nodeSize; i++) {
         Element element = (Element) nodeList.item(i);

         if (properLevelChild(element)) {
            if (parseState == ParseState.LOOKING_FOR_END) {
               if (isArtifactEditTag(element, false)) {
                  parseState = handleEndElement(linkBuilder, extractorData, element);
               } else {
                  extractorData.addChild(element.cloneNode(true));
               }
            } else if (isArtifactEditTag(element, true)) {
               parseState = ParseState.LOOKING_FOR_END;
               //This is where we create a new data object for each artifact
               extractorData = new WordExtractorData();
               handleStartElement(linkBuilder, artifactElements, element, extractorData);
            }
         }
      }

      validateEditTags();
      clearImageIds();
      return artifactElements;
   }

   private ParseState handleEndElement(OseeLinkBuilder linkBuilder, WordExtractorData extractorData, Element element) {
      ParseState parseState;
      numberOfEndTags++;
      guid = null;
      parseState = ParseState.LOOKING_FOR_START;

      Node clonedElement = cloneWithoutArtifactEditImage(element, Side.left, linkBuilder);
      if (elementHasGrandChildren(clonedElement)) {
         extractorData.addChild(clonedElement);
      }
      return parseState;
   }

   private void handleStartElement(OseeLinkBuilder linkBuilder, final List<WordExtractorData> artifactElements, Element element, WordExtractorData extractorData) {
      Element newArtifactElement;
      numberOfStartTags++;
      newArtifactElement = document.createElement("WordAttribute.WORD_TEMPLATE_CONTENT");
      populateNewArtifactElement(newArtifactElement);

      extractorData.setGuid(guid);
      extractorData.addParent(newArtifactElement);

      artifactElements.add(extractorData);

      Node clonedElement = cloneWithoutArtifactEditImage(element, Side.right, linkBuilder);
      if (elementHasGrandChildren(clonedElement)) {
         extractorData.addChild(clonedElement);
      }
   }

   private void clearImageIds() {
      START_IMG_ID = null;
      END_IMG_ID = null;
      START_IMAGE_CHECKSUM = -1;
      END_IMAGE_CHECKSUM = -1;
   }

   private boolean elementHasGrandChildren(Node element) {
      return element.getChildNodes().getLength() > 0 && element.getChildNodes().item(0).getChildNodes().getLength() > 0;
   }

   private Node cloneWithoutArtifactEditImage(Element element, Side keepSide, OseeLinkBuilder linkBuilder) {
      Collection<Node> removals = new LinkedList<>();

      Element clonedElement = (Element) element.cloneNode(true);
      boolean beforeEditTag = true;
      boolean afterEditTag = false;
      NodeList descendants = clonedElement.getElementsByTagName("*");
      int nodeSize = descendants.getLength();

      for (int i = 0; i < nodeSize; i++) {
         Node descendant = descendants.item(i);
         if (isEditStartImage(descendant)) {
            removals.add(descendant);
            beforeEditTag = false;
         } else if (isEditEndImage(descendant)) {
            removals.add(descendant);
            afterEditTag = true;
         } else if ((beforeEditTag && keepSide == Side.right) || (afterEditTag && keepSide == Side.left)) {
            removals.add(descendant);
         }
      }

      for (Node remove : removals) {
         Node parentNode = remove.getParentNode();
         if (parentNode != null) {
            parentNode.removeChild(remove);
         }
      }
      return clonedElement;
   }

   private boolean isEditEndImage(Node descendant) {
      return isEditImage(descendant, false);
   }

   private boolean isEditStartImage(Node descendant) {
      return isEditImage(descendant, true);
   }

   private boolean isEditImage(Node element, boolean lookingForStartImage) {
      boolean hasEditImage = false;
      String name = element.getNodeName();

      if (name.equals(PICT)) {
         hasEditImage = isImageBinData((Element) element, lookingForStartImage);
      }
      return hasEditImage;
   }

   private boolean isImageBinData(Element pictElement, boolean lookingForStart) {
      boolean foundBinData = false;
      String imageId;

      if (lookingForStart) {
         imageId = START_IMG_ID;
      } else {
         imageId = END_IMG_ID;
      }

      if (imageId == null) {
         foundBinData = compareBinData(pictElement, lookingForStart);
      } else {
         foundBinData = compareImageId(imageId, getImageId(pictElement));
      }

      if (foundBinData) {
         setGuid(pictElement);
      }
      return foundBinData;
   }

   private boolean compareImageId(String storedImageId, String currentElementId) {
      return storedImageId.equals(currentElementId);
   }

   private boolean compareBinData(Element pictElement, boolean lookingForStart) {
      boolean foundBindata = false;
      int imageCheckSum = (getImageChecksum(lookingForStart));
      Node currentBinData = getElement(pictElement, BIN_DATA);

      if (currentBinData != null) {
         Node bindDataValue = currentBinData.getFirstChild();
         foundBindata = getCheckSum(bindDataValue.getNodeValue()) == (imageCheckSum);
         if (foundBindata) {
            if (lookingForStart) {
               START_IMG_ID = getImageId(pictElement);
            } else {
               END_IMG_ID = getImageId(pictElement);
            }
         }
      }
      return foundBindata;
   }

   private int getImageChecksum(boolean lookingForStart) {
      String binData;
      int imageCheckSum;

      if (lookingForStart) {
         binData = OseeLinkBuilder.START_BIN_DATA;

         if (START_IMAGE_CHECKSUM == -1) {
            START_IMAGE_CHECKSUM = getCheckSum(binData);
         }
         imageCheckSum = START_IMAGE_CHECKSUM;
      } else {
         binData = OseeLinkBuilder.END_BIN_DATA;

         if (END_IMAGE_CHECKSUM == -1) {
            END_IMAGE_CHECKSUM = getCheckSum(binData);
         }
         imageCheckSum = END_IMAGE_CHECKSUM;
      }
      return imageCheckSum;
   }

   private String getImageId(Element pictElement) {
      String imgId = null;
      Node imageData = getElement(pictElement, IMAGE);

      if (imageData != null) {
         Node srcAttribute = imageData.getAttributes().getNamedItem(SRC);
         imgId = srcAttribute.getNodeValue();
      }
      return imgId;
   }

   private void setGuid(Element pictElement) {
      Node imageData = getElement(pictElement, IMAGE);

      if (imageData != null) {
         Node srcAttribute = imageData.getAttributes().getNamedItem(TITLE);
         guid = srcAttribute.getNodeValue();
      }
   }

   private Node getElement(Element element, String name) {
      Node discoveredNode = null;
      NodeList descendants = element.getElementsByTagName(name);

      if (descendants.getLength() > 0) {
         discoveredNode = descendants.item(0);
      }

      return discoveredNode;
   }

   private static int getCheckSum(String data) {
      int checksum = -1;
      for (int index = 0; index < data.length(); index++) {
         char character = data.charAt(index);
         if (character != '\n' && character != '\t' && character != '\r' && character != ' ') {
            checksum += character;
         }
      }
      return checksum;
   }

   private void validateEditTags() throws OseeCoreException {
      if (numberOfStartTags == 0 || numberOfEndTags != numberOfStartTags) {
         throw new OseeCoreException(
            "This document is missing start/end edit tags, therefore the document will not be saved. You can re-edit the artifact and the edit tags should reappear.");
      }
   }

   private void populateNewArtifactElement(Element newArtifactElement) throws DOMException {
      newArtifactElement.setAttribute("guid", guid);
   }

   private boolean isArtifactEditTag(Element element, boolean lookingForStart) {
      boolean isArtifactEditTag = false;
      NodeList descendants = element.getElementsByTagName(PICT);
      int nodeSize = descendants.getLength();
      for (int i = 0; i < nodeSize; i++) {
         Node descendant = descendants.item(i);
         isArtifactEditTag = isEditImage(descendant, lookingForStart);

         if (isArtifactEditTag) {
            break;
         }
      }
      return isArtifactEditTag;
   }

   private boolean properLevelChild(Element element) {
      return (properLevelChildWord2003(element) || properLevelChildWord2007(element));
   }

   private void handleImages(Element element) {
      NodeList descendants = element.getElementsByTagName(PICT);
      int nodeSize = descendants.getLength();
      for (int i = 0; i < nodeSize; i++) {
         Node descendant = descendants.item(i);
         NodeList imageDataElement = ((Element) descendant).getElementsByTagName(IMAGE);
         if (imageDataElement.getLength() > 0) {
            String imgKey = ((Element) imageDataElement.item(0)).getAttribute(SRC);
            Element storedPictureElement = pictureMap.get(imgKey);
            NodeList binDataElement = ((Element) descendant).getElementsByTagName(BIN_DATA);

            if (storedPictureElement != null) {
               if (binDataElement.getLength() == 0) {
                  descendant.appendChild(storedPictureElement.cloneNode(true));
               }
            } else {
               pictureMap.put(imgKey, (Element) binDataElement.item(0));
            }
         }
      }
   }

   private String getAncestorName(Element element, int level) {
      String name = "";
      Node parent = element;
      for (int i = 0; i < level; i++) {
         if (parent != null) {
            parent = parent.getParentNode();
         }
      }
      if (parent != null) {
         name = parent.getNodeName();
      }
      return name;
   }

   private boolean properLevelChildWord2003(Element element) {
      String grandParentName = getAncestorName(element, 2);
      String parentName = getAncestorName(element, 1);
      String myName = element.getNodeName();

      boolean nonSubsectionBodyChild =
         parentName.equals(BODY_TAG) && !myName.equals(SUB_SECTION_TAG) && !myName.equals(SECTION_TAG);
      boolean sectionChild =
         grandParentName.equals(BODY_TAG) && parentName.equals(SECTION_TAG) && !myName.equals(SUB_SECTION_TAG);
      boolean subsectionChild = parentName.equals(SUB_SECTION_TAG) && !myName.equals(SUB_SECTION_TAG);

      return (nonSubsectionBodyChild || sectionChild || subsectionChild);
   }

   private boolean properLevelChildWord2007(Element element) {
      String parentName = getAncestorName(element, 1);
      String myName = element.getNodeName();

      boolean nonSubsectionBodyChild =
         parentName.equals(BODY_TAG) && !myName.equals(SUB_SECTION_TAG) && !myName.equals(SECTION_TAG);
      boolean subsectionChild = parentName.equals(SUB_SECTION_TAG) && !myName.equals(SUB_SECTION_TAG);

      return nonSubsectionBodyChild || subsectionChild;
   }
}

Back to the top