Skip to main content
summaryrefslogtreecommitdiffstats
blob: d2dd6451e1cdb60d540708f8d187b8f25af31169 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*******************************************************************************
 * Copyright (c) 2013 Boeing.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     Boeing - initial API and implementation
 *******************************************************************************/
package org.eclipse.osee.coverage.help.ui.util;

import java.io.BufferedInputStream;
import java.io.InputStream;
import java.net.URL;
import java.util.HashSet;
import java.util.Set;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamReader;
import org.eclipse.osee.framework.jdk.core.util.Lib;
import org.eclipse.osee.framework.jdk.core.util.Strings;

;

/**
 * @author Angel Avila
 */
public class HtmlParser {

   private static final XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();

   private static final String LINK_NODE = "link";
   private static final String HREF_TAG = "href";
   private static final String SRC_TAG = "src";

   private final String pathHint;

   public HtmlParser(String pathHint) {
      this.pathHint = pathHint;
   }

   private String getPath(String fullPath) {
      StringBuilder builder = new StringBuilder();

      String[] parts = fullPath.split("/");
      boolean found = false;
      for (String part : parts) {
         if (found && !part.endsWith(".html")) {
            builder.append(part);
            builder.append("/");
         }
         if (pathHint.equals(part)) {
            found = true;
         }
      }
      return builder.toString();
   }

   public Set<String> parse(URL url) throws Exception {
      Set<String> entries = new HashSet<String>();
      entries.clear();

      String pathPrefix = getPath(url.toString());

      InputStream inputStream = null;
      try {
         inputStream = new BufferedInputStream(url.openStream());
         XMLStreamReader streamReader = xmlInputFactory.createXMLStreamReader(inputStream);
         while (streamReader.hasNext()) {
            process(streamReader, pathPrefix, entries);
            streamReader.next();
         }

      } finally {
         Lib.close(inputStream);
      }

      return entries;
   }

   private void process(XMLStreamReader reader, String pathPrefix, Set<String> entries) {
      int eventType = reader.getEventType();
      switch (eventType) {
         case XMLStreamConstants.START_ELEMENT:
            String localName = reader.getLocalName();
            for (int index = 0; index < reader.getAttributeCount(); index++) {

               String attributeName = reader.getAttributeLocalName(index);
               String value = reader.getAttributeValue(index);

               if (Strings.isValid(value)) {
                  if (!LINK_NODE.equals(localName)) {
                     if (HREF_TAG.equals(attributeName) || SRC_TAG.equals(attributeName)) {
                        processResource(pathPrefix, entries, value);
                     }
                  }
               }
            }
            break;
      }
   }

   private void processResource(String pathPrefix, Set<String> references, String value) {
      if (!isExternalLink(value)) {
         String reference = normalizePath(pathPrefix, value);
         references.add(reference);
      }
   }

   private String normalizePath(String pathPrefix, String reference) {
      String path = reference.replaceAll("\\.html#.*", ".html");
      return String.format("%s%s", pathPrefix, path);
   }

   private boolean isExternalLink(String resource) {
      return resource.contains("://");
   }
}

Back to the top