001package com.ganteater.ae.util.xml.easyparser; 002 003import java.io.File; 004import java.io.FileInputStream; 005import java.io.IOException; 006import java.io.InputStream; 007import java.io.InputStreamReader; 008import java.net.URL; 009import java.text.ParseException; 010import java.util.ArrayList; 011 012import org.apache.commons.io.IOUtils; 013import org.apache.commons.lang.StringEscapeUtils; 014import org.apache.commons.lang.StringUtils; 015 016/** 017 * @author victort 018 */ 019public class EasyParser { 020 021 final static boolean debug = false; 022 023 public EasyParser() { 024 } 025 026 public Node getObject(InputStream aInputStream) throws ParserException { 027 try { 028 String xml = IOUtils.toString(new InputStreamReader(aInputStream, "UTF-8")); 029 030 if (xml.indexOf("<?xml") == 0) { 031 xml = StringUtils.substringAfter(xml, "?>"); 032 } 033 034 return getObject(xml); 035 036 } catch (Exception e) { 037 throw new ParserException(e); 038 } 039 } 040 041 public Node getObject(String theXML) { 042 043 if (theXML == null) { 044 return null; 045 } 046 047 int start = -1; 048 do { 049 start++; 050 start = theXML.indexOf("<", start); 051 } while (theXML.charAt(start + 1) == '!' && theXML.charAt(start + 2) == '-'); 052 053 if (start < 0) 054 start = 0; 055 056 theXML = replaceComment(theXML.substring(start)); 057 058 if (StringUtils.contains(theXML, "<")) { 059 theXML = theXML.trim(); 060 } 061 062 if (theXML.charAt(0) != '<' || theXML.charAt(theXML.length() - 1) != '>') { 063 // text block 064 int theEndText = theXML.indexOf('<'); 065 if (theEndText < 0) { 066 theEndText = theXML.length(); 067 } 068 069 Node theNode = new Node(Node.TEXT_TEAG_NAME); 070 String theText = StringEscapeUtils.unescapeXml(theXML.substring(0, theEndText)); 071 theNode.setText(theText); 072 return theNode; 073 } 074 075 theXML = theXML.substring(1); // .trim(); 076 077 int theEndPos; 078 079 for (theEndPos = 0; theEndPos < theXML.length() && theXML.charAt(theEndPos) != ' ' 080 && theXML.charAt(theEndPos) != '\r' && theXML.charAt(theEndPos) != '>' 081 && theXML.charAt(theEndPos) != '\n'; theEndPos++) { 082 if (theXML.charAt(theEndPos) == '/') { 083 Node theNode = new Node(theXML.substring(0, theEndPos)); 084 theNode.setNill(true); 085 return theNode; 086 } 087 } 088 089 if (theXML.charAt(theEndPos - 1) == '>') { 090 theEndPos--; 091 } 092 093 String theTagName = theXML.substring(0, theEndPos); 094 095 theXML = theXML.substring(theEndPos); // .trim(); 096 097 int theTagEndPos = theXML.indexOf('>'); 098 099 boolean theEmptyTag = theTagEndPos != 0 && theXML.charAt(theTagEndPos - 1) == '/'; 100 101 Node theNode = new Node(theTagName); 102 theNode.setNill(theEmptyTag); 103 104 String theAttributeText = theXML.substring(0, theTagEndPos).trim(); 105 parseAtributeText(theNode, theAttributeText); 106 107 theTagEndPos++; 108 109 theXML = theXML.substring(theTagEndPos); // .trim(); 110 111 if (theEmptyTag == false) { 112 113 int theInnerTagEndPos = getEndTagPosition(theTagName, theXML, 1); 114 115 if (theInnerTagEndPos < 0) { 116 throw new RuntimeException("Not found tag: " + "</" + theTagName + ">\nin text:\n" + theXML); 117 } 118 119 String theInnerText = theXML.substring(0, theInnerTagEndPos); 120 if (StringUtils.contains(theInnerText, "<")) { 121 theInnerText = theInnerText.trim(); 122 } 123 124 Node[] theNodeArray = getObjectArray(theInnerText); 125 if (theNodeArray.length > 0) { 126 for (int i = 0; i < theNodeArray.length; i++) { 127 theNode.addInnerTag(theNodeArray[i]); 128 } 129 } 130 } 131 132 return theNode; 133 } 134 135 private String replaceComment(String theXML) { 136 137 int theStartComment = 0; 138 int theEndComment = 0; 139 StringBuffer theBuffer = new StringBuffer(); 140 141 while ((theStartComment = theXML.indexOf("<!--", theEndComment)) >= 0) { 142 theBuffer.append(theXML.substring(theEndComment, theStartComment)); 143 theEndComment = theXML.indexOf("-->", theStartComment) + 3; 144 145 if (theEndComment > 0) { 146 theBuffer.append("<Comment>"); 147 theBuffer.append(theXML.substring(theStartComment + 4, theEndComment - 3).replaceAll("<", "<") 148 .replaceAll(">", ">")); 149 theBuffer.append("</Comment>"); 150 } 151 152 } 153 154 theBuffer.append(theXML.substring(theEndComment)); 155 String s = theBuffer.toString(); 156 theBuffer.setLength(0); 157 theBuffer = null; 158 159 return deleteDoctype(s); 160 161 } 162 163 private static String deleteDoctype(String theXML) { 164 int theStartComment = 0; 165 int theEndComment = 0; 166 StringBuffer theBuffer = new StringBuffer(); 167 168 while ((theStartComment = theXML.indexOf("<!DOCTYPE", theEndComment)) >= 0) { 169 theBuffer.append(theXML.substring(theEndComment, theStartComment)); 170 theEndComment = theXML.indexOf(">", theStartComment) + 1; 171 } 172 173 theBuffer.append(theXML.substring(theEndComment)); 174 String s = theBuffer.toString(); 175 theBuffer.setLength(0); 176 theBuffer = null; 177 178 return s; 179 } 180 181 private int getEndTagPosition(String theTagName, String theXML, int theLevet) { 182 183 int theTagEndPos = 0; 184 int theInnerTagEndPos = 0; 185 int theStart = 0; 186 187 int theCounter = theLevet; 188 189 String theTag = "</" + theTagName + ">"; 190 191 do { 192 theTagEndPos = theXML.indexOf(theTag, theStart); 193 194 theInnerTagEndPos = theXML.indexOf("<" + theTagName + " ", theStart); 195 if (theInnerTagEndPos < 0) { 196 theInnerTagEndPos = theXML.length(); 197 } 198 199 int theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "/", theStart); 200 if (theInnerTagEndPos1 >= 0) { 201 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 202 } 203 204 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + ">", theStart); 205 if (theInnerTagEndPos1 >= 0) { 206 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 207 } 208 209 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\r", theStart); 210 if (theInnerTagEndPos1 >= 0) { 211 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 212 } 213 214 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\n", theStart); 215 if (theInnerTagEndPos1 >= 0) { 216 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 217 } 218 219 if (theTagEndPos > theInnerTagEndPos && theInnerTagEndPos >= 0) { 220 // Check for an empty tag. 221 int theEndPbrk = theXML.indexOf(">", theInnerTagEndPos); 222 if (theXML.charAt(theEndPbrk - 1) != '/') { 223 theCounter++; 224 } 225 } else { 226 theCounter--; 227 } 228 229 theStart = Math.min(theTagEndPos, theInnerTagEndPos) + 1; 230 231 if (theTagEndPos < 0) { 232 throw new IllegalArgumentException("The closing tag " + theTag + " is not retrieved. "); 233 } 234 235 } while (theCounter > 0); 236 237 return theTagEndPos; 238 } 239 240 public Node[] getObjectArray(String theXML) { 241 if (theXML == null) { 242 return null; 243 } 244 245 theXML = replaceComment(theXML); 246 ArrayList<Node> theNodeArray = new ArrayList<Node>(); 247 248 // theXML = theXML.trim(); 249 250 while (theXML.length() > 0) { 251 252 Node theInnerTag = getObject(theXML); 253 theNodeArray.add(theInnerTag); 254 if (theInnerTag == null) { 255 break; 256 } 257 258 if (theInnerTag.isNill()) { 259 theXML = theXML.substring(theXML.indexOf("/>") + 2); 260 } 261 if (theInnerTag.isNill() == false && theInnerTag.getText() == null) { 262 int theInnerTagEndPos = getEndTagPosition(theInnerTag.getTag(), theXML, 0); 263 String theEndTag = "</" + theInnerTag.getTag() + ">"; 264 265 theXML = theXML.substring(theInnerTagEndPos + theEndTag.length()).trim(); 266 } 267 268 String theText = theInnerTag.getText(); 269 if (theInnerTag.isNill() && theText != null) { 270 int theEndText = theXML.indexOf('<'); 271 if (theEndText < 0) { 272 theEndText = theXML.length(); 273 } 274 theXML = theXML.substring(theEndText); 275 } 276 } 277 278 int theNumberNode = theNodeArray.size(); 279 280 Node[] theResult = new Node[theNumberNode]; 281 282 for (int i = 0; i < theNumberNode; i++) { 283 theResult[i] = (Node) theNodeArray.get(i); 284 } 285 286 return theResult; 287 } 288 289 public void parseAtributeText(Node aNode, String aAttributeText) { 290 291 if (aAttributeText.length() > 0 && aAttributeText.charAt(aAttributeText.length() - 1) == '/') { 292 aAttributeText = aAttributeText.substring(0, aAttributeText.length() - 1); 293 } 294 295 while (aAttributeText.length() > 0) { 296 297 aAttributeText = aAttributeText.trim(); 298 int theEndLine = aAttributeText.indexOf('='); 299 300 String theName = aAttributeText.substring(0, theEndLine).trim(); 301 aAttributeText = aAttributeText.substring(theEndLine + 1).trim(); 302 303 char theBChar = aAttributeText.charAt(0); 304 int theBeginValue = 1; 305 int theEndValue = 0; 306 307 if (theBChar == '"' || theBChar == '\'') { 308 theEndValue = aAttributeText.indexOf(theBChar, 1); 309 } else { 310 for (theEndValue = 0; theEndValue < aAttributeText.length() && aAttributeText.charAt(theEndValue) != ' ' 311 && aAttributeText.charAt(theEndValue) != '\r' 312 && aAttributeText.charAt(theEndValue) != '\n'; theEndValue++) { 313 ; 314 } 315 theBeginValue = 0; 316 } 317 318 String theValue = aAttributeText.substring(theBeginValue, theEndValue); 319 theValue = StringEscapeUtils.unescapeXml(theValue); 320 aNode.setAttribute(theName, theValue); 321 aAttributeText = aAttributeText.substring(theEndValue + 1); 322 } 323 } 324 325 public Node load(String filePath) throws ParserException, ParseException, IOException { 326 if (new File(filePath).exists()) { 327 return new EasyParser().getObject(new File(filePath)); 328 } 329 330 URL entryUrl = new URL(filePath); 331 InputStream is = entryUrl.openStream(); 332 return new EasyParser().getObject(is); 333 } 334 335 public Node getObject(File theXMLFile) throws IOException { 336 int theLength = (int) theXMLFile.length(); 337 byte[] theBuffer = new byte[theLength]; 338 Node object = null; 339 340 try (FileInputStream theInputStream = new FileInputStream(theXMLFile)) { 341 theInputStream.read(theBuffer); 342 theInputStream.close(); 343 344 String theEncode = "UTF-8"; 345 346 int theEndHead = 0; 347 348 String theDocHead = new String(theBuffer, "UTF-8"); 349 if (theDocHead.indexOf("<?xml") == 0) { 350 theEndHead = theDocHead.indexOf("?>"); 351 Node theHead = getObject(theDocHead.substring(0, theEndHead) + "/>"); 352 theEncode = theHead.getAttribute("encoding"); 353 theEndHead += 2; 354 } 355 356 if (theEncode == null) 357 theEncode = "UTF-8"; 358 String theXML = new String(theBuffer, theEndHead, theBuffer.length - theEndHead, theEncode); 359 theBuffer = null; 360 361 object = getObject(theXML); 362 } 363 364 return object; 365 } 366 367 public static String replaceProperties(String aValue, String aFragment, String aNewFragment) { 368 int theBeginPos = 0; 369 int theEndPos = 0; 370 371 if (aValue == null) { 372 return aValue; 373 } 374 375 StringBuffer theStringBuffer = new StringBuffer(); 376 377 while (true) { 378 theBeginPos = aValue.indexOf(aFragment, theEndPos); 379 if (theBeginPos < 0) { 380 break; 381 } 382 383 theStringBuffer.append(aValue.substring(theEndPos, theBeginPos)); 384 theEndPos = theBeginPos + aFragment.length(); 385 386 theStringBuffer.append(aNewFragment); 387 } 388 389 theStringBuffer.append(aValue.substring(theEndPos)); 390 391 return theStringBuffer.toString(); 392 } 393 394}