001package com.ganteater.ae.util.xml.easyparser; 002 003import java.io.File; 004import java.io.FileInputStream; 005import java.io.IOException; 006import java.io.InputStream; 007import java.net.URL; 008import java.text.ParseException; 009import java.util.ArrayList; 010 011import org.apache.commons.io.IOUtils; 012import org.apache.commons.lang.StringEscapeUtils; 013import org.apache.commons.lang.StringUtils; 014 015/** 016 * @author victort 017 */ 018public class EasyParser { 019 020 final static boolean debug = false; 021 022 public EasyParser() { 023 } 024 025 public Node getObject(InputStream inputStream) throws ParserException { 026 String xml = null; 027 try { 028 xml = IOUtils.toString(inputStream); 029 030 xml = StringUtils.trimToNull(xml); 031 Node object = null; 032 if (xml != null) { 033 if (xml.indexOf("<?xml") == 0) { 034 xml = StringUtils.substringAfter(xml, "?>"); 035 } 036 object = getObject(xml); 037 } 038 return object; 039 040 } catch (Exception e) { 041 throw new ParserException(e); 042 } 043 } 044 045 public Node getObject(String theXML) { 046 047 if (StringUtils.isBlank(theXML)) { 048 return null; 049 } 050 051 int start = -1; 052 do { 053 start++; 054 start = theXML.indexOf("<", start); 055 } while (theXML.charAt(start + 1) == '!' && theXML.charAt(start + 2) == '-'); 056 057 if (start < 0) 058 start = 0; 059 060 theXML = replaceComment(theXML.substring(start)); 061 062 if (StringUtils.contains(theXML, "<")) { 063 theXML = theXML.trim(); 064 } 065 066 if (theXML.charAt(0) != '<' || theXML.charAt(theXML.length() - 1) != '>') { 067 // text block 068 int theEndText = theXML.indexOf('<'); 069 if (theEndText < 0) { 070 theEndText = theXML.length(); 071 } 072 073 Node theNode = new Node(Node.TEXT_TEAG_NAME); 074 String theText = StringEscapeUtils.unescapeXml(theXML.substring(0, theEndText)); 075 theNode.setText(theText); 076 return theNode; 077 } 078 079 theXML = theXML.substring(1); // .trim(); 080 081 int theEndPos; 082 083 for (theEndPos = 0; theEndPos < theXML.length() && theXML.charAt(theEndPos) != ' ' 084 && theXML.charAt(theEndPos) != '\r' && theXML.charAt(theEndPos) != '>' 085 && theXML.charAt(theEndPos) != '\n'; theEndPos++) { 086 if (theXML.charAt(theEndPos) == '/') { 087 Node theNode = new Node(theXML.substring(0, theEndPos)); 088 theNode.setNill(true); 089 return theNode; 090 } 091 } 092 093 if (theXML.charAt(theEndPos - 1) == '>') { 094 theEndPos--; 095 } 096 097 String theTagName = theXML.substring(0, theEndPos); 098 099 theXML = theXML.substring(theEndPos); // .trim(); 100 101 int theTagEndPos = theXML.indexOf('>'); 102 103 boolean theEmptyTag = theTagEndPos != 0 && theXML.charAt(theTagEndPos - 1) == '/'; 104 105 Node theNode = new Node(theTagName); 106 theNode.setNill(theEmptyTag); 107 108 String theAttributeText = theXML.substring(0, theTagEndPos).trim(); 109 parseAtributeText(theNode, theAttributeText); 110 111 theTagEndPos++; 112 113 theXML = theXML.substring(theTagEndPos); // .trim(); 114 115 if (theEmptyTag == false) { 116 117 int theInnerTagEndPos = getEndTagPosition(theTagName, theXML, 1); 118 119 if (theInnerTagEndPos < 0) { 120 throw new RuntimeException("Not found tag: " + "</" + theTagName + ">\nin text:\n" + theXML); 121 } 122 123 String theInnerText = theXML.substring(0, theInnerTagEndPos); 124 if (StringUtils.contains(theInnerText, "<")) { 125 theInnerText = theInnerText.trim(); 126 } 127 128 Node[] theNodeArray = getObjectArray(theInnerText); 129 if (theNodeArray.length > 0) { 130 for (int i = 0; i < theNodeArray.length; i++) { 131 theNode.addInnerTag(theNodeArray[i]); 132 } 133 } 134 } 135 136 return theNode; 137 } 138 139 private String replaceComment(String theXML) { 140 141 int theStartComment = 0; 142 int theEndComment = 0; 143 StringBuffer theBuffer = new StringBuffer(); 144 145 while ((theStartComment = theXML.indexOf("<!--", theEndComment)) >= 0) { 146 theBuffer.append(theXML.substring(theEndComment, theStartComment)); 147 theEndComment = theXML.indexOf("-->", theStartComment) + 3; 148 149 if (theEndComment > 0) { 150 theBuffer.append("<Comment>"); 151 theBuffer.append(theXML.substring(theStartComment + 4, theEndComment - 3).replaceAll("<", "<") 152 .replaceAll(">", ">")); 153 theBuffer.append("</Comment>"); 154 } 155 156 } 157 158 theBuffer.append(theXML.substring(theEndComment)); 159 String s = theBuffer.toString(); 160 theBuffer.setLength(0); 161 theBuffer = null; 162 163 return deleteDoctype(s); 164 165 } 166 167 private static String deleteDoctype(String theXML) { 168 int theStartComment = 0; 169 int theEndComment = 0; 170 StringBuffer theBuffer = new StringBuffer(); 171 172 while ((theStartComment = theXML.indexOf("<!DOCTYPE", theEndComment)) >= 0) { 173 theBuffer.append(theXML.substring(theEndComment, theStartComment)); 174 theEndComment = theXML.indexOf(">", theStartComment) + 1; 175 } 176 177 theBuffer.append(theXML.substring(theEndComment)); 178 String s = theBuffer.toString(); 179 theBuffer.setLength(0); 180 theBuffer = null; 181 182 return s; 183 } 184 185 private int getEndTagPosition(String theTagName, String theXML, int theLevet) { 186 187 int theTagEndPos = 0; 188 int theInnerTagEndPos = 0; 189 int theStart = 0; 190 191 int theCounter = theLevet; 192 193 String theTag = "</" + theTagName + ">"; 194 195 do { 196 theTagEndPos = theXML.indexOf(theTag, theStart); 197 198 theInnerTagEndPos = theXML.indexOf("<" + theTagName + " ", theStart); 199 if (theInnerTagEndPos < 0) { 200 theInnerTagEndPos = theXML.length(); 201 } 202 203 int theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "/", theStart); 204 if (theInnerTagEndPos1 >= 0) { 205 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 206 } 207 208 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + ">", theStart); 209 if (theInnerTagEndPos1 >= 0) { 210 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 211 } 212 213 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\r", theStart); 214 if (theInnerTagEndPos1 >= 0) { 215 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 216 } 217 218 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\n", theStart); 219 if (theInnerTagEndPos1 >= 0) { 220 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 221 } 222 223 if (theTagEndPos > theInnerTagEndPos && theInnerTagEndPos >= 0) { 224 // Check for an empty tag. 225 int theEndPbrk = theXML.indexOf(">", theInnerTagEndPos); 226 if (theXML.charAt(theEndPbrk - 1) != '/') { 227 theCounter++; 228 } 229 } else { 230 theCounter--; 231 } 232 233 theStart = Math.min(theTagEndPos, theInnerTagEndPos) + 1; 234 235 if (theTagEndPos < 0) { 236 throw new IllegalArgumentException("The closing tag " + theTag + " is not retrieved. "); 237 } 238 239 } while (theCounter > 0); 240 241 return theTagEndPos; 242 } 243 244 public Node[] getObjectArray(String theXML) { 245 if (theXML == null) { 246 return null; 247 } 248 249 theXML = replaceComment(theXML); 250 ArrayList<Node> theNodeArray = new ArrayList<Node>(); 251 252 // theXML = theXML.trim(); 253 254 while (theXML.length() > 0) { 255 Node theInnerTag = getObject(theXML); 256 if (theInnerTag == null) { 257 break; 258 } 259 theNodeArray.add(theInnerTag); 260 if (theInnerTag.isNill()) { 261 theXML = theXML.substring(theXML.indexOf("/>") + 2); 262 } 263 if (theInnerTag.isNill() == false && theInnerTag.getText() == null) { 264 int theInnerTagEndPos = getEndTagPosition(theInnerTag.getTag(), theXML, 0); 265 String theEndTag = "</" + theInnerTag.getTag() + ">"; 266 267 theXML = theXML.substring(theInnerTagEndPos + theEndTag.length()).trim(); 268 } 269 270 String theText = theInnerTag.getText(); 271 if (theInnerTag.isNill() && theText != null) { 272 int theEndText = theXML.indexOf('<'); 273 if (theEndText < 0) { 274 theEndText = theXML.length(); 275 } 276 theXML = theXML.substring(theEndText); 277 } 278 } 279 280 int theNumberNode = theNodeArray.size(); 281 282 Node[] theResult = new Node[theNumberNode]; 283 284 for (int i = 0; i < theNumberNode; i++) { 285 theResult[i] = (Node) theNodeArray.get(i); 286 } 287 288 return theResult; 289 } 290 291 public void parseAtributeText(Node aNode, String aAttributeText) { 292 293 if (aAttributeText.length() > 0 && aAttributeText.charAt(aAttributeText.length() - 1) == '/') { 294 aAttributeText = aAttributeText.substring(0, aAttributeText.length() - 1); 295 } 296 297 while (aAttributeText.length() > 0) { 298 299 aAttributeText = aAttributeText.trim(); 300 int theEndLine = aAttributeText.indexOf('='); 301 302 try { 303 String theName = aAttributeText.substring(0, theEndLine).trim(); 304 aAttributeText = aAttributeText.substring(theEndLine + 1).trim(); 305 306 char theBChar = aAttributeText.charAt(0); 307 int theBeginValue = 1; 308 int theEndValue = 0; 309 310 if (theBChar == '"' || theBChar == '\'') { 311 theEndValue = aAttributeText.indexOf(theBChar, 1); 312 } else { 313 for (theEndValue = 0; theEndValue < aAttributeText.length() 314 && aAttributeText.charAt(theEndValue) != ' ' 315 && aAttributeText.charAt(theEndValue) != '\r' 316 && aAttributeText.charAt(theEndValue) != '\n'; theEndValue++) { 317 ; 318 } 319 theBeginValue = 0; 320 } 321 322 String theValue = aAttributeText.substring(theBeginValue, theEndValue); 323 theValue = StringEscapeUtils.unescapeXml(theValue); 324 aNode.setAttribute(theName, theValue); 325 326 aAttributeText = aAttributeText.substring(theEndValue + 1); 327 } catch (StringIndexOutOfBoundsException e) { 328 throw new IllegalArgumentException(aAttributeText, e); 329 } 330 } 331 } 332 333 public Node load(String filePath) throws ParserException, ParseException, IOException { 334 if (new File(filePath).exists()) { 335 return new EasyParser().getObject(new File(filePath)); 336 } 337 338 URL entryUrl = new URL(filePath); 339 try (InputStream is = entryUrl.openStream()) { 340 return new EasyParser().getObject(is); 341 } 342 } 343 344 public Node getObject(File theXMLFile) throws IOException { 345 int theLength = (int) theXMLFile.length(); 346 byte[] theBuffer = new byte[theLength]; 347 Node object = null; 348 349 try (FileInputStream theInputStream = new FileInputStream(theXMLFile)) { 350 theInputStream.read(theBuffer); 351 theInputStream.close(); 352 353 String theEncode = "UTF-8"; 354 355 int theEndHead = 0; 356 357 String theDocHead = new String(theBuffer, "UTF-8"); 358 if (theDocHead.indexOf("<?xml") == 0) { 359 theEndHead = theDocHead.indexOf("?>"); 360 Node theHead = getObject(theDocHead.substring(0, theEndHead) + "/>"); 361 theEncode = theHead.getAttribute("encoding"); 362 theEndHead += 2; 363 } 364 365 if (theEncode == null) 366 theEncode = "UTF-8"; 367 String theXML = new String(theBuffer, theEndHead, theBuffer.length - theEndHead, theEncode); 368 theBuffer = null; 369 370 object = getObject(theXML); 371 } 372 373 return object; 374 } 375 376 public static String replaceProperties(String aValue, String aFragment, String aNewFragment) { 377 int theBeginPos = 0; 378 int theEndPos = 0; 379 380 if (aValue == null) { 381 return aValue; 382 } 383 384 StringBuffer theStringBuffer = new StringBuffer(); 385 386 while (true) { 387 theBeginPos = aValue.indexOf(aFragment, theEndPos); 388 if (theBeginPos < 0) { 389 break; 390 } 391 392 theStringBuffer.append(aValue.substring(theEndPos, theBeginPos)); 393 theEndPos = theBeginPos + aFragment.length(); 394 395 theStringBuffer.append(aNewFragment); 396 } 397 398 theStringBuffer.append(aValue.substring(theEndPos)); 399 400 return theStringBuffer.toString(); 401 } 402 403}