001package com.ganteater.ae.util.xml.easyparser;
002
003import java.io.File;
004import java.io.FileInputStream;
005import java.io.IOException;
006import java.io.InputStream;
007import java.net.URL;
008import java.text.ParseException;
009import java.util.ArrayList;
010
011import org.apache.commons.io.IOUtils;
012import org.apache.commons.lang.StringEscapeUtils;
013import org.apache.commons.lang.StringUtils;
014
015/**
016 * @author victort
017 */
018public class EasyParser {
019
020        final static boolean debug = false;
021
022        public EasyParser() {
023        }
024
025        public Node getObject(InputStream inputStream) throws ParserException {
026                String xml = null;
027                try {
028                        xml = IOUtils.toString(inputStream);
029
030                        xml = StringUtils.trimToNull(xml);
031                        Node object = null;
032                        if (xml != null) {
033                                if (xml.indexOf("<?xml") == 0) {
034                                        xml = StringUtils.substringAfter(xml, "?>");
035                                }
036                                object = getObject(xml);
037                        }
038                        return object;
039
040                } catch (Exception e) {
041                        throw new ParserException(e);
042                }
043        }
044
045        public Node getObject(String theXML) {
046
047                if (StringUtils.isBlank(theXML)) {
048                        return null;
049                }
050
051                int start = -1;
052                do {
053                        start++;
054                        start = theXML.indexOf("<", start);
055                } while (theXML.charAt(start + 1) == '!' && theXML.charAt(start + 2) == '-');
056
057                if (start < 0)
058                        start = 0;
059
060                theXML = replaceComment(theXML.substring(start));
061
062                if (StringUtils.contains(theXML, "<")) {
063                        theXML = theXML.trim();
064                }
065
066                if (theXML.charAt(0) != '<' || theXML.charAt(theXML.length() - 1) != '>') {
067                        // text block
068                        int theEndText = theXML.indexOf('<');
069                        if (theEndText < 0) {
070                                theEndText = theXML.length();
071                        }
072
073                        Node theNode = new Node(Node.TEXT_TEAG_NAME);
074                        String theText = StringEscapeUtils.unescapeXml(theXML.substring(0, theEndText));
075                        theNode.setText(theText);
076                        return theNode;
077                }
078
079                theXML = theXML.substring(1); // .trim();
080
081                int theEndPos;
082
083                for (theEndPos = 0; theEndPos < theXML.length() && theXML.charAt(theEndPos) != ' '
084                                && theXML.charAt(theEndPos) != '\r' && theXML.charAt(theEndPos) != '>'
085                                && theXML.charAt(theEndPos) != '\n'; theEndPos++) {
086                        if (theXML.charAt(theEndPos) == '/') {
087                                Node theNode = new Node(theXML.substring(0, theEndPos));
088                                theNode.setNill(true);
089                                return theNode;
090                        }
091                }
092
093                if (theXML.charAt(theEndPos - 1) == '>') {
094                        theEndPos--;
095                }
096
097                String theTagName = theXML.substring(0, theEndPos);
098
099                theXML = theXML.substring(theEndPos); // .trim();
100
101                int theTagEndPos = theXML.indexOf('>');
102
103                boolean theEmptyTag = theTagEndPos != 0 && theXML.charAt(theTagEndPos - 1) == '/';
104
105                Node theNode = new Node(theTagName);
106                theNode.setNill(theEmptyTag);
107
108                String theAttributeText = theXML.substring(0, theTagEndPos).trim();
109                parseAtributeText(theNode, theAttributeText);
110
111                theTagEndPos++;
112
113                theXML = theXML.substring(theTagEndPos); // .trim();
114
115                if (theEmptyTag == false) {
116
117                        int theInnerTagEndPos = getEndTagPosition(theTagName, theXML, 1);
118
119                        if (theInnerTagEndPos < 0) {
120                                throw new RuntimeException("Not found tag: " + "</" + theTagName + ">\nin text:\n" + theXML);
121                        }
122
123                        String theInnerText = theXML.substring(0, theInnerTagEndPos);
124                        if (StringUtils.contains(theInnerText, "<")) {
125                                theInnerText = theInnerText.trim();
126                        }
127
128                        Node[] theNodeArray = getObjectArray(theInnerText);
129                        if (theNodeArray.length > 0) {
130                                for (int i = 0; i < theNodeArray.length; i++) {
131                                        theNode.addInnerTag(theNodeArray[i]);
132                                }
133                        }
134                }
135
136                return theNode;
137        }
138
139        private String replaceComment(String theXML) {
140
141                int theStartComment = 0;
142                int theEndComment = 0;
143                StringBuffer theBuffer = new StringBuffer();
144
145                while ((theStartComment = theXML.indexOf("<!--", theEndComment)) >= 0) {
146                        theBuffer.append(theXML.substring(theEndComment, theStartComment));
147                        theEndComment = theXML.indexOf("-->", theStartComment) + 3;
148
149                        if (theEndComment > 0) {
150                                theBuffer.append("<Comment>");
151                                theBuffer.append(theXML.substring(theStartComment + 4, theEndComment - 3).replaceAll("<", "&lt;")
152                                                .replaceAll(">", "&gt;"));
153                                theBuffer.append("</Comment>");
154                        }
155
156                }
157
158                theBuffer.append(theXML.substring(theEndComment));
159                String s = theBuffer.toString();
160                theBuffer.setLength(0);
161                theBuffer = null;
162
163                return deleteDoctype(s);
164
165        }
166
167        private static String deleteDoctype(String theXML) {
168                int theStartComment = 0;
169                int theEndComment = 0;
170                StringBuffer theBuffer = new StringBuffer();
171
172                while ((theStartComment = theXML.indexOf("<!DOCTYPE", theEndComment)) >= 0) {
173                        theBuffer.append(theXML.substring(theEndComment, theStartComment));
174                        theEndComment = theXML.indexOf(">", theStartComment) + 1;
175                }
176
177                theBuffer.append(theXML.substring(theEndComment));
178                String s = theBuffer.toString();
179                theBuffer.setLength(0);
180                theBuffer = null;
181
182                return s;
183        }
184
185        private int getEndTagPosition(String theTagName, String theXML, int theLevet) {
186
187                int theTagEndPos = 0;
188                int theInnerTagEndPos = 0;
189                int theStart = 0;
190
191                int theCounter = theLevet;
192
193                String theTag = "</" + theTagName + ">";
194
195                do {
196                        theTagEndPos = theXML.indexOf(theTag, theStart);
197
198                        theInnerTagEndPos = theXML.indexOf("<" + theTagName + " ", theStart);
199                        if (theInnerTagEndPos < 0) {
200                                theInnerTagEndPos = theXML.length();
201                        }
202
203                        int theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "/", theStart);
204                        if (theInnerTagEndPos1 >= 0) {
205                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
206                        }
207
208                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + ">", theStart);
209                        if (theInnerTagEndPos1 >= 0) {
210                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
211                        }
212
213                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\r", theStart);
214                        if (theInnerTagEndPos1 >= 0) {
215                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
216                        }
217
218                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\n", theStart);
219                        if (theInnerTagEndPos1 >= 0) {
220                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
221                        }
222
223                        if (theTagEndPos > theInnerTagEndPos && theInnerTagEndPos >= 0) {
224                                // Check for an empty tag.
225                                int theEndPbrk = theXML.indexOf(">", theInnerTagEndPos);
226                                if (theXML.charAt(theEndPbrk - 1) != '/') {
227                                        theCounter++;
228                                }
229                        } else {
230                                theCounter--;
231                        }
232
233                        theStart = Math.min(theTagEndPos, theInnerTagEndPos) + 1;
234
235                        if (theTagEndPos < 0) {
236                                throw new IllegalArgumentException("The closing tag " + theTag + " is not retrieved. ");
237                        }
238
239                } while (theCounter > 0);
240
241                return theTagEndPos;
242        }
243
244        public Node[] getObjectArray(String theXML) {
245                if (theXML == null) {
246                        return null;
247                }
248
249                theXML = replaceComment(theXML);
250                ArrayList<Node> theNodeArray = new ArrayList<Node>();
251
252                // theXML = theXML.trim();
253
254                while (theXML.length() > 0) {
255                        Node theInnerTag = getObject(theXML);
256                        if (theInnerTag == null) {
257                                break;
258                        }
259                        theNodeArray.add(theInnerTag);
260                        if (theInnerTag.isNill()) {
261                                theXML = theXML.substring(theXML.indexOf("/>") + 2);
262                        }
263                        if (theInnerTag.isNill() == false && theInnerTag.getText() == null) {
264                                int theInnerTagEndPos = getEndTagPosition(theInnerTag.getTag(), theXML, 0);
265                                String theEndTag = "</" + theInnerTag.getTag() + ">";
266
267                                theXML = theXML.substring(theInnerTagEndPos + theEndTag.length()).trim();
268                        }
269
270                        String theText = theInnerTag.getText();
271                        if (theInnerTag.isNill() && theText != null) {
272                                int theEndText = theXML.indexOf('<');
273                                if (theEndText < 0) {
274                                        theEndText = theXML.length();
275                                }
276                                theXML = theXML.substring(theEndText);
277                        }
278                }
279
280                int theNumberNode = theNodeArray.size();
281
282                Node[] theResult = new Node[theNumberNode];
283
284                for (int i = 0; i < theNumberNode; i++) {
285                        theResult[i] = (Node) theNodeArray.get(i);
286                }
287
288                return theResult;
289        }
290
291        public void parseAtributeText(Node aNode, String aAttributeText) {
292
293                if (aAttributeText.length() > 0 && aAttributeText.charAt(aAttributeText.length() - 1) == '/') {
294                        aAttributeText = aAttributeText.substring(0, aAttributeText.length() - 1);
295                }
296
297                while (aAttributeText.length() > 0) {
298
299                        aAttributeText = aAttributeText.trim();
300                        int theEndLine = aAttributeText.indexOf('=');
301
302                        try {
303                                String theName = aAttributeText.substring(0, theEndLine).trim();
304                                aAttributeText = aAttributeText.substring(theEndLine + 1).trim();
305
306                                char theBChar = aAttributeText.charAt(0);
307                                int theBeginValue = 1;
308                                int theEndValue = 0;
309
310                                if (theBChar == '"' || theBChar == '\'') {
311                                        theEndValue = aAttributeText.indexOf(theBChar, 1);
312                                } else {
313                                        for (theEndValue = 0; theEndValue < aAttributeText.length()
314                                                        && aAttributeText.charAt(theEndValue) != ' '
315                                                        && aAttributeText.charAt(theEndValue) != '\r'
316                                                        && aAttributeText.charAt(theEndValue) != '\n'; theEndValue++) {
317                                                ;
318                                        }
319                                        theBeginValue = 0;
320                                }
321
322                                String theValue = aAttributeText.substring(theBeginValue, theEndValue);
323                                theValue = StringEscapeUtils.unescapeXml(theValue);
324                                aNode.setAttribute(theName, theValue);
325
326                                aAttributeText = aAttributeText.substring(theEndValue + 1);
327                        } catch (StringIndexOutOfBoundsException e) {
328                                throw new IllegalArgumentException(aAttributeText, e);
329                        }
330                }
331        }
332
333        public Node load(String filePath) throws ParserException, ParseException, IOException {
334                if (new File(filePath).exists()) {
335                        return new EasyParser().getObject(new File(filePath));
336                }
337
338                URL entryUrl = new URL(filePath);
339                try (InputStream is = entryUrl.openStream()) {
340                        return new EasyParser().getObject(is);
341                }
342        }
343
344        public Node getObject(File theXMLFile) throws IOException {
345                int theLength = (int) theXMLFile.length();
346                byte[] theBuffer = new byte[theLength];
347                Node object = null;
348
349                try (FileInputStream theInputStream = new FileInputStream(theXMLFile)) {
350                        theInputStream.read(theBuffer);
351                        theInputStream.close();
352
353                        String theEncode = "UTF-8";
354
355                        int theEndHead = 0;
356
357                        String theDocHead = new String(theBuffer, "UTF-8");
358                        if (theDocHead.indexOf("<?xml") == 0) {
359                                theEndHead = theDocHead.indexOf("?>");
360                                Node theHead = getObject(theDocHead.substring(0, theEndHead) + "/>");
361                                theEncode = theHead.getAttribute("encoding");
362                                theEndHead += 2;
363                        }
364
365                        if (theEncode == null)
366                                theEncode = "UTF-8";
367                        String theXML = new String(theBuffer, theEndHead, theBuffer.length - theEndHead, theEncode);
368                        theBuffer = null;
369
370                        object = getObject(theXML);
371                }
372
373                return object;
374        }
375
376        public static String replaceProperties(String aValue, String aFragment, String aNewFragment) {
377                int theBeginPos = 0;
378                int theEndPos = 0;
379
380                if (aValue == null) {
381                        return aValue;
382                }
383
384                StringBuffer theStringBuffer = new StringBuffer();
385
386                while (true) {
387                        theBeginPos = aValue.indexOf(aFragment, theEndPos);
388                        if (theBeginPos < 0) {
389                                break;
390                        }
391
392                        theStringBuffer.append(aValue.substring(theEndPos, theBeginPos));
393                        theEndPos = theBeginPos + aFragment.length();
394
395                        theStringBuffer.append(aNewFragment);
396                }
397
398                theStringBuffer.append(aValue.substring(theEndPos));
399
400                return theStringBuffer.toString();
401        }
402
403}