001package com.ganteater.ae.util.xml.easyparser;
002
003import java.io.File;
004import java.io.FileInputStream;
005import java.io.IOException;
006import java.io.InputStream;
007import java.io.InputStreamReader;
008import java.net.URL;
009import java.text.ParseException;
010import java.util.ArrayList;
011
012import org.apache.commons.io.IOUtils;
013import org.apache.commons.lang.StringEscapeUtils;
014import org.apache.commons.lang.StringUtils;
015
016/**
017 * @author victort
018 */
019public class EasyParser {
020
021        final static boolean debug = false;
022
023        public EasyParser() {
024        }
025
026        public Node getObject(InputStream aInputStream) throws ParserException {
027                try {
028                        String xml = IOUtils.toString(new InputStreamReader(aInputStream, "UTF-8"));
029
030                        if (xml.indexOf("<?xml") == 0) {
031                                xml = StringUtils.substringAfter(xml, "?>");
032                        }
033
034                        return getObject(xml);
035
036                } catch (Exception e) {
037                        throw new ParserException(e);
038                }
039        }
040
041        public Node getObject(String theXML) {
042
043                if (theXML == null) {
044                        return null;
045                }
046
047                int start = -1;
048                do {
049                        start++;
050                        start = theXML.indexOf("<", start);
051                } while (theXML.charAt(start + 1) == '!' && theXML.charAt(start + 2) == '-');
052
053                if (start < 0)
054                        start = 0;
055
056                theXML = replaceComment(theXML.substring(start));
057
058                if (StringUtils.contains(theXML, "<")) {
059                        theXML = theXML.trim();
060                }
061
062                if (theXML.charAt(0) != '<' || theXML.charAt(theXML.length() - 1) != '>') {
063                        // text block
064                        int theEndText = theXML.indexOf('<');
065                        if (theEndText < 0) {
066                                theEndText = theXML.length();
067                        }
068
069                        Node theNode = new Node(Node.TEXT_TEAG_NAME);
070                        String theText = StringEscapeUtils.unescapeXml(theXML.substring(0, theEndText));
071                        theNode.setText(theText);
072                        return theNode;
073                }
074
075                theXML = theXML.substring(1); // .trim();
076
077                int theEndPos;
078
079                for (theEndPos = 0; theEndPos < theXML.length() && theXML.charAt(theEndPos) != ' '
080                                && theXML.charAt(theEndPos) != '\r' && theXML.charAt(theEndPos) != '>'
081                                && theXML.charAt(theEndPos) != '\n'; theEndPos++) {
082                        if (theXML.charAt(theEndPos) == '/') {
083                                Node theNode = new Node(theXML.substring(0, theEndPos));
084                                theNode.setNill(true);
085                                return theNode;
086                        }
087                }
088
089                if (theXML.charAt(theEndPos - 1) == '>') {
090                        theEndPos--;
091                }
092
093                String theTagName = theXML.substring(0, theEndPos);
094
095                theXML = theXML.substring(theEndPos); // .trim();
096
097                int theTagEndPos = theXML.indexOf('>');
098
099                boolean theEmptyTag = theTagEndPos != 0 && theXML.charAt(theTagEndPos - 1) == '/';
100
101                Node theNode = new Node(theTagName);
102                theNode.setNill(theEmptyTag);
103
104                String theAttributeText = theXML.substring(0, theTagEndPos).trim();
105                parseAtributeText(theNode, theAttributeText);
106
107                theTagEndPos++;
108
109                theXML = theXML.substring(theTagEndPos); // .trim();
110
111                if (theEmptyTag == false) {
112
113                        int theInnerTagEndPos = getEndTagPosition(theTagName, theXML, 1);
114
115                        if (theInnerTagEndPos < 0) {
116                                throw new RuntimeException("Not found tag: " + "</" + theTagName + ">\nin text:\n" + theXML);
117                        }
118
119                        String theInnerText = theXML.substring(0, theInnerTagEndPos);
120                        if (StringUtils.contains(theInnerText, "<")) {
121                                theInnerText = theInnerText.trim();
122                        }
123                        
124                        Node[] theNodeArray = getObjectArray(theInnerText);
125                        if (theNodeArray.length > 0) {
126                                for (int i = 0; i < theNodeArray.length; i++) {
127                                        theNode.addInnerTag(theNodeArray[i]);
128                                }
129                        }
130                }
131
132                return theNode;
133        }
134
135        private String replaceComment(String theXML) {
136
137                int theStartComment = 0;
138                int theEndComment = 0;
139                StringBuffer theBuffer = new StringBuffer();
140
141                while ((theStartComment = theXML.indexOf("<!--", theEndComment)) >= 0) {
142                        theBuffer.append(theXML.substring(theEndComment, theStartComment));
143                        theEndComment = theXML.indexOf("-->", theStartComment) + 3;
144
145                        if (theEndComment > 0) {
146                                theBuffer.append("<Comment>");
147                                theBuffer.append(theXML.substring(theStartComment + 4, theEndComment - 3).replaceAll("<", "&lt;")
148                                                .replaceAll(">", "&gt;"));
149                                theBuffer.append("</Comment>");
150                        }
151
152                }
153
154                theBuffer.append(theXML.substring(theEndComment));
155                String s = theBuffer.toString();
156                theBuffer.setLength(0);
157                theBuffer = null;
158
159                return deleteDoctype(s);
160
161        }
162
163        private static String deleteDoctype(String theXML) {
164                int theStartComment = 0;
165                int theEndComment = 0;
166                StringBuffer theBuffer = new StringBuffer();
167
168                while ((theStartComment = theXML.indexOf("<!DOCTYPE", theEndComment)) >= 0) {
169                        theBuffer.append(theXML.substring(theEndComment, theStartComment));
170                        theEndComment = theXML.indexOf(">", theStartComment) + 1;
171                }
172
173                theBuffer.append(theXML.substring(theEndComment));
174                String s = theBuffer.toString();
175                theBuffer.setLength(0);
176                theBuffer = null;
177
178                return s;
179        }
180
181        private int getEndTagPosition(String theTagName, String theXML, int theLevet) {
182
183                int theTagEndPos = 0;
184                int theInnerTagEndPos = 0;
185                int theStart = 0;
186
187                int theCounter = theLevet;
188
189                String theTag = "</" + theTagName + ">";
190
191                do {
192                        theTagEndPos = theXML.indexOf(theTag, theStart);
193
194                        theInnerTagEndPos = theXML.indexOf("<" + theTagName + " ", theStart);
195                        if (theInnerTagEndPos < 0) {
196                                theInnerTagEndPos = theXML.length();
197                        }
198
199                        int theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "/", theStart);
200                        if (theInnerTagEndPos1 >= 0) {
201                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
202                        }
203
204                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + ">", theStart);
205                        if (theInnerTagEndPos1 >= 0) {
206                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
207                        }
208
209                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\r", theStart);
210                        if (theInnerTagEndPos1 >= 0) {
211                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
212                        }
213
214                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\n", theStart);
215                        if (theInnerTagEndPos1 >= 0) {
216                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
217                        }
218
219                        if (theTagEndPos > theInnerTagEndPos && theInnerTagEndPos >= 0) {
220                                // Check for an empty tag.
221                                int theEndPbrk = theXML.indexOf(">", theInnerTagEndPos);
222                                if (theXML.charAt(theEndPbrk - 1) != '/') {
223                                        theCounter++;
224                                }
225                        } else {
226                                theCounter--;
227                        }
228
229                        theStart = Math.min(theTagEndPos, theInnerTagEndPos) + 1;
230
231                        if (theTagEndPos < 0) {
232                                throw new IllegalArgumentException("The closing tag " + theTag + " is not retrieved. ");
233                        }
234
235                } while (theCounter > 0);
236
237                return theTagEndPos;
238        }
239
240        public Node[] getObjectArray(String theXML) {
241                if (theXML == null) {
242                        return null;
243                }
244
245                theXML = replaceComment(theXML);
246                ArrayList<Node> theNodeArray = new ArrayList<Node>();
247
248                // theXML = theXML.trim();
249
250                while (theXML.length() > 0) {
251
252                        Node theInnerTag = getObject(theXML);
253                        theNodeArray.add(theInnerTag);
254                        if (theInnerTag == null) {
255                                break;
256                        }
257
258                        if (theInnerTag.isNill()) {
259                                theXML = theXML.substring(theXML.indexOf("/>") + 2);
260                        }
261                        if (theInnerTag.isNill() == false && theInnerTag.getText() == null) {
262                                int theInnerTagEndPos = getEndTagPosition(theInnerTag.getTag(), theXML, 0);
263                                String theEndTag = "</" + theInnerTag.getTag() + ">";
264
265                                theXML = theXML.substring(theInnerTagEndPos + theEndTag.length()).trim();
266                        }
267
268                        String theText = theInnerTag.getText();
269                        if (theInnerTag.isNill() && theText != null) {
270                                int theEndText = theXML.indexOf('<');
271                                if (theEndText < 0) {
272                                        theEndText = theXML.length();
273                                }
274                                theXML = theXML.substring(theEndText);
275                        }
276                }
277
278                int theNumberNode = theNodeArray.size();
279
280                Node[] theResult = new Node[theNumberNode];
281
282                for (int i = 0; i < theNumberNode; i++) {
283                        theResult[i] = (Node) theNodeArray.get(i);
284                }
285
286                return theResult;
287        }
288
289        public void parseAtributeText(Node aNode, String aAttributeText) {
290
291                if (aAttributeText.length() > 0 && aAttributeText.charAt(aAttributeText.length() - 1) == '/') {
292                        aAttributeText = aAttributeText.substring(0, aAttributeText.length() - 1);
293                }
294
295                while (aAttributeText.length() > 0) {
296
297                        aAttributeText = aAttributeText.trim();
298                        int theEndLine = aAttributeText.indexOf('=');
299
300                        String theName = aAttributeText.substring(0, theEndLine).trim();
301                        aAttributeText = aAttributeText.substring(theEndLine + 1).trim();
302
303                        char theBChar = aAttributeText.charAt(0);
304                        int theBeginValue = 1;
305                        int theEndValue = 0;
306
307                        if (theBChar == '"' || theBChar == '\'') {
308                                theEndValue = aAttributeText.indexOf(theBChar, 1);
309                        } else {
310                                for (theEndValue = 0; theEndValue < aAttributeText.length() && aAttributeText.charAt(theEndValue) != ' '
311                                                && aAttributeText.charAt(theEndValue) != '\r'
312                                                && aAttributeText.charAt(theEndValue) != '\n'; theEndValue++) {
313                                        ;
314                                }
315                                theBeginValue = 0;
316                        }
317
318                        String theValue = aAttributeText.substring(theBeginValue, theEndValue);
319                        theValue = StringEscapeUtils.unescapeXml(theValue);
320                        aNode.setAttribute(theName, theValue);
321                        aAttributeText = aAttributeText.substring(theEndValue + 1);
322                }
323        }
324
325        public Node load(String filePath) throws ParserException, ParseException, IOException {
326                if (new File(filePath).exists()) {
327                        return new EasyParser().getObject(new File(filePath));
328                }
329
330                URL entryUrl = new URL(filePath);
331                InputStream is = entryUrl.openStream();
332                return new EasyParser().getObject(is);
333        }
334
335        public Node getObject(File theXMLFile) throws IOException {
336                int theLength = (int) theXMLFile.length();
337                byte[] theBuffer = new byte[theLength];
338                Node object = null;
339
340                try (FileInputStream theInputStream = new FileInputStream(theXMLFile)) {
341                        theInputStream.read(theBuffer);
342                        theInputStream.close();
343
344                        String theEncode = "UTF-8";
345
346                        int theEndHead = 0;
347
348                        String theDocHead = new String(theBuffer, "UTF-8");
349                        if (theDocHead.indexOf("<?xml") == 0) {
350                                theEndHead = theDocHead.indexOf("?>");
351                                Node theHead = getObject(theDocHead.substring(0, theEndHead) + "/>");
352                                theEncode = theHead.getAttribute("encoding");
353                                theEndHead += 2;
354                        }
355
356                        if (theEncode == null)
357                                theEncode = "UTF-8";
358                        String theXML = new String(theBuffer, theEndHead, theBuffer.length - theEndHead, theEncode);
359                        theBuffer = null;
360
361                        object = getObject(theXML);
362                }
363
364                return object;
365        }
366
367        public static String replaceProperties(String aValue, String aFragment, String aNewFragment) {
368                int theBeginPos = 0;
369                int theEndPos = 0;
370
371                if (aValue == null) {
372                        return aValue;
373                }
374
375                StringBuffer theStringBuffer = new StringBuffer();
376
377                while (true) {
378                        theBeginPos = aValue.indexOf(aFragment, theEndPos);
379                        if (theBeginPos < 0) {
380                                break;
381                        }
382
383                        theStringBuffer.append(aValue.substring(theEndPos, theBeginPos));
384                        theEndPos = theBeginPos + aFragment.length();
385
386                        theStringBuffer.append(aNewFragment);
387                }
388
389                theStringBuffer.append(aValue.substring(theEndPos));
390
391                return theStringBuffer.toString();
392        }
393
394}