aspose file tools*
The moose likes XML and Related Technologies and the fly likes Explanation for Code : Parsing XML using Java Big Moose Saloon
  Search | Java FAQ | Recent Topics | Flagged Topics | Hot Topics | Zero Replies
Register / Login
JavaRanch » Java Forums » Engineering » XML and Related Technologies
Bookmark "Explanation for Code : Parsing XML using Java" Watch "Explanation for Code : Parsing XML using Java" New topic
Author

Explanation for Code : Parsing XML using Java

Raghuraman Muthuswamy
Ranch Hand

Joined: Mar 18, 2003
Posts: 73
This is a Java Code which does the Parsing, can somebody explain me how the code works.....Thanx

Raghu

import java.io.*;
import java.util.*;

class Parse
{
Vector G = new Vector();
/*Contents of this vector is a collection of vectors
which are TagName,TagValue,Hashtable(containing all the (attribute name,attribuite value) pairs */

Vector x = new Vector();
/* This Vector contains the positions of tags which has values */

Vector Positions = new Vector();
/*This Vector contains the positions of tags which has values */

Vector taglengths = new Vector();
/*This Vector contains the taglengths of each tags */

String TagName;
/*This String contains the Tag name */

String a=new String();
/*THis String contains the whole of the received packet */

String catstring = new String();
/*THis string contains the string that needs to be concatenated with tag name */

int GERROR = -1;

Vector Retparse()
{
return G;
}

int parse(String s)
{

int Gtracker = 0;
int Ytracker = 0;
int tagwritten = 1; /* Note */
int Accelerator = 0;
int AddElement = 0;

int ON = 1;
int NEXTELEMENT = 1;
int PREVIOUSELEMENT = 1;

int tagcount = 0;
int PacketLength=0;

int i1 = 0;
int i2 = 0;
int i3 = 0;
int i4 = 0;

try
{
a = s;

PacketLength=a.lastIndexOf(">");

System.out.println("\nStartTime :");
System.out.println(new Date().getTime());

/* Loop untill end of packet is reached */
while(Gtracker != PacketLength && a.charAt(Gtracker) != '\n')
{
/*Enter inside only when an starting "<" is found for a tag */
if(a.charAt(Gtracker) == '<')
{
Vector clas = new Vector();
Ytracker = Gtracker;
tagcount++;
tagwritten = 0;
Accelerator = 0;

while( Ytracker <= PacketLength && tagwritten == 0)
{

switch(a.charAt(Ytracker))
{

case ' ':
Accelerator = Ytracker;
while(Accelerator != PacketLength)
{
if( a.charAt(Accelerator) == '>')
{
AddElement = 1;
break;
}
if(a.charAt(Accelerator) == '<' || a.charAt(Accelerator) == '\0' || a.charAt(Accelerator) == '\n')
{
System.out.println("\nTwo Start tags or Null Value or New line found before an End tag\n");
return GERROR;
}
Accelerator++;
}/* while loop */

if(a.charAt(Accelerator - 1) == '/')
{
AddElement = 2;
Accelerator--;
}

catstring = ">";
if(AddElement == 2)
catstring = " />";
TagName = a.substring(Gtracker,Ytracker);
TagName = TagName + catstring;

clas.addElement(TagName);

tagwritten = 1;

Positions.addElement((new Integer(Gtracker)));
taglengths.addElement((new Integer(Accelerator-Gtracker + AddElement)));

/*Note: Below is a function call */
Hashtable d = AttSeperator(Ytracker,Accelerator);
if ((d == null))
{
System.out.println("\nError in parsing attributes\n");
return GERROR;
}

clas.addElement(d);
G.addElement(clas);
if( AddElement != 2 && a.charAt(Accelerator + NEXTELEMENT) != '<' &&
a.charAt(Accelerator + NEXTELEMENT) != '\0' && a.charAt(Accelerator + NEXTELEMENT ) != ' ' )
{
x.addElement((new Integer(tagcount - 1)));
}
break;

case '>':

/* To check if ending ">" of a tag has been found and if so copy the tag name */
if(tagwritten == 0)
{
TagName = a.substring(Gtracker,Ytracker+1);

clas.addElement(TagName);
clas.addElement(new Hashtable());/* THis is a dummy hash table added into the vector*/
G.addElement(clas);
Positions.addElement((new Integer(Gtracker)));

taglengths.addElement((new Integer(TagName.length())));
tagwritten = 1;

if(Ytracker+NEXTELEMENT < PacketLength && a.charAt(Ytracker + NEXTELEMENT) != '<' && a.charAt(Ytracker + NEXTELEMENT) != ' ' &&
a.charAt(Ytracker - PREVIOUSELEMENT) != '/' && a.charAt(Ytracker + 1) != '\0' && a.charAt(Ytracker + 1) != '\n')
{
x.addElement((new Integer(tagcount - 1)));
}/* if case for a[Ytracker + NEXTELEMENT] != '<' ....*/
}/* if case for tagwritten == 0 */
break;

case '=':
System.out.println("\nEncountered Equalto sign with in a tag name\n");
return GERROR;

case '"':
System.out.println("\nEncountered doubles quotes with in a tag name\n");
return GERROR;
default :

if(a.charAt(Ytracker) == '<' && Ytracker != Gtracker)
{
System.out.println("\nTwo Start tags found before an end tag\n");
return GERROR;
}/* if case for a[Ytracker] == '<' && Ytracker != Gtracker */
break;
}/* switch case */
Ytracker++;
}/* while for YTracker */
}/* if case for Gtrakcer == '<' */
Gtracker++;
}/* while for GTracker */

for(i2 = 0; i2 < x.size() ; i2++)
{
boolean b;
String TagValue;
int Pos = ((Integer)x.elementAt(i2)).intValue();

Vector S = (Vector)G.elementAt(Pos);

String StartTag = S.elementAt(0).toString();

Vector E = (Vector)G.elementAt(Pos+1);
String EndTag = E.elementAt(0).toString();

b = (StartTag.regionMatches(1,EndTag,2,StartTag.length() -1));
if(!b)
{
System.out.println("\nEncountered two Sucesive Start tags when seraching for an end tag \n");
System.out.println("||"+StartTag + "||"+EndTag);
return GERROR;
}

TagValue = a.substring(((Integer)Positions.elementAt(Pos)).intValue()
+ ((Integer)taglengths.elementAt(Pos)).intValue(),((Integer)Positions.elementAt(Pos+1)).intValue());
((Vector)G.elementAt(Pos)).addElement(TagValue); /* This always get added to the last elelment in the vector*/
/*ie.., already tagname and a hashtable for attibutes are present in the vector, we are adding
the tag value as the third element into the vector*/
}
/*
for(i3 = 0; i3 < G.size();i3++)
{
Vector N = (Vector)G.elementAt(i3);
String Name = N.elementAt(0).toString();
System.out.println(Name);

if(N.size() > 2)
{
String Value = N.elementAt(2).toString();
System.out.println(Value);
}
if(N.size() > 1)
{
Hashtable h = (Hashtable)N.elementAt(1);
Enumeration keys=h.keys();
while(keys.hasMoreElements())
{
String akey=keys.nextElement().toString();
String avalue=h.get(akey).toString();
System.out.println("Attname "+akey+"\tAttValue "+avalue);
}
}
}*/

System.out.println("\nEndTime :");
System.out.println(new Date().getTime());
}
catch(Exception e)
{
e.printStackTrace();
}
return ON;
}


Hashtable AttSeperator(int Start,int End)
{

Hashtable allatt = new Hashtable();
String attN;
String attV;

int i = 0;
int EqualTo = 0;
int AttValStart = 0;
int AttValEnd = 0;
int AttNameStart = 1;
int attcount = 1;

i = Start;
AttValStart = 0;
AttValEnd = 0;
EqualTo = 0;
AttNameStart = i + 1;


/*Loop from start untill end */
while(i != End )
{

/* This checking is done to find out the starting of next attribute with in the range */
if(a.charAt(i) == ' ' && EqualTo > 0 && AttValStart > 0 && AttValEnd > 0)
{
EqualTo = 0;
AttValStart = 0;
AttValEnd = 0;
AttNameStart = i + 1;
attcount++;
}

switch(a.charAt(i))
{
case '=':

/*This check ensures that we are not encoutering two "=" with in the the same attribute */
if( EqualTo > 0)
{
System.out.println("\n\nTwo equal signs encountered\n");
System.out.println(a.substring(Start,End));
allatt = null;
return allatt;
}

if(a.charAt(i+1) != '"')
{
System.out.println("\n\nEqualto sign followed by a charecter that is not a double quotes\n");
System.out.println(a.substring(Start,End));
allatt = null;
return allatt;
}

/* This condition checks for "=" to be a must before the starting double quotes */
if(AttValStart > 0)
{
System.out.println("\n\nDouble quotes(AttValStarting) before Equalto Sign\n");
System.out.println(a.substring(Start,End));
allatt = null;
return allatt;
}

EqualTo = i;
break;


case '"':
if(EqualTo == 0)
{
System.out.println("\n\nDouble quotes AttValStarted without Equalto Sign\n");
System.out.println(a.substring(Start,End));
allatt = null;
return allatt;
}

/* To find starting double quotes */
if( AttValStart == 0 && EqualTo > 0 )
{
/*ensure starting double quotes is preceded by an "=" sign else error */
if(a.charAt(i-1) == '=')
{
AttValStart = i+1;
break;
}
else
{
System.out.println("\n\n AttValStarted(Double quotes) with previous charecter that is not an Equal to sign\n");
System.out.println(a.substring(Start,End));
allatt = null;
return allatt;
}
}/* if( AttValStart == 0 && EqualTo > 0 )..*/

/*To find ending double quotes */

if(AttValStart > 0 && EqualTo > 0)
{
AttValEnd = i;
break;
}
break;

default :
break;
}/* switch case */

/*This check ensures that we have found out "=", starting double quotes, ending double quotes in sequence */

if(EqualTo > 0 && AttValStart > 0 && AttValEnd > 0)
{
attN = a.substring(AttNameStart,EqualTo);
attV = a.substring(AttValStart,AttValEnd);
allatt.put(attN,attV);
}
i++;

if(i == End && EqualTo > 0 && AttValEnd == 0)
{
System.out.println("\n\nAtt Value with AttValStarting quotes but no AttValEnding quotes\n");
System.out.println(a.substring(Start,End));
allatt = null;
return allatt;
}

}
return allatt;
}


}
Lasse Koskela
author
Sheriff

Joined: Jan 23, 2002
Posts: 11962
    
    5
What is it that you have trouble understanding in this piece of code?


Author of Test Driven (2007) and Effective Unit Testing (2013) [Blog] [HowToAskQuestionsOnJavaRanch]
 
Consider Paul's rocket mass heater.
 
subject: Explanation for Code : Parsing XML using Java