View Single Post
  #1 (permalink)  
Old 05-15-2008, 06:36 PM
John_L John_L is offline
Newbie
 
Join Date: May 2008
Posts: 10
Credits: 0
Rep Power: 0
John_L is on a distinguished road
Default Need some help with my script

Basic XML parser. I noted an "EMPTY???" comment beside the area which doesn't print anything. It's strange because the variables do get filled properly...anyone have any ideas?

Code:
#!/usr/bin/perl -w 

my $title = ""; #title
my $link  = ""; #link
my $description = ""; #description
my $item_found = 0; # <item> element within the xml file

print "<html>\n";
print "\n\t<body>\n";


#read xml file from standard input
while( $line = <stdin> )
{
	
	#remove white space
	chomp( $line );
			
	if( $item_found == 0 )
	{	
		$start = index( $line, "item" ); #find an item tag
		
		if( $start != -1 )
		{
			$item_found = 1; #beginning of item
		}
	}
	
	if( $item_found == 1 )
	{

		#parse data that we want
				
		$title = &process_element( $line, "title" ); 
		$link = &process_element( $line, "link" );
		$description = &process_element( $line, "description" );
				
		#look for the end of item tag
		$end = index( $line, "/item" );
			
		if( end != -1 )
		{
			
			#print the gathered information to the html file			
			if( length( $title ) != 0 || length( $link ) != 0 || length( $description ) != 0 )
			{
				print "\n\t\t<p> \n\t \t\t<a href= \"$link\"> $title </a> \n\t\t\t $description \n\t\t</p>\n\n" ; #EMPTY????
			}
			
			#clear stored information
			$title = "";
			$link = "";
			$description = "";
			
			$chann_found = 0; #end of channel
			$item_found = 0; #end of item
		}
		
	}
	
	

}

#$_[0] = line, $_[1] = tag
sub process_element
{	
	#my ( $start, $end, $length );		#private local variables 
	#my $information = "";
	
	#Grab the string between these tags
	$start = index( $_[0], "$_[1]" );	#assume this is how a beginning tag will always look
	$end = rindex( $_[0], "</" );		#this is how an ending tag will always look
			
	if( $start != -1 )
	{
		$start = index( $_[0], ">" ) + 1; #advance start to the end of the tag
		$end = rindex( $_[0], "<" ); #advance start to the beginning of the closing tag
			
		$length = length( $_[0] );
			
		#end tag wasn't on the same line
		if( $end == -1 )
		{
			$end = $length;
		}
			
		$length = $end - $start; #the length of the substring
		
		$information = substr( $_[0], $start, $length );
		
		#replace encoded characters if any exist
		$information =~ s/&lt;/</;
		$information =~ s/&gt;/>/;
		$information =~ s/&amp;/&/;
					
		return $information
		
	}
}

print "\n\t</body>\n";
print "\n</html>\n\n";
Reply With Quote

Sponsored Links