Basic XML parser. I noted an "EMPTY???" comment beside the area which doesn't print anything. It's strange because the variables do get filled properly...anyone have any ideas?
Code:
#!/usr/bin/perl -w
my $title = ""; #title
my $link = ""; #link
my $description = ""; #description
my $item_found = 0; # <item> element within the xml file
print "<html>\n";
print "\n\t<body>\n";
#read xml file from standard input
while( $line = <stdin> )
{
#remove white space
chomp( $line );
if( $item_found == 0 )
{
$start = index( $line, "item" ); #find an item tag
if( $start != -1 )
{
$item_found = 1; #beginning of item
}
}
if( $item_found == 1 )
{
#parse data that we want
$title = &process_element( $line, "title" );
$link = &process_element( $line, "link" );
$description = &process_element( $line, "description" );
#look for the end of item tag
$end = index( $line, "/item" );
if( end != -1 )
{
#print the gathered information to the html file
if( length( $title ) != 0 || length( $link ) != 0 || length( $description ) != 0 )
{
print "\n\t\t<p> \n\t \t\t<a href= \"$link\"> $title </a> \n\t\t\t $description \n\t\t</p>\n\n" ; #EMPTY????
}
#clear stored information
$title = "";
$link = "";
$description = "";
$chann_found = 0; #end of channel
$item_found = 0; #end of item
}
}
}
#$_[0] = line, $_[1] = tag
sub process_element
{
#my ( $start, $end, $length ); #private local variables
#my $information = "";
#Grab the string between these tags
$start = index( $_[0], "$_[1]" ); #assume this is how a beginning tag will always look
$end = rindex( $_[0], "</" ); #this is how an ending tag will always look
if( $start != -1 )
{
$start = index( $_[0], ">" ) + 1; #advance start to the end of the tag
$end = rindex( $_[0], "<" ); #advance start to the beginning of the closing tag
$length = length( $_[0] );
#end tag wasn't on the same line
if( $end == -1 )
{
$end = $length;
}
$length = $end - $start; #the length of the substring
$information = substr( $_[0], $start, $length );
#replace encoded characters if any exist
$information =~ s/</</;
$information =~ s/>/>/;
$information =~ s/&/&/;
return $information
}
}
print "\n\t</body>\n";
print "\n</html>\n\n";