tags to line breaks.
$entry =~ s/
/\n/gis;
$entry =~ s/<\/?(p|div|tr)([^>]*?\"[^\"]*\")*[^>]*>/\n\n/gis;
# Kill the rest of the HTML markup.
$entry =~ s/<([^>]*?\"[^\"]*\")*[^>]*>//gs;
# Kill anything that looks like a date
# $entry =~ s/^\s*\d+[\.-]\d+[\.-]\d+\s*//s;
# Kill weird punctuation
$entry =~ s/\s[\.:\[\]\(\)\\\/;]\s/ /gs;
# Kill leading weirdness
$entry =~ s/^[^A-Z0-9a-záéíóúýþæöðÁÉÍÓÚÝÞÆÖÐ_-]*//s;
# Chop everything but the first sentance away, compact white space.
$entry =~ s/\n.*$//s;
$entry =~ s/\s+/ /gs;
$entry =~ s/([^\d][\!\.\?]+)\s.*$/$1/s;
$entry =~ s/^(.{50,65})\s+.*$/$1/;
$entry =~ s/^(.{65,65}).*$/$1/;
# Replace empty entries with "no subject found"
$entry =~ s/^\s*$/(no subject found)/s;
return $entry;
}
sub getdesc
{
my $entry = shift;
my $title = shift;
# Kill HTML which got truncated...
$entry =~ s/<[^>]+$//s;
# Grab URLS for later processing.
my @urls = ( );
while ((@urls < 5) && ($entry =~ s/<(?:a|img)\s+.*?(?:href|src)=\"?\s*([^\">]+)\"?/]+|\"[^\"]*\")*[^>]*>.*?<\/\1[^>]*>//gis;
$entry =~ s/ / /gsi;
$entry =~ s/<(p|br|hr|div)[^>]*>/
/gis;
$entry =~ s/
![]()
]>/[img]/gis;
$entry =~ s/<([^>]*?\"[^\"]*\")*[^>]*>//gs;
# Remove/compact white space...
$entry =~ s/\s+/ /gs;
$entry =~ s/(
\s*)+/
/gs;
$entry =~ s/^\s*(
\s*)*//;
$entry =~ s/(
\s*)*$//;
# Kill title.
$entry =~ s/^.*?\Q$title\E//;
$entry =~ s/^\s*(
\s*)+//;
# Truncate...
if ($entry =~ s/^(.{124,124}).*/$1/)
{
$entry =~ s/<[^>]+$//i;
$entry .= " ...";
}
# Append URLs to description.
my $i = 1;
foreach my $url (@urls)
{
next if ($url =~ /^(javascript|mailto:)/i);
$url = addprefix($url);
$entry .= " [
$i]";
$i++;
}
return $entry;
}
sub addprefix
{
my $url = shift;
if ($url !~ /^(http|ftp|mailto|javascript):/i)
{
if ($url =~ /^\//)
{
return $prefix_root.$url;
}
else
{
return $prefix_dir.$url;
}
}
return $url;
}
sub xmlesc
{
my $text = shift;
$text =~ s/&/&/g;
$text =~ s/</g;
$text =~ s/>/>/g;
return $text;
}