#!/usr/bin/perl use strict; use warnings; use Getopt::Long; use Pod::Usage; use Path::Class qw(dir file); use meon::Web::Config; use XML::Chain qw(xc); use DateTime; use URI; use URI::Escape qw(uri_escape); use DateTime::Format::Strptime; use HTML::Entities qw(encode_entities); use File::MimeInfo qw(mimetype); use 5.010; our $MAX_FEED_ITEMS_COUNT = 20; my $strptime_iso8601 = DateTime::Format::Strptime->new( pattern => '%FT%T', time_zone => 'UTC', on_error => 'undef', ); exit main(); sub main { my $help; my $dst_domain; my $notify_email; my $author; my $title; GetOptions( 'help|h' => \$help, 'hostname=s' => \$dst_domain, 'title=s' => \$title, ) or pod2usage; pod2usage if $help; pod2usage unless defined $dst_domain; pod2usage unless defined $title; my $hostname_folder = meon::Web::Config->hostname_to_folder($dst_domain); my $dst_hostname_dir = dir(meon::Web::SPc->srvdir, 'www', 'meon-web', $hostname_folder, 'content',); die 'no such hostname ' . $dst_domain unless $hostname_folder; my $channel = xc('rss', version => "2.0", 'xmlns:atom' => "http://www.w3.org/2005/Atom")->t("\n") ->set_io_any($dst_hostname_dir->file('rss.xml'))->c('channel')->t("\n") ->a('atom:link', href => 'https://'.$dst_domain.'/rss.xml', rel => 'self', type => 'application/rss+xml')->t("\n") ->a('title', '-' => $title)->t("\n")->a('link', '-' => 'http://' . $dst_domain . '/') ->t("\n")->a('description', '-' => 'news feed')->t("\n"); my @feed_items; foreach my $year_dir (sort {$b cmp $a} $dst_hostname_dir->children) { next unless $year_dir->basename =~ m/^\d{4}$/; $year_dir->recurse( callback => sub { my ($file) = @_; return if $file->is_dir; return if $file !~ m/\.xml$/; my $rel_file = $file->stringify; $rel_file =~ s/(index)?\.xml$//; $rel_file =~ s/^$dst_hostname_dir//; return if $rel_file eq '/'; return if $rel_file eq '/sitemap'; my $content_xml = xc($file); $content_xml->reg_global_ns('w' => 'http://web.meon.eu/'); my $tl_entry = $content_xml->find('/w:page/w:content//w:timeline-entry')->first; return unless $tl_entry->count; my $updated = $file->stat->mtime; my $created_iso8601 = $tl_entry->find('w:created')->text_content; my $created = ( $created_iso8601 ? eval {$strptime_iso8601->parse_datetime($created_iso8601)->epoch} : undef ) // $file->stat->mtime; warn $@.' - '.$file if $@; my $url = URI->new('http://' . $dst_domain . join('/', map {uri_escape($_)} file($rel_file)->components)); my $text = $tl_entry->find('w:text')->text_content; $text =~ s/^\s+//; $text =~ s/\s+$//; my $intro = $tl_entry->find('w:intro')->text_content; $intro =~ s/^\s+//; $intro =~ s/\s+$//; my $description = xc('div', xmlns => 'http://www.w3.org/1999/xhtml'); my $img_thumb = $tl_entry->find('w:img-thumb')->text_content; my $enclosure; if ($img_thumb) { my $thumb_abs = ($img_thumb =~ m{^/}); my $is_static = ($img_thumb =~ m{^/static}); my $is_extern = ($img_thumb =~ m{^(https?:)?//}); $img_thumb = 'http:'.$img_thumb if $is_extern && !$1; my $rel_link = ( $thumb_abs ? $img_thumb : $rel_file . '/' . uri_escape($img_thumb) ); if ($is_extern) { $enclosure = { url => $img_thumb, }; } else { my $img_file = file( ( $is_static ? $dst_hostname_dir->parent->subdir('www') : $dst_hostname_dir ), ($thumb_abs ? () : $rel_file), $img_thumb ); my $mime = mimetype(file($img_thumb)->basename); $enclosure = { url => 'http://' . $dst_domain . $rel_link, length => $img_file->stat->size, type => $mime, }; } $description->c('div')->c('a', href => $url)->a('img', width => '32', height => '32', src=> $enclosure->{url}); } $description->c('p')->c('i', '-' => $intro) if $intro; if ($text) { my @paras = split(/\n\n/, $text); foreach my $para (@paras) { $description->c('p', '-' => $para); } } push( @feed_items, { link => $url, created => $created, updated => $updated, title => $tl_entry->find('w:title')->text_content, description => $description, enclosure => $enclosure, } ); } ); last if @feed_items >= $MAX_FEED_ITEMS_COUNT; } @feed_items = splice(@feed_items, 0, $MAX_FEED_ITEMS_COUNT); foreach my $feed_item (sort {$b->{created} <=> $a->{created}} @feed_items) { my $created = DateTime->from_epoch(epoch => $feed_item->{created}); my $updated = DateTime->from_epoch(epoch => $feed_item->{updated}); my $item = $channel->c('item')->t("\n"); $item->a('title', '-' => $feed_item->{title})->t("\n") ->a('link', '-' => $feed_item->{link})->t("\n")->a('guid', '-' => $feed_item->{link}) ->t("\n"); $item->a('description', '-' => $feed_item->{description}->as_string)->t("\n") if $feed_item->{description}; $item->a('enclosure', %{$feed_item->{enclosure}})->t("\n") if $feed_item->{enclosure}; $item->a('pubDate', '-' => $created->strftime('%a, %d %b %Y %H:%M:%S %z'))->t("\n"); $channel->t("\n"); } $channel->store; return 0; } =head1 NAME meon-web-rss2-feed - generate content sitemap.xml =head1 SYNOPSIS meon-web-rss2-feed --hostname domain --hostname domain hostname --title $ title of the feed =head1 DESCRIPTION =cut