Back to documentation
package Statocles::Plugin::LinkCheck;
our $VERSION = '0.094';
# ABSTRACT: Check links and images for validity during build
use Statocles::Base 'Class';
with 'Statocles::Plugin';
use Mojo::Util qw( url_escape url_unescape );
=attr ignore
An array of URL patterns to ignore. These are interpreted as regular expressions,
and are anchored to the beginning of the URL.
For example:
/broken will match "/broken.html" "/broken/page.html" but not "/page/broken"
.*/broken will match "/broken.html" "/broken/page.html" and "/page/broken"
=cut
has ignore => (
is => 'ro',
isa => ArrayRef[Str],
default => sub { [] },
);
=method check_pages
$plugin->check_pages( $event );
Check the pages inside the given
L<Statocles::Event::Pages|Statocles::Event::Pages> event.
=cut
sub check_pages {
my ( $self, $event ) = @_;
my %page_paths = ();
my %links = ();
my %empty = (); # Pages with empty links
for my $page ( @{ $event->pages } ) {
$page_paths{ url_unescape $page->path } = 1;
if ( $page->DOES( 'Statocles::Page::Document' ) ) {
my $dom = $page->dom;
for my $attr ( qw( src href ) ) {
for my $el ( $dom->find( "[$attr]" )->each ) {
my $url = $el->attr( $attr );
if ( !$url ) {
push @{ $empty{ $page->path } }, $el;
}
$url =~ s{#.*$}{};
next unless $url; # Skip checking fragment-internal links for now
next if $url =~ m{^(?:[a-z][a-z0-9+.-]*):}i;
next if $url =~ m{^//};
if ( $url !~ m{^/} ) {
my $clone = $page->path->clone;
pop @$clone;
$url = join '/', $clone, $url;
}
# Fix ".." and ".". Path::Tiny->canonpath can't do
# this for us because these paths do not exist on
# the filesystem
$url =~ s{/[^/]+/[.][.]/}{/}g; # Fix ".." to refer to parent
$url =~ s{/[.]/}{/}g; # Fix "." to refer to self
$links{ url_unescape $url }{ url_unescape $page->path }++;
}
}
}
}
my @missing; # Array of arrayrefs of [ link_url, page_path ]
for my $link_url ( keys %links ) {
$link_url .= 'index.html' if $link_url =~ m{/$};
next if $page_paths{ $link_url } || $page_paths{ "$link_url/index.html" };
next if grep { $link_url =~ /^$_/ } @{ $self->ignore };
push @missing, [ $link_url, $_ ] for keys %{ $links{ $link_url } };
}
for my $page_url ( keys %empty ) {
push @missing, map { [ '', $page_url, $_ ] } @{ $empty{ $page_url } };
}
if ( @missing ) {
# Sort by page url and then missing link url
for my $m ( sort { $a->[1] cmp $b->[1] || $a->[0] cmp $b->[0] } @missing ) {
my $msg = $m->[0] ? sprintf( q{'%s' not found}, $m->[0] )
: sprintf( q{Link with text "%s" has no destination}, $m->[2]->text )
;
$event->emitter->log->warn( "URL broken on $m->[1]: $msg" );
}
}
}
=method register
Register this plugin to install its event handlers. Called automatically.
=cut
sub register {
my ( $self, $site ) = @_;
$site->on( build => sub { $self->check_pages( @_ ) } );
}
1;
=head1 SYNOPSIS
# site.yml
site:
class: Statocles::Site
args:
plugins:
link_check:
$class: Statocles::Plugin::LinkCheck
=head1 DESCRIPTION
This plugin checks all of the links and images to ensure they exist. If something
is missing, this plugin will write a warning to the screen.