Difference between revisions of "API:EPrints/Apache/SiteMap"
(Created page with "<!-- Pod2Wiki=_preamble_ This page has been automatically generated from the EPrints 3.2 source. Any wiki changes made between the 'Pod2Wiki=*' and 'Edit below this comment'...") |
|||
Line 41: | Line 41: | ||
$rc = EPrints::Apache::SiteMap::handler( $r ) | $rc = EPrints::Apache::SiteMap::handler( $r ) | ||
− | Handler for managing EPrints requests for dynamically generated sitemap.xml or sitemap-sc.xml (or returning static version if that exists). | + | Handler for managing EPrints requests for dynamically generated sitemap.xml or sitemap-sc.xml (or returning static version if that exists). =cut ###################################################################### |
+ | |||
+ | sub handler { | ||
+ | my( $r ) = @_; | ||
+ | |||
+ | my $repository = $EPrints::HANDLE->current_repository; | ||
+ | my $xml = $repository->xml; | ||
+ | my $sitemap; | ||
+ | |||
+ | if ( $r->uri =~ m! sitemap-sc\.xml$ !x ) | ||
+ | { | ||
+ | # this is a direct request for the semantic web extensions | ||
+ | $sitemap = _new_urlset( $repository, $xml ); | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | # get the static sitemap.xml | ||
+ | my $langid = EPrints::Session::get_session_language( $repository, $r ); | ||
+ | my @static_dirs = $repository->get_static_dirs( $langid ); | ||
+ | foreach my $static_dir ( @static_dirs ) | ||
+ | { | ||
+ | my $file = "$static_dir/sitemap.xml"; | ||
+ | next if( !-e $file ); | ||
+ | |||
+ | $sitemap = $xml->parse_file($file) || EPrints::abort( "Can't parse $file: $!" ); | ||
+ | last; | ||
+ | } | ||
+ | |||
+ | if( !defined $sitemap ) | ||
+ | { | ||
+ | # no static sitemap file - create a new document | ||
+ | $sitemap = _new_urlset( $repository, $xml ); | ||
+ | } | ||
+ | elsif( $sitemap->documentElement->localname eq "urlset" ) | ||
+ | { | ||
+ | # the static sitemap is a <urlset> - append the semantic web extensions to the end | ||
+ | _insert_semantic_web_extensions($repository, $xml, $sitemap->documentElement); | ||
+ | } | ||
+ | elsif( $sitemap->documentElement->localname eq "sitemapindex" ) | ||
+ | { | ||
+ | # the static sitemap is a <sitemapindex> - append a semantic web sitemap to the index | ||
+ | my $sw_sitemap = $sitemap->createElement("sitemap"); | ||
+ | $sitemap->documentElement->appendChild($sw_sitemap); | ||
+ | |||
+ | # append the location of the semantic web sitemap | ||
+ | my $sw_loc = $sitemap->createElement("loc"); | ||
+ | $sw_sitemap->appendChild($sw_loc); | ||
+ | $sw_loc->appendChild($sitemap->createTextNode($repository->config('base_url')."/sitemap-sc.xml")); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # adds local sitemap URLs | ||
+ | if( $sitemap->documentElement->localname eq "urlset" ) | ||
+ | { | ||
+ | $repository->run_trigger( EPrints::Const::EP_TRIGGER_LOCAL_SITEMAP_URLS, | ||
+ | urlset => $sitemap->documentElement, | ||
+ | ); | ||
+ | } # TODO: else { call some other trigger, with the sitemapindex element } | ||
+ | |||
+ | binmode( *STDOUT, ":utf8" ); | ||
+ | $repository->send_http_header( "content_type"=>"text/xml; charset=UTF-8" ); | ||
+ | print $xml->to_string( $sitemap ); | ||
+ | return DONE; } | ||
+ | |||
+ | # # Creates a new XML document containing a urlset populated # by _insert_semantic_web_extensions # | ||
+ | sub _new_urlset { | ||
+ | my( $repository, $xml ) = @_; | ||
+ | |||
+ | my $document = $xml->make_document(); | ||
+ | my $urlset = $xml->create_element( | ||
+ | "urlset", | ||
+ | "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" | ||
+ | ); | ||
+ | _insert_semantic_web_extensions( $repository, $xml, $urlset ); | ||
+ | $document->appendChild( $urlset ); | ||
+ | |||
+ | return $document; } | ||
+ | |||
+ | # # Insert the semantic web extensions as children of the element given as the # third argument to the function. This function contains the body of the main # handler shipped with EPrints 3.2.x # | ||
+ | sub _insert_semantic_web_extensions { | ||
+ | my ( $repository, $xml, $urlset ) = @_; | ||
+ | |||
+ | $urlset->setAttribute( "xmlns:sc" , "http://sw.deri.org/2007/07/sitemapextension/scschema.xsd" ); | ||
+ | |||
+ | my $sc_dataset = $xml->create_element( "sc:dataset" ); | ||
+ | |||
+ | $urlset->appendChild( $sc_dataset ); | ||
+ | $sc_dataset->appendChild( _create_data( $xml, | ||
+ | "sc:linkedDataPrefix", | ||
+ | $repository->config( 'base_url' )."/id/", | ||
+ | slicing => "subject-object", )); | ||
+ | $sc_dataset->appendChild( _create_data( $xml, | ||
+ | "sc:datasetURI", | ||
+ | $repository->config( 'base_url' )."/id/repository" )); | ||
+ | |||
+ | |||
+ | $sc_dataset->appendChild( _create_data( $xml, | ||
+ | "sc:dataDumpLocation", | ||
+ | $repository->config( 'base_url' )."/id/repository" )); | ||
+ | $sc_dataset->appendChild( _create_data( $xml, | ||
+ | "sc:dataDumpLocation", | ||
+ | $repository->config( 'base_url' )."/id/dump" )); | ||
+ | |||
+ | my $root_subject = $repository->dataset("subject")->dataobj("ROOT"); | ||
+ | foreach my $top_subject ( $root_subject->get_children ) | ||
+ | { | ||
+ | $sc_dataset->appendChild( _create_data( $xml, | ||
+ | "sc:dataDumpLocation", | ||
+ | $top_subject->uri ) ); | ||
+ | } } | ||
+ | |||
+ | sub _create_data { | ||
+ | my( $xml, $name, $data, %attr ) = @_; | ||
+ | |||
+ | my $node = $xml->create_element( $name, %attr ); | ||
+ | $node->appendChild( $xml->create_text_node( $data )); | ||
+ | |||
+ | return $node; } | ||
+ | |||
+ | 1; | ||
<div style='background-color: #e8e8f; margin: 0.5em 0em 1em 0em; border: solid 1px #cce; padding: 0em 1em 0em 1em; font-size: 80%; '> | <div style='background-color: #e8e8f; margin: 0.5em 0em 1em 0em; border: solid 1px #cce; padding: 0em 1em 0em 1em; font-size: 80%; '> |
Revision as of 16:26, 14 December 2021
EPrints 3 Reference: Directory Structure - Metadata Fields - Repository Configuration - XML Config Files - XML Export Format - EPrints data structure - Core API - Data Objects
Latest Source Code (3.4, 3.3) | Revision Log | Before editing this page please read Pod2Wiki
Contents
NAME
EPrints::Apache::SiteMap
DESCRIPTION
This handler has been heavily modified in order to support a static sitemap.xml file in addition to the semantic web crawling extensions provided by EPrints. The modified handler inserts the semantic web crawling extensions into the existing sitemap.xml if it exists, or creates a new document if it doesn't. The original handler is now in the _insert_semantic_web_extensions below.
If the static sitemap XML is a sitemapindex, this handler inserts a new <sitemap> element into the index, which directs crawlers to a "sitemap-sc.xml" URL that contains the semantic web sitemap generated by _insert_semantic_web_extensions. This handler also implements the sitemap-sc.xml URL.
METHODS
handler
$rc = EPrints::Apache::SiteMap::handler( $r )
Handler for managing EPrints requests for dynamically generated sitemap.xml or sitemap-sc.xml (or returning static version if that exists). =cut ######################################################################
sub handler { my( $r ) = @_;
my $repository = $EPrints::HANDLE->current_repository; my $xml = $repository->xml; my $sitemap; if ( $r->uri =~ m! sitemap-sc\.xml$ !x ) { # this is a direct request for the semantic web extensions $sitemap = _new_urlset( $repository, $xml ); } else { # get the static sitemap.xml my $langid = EPrints::Session::get_session_language( $repository, $r ); my @static_dirs = $repository->get_static_dirs( $langid ); foreach my $static_dir ( @static_dirs ) { my $file = "$static_dir/sitemap.xml"; next if( !-e $file ); $sitemap = $xml->parse_file($file) || EPrints::abort( "Can't parse $file: $!" ); last; } if( !defined $sitemap ) { # no static sitemap file - create a new document $sitemap = _new_urlset( $repository, $xml ); } elsif( $sitemap->documentElement->localname eq "urlset" ) { # the static sitemap is a <urlset> - append the semantic web extensions to the end _insert_semantic_web_extensions($repository, $xml, $sitemap->documentElement); } elsif( $sitemap->documentElement->localname eq "sitemapindex" ) { # the static sitemap is a <sitemapindex> - append a semantic web sitemap to the index my $sw_sitemap = $sitemap->createElement("sitemap"); $sitemap->documentElement->appendChild($sw_sitemap); # append the location of the semantic web sitemap my $sw_loc = $sitemap->createElement("loc"); $sw_sitemap->appendChild($sw_loc); $sw_loc->appendChild($sitemap->createTextNode($repository->config('base_url')."/sitemap-sc.xml")); } } # adds local sitemap URLs if( $sitemap->documentElement->localname eq "urlset" ) { $repository->run_trigger( EPrints::Const::EP_TRIGGER_LOCAL_SITEMAP_URLS, urlset => $sitemap->documentElement, ); } # TODO: else { call some other trigger, with the sitemapindex element } binmode( *STDOUT, ":utf8" ); $repository->send_http_header( "content_type"=>"text/xml; charset=UTF-8" ); print $xml->to_string( $sitemap ); return DONE; }
- # Creates a new XML document containing a urlset populated # by _insert_semantic_web_extensions #
sub _new_urlset { my( $repository, $xml ) = @_;
my $document = $xml->make_document(); my $urlset = $xml->create_element( "urlset", "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" ); _insert_semantic_web_extensions( $repository, $xml, $urlset ); $document->appendChild( $urlset ); return $document; }
- # Insert the semantic web extensions as children of the element given as the # third argument to the function. This function contains the body of the main # handler shipped with EPrints 3.2.x #
sub _insert_semantic_web_extensions { my ( $repository, $xml, $urlset ) = @_;
$urlset->setAttribute( "xmlns:sc" , "http://sw.deri.org/2007/07/sitemapextension/scschema.xsd" ); my $sc_dataset = $xml->create_element( "sc:dataset" ); $urlset->appendChild( $sc_dataset ); $sc_dataset->appendChild( _create_data( $xml, "sc:linkedDataPrefix", $repository->config( 'base_url' )."/id/", slicing => "subject-object", )); $sc_dataset->appendChild( _create_data( $xml, "sc:datasetURI", $repository->config( 'base_url' )."/id/repository" )); $sc_dataset->appendChild( _create_data( $xml, "sc:dataDumpLocation", $repository->config( 'base_url' )."/id/repository" )); $sc_dataset->appendChild( _create_data( $xml, "sc:dataDumpLocation", $repository->config( 'base_url' )."/id/dump" )); my $root_subject = $repository->dataset("subject")->dataobj("ROOT"); foreach my $top_subject ( $root_subject->get_children ) { $sc_dataset->appendChild( _create_data( $xml, "sc:dataDumpLocation", $top_subject->uri ) ); } }
sub _create_data { my( $xml, $name, $data, %attr ) = @_;
my $node = $xml->create_element( $name, %attr ); $node->appendChild( $xml->create_text_node( $data )); return $node; }
1;
COPYRIGHT