Difference between revisions of "Contribute: Plugins/ExportPluginsZip"
(→Handling DataObjs) |
|||
(25 intermediate revisions by one other user not shown) | |||
Line 1: | Line 1: | ||
+ | [[Category:Contribute]] | ||
+ | [[Category:Plugins]] | ||
= Export Plugin Tutorial 5: Zip = | = Export Plugin Tutorial 5: Zip = | ||
− | In this tutorial we'll look at packaging the results of a search into a Zip file. We'll create a directory for each eprint, and a sub-directory for each document belonging to that eprint. We'll also add | + | In this tutorial we'll look at packaging the results of a search into a Zip file. We'll create a directory for each eprint, and a sub-directory for each document belonging to that eprint. We'll also add an HTML index file to the archive to make it easier to navigate. |
To prepare for this tutorial you should install the [http://search.cpan.org/~miyagawa/Archive-Any-Create-0.02/lib/Archive/Any/Create.pm Archive::Any::Create] module. The following command as root, or using sudo should work. | To prepare for this tutorial you should install the [http://search.cpan.org/~miyagawa/Archive-Any-Create-0.02/lib/Archive/Any/Create.pm Archive::Any::Create] module. The following command as root, or using sudo should work. | ||
Line 9: | Line 11: | ||
= Zip.pm = | = Zip.pm = | ||
+ | The code in the section below should be placed in a file called Zip.pm in the directory created previously, and MyPlugins should be changed to the name of that directory. | ||
<pre> | <pre> | ||
package EPrints::Plugin::Export::MyPlugins::Zip; | package EPrints::Plugin::Export::MyPlugins::Zip; | ||
− | @ISA = ( | + | @ISA = ('EPrints::Plugin::Export'); |
use strict; | use strict; | ||
Line 24: | Line 27: | ||
my $self = $class->SUPER::new(%opts); | my $self = $class->SUPER::new(%opts); | ||
− | $self->{name} = | + | $self->{name} = 'Zip'; |
$self->{accept} = [ 'list/eprint' ]; | $self->{accept} = [ 'list/eprint' ]; | ||
− | $self->{visible} = | + | $self->{visible} = 'all'; |
− | $self->{suffix} = | + | $self->{suffix} = '.zip'; |
− | $self->{mimetype} = | + | $self->{mimetype} = 'application/zip'; |
+ | |||
+ | my $rc = EPrints::Utils::require_if_exists('Archive::Any::Create'); | ||
+ | unless ($rc) | ||
+ | { | ||
+ | $self->{visible} = ''; | ||
+ | $self->{error} = 'Unable to load required module Archive::Any::Create'; | ||
+ | } | ||
return $self; | return $self; | ||
Line 42: | Line 52: | ||
my $zip = Archive::Any::Create->new; | my $zip = Archive::Any::Create->new; | ||
− | my $ | + | my $index = <<END; |
+ | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | ||
+ | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | ||
+ | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
+ | <head> | ||
+ | <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> | ||
+ | <title>EPrints Search Results</title> | ||
+ | </head> | ||
+ | <body> | ||
+ | END | ||
− | my | + | my $session = $plugin->{session}; |
− | + | ||
+ | foreach my $dataobj ($opts{list}->get_records) | ||
{ | { | ||
− | $ | + | my $div = $session->make_element('div'); |
− | + | my $heading = $session->make_element('h2'); | |
+ | $heading->appendChild($session->make_text($dataobj->get_value('title'))); | ||
+ | $div->appendChild($heading); | ||
− | + | my $uldoc = $session->make_element('ul'); | |
+ | $div->appendChild($uldoc); | ||
− | + | my $dirpath = 'eprints-search/'.$dataobj->get_id().'/'; | |
− | |||
− | |||
− | |||
− | my $dirpath = | ||
my $i = 1; | my $i = 1; | ||
Line 63: | Line 82: | ||
my $subdirpath = $dirpath."doc$i/"; | my $subdirpath = $dirpath."doc$i/"; | ||
my %files = $doc->files; | my %files = $doc->files; | ||
+ | |||
+ | my $lidoc = $session->make_element('li'); | ||
+ | $uldoc->appendChild($lidoc); | ||
+ | |||
+ | my $adoc = $session->make_element('a', href=>$dataobj->get_id."/doc$i/".$doc->get_main); | ||
+ | $lidoc->appendChild($adoc); | ||
+ | |||
+ | if ($doc->exists_and_set('formatdesc')) | ||
+ | { | ||
+ | $adoc->appendChild($session->make_text($doc->get_value('formatdesc'))); | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | $adoc->appendChild($session->make_text($doc->get_main)); | ||
+ | } | ||
foreach my $filename (sort keys %files) | foreach my $filename (sort keys %files) | ||
{ | { | ||
− | + | my $filepath = $subdirpath.$filename; | |
− | my $file = $doc->local_path. | + | my $file = $doc->local_path.'/'.$filename; |
if (-d $file) | if (-d $file) | ||
Line 88: | Line 122: | ||
$i++; | $i++; | ||
} | } | ||
+ | $index .= EPrints::XML::to_string($div); | ||
} | } | ||
− | if (defined $opts{ | + | $index .= '</body></html>'; |
+ | $zip->add_file('eprints-search/index.htm',$index); | ||
+ | |||
+ | if (defined $opts{fh}) | ||
{ | { | ||
− | $zip->write_filehandle($opts{ | + | $zip->write_filehandle($opts{fh},'zip'); |
return undef; | return undef; | ||
} | } | ||
− | $zip->write_filehandle($FH, | + | $zip->write_filehandle($FH,'zip'); |
return $archive; | return $archive; | ||
} | } | ||
1; | 1; | ||
+ | |||
</pre> | </pre> | ||
= In More Detail = | = In More Detail = | ||
− | == | + | == Constructor == |
− | + | For the sake of simplicity this plugin will only deal with lists of eprints. This avoids some code duplication, and it would be fairly easy to modify the plugin to deal with both individual eprints and lists of eprints sensibly. | |
<pre> | <pre> | ||
− | + | $self->{accept} = [ 'list/eprint' ]; | |
</pre> | </pre> | ||
− | + | The file extension and [http://en.wikipedia.org/wiki/MIME MIME] type are set to values appropriate for Zip files. | |
− | |||
<pre> | <pre> | ||
− | $self->{ | + | $self->{suffix} = '.zip'; |
+ | $self->{mimetype} = 'application/zip'; | ||
</pre> | </pre> | ||
− | + | We need to import a module that is not included with EPrints for creating zip files. We use the EPrints::Utils::require_if_exists function to check if the module exists, and load it if it does. We then check the value returned from that function, and make the plugin invisible if it failed. | |
+ | |||
<pre> | <pre> | ||
− | $self->{ | + | my $rc = EPrints::Utils::require_if_exists('Archive::Any::Create'); |
− | + | unless ($rc) | |
+ | { | ||
+ | $self->{visible} = ''; | ||
+ | $self->{error} = 'Unable to load required module Archive::Any::Create'; | ||
+ | } | ||
</pre> | </pre> | ||
Line 132: | Line 176: | ||
</pre> | </pre> | ||
− | === | + | === Navigation === |
− | Here we | + | Here we begin to setup the HTML file that we'll add to our archive for navigation. First we setup a header. |
<pre> | <pre> | ||
− | my $ | + | my $index = <<END; |
+ | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | ||
+ | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | ||
+ | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
+ | <head> | ||
+ | <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> | ||
+ | <title>EPrints Search Results</title> | ||
+ | </head> | ||
+ | <body> | ||
+ | END | ||
</pre> | </pre> | ||
− | + | Now we get the Session object, we'll be using it to manipulate [http://en.wikipedia.org/wiki/Document_Object_Model DOM] objects later. | |
<pre> | <pre> | ||
− | my | + | my $session = $plugin->{session}; |
− | |||
− | |||
− | |||
− | |||
</pre> | </pre> | ||
− | We | + | === Handling DataObjs === |
+ | We loop over the DataObjs as we have done before. | ||
+ | |||
+ | This time we setup some [http://en.wikipedia.org/wiki/Document_Object_Model DOM] objects to be added to our index. Each eprint will have it's title printed out followed by a list of documents. | ||
<pre> | <pre> | ||
− | + | my $div = $session->make_element('div'); | |
+ | my $heading = $session->make_element('h2'); | ||
+ | $heading->appendChild($session->make_text($dataobj->get_value('title'))); | ||
+ | $div->appendChild($heading); | ||
− | + | my $uldoc = $session->make_element('ul'); | |
+ | $div->appendChild($uldoc); | ||
</pre> | </pre> | ||
− | + | We create a directory for each eprint. Note it is not necessary to explicitly create a directory, we simply have to set the appropriate file path. However this means that if you do not add files to a certain directory it will not be created, rather than having an empty directory for a given eprint. | |
+ | |||
<pre> | <pre> | ||
− | + | my $dirpath = 'eprints-search/'.$dataobj->get_id().'/'; | |
− | |||
− | my $dirpath = | ||
</pre> | </pre> | ||
+ | ==== Dealing With Documents ==== | ||
+ | We then loop over all the documents belonging to each DataObj. The get_all_documents method returns an array of Document objects. | ||
<pre> | <pre> | ||
my $i = 1; | my $i = 1; | ||
Line 166: | Line 223: | ||
{ | { | ||
my $subdirpath = $dirpath."doc$i/"; | my $subdirpath = $dirpath."doc$i/"; | ||
+ | </pre> | ||
+ | |||
+ | Here we create a list item for the document containing a link to the main file. | ||
+ | <pre> | ||
+ | my $lidoc = $session->make_element('li'); | ||
+ | $uldoc->appendChild($lidoc); | ||
+ | |||
+ | my $adoc = $session->make_element('a', href=>$dataobj->get_id."/doc$i/".$doc->get_main); | ||
+ | $lidoc->appendChild($adoc); | ||
+ | </pre> | ||
+ | If a description of the main file has been set we use that as the link text, otherwise we use the filename. | ||
+ | <pre> | ||
+ | if ($doc->exists_and_set('formatdesc')) | ||
+ | { | ||
+ | $adoc->appendChild($session->make_text($doc->get_value('formatdesc'))); | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | $adoc->appendChild($session->make_text($doc->get_main)); | ||
+ | } | ||
+ | </pre> | ||
+ | |||
+ | ==== Dealing With Files ==== | ||
+ | The files method of the Document object returns a hash whose keys are file names and values are file sizes. | ||
+ | <pre> | ||
my %files = $doc->files; | my %files = $doc->files; | ||
</pre> | </pre> | ||
+ | We loop over each file belonging to the document, in most cases there will only be one file. | ||
<pre> | <pre> | ||
foreach my $filename (sort keys %files) | foreach my $filename (sort keys %files) | ||
{ | { | ||
my $filepath = $subdirpath.$filename; | my $filepath = $subdirpath.$filename; | ||
− | my $file = $doc->local_path. | + | my $file = $doc->local_path.'/'.$filename; |
+ | </pre> | ||
+ | We need to read the contents of the file and add it to a file in the zip. First we'll create another in-memory file to hold the contents. | ||
+ | <pre> | ||
my $data = ''; | my $data = ''; | ||
open (my $datafh ,'>', \$data); | open (my $datafh ,'>', \$data); | ||
+ | </pre> | ||
+ | We open our file and print it straight out to our in-memory file. | ||
+ | <pre> | ||
open (INFH, "<$file") or die ("Could not open file $file"); | open (INFH, "<$file") or die ("Could not open file $file"); | ||
while (<INFH>) | while (<INFH>) | ||
Line 184: | Line 273: | ||
} | } | ||
close INFH; | close INFH; | ||
+ | </pre> | ||
+ | Then we add the file data to our file. | ||
+ | <pre> | ||
$zip->add_file($filepath, $data); | $zip->add_file($filepath, $data); | ||
− | |||
− | |||
− | |||
− | |||
</pre> | </pre> | ||
+ | Finally we add the [http://en.wikipedia.org/wiki/Document_Object_Model DOM] object for our eprint to the index. | ||
+ | <pre> | ||
+ | $index .= EPrints::XML::to_string($div); | ||
+ | </pre> | ||
+ | |||
+ | === Finishing Off === | ||
+ | After finishing off our index file we add it to the zip file. | ||
+ | <pre> | ||
+ | $index .= '</body></html>'; | ||
+ | $zip->add_file('eprints-search/index.htm',$index); | ||
+ | </pre> | ||
+ | |||
+ | If a file handle has been provided we write to it, otherwise we write to the scalar file handle created earlier. We then return in the usual fashion. | ||
<pre> | <pre> | ||
− | if (defined $opts{ | + | if (defined $opts{fh}) |
{ | { | ||
− | $zip->write_filehandle($opts{ | + | $zip->write_filehandle($opts{fh},'zip'); |
return undef; | return undef; | ||
} | } | ||
− | $zip->write_filehandle($FH, | + | $zip->write_filehandle($FH,'zip'); |
return $archive; | return $archive; | ||
</pre> | </pre> | ||
= Testing Your Plugin = | = Testing Your Plugin = | ||
− | Restart your webserver and test the plugin as | + | Restart your webserver and test the plugin as [[Contribute:_Plugins/ExportPluginsHello| before]]. |
+ | |||
+ | == Sample Output == | ||
+ | [[Image:Expzipv2.png]] | ||
+ | |||
+ | The accompanying HTML index. | ||
+ | |||
+ | [[Image:Expzip2.png]] |
Latest revision as of 13:51, 8 February 2010
Contents
Export Plugin Tutorial 5: Zip
In this tutorial we'll look at packaging the results of a search into a Zip file. We'll create a directory for each eprint, and a sub-directory for each document belonging to that eprint. We'll also add an HTML index file to the archive to make it easier to navigate.
To prepare for this tutorial you should install the Archive::Any::Create module. The following command as root, or using sudo should work.
cpan Archive::Any::Create
Zip.pm
The code in the section below should be placed in a file called Zip.pm in the directory created previously, and MyPlugins should be changed to the name of that directory.
package EPrints::Plugin::Export::MyPlugins::Zip; @ISA = ('EPrints::Plugin::Export'); use strict; use Archive::Any::Create; sub new { my ($class, %opts) = @_; my $self = $class->SUPER::new(%opts); $self->{name} = 'Zip'; $self->{accept} = [ 'list/eprint' ]; $self->{visible} = 'all'; $self->{suffix} = '.zip'; $self->{mimetype} = 'application/zip'; my $rc = EPrints::Utils::require_if_exists('Archive::Any::Create'); unless ($rc) { $self->{visible} = ''; $self->{error} = 'Unable to load required module Archive::Any::Create'; } return $self; } sub output_list { my ($plugin, %opts) = @_; my $archive = ''; open (my $FH, '>', \$archive) or die("Could not create filehandle: $!"); my $zip = Archive::Any::Create->new; my $index = <<END; <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> <title>EPrints Search Results</title> </head> <body> END my $session = $plugin->{session}; foreach my $dataobj ($opts{list}->get_records) { my $div = $session->make_element('div'); my $heading = $session->make_element('h2'); $heading->appendChild($session->make_text($dataobj->get_value('title'))); $div->appendChild($heading); my $uldoc = $session->make_element('ul'); $div->appendChild($uldoc); my $dirpath = 'eprints-search/'.$dataobj->get_id().'/'; my $i = 1; foreach my $doc ($dataobj->get_all_documents) { my $subdirpath = $dirpath."doc$i/"; my %files = $doc->files; my $lidoc = $session->make_element('li'); $uldoc->appendChild($lidoc); my $adoc = $session->make_element('a', href=>$dataobj->get_id."/doc$i/".$doc->get_main); $lidoc->appendChild($adoc); if ($doc->exists_and_set('formatdesc')) { $adoc->appendChild($session->make_text($doc->get_value('formatdesc'))); } else { $adoc->appendChild($session->make_text($doc->get_main)); } foreach my $filename (sort keys %files) { my $filepath = $subdirpath.$filename; my $file = $doc->local_path.'/'.$filename; if (-d $file) { next; } my $data = ''; open (my $datafh ,'>', \$data); open (INFH, "<$file") or die ("Could not open file $file"); while (<INFH>) { print {$datafh} $_; } close INFH; $zip->add_file($filepath, $data); } $i++; } $index .= EPrints::XML::to_string($div); } $index .= '</body></html>'; $zip->add_file('eprints-search/index.htm',$index); if (defined $opts{fh}) { $zip->write_filehandle($opts{fh},'zip'); return undef; } $zip->write_filehandle($FH,'zip'); return $archive; } 1;
In More Detail
Constructor
For the sake of simplicity this plugin will only deal with lists of eprints. This avoids some code duplication, and it would be fairly easy to modify the plugin to deal with both individual eprints and lists of eprints sensibly.
$self->{accept} = [ 'list/eprint' ];
The file extension and MIME type are set to values appropriate for Zip files.
$self->{suffix} = '.zip'; $self->{mimetype} = 'application/zip';
We need to import a module that is not included with EPrints for creating zip files. We use the EPrints::Utils::require_if_exists function to check if the module exists, and load it if it does. We then check the value returned from that function, and make the plugin invisible if it failed.
my $rc = EPrints::Utils::require_if_exists('Archive::Any::Create'); unless ($rc) { $self->{visible} = ''; $self->{error} = 'Unable to load required module Archive::Any::Create'; }
List Handling
Setting Up
Here we setup an in-memory file for the Zip, and create an Archive object.
my $archive = ''; open (my $FH, '>', \$archive) or die("Could not create filehandle: $!"); my $zip = Archive::Any::Create->new;
Here we begin to setup the HTML file that we'll add to our archive for navigation. First we setup a header.
my $index = <<END; <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> <title>EPrints Search Results</title> </head> <body> END
Now we get the Session object, we'll be using it to manipulate DOM objects later.
my $session = $plugin->{session};
Handling DataObjs
We loop over the DataObjs as we have done before.
This time we setup some DOM objects to be added to our index. Each eprint will have it's title printed out followed by a list of documents.
my $div = $session->make_element('div'); my $heading = $session->make_element('h2'); $heading->appendChild($session->make_text($dataobj->get_value('title'))); $div->appendChild($heading); my $uldoc = $session->make_element('ul'); $div->appendChild($uldoc);
We create a directory for each eprint. Note it is not necessary to explicitly create a directory, we simply have to set the appropriate file path. However this means that if you do not add files to a certain directory it will not be created, rather than having an empty directory for a given eprint.
my $dirpath = 'eprints-search/'.$dataobj->get_id().'/';
Dealing With Documents
We then loop over all the documents belonging to each DataObj. The get_all_documents method returns an array of Document objects.
my $i = 1; foreach my $doc ($dataobj->get_all_documents) { my $subdirpath = $dirpath."doc$i/";
Here we create a list item for the document containing a link to the main file.
my $lidoc = $session->make_element('li'); $uldoc->appendChild($lidoc); my $adoc = $session->make_element('a', href=>$dataobj->get_id."/doc$i/".$doc->get_main); $lidoc->appendChild($adoc);
If a description of the main file has been set we use that as the link text, otherwise we use the filename.
if ($doc->exists_and_set('formatdesc')) { $adoc->appendChild($session->make_text($doc->get_value('formatdesc'))); } else { $adoc->appendChild($session->make_text($doc->get_main)); }
Dealing With Files
The files method of the Document object returns a hash whose keys are file names and values are file sizes.
my %files = $doc->files;
We loop over each file belonging to the document, in most cases there will only be one file.
foreach my $filename (sort keys %files) { my $filepath = $subdirpath.$filename; my $file = $doc->local_path.'/'.$filename;
We need to read the contents of the file and add it to a file in the zip. First we'll create another in-memory file to hold the contents.
my $data = ''; open (my $datafh ,'>', \$data);
We open our file and print it straight out to our in-memory file.
open (INFH, "<$file") or die ("Could not open file $file"); while (<INFH>) { print {$datafh} $_; } close INFH;
Then we add the file data to our file.
$zip->add_file($filepath, $data);
Finally we add the DOM object for our eprint to the index.
$index .= EPrints::XML::to_string($div);
Finishing Off
After finishing off our index file we add it to the zip file.
$index .= '</body></html>'; $zip->add_file('eprints-search/index.htm',$index);
If a file handle has been provided we write to it, otherwise we write to the scalar file handle created earlier. We then return in the usual fashion.
if (defined $opts{fh}) { $zip->write_filehandle($opts{fh},'zip'); return undef; } $zip->write_filehandle($FH,'zip'); return $archive;
Testing Your Plugin
Restart your webserver and test the plugin as before.
Sample Output
The accompanying HTML index.