Difference between revisions of "Contribute: Plugins/ExportPluginsExcel"
|  (→In More Detail) | |||
| (14 intermediate revisions by one other user not shown) | |||
| Line 1: | Line 1: | ||
| + | [[Category:Contribute]] | ||
| + | [[Category:Plugins]] | ||
| =  Export Plugin Tutorial 4: Excel = | =  Export Plugin Tutorial 4: Excel = | ||
| In this tutorial and the next one we'll look at exporting files in non-text formats. Here we will explore exporting metadata in Excel format (pre Microsoft Office 2007 which uses an XML based format). | In this tutorial and the next one we'll look at exporting files in non-text formats. Here we will explore exporting metadata in Excel format (pre Microsoft Office 2007 which uses an XML based format). | ||
| − | To prepare for this tutorial you should install the Spreadsheet::Excel module. The following command as root, or using sudo should work. | + | To prepare for this tutorial you should install the [http://search.cpan.org/dist/Spreadsheet-WriteExcel/lib/Spreadsheet/WriteExcel.pm Spreadsheet::Excel] module. The following command as root, or using sudo should work. | 
| <pre> | <pre> | ||
| Line 10: | Line 12: | ||
| = Excel.pm = | = Excel.pm = | ||
| + | The code in the section below should be placed in a file called Excel.pm in the directory created previously, and MyPlugins should be changed to the name of that directory. | ||
| <pre> | <pre> | ||
| package EPrints::Plugin::Export::MyPlugins::Excel; | package EPrints::Plugin::Export::MyPlugins::Excel; | ||
| − | @ISA = ( | + | @ISA = ('EPrints::Plugin::Export'); | 
| use strict; | use strict; | ||
| − | |||
| sub new | sub new | ||
| Line 24: | Line 26: | ||
|    my $self = $class->SUPER::new(%opts); |    my $self = $class->SUPER::new(%opts); | ||
| − |    $self->{name} =  | + |    $self->{name} = 'Excel'; | 
|    $self->{accept} = ['list/eprint']; |    $self->{accept} = ['list/eprint']; | ||
| − |    $self->{visible} =  | + |    $self->{visible} = 'all'; | 
| − |    $self->{suffix} =  | + |    $self->{suffix} = '.xls'; | 
| − |    $self->{mimetype} =  | + |    $self->{mimetype} = 'application/vnd.ms-excel'; | 
| + | |||
| + |   my $rc = EPrints::Utils::require_if_exists('Spreadsheet::WriteExcel'); | ||
| + |   unless ($rc) | ||
| + |   { | ||
| + |     $self->{visible} = ''; | ||
| + |     $self->{error} = 'Unable to load required module Spreadsheet::WriteExcel'; | ||
| + |   } | ||
|    return $self; |    return $self; | ||
| Line 41: | Line 50: | ||
|    open(my $FH,'>',\$output); |    open(my $FH,'>',\$output); | ||
| − |    if (defined $opts{ | + |    if (defined $opts{fh}) | 
|    { |    { | ||
| − |      $workbook = Spreadsheet::WriteExcel->new(\*{$opts{ | + |      $workbook = Spreadsheet::WriteExcel->new(\*{$opts{fh}}); | 
|      die("Unable to create spreadsheet: $!")unless defined $workbook; |      die("Unable to create spreadsheet: $!")unless defined $workbook; | ||
|    } |    } | ||
| Line 56: | Line 65: | ||
|    my $i = 0; |    my $i = 0; | ||
|    my @fields = |    my @fields = | ||
| − |    $plugin->{session}->get_repository->get_dataset( | + |    $plugin->{session}->get_repository->get_dataset('archive')->get_fields; | 
|    foreach my $field (@fields) |    foreach my $field (@fields) | ||
| Line 72: | Line 81: | ||
|        if ($dataobj->exists_and_set($field->get_name)) |        if ($dataobj->exists_and_set($field->get_name)) | ||
|        { |        { | ||
| − |          if ($field->get_property( | + |          if ($field->get_property('multiple')) | 
|          { |          { | ||
| − |            if ($field->{type} eq  | + |            if ($field->{type} eq 'name') | 
|            { |            { | ||
|              my $namelist = ''; |              my $namelist = ''; | ||
|              foreach my $name (@{$dataobj->get_value_raw($field->get_name)}) |              foreach my $name (@{$dataobj->get_value_raw($field->get_name)}) | ||
|              { |              { | ||
| − |                $namelist .= $name->{family} .  | + |                $namelist .= $name->{family} . ',' . $name->{given} . ';'; | 
|              } |              } | ||
|              $worksheet->write($i, $j, $namelist); |              $worksheet->write($i, $j, $namelist); | ||
|            } |            } | ||
| − |            elsif ($field->{type} eq  | + |            elsif ($field->{type} eq 'compound') | 
|            { |            { | ||
| − |              $worksheet->write($i, $j,  | + |              $worksheet->write($i, $j, 'COMPOUND'); | 
|            } |            } | ||
|            else |            else | ||
| Line 104: | Line 113: | ||
|    $workbook->close; |    $workbook->close; | ||
| − |    if (defined $opts{ | + |    if (defined $opts{fh}) | 
|    { |    { | ||
|      return undef; |      return undef; | ||
| Line 117: | Line 126: | ||
| = In More Detail = | = In More Detail = | ||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| == Constructor == | == Constructor == | ||
| + | For the sake of simplicity this plugin will only deal with lists of eprints. This avoids some code duplication, and it would be fairly easy to modify the plugin to deal with both individual eprints and lists of eprints sensibly. | ||
| <pre>    | <pre>    | ||
|    $self->{accept} = ['list/eprint']; |    $self->{accept} = ['list/eprint']; | ||
| </pre> | </pre> | ||
| + | The file extension and [http://en.wikipedia.org/wiki/MIME MIME] type are set to values appropriate for Excel files. | ||
| + | <pre> | ||
| + |   $self->{suffix} = '.xls'; | ||
| + |   $self->{mimetype} = 'application/vnd.ms-excel'; | ||
| + | </pre> | ||
| + | |||
| + | We need to import a module that is not included with EPrints for creating zip files. We use the EPrints::Utils::require_if_exists function to check if the module exists, and load it if it does. We then check the value returned from that function, and make the plugin invisible if it failed. | ||
| <pre> | <pre> | ||
| − |    $self->{ | + |    my $rc = EPrints::Utils::require_if_exists('Spreadsheet::WriteExcel'); | 
| − | + |   unless ($rc) | |
| + |   { | ||
| + |     $self->{visible} = ''; | ||
| + |     $self->{error} = 'Unable to load required module Spreadsheet::WriteExcel'; | ||
| + |   } | ||
| </pre> | </pre> | ||
| == List Handling == | == List Handling == | ||
| + | === Setting Up a Workbook === | ||
| + | Here we create a new Excel workbook. We start by creating a file handle using a scalar rather than a filename, this creates an in-memory file. Then depending on if a file handle has been supplied or not we create a workbook object that will be written to that file handle or the scalar file handle we just created. | ||
| + | |||
| <pre> | <pre> | ||
| − | |||
| − | |||
| − | |||
|    my $workbook; |    my $workbook; | ||
| Line 142: | Line 158: | ||
|    open(my $FH,'>',\$output); |    open(my $FH,'>',\$output); | ||
| − |    if (defined $opts{ | + |    if (defined $opts{fh}) | 
|    { |    { | ||
| − |      $workbook = Spreadsheet::WriteExcel->new(\*{$opts{ | + |      $workbook = Spreadsheet::WriteExcel->new(\*{$opts{fh}}); | 
|      die("Unable to create spreadsheet: $!")unless defined $workbook; |      die("Unable to create spreadsheet: $!")unless defined $workbook; | ||
|    } |    } | ||
| Line 152: | Line 168: | ||
|      die("Unable to create spreadsheet: $!")unless defined $workbook; |      die("Unable to create spreadsheet: $!")unless defined $workbook; | ||
|    } |    } | ||
| + | </pre> | ||
| + | === Handling DataObjs === | ||
| + | To start adding data to the Excel file we have to create a worksheet. | ||
| + | <pre> | ||
|    my $worksheet = $workbook->add_worksheet(); |    my $worksheet = $workbook->add_worksheet(); | ||
| + | </pre> | ||
| − | + | To the first row of our worksheet we add the names of all the metadata fields that can be associated with eprints. We get the session associated with the plugin, and then the repository associated with that session. We then get the DataSet "archive" from that repository and call the get_fields method. That method returns an array of MetaField objects. | |
| + | <pre> | ||
|    my @fields = |    my @fields = | ||
| − |    $plugin->{session}->get_repository->get_dataset( | + |    $plugin->{session}->get_repository->get_dataset('archive')->get_fields; | 
| + | </pre> | ||
| + | Here we loop over each field and write it's name to our worksheet. | ||
| + | <pre> | ||
|    foreach my $field (@fields) |    foreach my $field (@fields) | ||
|    { |    { | ||
| Line 164: | Line 189: | ||
|      $i++; |      $i++; | ||
|    } |    } | ||
| + | </pre> | ||
| − | + | We now loop over each DataObj in our list, and over each MetaField we found earlier. | |
| + | <pre> | ||
|    foreach my $dataobj ($opts{list}->get_records) |    foreach my $dataobj ($opts{list}->get_records) | ||
|    { |    { | ||
| Line 171: | Line 198: | ||
|      foreach my $field (@fields) |      foreach my $field (@fields) | ||
|      { |      { | ||
| + | </pre> | ||
| + | |||
| + | We only write something to the worksheet if the field can apply to the DataObj and is set. Scalar values are simply written to the worksheet. | ||
| + | <pre> | ||
|        if ($dataobj->exists_and_set($field->get_name)) |        if ($dataobj->exists_and_set($field->get_name)) | ||
| − | + | </pre> | |
| − |          if ($field->get_property( | + | |
| − | + | The plugin handles fields which can take multiple values in a number of ways.    | |
| − |            if ($field->{type} eq  | + | <pre> | 
| + |          if ($field->get_property('multiple')) | ||
| + | </pre> | ||
| + | Names are handled specially and are formatted with the family name followed by a comma, the given name or initial and then a semi-colon. | ||
| + | <pre> | ||
| + |            if ($field->{type} eq 'name') | ||
|            { |            { | ||
|              my $namelist = ''; |              my $namelist = ''; | ||
|              foreach my $name (@{$dataobj->get_value_raw($field->get_name)}) |              foreach my $name (@{$dataobj->get_value_raw($field->get_name)}) | ||
|              { |              { | ||
| − |                $namelist .= $name->{family} .  | + |                $namelist .= $name->{family} . ',' . $name->{given} . ';'; | 
|              } |              } | ||
|              $worksheet->write($i, $j, $namelist); |              $worksheet->write($i, $j, $namelist); | ||
|            } |            } | ||
| − |            elsif ($field->{type} eq  | + | </pre> | 
| + | Fields which have a compound type are not handled, and 'COMPOUND' is written to the worksheet. | ||
| + | <pre> | ||
| + |            elsif ($field->{type} eq 'compound') | ||
|            { |            { | ||
| − |              $worksheet->write($i, $j,  | + |              $worksheet->write($i, $j, 'COMPOUND'); | 
|            } |            } | ||
| + | </pre> | ||
| + | For most multiple fields each value is taken and concatenated, separated by semi-colons. | ||
| + | <pre> | ||
|            else |            else | ||
|            { |            { | ||
| Line 193: | Line 235: | ||
|                          join(';',@{$dataobj->get_value($field->get_name)})); |                          join(';',@{$dataobj->get_value($field->get_name)})); | ||
|            } |            } | ||
| − | + | </pre> | |
| − | + | ||
| − | + | === Finishing Up === | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| + | We first need to close the workbook to ensure that the data is written to the file handles, and then we can return in the usual fashion.  | ||
| + | <pre> | ||
|    $workbook->close; |    $workbook->close; | ||
| − |    if (defined $opts{ | + |    if (defined $opts{fh}) | 
|    { |    { | ||
|      return undef; |      return undef; | ||
| Line 211: | Line 249: | ||
|    return $output; |    return $output; | ||
| − | |||
| − | |||
| − | |||
| − | |||
| </pre> | </pre> | ||
| = Testing Your Plugin = | = Testing Your Plugin = | ||
| − | Restart your webserver and test the plugin as  | + | Restart your webserver and test the plugin as [[Contribute:_Plugins/ExportPluginsHello| before]]. | 
| + | |||
| + | == Sample Output == | ||
| + | [[Image:Expexcel.png]] | ||
Latest revision as of 13:33, 8 February 2010
Contents
Export Plugin Tutorial 4: Excel
In this tutorial and the next one we'll look at exporting files in non-text formats. Here we will explore exporting metadata in Excel format (pre Microsoft Office 2007 which uses an XML based format).
To prepare for this tutorial you should install the Spreadsheet::Excel module. The following command as root, or using sudo should work.
cpan Spreadsheet::Excel
Excel.pm
The code in the section below should be placed in a file called Excel.pm in the directory created previously, and MyPlugins should be changed to the name of that directory.
package EPrints::Plugin::Export::MyPlugins::Excel;
@ISA = ('EPrints::Plugin::Export');
use strict;
sub new
{
  my ($class, %opts) = @_;
  my $self = $class->SUPER::new(%opts);
  $self->{name} = 'Excel';
  $self->{accept} = ['list/eprint'];
  $self->{visible} = 'all';
  $self->{suffix} = '.xls';
  $self->{mimetype} = 'application/vnd.ms-excel';
  my $rc = EPrints::Utils::require_if_exists('Spreadsheet::WriteExcel');
  unless ($rc)
  {
    $self->{visible} = '';
    $self->{error} = 'Unable to load required module Spreadsheet::WriteExcel';
  }
  return $self;
}
sub output_list
{
  my ($plugin, %opts) = @_;
  my $workbook;
  my $output;
  open(my $FH,'>',\$output);
  if (defined $opts{fh})
  {
    $workbook = Spreadsheet::WriteExcel->new(\*{$opts{fh}});
    die("Unable to create spreadsheet: $!")unless defined $workbook;
  }
  else
  {
    $workbook = Spreadsheet::WriteExcel->new($FH);
    die("Unable to create spreadsheet: $!")unless defined $workbook;
  }
  my $worksheet = $workbook->add_worksheet();
  my $i = 0;
  my @fields =
  $plugin->{session}->get_repository->get_dataset('archive')->get_fields;
  foreach my $field (@fields)
  {
    $worksheet->write(0, $i, $field->get_name);
    $i++;
  }
  $i = 1;
  foreach my $dataobj ($opts{list}->get_records)
  {
    my $j = 0;
    foreach my $field (@fields)
    {
      if ($dataobj->exists_and_set($field->get_name))
      {
        if ($field->get_property('multiple'))
        {
          if ($field->{type} eq 'name')
          {
            my $namelist = '';
            foreach my $name (@{$dataobj->get_value_raw($field->get_name)})
            {
              $namelist .= $name->{family} . ',' . $name->{given} . ';';
            }
            $worksheet->write($i, $j, $namelist);
          }
          elsif ($field->{type} eq 'compound')
          {
            $worksheet->write($i, $j, 'COMPOUND');
          }
          else
          {
            $worksheet->write($i, $j,
                        join(';',@{$dataobj->get_value($field->get_name)}));
          }
        }
        else {
          $worksheet->write($i, $j, $dataobj->get_value($field->get_name));
        }
      }
      $j++;
    }
    $i++;
  }
  $workbook->close;
  if (defined $opts{fh})
  {
    return undef;
  }
  return $output;
}
1;
In More Detail
Constructor
For the sake of simplicity this plugin will only deal with lists of eprints. This avoids some code duplication, and it would be fairly easy to modify the plugin to deal with both individual eprints and lists of eprints sensibly.
  
  $self->{accept} = ['list/eprint'];
The file extension and MIME type are set to values appropriate for Excel files.
  $self->{suffix} = '.xls';
  $self->{mimetype} = 'application/vnd.ms-excel';
We need to import a module that is not included with EPrints for creating zip files. We use the EPrints::Utils::require_if_exists function to check if the module exists, and load it if it does. We then check the value returned from that function, and make the plugin invisible if it failed.
  my $rc = EPrints::Utils::require_if_exists('Spreadsheet::WriteExcel');
  unless ($rc)
  {
    $self->{visible} = '';
    $self->{error} = 'Unable to load required module Spreadsheet::WriteExcel';
  }
List Handling
Setting Up a Workbook
Here we create a new Excel workbook. We start by creating a file handle using a scalar rather than a filename, this creates an in-memory file. Then depending on if a file handle has been supplied or not we create a workbook object that will be written to that file handle or the scalar file handle we just created.
  my $workbook;
  my $output;
  open(my $FH,'>',\$output);
  if (defined $opts{fh})
  {
    $workbook = Spreadsheet::WriteExcel->new(\*{$opts{fh}});
    die("Unable to create spreadsheet: $!")unless defined $workbook;
  }
  else
  {
    $workbook = Spreadsheet::WriteExcel->new($FH);
    die("Unable to create spreadsheet: $!")unless defined $workbook;
  }
Handling DataObjs
To start adding data to the Excel file we have to create a worksheet.
my $worksheet = $workbook->add_worksheet();
To the first row of our worksheet we add the names of all the metadata fields that can be associated with eprints. We get the session associated with the plugin, and then the repository associated with that session. We then get the DataSet "archive" from that repository and call the get_fields method. That method returns an array of MetaField objects.
  my @fields =
  $plugin->{session}->get_repository->get_dataset('archive')->get_fields;
Here we loop over each field and write it's name to our worksheet.
  foreach my $field (@fields)
  {
    $worksheet->write(0, $i, $field->get_name);
    $i++;
  }
We now loop over each DataObj in our list, and over each MetaField we found earlier.
  foreach my $dataobj ($opts{list}->get_records)
  {
    my $j = 0;
    foreach my $field (@fields)
    {
We only write something to the worksheet if the field can apply to the DataObj and is set. Scalar values are simply written to the worksheet.
if ($dataobj->exists_and_set($field->get_name))
The plugin handles fields which can take multiple values in a number of ways.
        if ($field->get_property('multiple'))
Names are handled specially and are formatted with the family name followed by a comma, the given name or initial and then a semi-colon.
          if ($field->{type} eq 'name')
          {
            my $namelist = '';
            foreach my $name (@{$dataobj->get_value_raw($field->get_name)})
            {
              $namelist .= $name->{family} . ',' . $name->{given} . ';';
            }
            $worksheet->write($i, $j, $namelist);
          }
Fields which have a compound type are not handled, and 'COMPOUND' is written to the worksheet.
          elsif ($field->{type} eq 'compound')
          {
            $worksheet->write($i, $j, 'COMPOUND');
          }
For most multiple fields each value is taken and concatenated, separated by semi-colons.
          else
          {
            $worksheet->write($i, $j,
                        join(';',@{$dataobj->get_value($field->get_name)}));
          }
Finishing Up
We first need to close the workbook to ensure that the data is written to the file handles, and then we can return in the usual fashion.
  $workbook->close;
  if (defined $opts{fh})
  {
    return undef;
  }
  return $output;
Testing Your Plugin
Restart your webserver and test the plugin as before.

