Contribute: Plugins/ImportPluginsCSV

From EPrints Documentation
Revision as of 15:36, 14 September 2007 by Tom (talk | contribs) (Import Plugin Tutorial 1: CSV)
Jump to: navigation, search

Import Plugin Tutorial 1: CSV

In this tutorial we will look at creating a relatively simple plugin to import eprints into our repository by reading files containing comma separated variables. We won't be dealing with documents and files, but will be focusing on importing eprint metadata.

Import plugins are inherently more complicated than export plugins because of the error checking that must be done, however in this example error checking has been kept to a minimum to simplify the example. In a "real" plugin you should check that the appropriate metadata fields are set for a given type of eprint, and unfortunately there appears to be no quick way to do this.

Before You Start

It is sensible to separate the plugins you create for EPrints from those included with it. Create a directory for your import plugins in the main plugin directory (usually /opt/eprints3/perl_lib/EPrints/Plugin/import) for example /opt/eprints3/perl_lib/EPrints/Plugin/import/MyPlugins.

To prepare for this tutorial you should install the Text::CSV module. The following command as root, or using sudo should work.

cpan Text::CSV

package EPrints::Plugin::Import::MyPlugins::CSV;

use EPrints::Plugin::Import::TextFile;
use strict;

our @ISA = ('EPrints::Plugin::Import::TextFile');

sub new
        my( $class, %params ) = @_;

        my $self = $class->SUPER::new( %params );

        $self->{name} = 'CSV';
        $self->{visible} = 'all';
        $self->{produce} = [ 'list/eprint' ];

        my $rc = EPrints::Utils::require_if_exists('Text::CSV');
        unless( $rc )
                $self->{visible} = '';
                $self->{error} = 'Failed to load required module Text::CSV';

        return $self;

sub input_fh
        my( $plugin, %opts ) = @_;
        my @ids;
        my $fh = $opts{fh};
        my $csv = Text::CSV->new();
        my @records = <$fh>;
        my @fields;

        if ($csv->parse(shift @records))
                @fields = $csv->fields();
                return undef;

        foreach my $row (@records)
                my @input_data = (join(',',@fields),$row);

                my $epdata = $plugin->convert_input(\@input_data);
                next unless defined $epdata;

                my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
                if( defined $dataobj )
                        push @ids, $dataobj->get_id;

        return EPrints::List->new(
                        dataset => $opts{dataset},
                        session => $plugin->{session},
                        ids=>\@ids );

sub convert_input
        my $plugin = shift;
        my @input = @{shift @_};
        my $csv = Text::CSV->new();

        my @record;
        if ($csv->parse($input[1]))
                @record = $csv->fields();
                return undef;

        my @fields = split(',',$input[0]);
        #Check length of row
        if (scalar @fields != scalar @record)
                $plugin->warning('Row length mismatch');
                return undef;

        my %output = ();

        my $dataset = $plugin->{session}->{repository}->get_dataset('archive');

        my $i = 0;
        foreach my $field (@fields)
                #Check field exists
                unless ($dataset->has_field($field))

                my $metafield = $dataset->get_field($field);
                #Check for multiple
                if ($metafield->get_property('multiple'))
                        my @values = split(';',$record[$i]);

                        #Check for name
                        if ($metafield->{type} eq 'name')
                                my @names = ();

                                foreach my $value (@values)
                                        my $name = $value;

                                        next unless ($value =~ /^(.*?),(.*?)(,(.*?))?$/);
                                        push @names, { family => $1, given => $2, lineage => $4 };

                                $output{$field} = \@names;
                                $output{$field} = \@values;
                        $output{$field} = $record[$i];
        return \%output;


In More Detail

package EPrints::Plugin::Import::MyPlugins::CSV;

use EPrints::Plugin::Import::TextFile;
use strict;

our @ISA = ('EPrints::Plugin::Import::TextFile');

sub new
        my( $class, %params ) = @_;

        my $self = $class->SUPER::new( %params );

        $self->{name} = 'CSV';
        $self->{visible} = 'all';
        $self->{produce} = [ 'list/eprint' ];

        my $rc = EPrints::Utils::require_if_exists('Text::CSV');
        unless( $rc )
                $self->{visible} = '';
                $self->{error} = 'Failed to load required module Text::CSV';

        return $self;

sub input_fh
        my( $plugin, %opts ) = @_;
        my @ids;
        my $fh = $opts{fh};
        my $csv = Text::CSV->new();
        my @records = <$fh>;
        my @fields;

        if ($csv->parse(shift @records))
                @fields = $csv->fields();
                return undef;

        foreach my $row (@records)
                my @input_data = (join(',',@fields),$row);

                my $epdata = $plugin->convert_input(\@input_data);
                next unless defined $epdata;

                my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
                if( defined $dataobj )
                        push @ids, $dataobj->get_id;

        return EPrints::List->new(
                        dataset => $opts{dataset},
                        session => $plugin->{session},
                        ids=>\@ids );

sub convert_input
        my $plugin = shift;
        my @input = @{shift @_};
        my $csv = Text::CSV->new();

        my @record;
        if ($csv->parse($input[1]))
                @record = $csv->fields();
                return undef;

        my @fields = split(',',$input[0]);
        #Check length of row
        if (scalar @fields != scalar @record)
                $plugin->warning('Row length mismatch');
                return undef;

        my %output = ();

        my $dataset = $plugin->{session}->{repository}->get_dataset('archive');

        my $i = 0;
        foreach my $field (@fields)
                #Check field exists
                unless ($dataset->has_field($field))

                my $metafield = $dataset->get_field($field);
                #Check for multiple
                if ($metafield->get_property('multiple'))
                        my @values = split(';',$record[$i]);

                        #Check for name
                        if ($metafield->{type} eq 'name')
                                my @names = ();

                                foreach my $value (@values)
                                        my $name = $value;

                                        next unless ($value =~ /^(.*?),(.*?)(,(.*?))?$/);
                                        push @names, { family => $1, given => $2, lineage => $4 };

                                $output{$field} = \@names;
                                $output{$field} = \@values;
                        $output{$field} = $record[$i];
        return \%output;
