Contribute: Plugins/ImportPluginsCSV
Import Plugin Tutorial 1: CSV
Before You Start
It is sensible to separate the plugins you create for EPrints from those included with it. Create a directory for your import plugins in the main plugin directory (usually /opt/eprints3/perl_lib/EPrints/Plugin/import) for example /opt/eprints3/perl_lib/EPrints/Plugin/import/MyPlugins.
To prepare for this tutorial you should install the Text::CSV module. The following command as root, or using sudo should work.
cpan Text::CSV
CSV.pm
package EPrints::Plugin::Import::MyPlugins::CSV;
use EPrints::Plugin::Import::TextFile;
use strict;
our @ISA = ('EPrints::Plugin::Import::TextFile');
sub new
{
my( $class, %params ) = @_;
my $self = $class->SUPER::new( %params );
$self->{name} = 'CSV';
$self->{visible} = 'all';
$self->{produce} = [ 'list/eprint' ];
my $rc = EPrints::Utils::require_if_exists('Text::CSV');
unless( $rc )
{
$self->{visible} = '';
$self->{error} = 'Failed to load required module Text::CSV';
}
return $self;
}
sub input_fh
{
my( $plugin, %opts ) = @_;
my @ids;
my $fh = $opts{fh};
my $csv = Text::CSV->new();
my @records = <$fh>;
my @fields;
if ($csv->parse(shift @records))
{
@fields = $csv->fields();
}
else
{
$plugin->error($csv->error_input);
return undef;
}
foreach my $row (@records)
{
my @input_data = (join(',',@fields),$row);
my $epdata = $plugin->convert_input(\@input_data);
next unless defined $epdata;
my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
if( defined $dataobj )
{
push @ids, $dataobj->get_id;
}
}
return EPrints::List->new(
dataset => $opts{dataset},
session => $plugin->{session},
ids=>\@ids );
}
sub convert_input
{
my $plugin = shift;
my @input = @{shift @_};
my $csv = Text::CSV->new();
my @record;
if ($csv->parse($input[1]))
{
@record = $csv->fields();
}
else
{
$plugin->error($csv->error_input);
return undef;
}
my @fields = split(',',$input[0]);
#Check length of row
if (scalar @fields != scalar @record)
{
$plugin->warning('Row length mismatch');
return undef;
}
my %output = ();
my $dataset = $plugin->{session}->{repository}->get_dataset('archive');
my $i = 0;
foreach my $field (@fields)
{
#Check field exists
unless ($dataset->has_field($field))
{
$i++;
next;
}
my $metafield = $dataset->get_field($field);
#Check for multiple
if ($metafield->get_property('multiple'))
{
#Split
my @values = split(';',$record[$i]);
#Check for name
if ($metafield->{type} eq 'name')
{
my @names = ();
foreach my $value (@values)
{
my $name = $value;
next unless ($value =~ /^(.*?),(.*?)(,(.*?))?$/);
push @names, { family => $1, given => $2, lineage => $4 };
}
$output{$field} = \@names;
}
else
{
$output{$field} = \@values;
}
}
else
{
$output{$field} = $record[$i];
}
$i++;
}
return \%output;
}
1;
In More Detail
package EPrints::Plugin::Import::MyPlugins::CSV;
use EPrints::Plugin::Import::TextFile;
use strict;
our @ISA = ('EPrints::Plugin::Import::TextFile');
sub new
{
my( $class, %params ) = @_;
my $self = $class->SUPER::new( %params );
$self->{name} = 'CSV';
$self->{visible} = 'all';
$self->{produce} = [ 'list/eprint' ];
my $rc = EPrints::Utils::require_if_exists('Text::CSV');
unless( $rc )
{
$self->{visible} = '';
$self->{error} = 'Failed to load required module Text::CSV';
}
return $self;
}
sub input_fh
{
my( $plugin, %opts ) = @_;
my @ids;
my $fh = $opts{fh};
my $csv = Text::CSV->new();
my @records = <$fh>;
my @fields;
if ($csv->parse(shift @records))
{
@fields = $csv->fields();
}
else
{
$plugin->error($csv->error_input);
return undef;
}
foreach my $row (@records)
{
my @input_data = (join(',',@fields),$row);
my $epdata = $plugin->convert_input(\@input_data);
next unless defined $epdata;
my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
if( defined $dataobj )
{
push @ids, $dataobj->get_id;
}
}
return EPrints::List->new(
dataset => $opts{dataset},
session => $plugin->{session},
ids=>\@ids );
}
sub convert_input
{
my $plugin = shift;
my @input = @{shift @_};
my $csv = Text::CSV->new();
my @record;
if ($csv->parse($input[1]))
{
@record = $csv->fields();
}
else
{
$plugin->error($csv->error_input);
return undef;
}
my @fields = split(',',$input[0]);
#Check length of row
if (scalar @fields != scalar @record)
{
$plugin->warning('Row length mismatch');
return undef;
}
my %output = ();
my $dataset = $plugin->{session}->{repository}->get_dataset('archive');
my $i = 0;
foreach my $field (@fields)
{
#Check field exists
unless ($dataset->has_field($field))
{
$i++;
next;
}
my $metafield = $dataset->get_field($field);
#Check for multiple
if ($metafield->get_property('multiple'))
{
#Split
my @values = split(';',$record[$i]);
#Check for name
if ($metafield->{type} eq 'name')
{
my @names = ();
foreach my $value (@values)
{
my $name = $value;
next unless ($value =~ /^(.*?),(.*?)(,(.*?))?$/);
push @names, { family => $1, given => $2, lineage => $4 };
}
$output{$field} = \@names;
}
else
{
$output{$field} = \@values;
}
}
else
{
$output{$field} = $record[$i];
}
$i++;
}
return \%output;
}
1;