Contribute: Plugins/ImportPluginsAWS
Contents
Import Plugin Tutorial 2: Amazon Web Services
In the last tutorial we created an import plugin that took data which needed very little modification to import into the respository. The column names in the CSV file matched the names of metadata fields present in the repository. In this tutorial we'll look at importing data that needs some modification to be imported, needs more error checking and is obtained in a different way.
We'll be using Amazon's E-Commerce Webservice to import books from their website into our respository given a list of ASINs (Amazon Standard Identification Numbers).
Before You Start
AWS.pm
package EPrints::Plugin::Import::MyPlugins::AWS;
use EPrints::Plugin::Import::TextFile;
use strict;
use URI::Escape;
our @ISA = ('EPrints::Plugin::Import::TextFile');
my $endpoint = "http://ecs.amazonaws.co.uk/onca/xml";
my $accesskey = '<YOURAMAZONWSKEY>';
my $service = "AWSECommerceService";
my $operation = "ItemLookup";
my $version = "2007-07-16";
sub new
{
my( $class, %params ) = @_;
my $self = $class->SUPER::new( %params );
$self->{name} = 'AWS';
$self->{visible} = 'all';
$self->{produce} = [ 'list/eprint' , 'dataobj/eprint'];
my $rc = EPrints::Utils::require_if_exists('LWP::UserAgent');
unless ($rc)
{
$self->{visible} = '';
$self->{error} = 'Module LWP::UserAgent not found.';
}
return $self;
}
sub input_fh
{
my( $plugin, %opts ) = @_;
my @ids;
my $fh = $opts{fh};
my @records = <$fh>;
foreach my $input_data (@records)
{
my $epdata = $plugin->convert_input($input_data);
next unless defined $epdata;
my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
if( defined $dataobj )
{
push @ids, $dataobj->get_id;
}
}
return EPrints::List->new(
dataset => $opts{dataset},
session => $plugin->{session},
ids=>\@ids );
}
sub convert_input
{
my ($plugin, $input) = @_;
my %output = ();
$input =~ m/([0-9]+)/;
$input = $1;
my $request =
"$endpoint?".
"Service=$service&".
"AWSAccessKeyId=$accesskey&".
"Operation=$operation&".
"ItemId=$input&".
"Version=$version&".
"ResponseGroup=Large,EditorialReview&";
my $ua = LWP::UserAgent->new;
$ua->timeout(30);
my $response = $ua->get($request);
my $dom = EPrints::XML::parse_xml_string($response->content);
my $rep =
$dom->getElementsByTagName("Items")->item(0)->
getElementsByTagName("Request")->item(0);
my $reptext =
EPrints::Utils::tree_to_utf8($rep->getElementsByTagName("IsValid")->item(0));
unless ($reptext eq 'True')
{
$plugin->error("Invalid AWS Request");
return undef;
}
#Get Item Object
my $item =
$dom->getElementsByTagName("Items")->item(0)->
getElementsByTagName("Item")->item(0);
unless (defined $item)
{
$plugin->error("No Item element found");
return undef;
}
my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);
my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));
unless ($pg eq 'Book')
{
$plugin->error("Product is not a book.");
return undef;
}
$output{type} = "book";
$output{refereed} = "FALSE";
$output{ispublished} = "pub";
my $title = $attr->getElementsByTagName("Title")->item(0);
$output{title} = EPrints::Utils::tree_to_utf8($title);
my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
$output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));
my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
if (defined $isbn)
{
$output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
}
my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
if (defined $pages)
{
$output{pages} = EPrints::Utils::tree_to_utf8($pages);
}
my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
if (defined $publisher)
{
$output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
}
my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
if (defined $pubdate)
{
$output{date} = EPrints::Utils::tree_to_utf8($pubdate);
}
return \%output;
}
1;
In More Detail
use URI::Escape;
my $endpoint = "http://ecs.amazonaws.co.uk/onca/xml"; my $accesskey = '<YOURAMAZONWSKEY>'; my $service = "AWSECommerceService"; my $operation = "ItemLookup"; my $version = "2007-07-16";
Constructor
$self->{produce} = [ 'list/eprint' , 'dataobj/eprint'];
my $rc = EPrints::Utils::require_if_exists('LWP::UserAgent');
unless ($rc)
{
$self->{visible} = '';
$self->{error} = 'Module LWP::UserAgent not found.';
}
Input
input_fh
my @ids;
my $fh = $opts{fh};
my @records = <$fh>;
foreach my $input_data (@records)
{
my $epdata = $plugin->convert_input($input_data);
next unless defined $epdata;
my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
if( defined $dataobj )
{
push @ids, $dataobj->get_id;
}
}
return EPrints::List->new(
dataset => $opts{dataset},
session => $plugin->{session},
ids=>\@ids );
convert_input
$input =~ m/([0-9]+)/;
$input = $1;
#Perform the request
my $request =
"$endpoint?".
"Service=$service&".
"AWSAccessKeyId=$accesskey&".
"Operation=$operation&".
"ItemId=$input&".
"Version=$version&".
"ResponseGroup=Large,EditorialReview&";
#Send the request
my $ua = LWP::UserAgent->new;
$ua->timeout(30);
my $response = $ua->get($request);
#Create domtree
my $dom = EPrints::XML::parse_xml_string($response->content);
#Get and check Amazon Response
my $rep =
$dom->getElementsByTagName("Items")->item(0)->
getElementsByTagName("Request")->item(0);
my $reptext =
EPrints::Utils::tree_to_utf8($rep->getElementsByTagName("IsValid")->item(0));
unless ($reptext eq 'True')
{
$plugin->error("Invalid AWS Request");
return undef;
}
#Get Item Object
my $item =
$dom->getElementsByTagName("Items")->item(0)->
getElementsByTagName("Item")->item(0);
unless (defined $item)
{
$plugin->error("No Item element found");
return undef;
}
#Get Attribute Object
my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);
#Check this is a book
my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));
unless ($pg eq 'Book')
{
$plugin->error("Product is not a book.");
return undef;
}
#Populate Hash
$output{type} = "book";
$output{refereed} = "FALSE";
$output{ispublished} = "pub";
#Add Title
my $title = $attr->getElementsByTagName("Title")->item(0);
$output{title} = EPrints::Utils::tree_to_utf8($title);
#Add URL
my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
$output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));
#Add ISBN
my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
if (defined $isbn)
{
$output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
}
#Add Number of Pages
my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
if (defined $pages)
{
$output{pages} = EPrints::Utils::tree_to_utf8($pages);
}
#Add Publisher/Publication Date
my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
if (defined $publisher)
{
$output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
}
my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
if (defined $pubdate)
{
$output{date} = EPrints::Utils::tree_to_utf8($pubdate);
}
return \%output;