Contribute: Plugins/ImportPluginsAWS

From EPrints Documentation
Revision as of 11:49, 26 September 2007 by Tom (talk | contribs) (Before You Start)
Jump to: navigation, search

Import Plugin Tutorial 2: Amazon Web Services

In the last tutorial we created an import plugin that took data which needed very little modification to import into the respository. The column names in the CSV file matched the names of metadata fields present in the repository. In this tutorial we'll look at importing data that needs some modification to be imported, needs more error checking and is obtained in a different way.

We'll be using Amazon's E-Commerce Webservice to import books from their website into our respository given a list of ASINs (Amazon Standard Identification Numbers).

Before You Start

Amazon Web Services

Required Modules

To prepare for this tutorial you should make sure the LWP::UserAgent module is installed. The following command as root, or using sudo should work.

cpan LWP::UserAgent

AWS.pm

package EPrints::Plugin::Import::MyPlugins::AWS;

use EPrints::Plugin::Import::TextFile;
use strict;
use URI::Escape;

our @ISA = ('EPrints::Plugin::Import::TextFile');

my $endpoint = "http://ecs.amazonaws.co.uk/onca/xml";
my $accesskey = '<YOURAMAZONWSKEY>';
my $service = "AWSECommerceService";
my $operation = "ItemLookup";
my $version = "2007-07-16";

sub new
{
        my( $class, %params ) = @_;
        my $self = $class->SUPER::new( %params );

        $self->{name} = 'AWS';
        $self->{visible} = 'all';
        $self->{produce} = [ 'list/eprint' , 'dataobj/eprint'];

        my $rc = EPrints::Utils::require_if_exists('LWP::UserAgent');
        unless ($rc)
        {
                $self->{visible} = '';
                $self->{error} = 'Module LWP::UserAgent not found.';
        }

        return $self;
}

sub input_fh
{
        my( $plugin, %opts ) = @_;
        my @ids;
        my $fh = $opts{fh};

        my @records = <$fh>;
        foreach my $input_data (@records)
        {
                my $epdata = $plugin->convert_input($input_data);
                next unless defined $epdata;

                my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
                if( defined $dataobj )
                {
                        push @ids, $dataobj->get_id;
                }
        }

        return EPrints::List->new(
                        dataset => $opts{dataset},
                        session => $plugin->{session},
                        ids=>\@ids );
}

sub convert_input
{
        my ($plugin, $input) = @_;
        my %output = ();

        $input =~ m/([0-9]+)/;
        $input = $1;

        my $request =
                "$endpoint?".
                "Service=$service&".
                "AWSAccessKeyId=$accesskey&".
                "Operation=$operation&".
                "ItemId=$input&".
                "Version=$version&".
                "ResponseGroup=Large,EditorialReview&";

        my $ua = LWP::UserAgent->new;
        $ua->timeout(30);
        my $response = $ua->get($request);

        my $dom = EPrints::XML::parse_xml_string($response->content);

        my $rep =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Request")->item(0);

        my $reptext =
                EPrints::Utils::tree_to_utf8($rep->getElementsByTagName("IsValid")->item(0));

        unless ($reptext eq 'True') 
        {
                $plugin->error("Invalid AWS Request");
                return undef;
        }

        #Get Item Object
        my $item =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Item")->item(0);

        unless (defined $item) 
        {
                $plugin->error("No Item element found");
                return undef;
        }

        my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);

        my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));

        unless ($pg eq 'Book') 
        {
                $plugin->error("Product is not a book.");
                return undef;
        }

        $output{type} = "book";
        $output{refereed} = "FALSE";
        $output{ispublished} = "pub";

        my $title = $attr->getElementsByTagName("Title")->item(0);
        $output{title} = EPrints::Utils::tree_to_utf8($title);

        my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
        $output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));

        my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
        if (defined $isbn)
        {
                $output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
        }

        my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
        if (defined $pages)
        {
                $output{pages} = EPrints::Utils::tree_to_utf8($pages);
        }

        my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
        if (defined $publisher)
        {
                $output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
        }

        my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
        if (defined $pubdate)
        {
                $output{date} = EPrints::Utils::tree_to_utf8($pubdate);
        }

        return \%output;
}

1;

In More Detail

use URI::Escape;
my $endpoint = "http://ecs.amazonaws.co.uk/onca/xml";
my $accesskey = '<YOURAMAZONWSKEY>';
my $service = "AWSECommerceService";
my $operation = "ItemLookup";
my $version = "2007-07-16";

Constructor

        $self->{produce} = [ 'list/eprint' , 'dataobj/eprint'];
        my $rc = EPrints::Utils::require_if_exists('LWP::UserAgent');
        unless ($rc)
        {
                $self->{visible} = '';
                $self->{error} = 'Module LWP::UserAgent not found.';
        }

Input

input_fh

        my @ids;
        my $fh = $opts{fh};

        my @records = <$fh>;
        foreach my $input_data (@records)
        {
                my $epdata = $plugin->convert_input($input_data);
                next unless defined $epdata;

                my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
                if( defined $dataobj )
                {
                        push @ids, $dataobj->get_id;
                }
        }
        return EPrints::List->new(
                        dataset => $opts{dataset},
                        session => $plugin->{session},
                        ids=>\@ids );

convert_input

        $input =~ m/([0-9]+)/;
        $input = $1;
        #Perform the request
        my $request =
                "$endpoint?".
                "Service=$service&".
                "AWSAccessKeyId=$accesskey&".
                "Operation=$operation&".
                "ItemId=$input&".
                "Version=$version&".
                "ResponseGroup=Large,EditorialReview&";
        #Send the request
        my $ua = LWP::UserAgent->new;
        $ua->timeout(30);
        my $response = $ua->get($request);
        #Create domtree
        my $dom = EPrints::XML::parse_xml_string($response->content);
        #Get and check Amazon Response
        my $rep =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Request")->item(0);

        my $reptext =
                EPrints::Utils::tree_to_utf8($rep->getElementsByTagName("IsValid")->item(0));

        unless ($reptext eq 'True') 
        {
                $plugin->error("Invalid AWS Request");
                return undef;
        }
        #Get Item Object
        my $item =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Item")->item(0);

        unless (defined $item) 
        {
                $plugin->error("No Item element found");
                return undef;
        }
        #Get Attribute Object
        my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);
        #Check this is a book
        my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));

        unless ($pg eq 'Book') 
        {
                $plugin->error("Product is not a book.");
                return undef;
        }
        #Populate Hash
        $output{type} = "book";
        $output{refereed} = "FALSE";
        $output{ispublished} = "pub";
        #Add Title
        my $title = $attr->getElementsByTagName("Title")->item(0);
        $output{title} = EPrints::Utils::tree_to_utf8($title);
        #Add URL
        my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
        $output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));
        #Add ISBN
        my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
        if (defined $isbn)
        {
                $output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
        }
        #Add Number of Pages
        my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
        if (defined $pages)
        {
                $output{pages} = EPrints::Utils::tree_to_utf8($pages);
        }
        #Add Publisher/Publication Date
        my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
        if (defined $publisher)
        {
                $output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
        }
        my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
        if (defined $pubdate)
        {
                $output{date} = EPrints::Utils::tree_to_utf8($pubdate);
        }

        return \%output;

Testing Your Plugin