Difference between revisions of "Contribute: Plugins/ImportPluginsAWS"

From EPrints Documentation
Jump to: navigation, search
(input_fh)
m (convert_input)
Line 232: Line 232:
 
                 "Version=$version&".
 
                 "Version=$version&".
 
                 "ResponseGroup=Large,EditorialReview&";
 
                 "ResponseGroup=Large,EditorialReview&";
 +
</pre>
  
 
+
<pre>
 
         #Send the request
 
         #Send the request
 
         my $ua = LWP::UserAgent->new;
 
         my $ua = LWP::UserAgent->new;
 
         $ua->timeout(30);
 
         $ua->timeout(30);
 
         my $response = $ua->get($request);
 
         my $response = $ua->get($request);
 +
</pre>
  
 +
<pre>
 
         #Create domtree
 
         #Create domtree
 
         my $dom = EPrints::XML::parse_xml_string($response->content);
 
         my $dom = EPrints::XML::parse_xml_string($response->content);
 +
</pre>
  
 +
<pre>
 
         #Get and check Amazon Response
 
         #Get and check Amazon Response
 
         my $rep =
 
         my $rep =
Line 255: Line 260:
 
                 return undef;
 
                 return undef;
 
         }
 
         }
 +
</pre>
  
 +
<pre>
 
         #Get Item Object
 
         #Get Item Object
 
         my $item =
 
         my $item =
Line 266: Line 273:
 
                 return undef;
 
                 return undef;
 
         }
 
         }
 +
</pre>
  
 +
<pre>
 
         #Get Attribute Object
 
         #Get Attribute Object
 
         my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);
 
         my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);
 +
</pre>
  
 +
<pre>
 
         #Check this is a book
 
         #Check this is a book
 
         my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));
 
         my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));
Line 278: Line 289:
 
                 return undef;
 
                 return undef;
 
         }
 
         }
 +
</pre>
  
 +
<pre>
 
         #Populate Hash
 
         #Populate Hash
 
         $output{type} = "book";
 
         $output{type} = "book";
 
         $output{refereed} = "FALSE";
 
         $output{refereed} = "FALSE";
 
         $output{ispublished} = "pub";
 
         $output{ispublished} = "pub";
 +
</pre>
  
 +
<pre>
 
         #Add Title
 
         #Add Title
 
         my $title = $attr->getElementsByTagName("Title")->item(0);
 
         my $title = $attr->getElementsByTagName("Title")->item(0);
 
         $output{title} = EPrints::Utils::tree_to_utf8($title);
 
         $output{title} = EPrints::Utils::tree_to_utf8($title);
 +
</pre>
  
 +
<pre>
 
         #Add URL
 
         #Add URL
 
         my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
 
         my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
 
         $output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));
 
         $output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));
 +
</pre>
  
 +
<pre>
 
         #Add ISBN
 
         #Add ISBN
 
         my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
 
         my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
Line 298: Line 317:
 
                 $output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
 
                 $output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
 
         }
 
         }
 +
</pre>
  
 +
<pre>
 
         #Add Number of Pages
 
         #Add Number of Pages
 
         my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
 
         my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
Line 305: Line 326:
 
                 $output{pages} = EPrints::Utils::tree_to_utf8($pages);
 
                 $output{pages} = EPrints::Utils::tree_to_utf8($pages);
 
         }
 
         }
 +
</pre>
  
 +
<pre>
 
         #Add Publisher/Publication Date
 
         #Add Publisher/Publication Date
 
         my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
 
         my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
Line 312: Line 335:
 
                 $output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
 
                 $output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
 
         }
 
         }
 +
</pre>
  
 +
<pre>
 
         my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
 
         my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
 
         if (defined $pubdate)
 
         if (defined $pubdate)
Line 320: Line 345:
  
 
         return \%output;
 
         return \%output;
}
 
 
1;
 
 
</pre>
 
</pre>
  
 
= Testing Your Plugin =
 
= Testing Your Plugin =

Revision as of 16:07, 25 September 2007

Import Plugin Tutorial 2: Amazon Web Services

Before You Start

AWS.pm

package EPrints::Plugin::Import::MyPlugins::AWS;

use EPrints::Plugin::Import::TextFile;
use strict;
use URI::Escape;

our @ISA = ('EPrints::Plugin::Import::TextFile');

my $endpoint = "http://ecs.amazonaws.co.uk/onca/xml";
my $accesskey = '<YOURAMAZONWSKEY>';
my $service = "AWSECommerceService";
my $operation = "ItemLookup";
my $version = "2007-07-16";

sub new
{
        my( $class, %params ) = @_;
        my $self = $class->SUPER::new( %params );

        $self->{name} = 'AWS';
        $self->{visible} = 'all';
        $self->{produce} = [ 'list/eprint' , 'dataobj/eprint'];

        my $rc = EPrints::Utils::require_if_exists('LWP::UserAgent');
        unless ($rc)
        {
                $self->{visible} = '';
                $self->{error} = 'Module LWP::UserAgent not found.';
        }

        return $self;
}

sub input_fh
{
        my( $plugin, %opts ) = @_;
        my @ids;
        my $fh = $opts{fh};

        my @records = <$fh>;
        foreach my $input_data (@records)
        {
                my $epdata = $plugin->convert_input($input_data);
                next unless defined $epdata;

                my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
                if( defined $dataobj )
                {
                        push @ids, $dataobj->get_id;
                }
        }

        return EPrints::List->new(
                        dataset => $opts{dataset},
                        session => $plugin->{session},
                        ids=>\@ids );
}

sub convert_input
{
        my ($plugin, $input) = @_;
        my %output = ();

        $input =~ m/([0-9]+)/;
        $input = $1;

        my $request =
                "$endpoint?".
                "Service=$service&".
                "AWSAccessKeyId=$accesskey&".
                "Operation=$operation&".
                "ItemId=$input&".
                "Version=$version&".
                "ResponseGroup=Large,EditorialReview&";

        my $ua = LWP::UserAgent->new;
        $ua->timeout(30);
        my $response = $ua->get($request);

        my $dom = EPrints::XML::parse_xml_string($response->content);

        my $rep =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Request")->item(0);

        my $reptext =
                EPrints::Utils::tree_to_utf8($rep->getElementsByTagName("IsValid")->item(0));

        unless ($reptext eq 'True') 
        {
                $plugin->error("Invalid AWS Request");
                return undef;
        }

        #Get Item Object
        my $item =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Item")->item(0);

        unless (defined $item) 
        {
                $plugin->error("No Item element found");
                return undef;
        }

        my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);

        my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));

        unless ($pg eq 'Book') 
        {
                $plugin->error("Product is not a book.");
                return undef;
        }

        $output{type} = "book";
        $output{refereed} = "FALSE";
        $output{ispublished} = "pub";

        my $title = $attr->getElementsByTagName("Title")->item(0);
        $output{title} = EPrints::Utils::tree_to_utf8($title);

        my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
        $output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));

        my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
        if (defined $isbn)
        {
                $output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
        }

        my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
        if (defined $pages)
        {
                $output{pages} = EPrints::Utils::tree_to_utf8($pages);
        }

        my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
        if (defined $publisher)
        {
                $output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
        }

        my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
        if (defined $pubdate)
        {
                $output{date} = EPrints::Utils::tree_to_utf8($pubdate);
        }

        return \%output;
}

1;

In More Detail

use URI::Escape;
my $endpoint = "http://ecs.amazonaws.co.uk/onca/xml";
my $accesskey = '<YOURAMAZONWSKEY>';
my $service = "AWSECommerceService";
my $operation = "ItemLookup";
my $version = "2007-07-16";

Constructor

        $self->{produce} = [ 'list/eprint' , 'dataobj/eprint'];
        my $rc = EPrints::Utils::require_if_exists('LWP::UserAgent');
        unless ($rc)
        {
                $self->{visible} = '';
                $self->{error} = 'Module LWP::UserAgent not found.';
        }

Input

input_fh

        my @ids;
        my $fh = $opts{fh};

        my @records = <$fh>;
        foreach my $input_data (@records)
        {
                my $epdata = $plugin->convert_input($input_data);
                next unless defined $epdata;

                my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
                if( defined $dataobj )
                {
                        push @ids, $dataobj->get_id;
                }
        }
        return EPrints::List->new(
                        dataset => $opts{dataset},
                        session => $plugin->{session},
                        ids=>\@ids );

convert_input

        $input =~ m/([0-9]+)/;
        $input = $1;
        #Perform the request
        my $request =
                "$endpoint?".
                "Service=$service&".
                "AWSAccessKeyId=$accesskey&".
                "Operation=$operation&".
                "ItemId=$input&".
                "Version=$version&".
                "ResponseGroup=Large,EditorialReview&";
        #Send the request
        my $ua = LWP::UserAgent->new;
        $ua->timeout(30);
        my $response = $ua->get($request);
        #Create domtree
        my $dom = EPrints::XML::parse_xml_string($response->content);
        #Get and check Amazon Response
        my $rep =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Request")->item(0);

        my $reptext =
                EPrints::Utils::tree_to_utf8($rep->getElementsByTagName("IsValid")->item(0));

        unless ($reptext eq 'True') 
        {
                $plugin->error("Invalid AWS Request");
                return undef;
        }
        #Get Item Object
        my $item =
                $dom->getElementsByTagName("Items")->item(0)->
                getElementsByTagName("Item")->item(0);

        unless (defined $item) 
        {
                $plugin->error("No Item element found");
                return undef;
        }
        #Get Attribute Object
        my $attr = $item->getElementsByTagName("ItemAttributes")->item(0);
        #Check this is a book
        my $pg = EPrints::Utils::tree_to_utf8($attr->getElementsByTagName("ProductGroup")->item(0));

        unless ($pg eq 'Book') 
        {
                $plugin->error("Product is not a book.");
                return undef;
        }
        #Populate Hash
        $output{type} = "book";
        $output{refereed} = "FALSE";
        $output{ispublished} = "pub";
        #Add Title
        my $title = $attr->getElementsByTagName("Title")->item(0);
        $output{title} = EPrints::Utils::tree_to_utf8($title);
        #Add URL
        my $url = $item->getElementsByTagName("DetailPageURL")->item(0);
        $output{official_url} = uri_unescape(EPrints::Utils::tree_to_utf8($url));
        #Add ISBN
        my $isbn = $attr->getElementsByTagName("ISBN")->item(0);
        if (defined $isbn)
        {
                $output{isbn} = EPrints::Utils::tree_to_utf8($isbn);
        }
        #Add Number of Pages
        my $pages = $attr->getElementsByTagName("NumberOfPages")->item(0);
        if (defined $pages)
        {
                $output{pages} = EPrints::Utils::tree_to_utf8($pages);
        }
        #Add Publisher/Publication Date
        my $publisher = $attr->getElementsByTagName("Publisher")->item(0);
        if (defined $publisher)
        {
                $output{publisher} = EPrints::Utils::tree_to_utf8($publisher);
        }
        my $pubdate = $attr->getElementsByTagName("PublicationDate")->item(0);
        if (defined $pubdate)
        {
                $output{date} = EPrints::Utils::tree_to_utf8($pubdate);
        }

        return \%output;

Testing Your Plugin