Difference between revisions of "Perl 101 for EPrints"
| m (create further reading section) | |||
| (37 intermediate revisions by 2 users not shown) | |||
| Line 1: | Line 1: | ||
| [[category:Documentation_Needed]] | [[category:Documentation_Needed]] | ||
| + | [[Category: Work in Progress]] | ||
| + | [[Category: Contribute]] | ||
| + | [[Category: Howto]] | ||
| − | === | + | ===Where to start=== | 
| − | + | Learning Perl: Helping you get started with Perl 5.... | |
| + | http://learn.perl.org/ | ||
| + | Perl Tutorials  | ||
| + | http://perldoc.perl.org/perl.html#Tutorials | ||
| + | |||
| + | ===Recommended books for learning Perl=== | ||
| + | |||
| + | Learning Perl, 6th Edition  | ||
| + | by Randal Schwartz, brian d foy, Tom Phoenix | ||
| + | http://www.amazon.com/Learning-Perl-Randal-L-Schwartz/dp/1449303587 | ||
| + | |||
| + | Programming Perl: Unmatched power for text processing and scripting Fourth Edition Edition | ||
| + | by Tom Christiansen, brian d foy, Larry Wall, Jon Orwant | ||
| + | http://www.amazon.com/Programming-Perl-Unmatched-processing-scripting/dp/0596004923/ | ||
| + | |||
| + | Intermediate Perl Second Edition Edition | ||
| + | by Randal L. Schwartz, brian d foy, Tom Phoenix | ||
| + | http://www.amazon.com/Intermediate-Perl-Randal-L-Schwartz/dp/1449393098/ | ||
| + | |||
| + | ==Using Perl in EPrints== | ||
| + | |||
| + | ===Retrieving data from a config file=== | ||
| + | |||
| + | Example of a data config file under /cfg/cfg.d/example.pl | ||
| + | |||
| + | The data structure is an array of hashes (http://perldoc.perl.org/perldsc.html#ARRAYS-OF-HASHES) | ||
| + | |||
| + | <source lang="perl"> | ||
| $c->{etd_ms}->{fields} = [ | $c->{etd_ms}->{fields} = [ | ||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| 	{ tagname => 'etd_ms:type', type=> 'constant', value => "Electronic Thesis or Dissertation" }, | 	{ tagname => 'etd_ms:type', type=> 'constant', value => "Electronic Thesis or Dissertation" }, | ||
| − | |||
| − | |||
| 	{ tagname => 'etd_ms:degree', type => 'compound', parts => [ | 	{ tagname => 'etd_ms:degree', type => 'compound', parts => [ | ||
| 		{ tagname => 'etd_ms:name', type => 'simple_text', eprint_fieldname => 'thesis_degree_name' }, | 		{ tagname => 'etd_ms:name', type => 'simple_text', eprint_fieldname => 'thesis_degree_name' }, | ||
| Line 23: | Line 44: | ||
| 	] }, | 	] }, | ||
| ]; | ]; | ||
| + | </source> | ||
| − | Retrieving values from this  | + | Retrieving values from this config file in an "Example" export plugin under /plugins/EPrints/Plugin/Export/example.pm | 
| + | <source lang="perl"> | ||
| + |                package EPrints::Plugin::Export::Example; | ||
| + |                use EPrints::Plugin::Export; | ||
| + |                @ISA = ( "EPrints::Plugin::Export" ); | ||
| + |                [...] | ||
|                 my $fields = $session->get_conf('etd_ms','fields');					 |                 my $fields = $session->get_conf('etd_ms','fields');					 | ||
|                 foreach my $field_conf (@{$fields}) |                 foreach my $field_conf (@{$fields}) | ||
| Line 35: | Line 62: | ||
| 		   } | 		   } | ||
| 		} | 		} | ||
| + | </source> | ||
| + | |||
| + | Supporting function that returns an arrayref | ||
| + | |||
| + | <source lang="perl"> | ||
| + | sub generate_tag { | ||
| + | 	my ($plugin, $eprint, $field_conf) = @_; | ||
| + | |||
| + | 	#Constant value | ||
| + | 	if ($field_conf->{type} eq 'constant') | ||
| + | 	{ | ||
| + | 		return $plugin->generate_constant_tag($eprint, $field_conf); | ||
| + | 	} | ||
| + | } | ||
| + | </source> | ||
| + | |||
| + | Supporting function that generates a tag | ||
| + | |||
| + | <source lang="perl"> | ||
| + | sub generate_constant_tag{ | ||
| + | 	my ($plugin, $eprint, $field_conf) = @_; | ||
| + | |||
| + | 	my $tag = $plugin->_simple_tag($field_conf, $field_conf->{value}); | ||
| + | 	return [ $tag ]; | ||
| + | } | ||
| + | </source> | ||
| + | |||
| + | ===Function parameters=== | ||
| + | In perl, the following: <source lang="perl">$obj->function($arg1, $arg2)</source> is equivalent to: <source lang="perl">Class::function($obj, $arg1,$arg2)</source>  | ||
| + | In the example above, when we call: | ||
| + | <source lang="perl"> | ||
| + | my $tags = $plugin->generate_tag($eprint, $field_conf); | ||
| + | </source> | ||
| + | It is equivalent to: | ||
| + | <source lang="perl"> | ||
| + | my $tags = Example::generate_tag($plugin, $eprint, $field_conf); | ||
| + | </source> | ||
| + | This explains the three parameters listed in the top of the generate_tag function definition: | ||
| + | <source lang="perl"> | ||
| + | sub generate_tag { | ||
| + | my ($plugin, $eprint, $field_conf) = @_; | ||
| + | </source> | ||
| + | |||
| + | === Tips and tricks === | ||
| + | |||
| + | ==== Creating a session ==== | ||
| + | Creating a session with eprints is now done via EPrints->new() however many scripts (and examples still use the legacy EPrints::Session->new() interface. | ||
| + | |||
| + | See "perl_lib/EPrints/BackCompatibility.pm" line 424 for details of the compatibility wrapper and "perl_lib/EPrints.pm" for full usage of Eprints methods. | ||
| + | |||
| + | Old style: | ||
| + | <source lang="perl"> | ||
| + | # When using this interface both values are required - 1 (to indicate this is a script not CGI) and repo id (second val). Attempting to run without the repo ID results in the following errors: | ||
| + | # Use of uninitialized value $repository_id in string eq at /usr/share/eprints/perl_lib/EPrints.pm line 551. | ||
| + | # Can't call method "get_database" on an undefined value at /usr/share/eprints/perl_lib/EPrints/DataObj.pm line 132. | ||
| + | |||
| + | my $session = EPrints::Session->new( 1, 'REPOID'); | ||
| + | </source> | ||
| + | |||
| + | New style: | ||
| + | <source lang="perl"> | ||
| + | # Note specifying CGI is not required via this interface. | ||
| + | my $ep = EPrints->new( cleanup=>0 ); | ||
| + | my $session = $ep->repository( 'epprod'); | ||
| + | </source> | ||
| + | |||
| + | ==== Usage of get_config ==== | ||
| + | |||
| + | EPrints::Repository::get_config (accessed via $session->get_repository->get_conf()) can be used with two or three parameters. | ||
| + | |||
| + | With only two (key and value) returns a hash  | ||
| + | $session->get_repository->get_conf( "datacitedoi", "typesallowed") | ||
| + | |||
| + | With three values (key and two values) will return 1 (true) if the last item is found in the list of the second entry - config_entry below | ||
| + | $session->get_repository->get_conf( "datacitedoi", "config_entry", 'Thesis') | ||
| + | |||
| + | |||
| + | get_config is a thin wrapper around config() | ||
| + | |||
| + | See also "perl_lib/EPrints/Repository.pm" line 1611 | ||
| + | |||
| + | == A basic EPrint extracting script == | ||
| + | |||
| + | The simplest of examples, use eprints to register a session in the repository then read an (arbitrary) eprint; accessing two bits of information about it. The attributes accessed come from [[EPrint Object]] and [[API:EPrints/DataObj]]. | ||
| + | <source lang="perl"> | ||
| + | |||
| + | #!/usr/bin/perl -w                                                                                                                                         | ||
| + | |||
| + | use EPrints;                                                                                                                                               | ||
| + | |||
| + | use strict;                                                                                                                                                | ||
| + | |||
| + | # Create new session for interacting with eprints                                                                                                          | ||
| + | my $ep = EPrints->new( cleanup=>0 ); | ||
| + | my $session = $ep->repository( 'REPOID'); | ||
| + | my $id = 23743;                                                                                                                                            | ||
| + | |||
| + | # Create new object referency thing for our desired eprint (23743)                                                                                         | ||
| + | $::pulled_eprint = EPrints::DataObj::EPrint->new( $session, $id );                                                                                         | ||
| + | |||
| + | print $::pulled_eprint->get_dataset_id . "\n";                                                                                                             | ||
| + | print $::pulled_eprint->get_type . "\n";                                                                                                                   | ||
| + | |||
| + | |||
| + | $session->terminate();    | ||
| + | |||
| + | </source> | ||
| + | |||
| + | This example can be expanded to test work in progress functions as required. | ||
| + | |||
| + | == Further reading == | ||
| + | |||
| + | [[Core_API]] describes many possible activities. | ||
Latest revision as of 02:03, 20 September 2018
Contents
Where to start
Learning Perl: Helping you get started with Perl 5.... http://learn.perl.org/
Perl Tutorials http://perldoc.perl.org/perl.html#Tutorials
Recommended books for learning Perl
Learning Perl, 6th Edition by Randal Schwartz, brian d foy, Tom Phoenix http://www.amazon.com/Learning-Perl-Randal-L-Schwartz/dp/1449303587
Programming Perl: Unmatched power for text processing and scripting Fourth Edition Edition by Tom Christiansen, brian d foy, Larry Wall, Jon Orwant http://www.amazon.com/Programming-Perl-Unmatched-processing-scripting/dp/0596004923/
Intermediate Perl Second Edition Edition by Randal L. Schwartz, brian d foy, Tom Phoenix http://www.amazon.com/Intermediate-Perl-Randal-L-Schwartz/dp/1449393098/
Using Perl in EPrints
Retrieving data from a config file
Example of a data config file under /cfg/cfg.d/example.pl
The data structure is an array of hashes (http://perldoc.perl.org/perldsc.html#ARRAYS-OF-HASHES)
$c->{etd_ms}->{fields} = [
	{ tagname => 'etd_ms:type', type=> 'constant', value => "Electronic Thesis or Dissertation" },
	{ tagname => 'etd_ms:degree', type => 'compound', parts => [
		{ tagname => 'etd_ms:name', type => 'simple_text', eprint_fieldname => 'thesis_degree_name' },
		{ tagname => 'etd_ms:level', type => 'simple_text', eprint_fieldname => 'thesis_type' },
		{ tagname => 'etd_ms:discipline', type => 'simple_text', eprint_fieldname => 'department' },
		{ tagname => 'etd_ms:grantor', type => 'simple_text', eprint_fieldname => 'institution' },
	] },
];
Retrieving values from this config file in an "Example" export plugin under /plugins/EPrints/Plugin/Export/example.pm
               package EPrints::Plugin::Export::Example;
               use EPrints::Plugin::Export;
               @ISA = ( "EPrints::Plugin::Export" );
               [...]
               my $fields = $session->get_conf('etd_ms','fields');					
               foreach my $field_conf (@{$fields})
		{
		   my $tags = $plugin->generate_tag($eprint, $field_conf);			
		   foreach my $tag (@{$tags})
		   {
					push @dcdata, ($tag) if $tag; 
		   }
		}
Supporting function that returns an arrayref
sub generate_tag {
	my ($plugin, $eprint, $field_conf) = @_;
	#Constant value
	if ($field_conf->{type} eq 'constant')
	{
		return $plugin->generate_constant_tag($eprint, $field_conf);
	}
}
Supporting function that generates a tag
sub generate_constant_tag{
	my ($plugin, $eprint, $field_conf) = @_;
	my $tag = $plugin->_simple_tag($field_conf, $field_conf->{value});
	return [ $tag ];
}
Function parameters
In perl, the following:
$obj->function($arg1, $arg2)
is equivalent to:
Class::function($obj, $arg1,$arg2)
In the example above, when we call:
my $tags = $plugin->generate_tag($eprint, $field_conf);
It is equivalent to:
my $tags = Example::generate_tag($plugin, $eprint, $field_conf);
This explains the three parameters listed in the top of the generate_tag function definition:
sub generate_tag {
my ($plugin, $eprint, $field_conf) = @_;
Tips and tricks
Creating a session
Creating a session with eprints is now done via EPrints->new() however many scripts (and examples still use the legacy EPrints::Session->new() interface.
See "perl_lib/EPrints/BackCompatibility.pm" line 424 for details of the compatibility wrapper and "perl_lib/EPrints.pm" for full usage of Eprints methods.
Old style:
# When using this interface both values are required - 1 (to indicate this is a script not CGI) and repo id (second val). Attempting to run without the repo ID results in the following errors:
# Use of uninitialized value $repository_id in string eq at /usr/share/eprints/perl_lib/EPrints.pm line 551.
# Can't call method "get_database" on an undefined value at /usr/share/eprints/perl_lib/EPrints/DataObj.pm line 132.
my $session = EPrints::Session->new( 1, 'REPOID');
New style:
# Note specifying CGI is not required via this interface.
my $ep = EPrints->new( cleanup=>0 );
my $session = $ep->repository( 'epprod');
Usage of get_config
EPrints::Repository::get_config (accessed via $session->get_repository->get_conf()) can be used with two or three parameters.
With only two (key and value) returns a hash $session->get_repository->get_conf( "datacitedoi", "typesallowed")
With three values (key and two values) will return 1 (true) if the last item is found in the list of the second entry - config_entry below $session->get_repository->get_conf( "datacitedoi", "config_entry", 'Thesis')
get_config is a thin wrapper around config()
See also "perl_lib/EPrints/Repository.pm" line 1611
A basic EPrint extracting script
The simplest of examples, use eprints to register a session in the repository then read an (arbitrary) eprint; accessing two bits of information about it. The attributes accessed come from EPrint Object and API:EPrints/DataObj.
#!/usr/bin/perl -w                                                                                                                                        
                                                                                                                                                          
use EPrints;                                                                                                                                              
                                                                                                                                                          
use strict;                                                                                                                                               
                                                                                                                                                                                              
# Create new session for interacting with eprints                                                                                                         
my $ep = EPrints->new( cleanup=>0 );
my $session = $ep->repository( 'REPOID');
my $id = 23743;                                                                                                                                           
                                                                                                                                                          
# Create new object referency thing for our desired eprint (23743)                                                                                        
$::pulled_eprint = EPrints::DataObj::EPrint->new( $session, $id );                                                                                        
                                                                                                                                                          
print $::pulled_eprint->get_dataset_id . "\n";                                                                                                            
print $::pulled_eprint->get_type . "\n";                                                                                                                  
                                                                                                                                                          
                                                                                                                                                          
$session->terminate();
This example can be expanded to test work in progress functions as required.
Further reading
Core_API describes many possible activities.
