PDF/A-Erzeugung: die XMP-Metadaten selber erzeugen
authorMoritz Bunkus <m.bunkus@linet-services.de>
Tue, 5 Nov 2019 15:53:11 +0000 (16:53 +0100)
committerMoritz Bunkus <m.bunkus@linet-services.de>
Fri, 28 Feb 2020 14:01:42 +0000 (15:01 +0100)
SL/DB/Helper/PDF_A.pm
SL/Template/LaTeX.pm
templates/pdf/pdf_a_metadata.xmp [new file with mode: 0644]

index 4f52844..f3388a3 100644 (file)
@@ -5,6 +5,27 @@ use strict;
 use parent qw(Exporter);
 our @EXPORT = qw(create_pdf_a_print_options);
 
+use Carp;
+use Template;
+
+sub _create_xmp_data {
+  my ($self, %params) = @_;
+
+        use Cwd;
+  my $template = Template->new({
+    INTERPOLATE  => 0,
+    EVAL_PERL    => 0,
+    ABSOLUTE     => 1,
+    PLUGIN_BASE  => 'SL::Template::Plugin',
+    ENCODING     => 'utf8',
+  }) || croak;
+
+  my $output = '';
+  $template->process(SL::System::Process::exe_dir() . '/templates/pdf/pdf_a_metadata.xmp', \%params, \$output) || croak $template->error;
+
+  return $output;
+}
+
 sub create_pdf_a_print_options {
   my ($self) = @_;
 
@@ -20,13 +41,23 @@ sub create_pdf_a_print_options {
     $::instance_conf->get_company
   };
 
+  my $timestamp =  DateTime->now_local->strftime('%Y-%m-%dT%H:%M:%S%z');
+  $timestamp    =~ s{(..)$}{:$1};
+
   return {
-    version   => '3b',
-    meta_data => {
-      title    => $self->displayable_name,
-      author   => $author,
-      language => $pdf_language,
-    },
+    version => '3b',
+    xmp     => _create_xmp_data(
+      $self,
+      pdf_a_version     => '3',
+      pdf_a_conformance => 'B',
+      producer          => 'pdfTeX',
+      timestamp         => $timestamp, # 2019-11-05T15:26:20+01:00
+      meta_data => {
+        title    => $self->displayable_name,
+        author   => $author,
+        language => $pdf_language,
+      },
+    ),
   };
 }
 
index 8b4c4e8..303fb7f 100644 (file)
@@ -420,7 +420,7 @@ sub _force_mandatory_packages {
   my ($self, @lines) = @_;
   my @new_lines;
 
-  my (%used_packages, $at_beginning_of_document);
+  my %used_packages;
   my @required_packages = qw(textcomp ulem);
   push @required_packages, 'embedfile' if $self->{pdf_a};
 
@@ -430,15 +430,27 @@ sub _force_mandatory_packages {
 
     } elsif (($line =~ m/\\documentclass/) && $self->{pdf_a}) {
       my $version = $self->{pdf_a}->{version}   // '3a';
-      my $meta    = $self->{pdf_a}->{meta_data} // {};
+
+      if ($self->{pdf_a}->{xmp}) {
+        my $xmp_file_name = $self->{userspath} . "/pdfa.xmp";
+        my $out           = IO::File->new($xmp_file_name, ">:encoding(utf-8)") || croak "Error creating ${xmp_file_name}: $!";
+        $out->print(Encode::encode('utf-8', $self->{pdf_a}->{xmp}));
+        $out->close;
+
+      } else {
+        my $meta = $self->{pdf_a}->{meta_data} // {};
+
+        push @new_lines, (
+          "\\RequirePackage{filecontents}\n",
+          "\\begin{filecontents*}{\\jobname.xmpdata}\n",
+          ($meta->{title}    ? sprintf("\\Title{%s}\n",    $meta->{title})    : ""),
+          ($meta->{author}   ? sprintf("\\Author{%s}\n",   $meta->{author})   : ""),
+          ($meta->{language} ? sprintf("\\Language{%s}\n", $meta->{language}) : ""),
+          "\\end{filecontents*}\n",
+        );
+      }
 
       push @new_lines, (
-        "\\RequirePackage{filecontents}\n",
-        "\\begin{filecontents*}{\\jobname.xmpdata}\n",
-        ($meta->{title}    ? sprintf("\\Title{%s}\n",    $meta->{title})    : ""),
-        ($meta->{author}   ? sprintf("\\Author{%s}\n",   $meta->{author})   : ""),
-        ($meta->{language} ? sprintf("\\Language{%s}\n", $meta->{language}) : ""),
-        "\\end{filecontents*}\n",
         $line,
         "\\usepackage[a-${version},mathxmp]{pdfx}[2018/12/22]\n",
         "\\usepackage[genericmode]{tagpdf}\n",
@@ -449,17 +461,14 @@ sub _force_mandatory_packages {
       next;
 
     } elsif ($line =~ m/\\begin\{document\}/) {
-      $at_beginning_of_document = 1;
       push @new_lines, map { "\\usepackage{$_}\n" } grep { !$used_packages{$_} } @required_packages;
+      push @new_lines, $line;
+      push @new_lines, map { $self->_embed_file_directive($_) } @{ $self->{pdf_attachments} // [] };
+
+      next;
     }
 
     push @new_lines, $line;
-
-    if ($at_beginning_of_document) {
-      $at_beginning_of_document = 0;
-
-      push @new_lines, map { $self->_embed_file_directive($_) } @{ $self->{pdf_attachments} // [] };
-    }
   }
 
   return @new_lines;
diff --git a/templates/pdf/pdf_a_metadata.xmp b/templates/pdf/pdf_a_metadata.xmp
new file mode 100644 (file)
index 0000000..a26157b
--- /dev/null
@@ -0,0 +1,92 @@
+<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d' ?>
+
+<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 4.0-c316 44.253921, Sun Oct 01 2006 17:14:39">
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+  <rdf:Description rdf:about=""
+                   xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/"
+                   xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#"
+                   xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#"
+                   >
+   <pdfaExtension:schemas>
+    <rdf:Bag>
+     <rdf:li rdf:parseType="Resource">
+      <pdfaSchema:namespaceURI>http://ns.adobe.com/pdfx/1.3/</pdfaSchema:namespaceURI>
+      <pdfaSchema:prefix>pdfx</pdfaSchema:prefix>
+      <pdfaSchema:schema>PDF/X Schema</pdfaSchema:schema>
+      <pdfaSchema:property><rdf:Seq>
+       <rdf:li rdf:parseType="Resource">
+        <pdfaProperty:category>external</pdfaProperty:category>
+        <pdfaProperty:description>URL to an online version or preprint</pdfaProperty:description>
+        <pdfaProperty:name>AuthoritativeDomain</pdfaProperty:name>
+        <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+       </rdf:li></rdf:Seq>
+      </pdfaSchema:property>
+     </rdf:li>
+     <rdf:li rdf:parseType="Resource">
+      <pdfaSchema:namespaceURI>http://www.aiim.org/pdfua/ns/id/</pdfaSchema:namespaceURI>
+      <pdfaSchema:prefix>pdfuaid</pdfaSchema:prefix>
+      <pdfaSchema:schema>PDF/UA ID Schema</pdfaSchema:schema>
+      <pdfaSchema:property><rdf:Seq>
+       <rdf:li rdf:parseType="Resource">
+        <pdfaProperty:category>internal</pdfaProperty:category>
+        <pdfaProperty:description>Part of PDF/UA standard</pdfaProperty:description>
+        <pdfaProperty:name>part</pdfaProperty:name>
+        <pdfaProperty:valueType>Integer</pdfaProperty:valueType>
+       </rdf:li></rdf:Seq>
+      </pdfaSchema:property>
+     </rdf:li>
+     <rdf:li rdf:parseType="Resource">
+      <pdfaSchema:schema>PRISM metadata</pdfaSchema:schema>
+      <pdfaSchema:namespaceURI>http://prismstandard.org/namespaces/basic/2.2/</pdfaSchema:namespaceURI>
+      <pdfaSchema:prefix>prism</pdfaSchema:prefix>
+      <pdfaSchema:property><rdf:Seq>
+       <rdf:li rdf:parseType="Resource">
+        <pdfaProperty:name>aggregationType</pdfaProperty:name>
+        <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+        <pdfaProperty:category>external</pdfaProperty:category>
+        <pdfaProperty:description>The type of publication. If defined, must be one of book, catalog, feed, journal, magazine, manual, newsletter, pamphlet.</pdfaProperty:description>
+       </rdf:li>
+       <rdf:li rdf:parseType="Resource">
+        <pdfaProperty:name>url</pdfaProperty:name>
+        <pdfaProperty:valueType>URL</pdfaProperty:valueType>
+        <pdfaProperty:category>external</pdfaProperty:category>
+        <pdfaProperty:description>URL for the article or unit of content</pdfaProperty:description>
+       </rdf:li>
+      </rdf:Seq></pdfaSchema:property>
+     </rdf:li>
+    </rdf:Bag>
+   </pdfaExtension:schemas>
+  </rdf:Description>
+  <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
+   <pdf:Producer>[% producer | xml %]</pdf:Producer>
+  </rdf:Description>
+  <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
+   <dc:format>application/pdf</dc:format>
+[% IF meta_data.title %]
+   <dc:title><rdf:Alt><rdf:li xml:lang="x-default">[% meta_data.title | xml %]</rdf:li></rdf:Alt></dc:title>
+[% END %]
+   <dc:creator><rdf:Seq><rdf:li>v3</rdf:li></rdf:Seq></dc:creator>
+[% IF meta_data.language %]
+   <dc:language><rdf:Bag><rdf:li>[% meta_data.language | xml %]</rdf:li></rdf:Bag></dc:language>
+[% END %]
+  </rdf:Description>
+  <rdf:Description rdf:about="" xmlns:prism="http://prismstandard.org/namespaces/basic/2.2/">
+  </rdf:Description>
+  <rdf:Description rdf:about="" xmlns:pdfx="http://ns.adobe.com/pdfx/1.3/">
+  </rdf:Description>
+  <rdf:Description rdf:about="" xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">
+   <pdfaid:part>[% pdf_a_version | xml %]</pdfaid:part>
+   <pdfaid:conformance>[% pdf_a_conformance | xml %]</pdfaid:conformance>
+  </rdf:Description>
+  <rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/">
+   <xmp:CreatorTool>[% producer | xml %]</xmp:CreatorTool>
+   <xmp:ModifyDate>[% timestamp | xml %]</xmp:ModifyDate>
+   <xmp:CreateDate>[% timestamp | xml %]</xmp:CreateDate>
+   <xmp:MetadataDate>[% timestamp | xml %]</xmp:MetadataDate>
+  </rdf:Description>
+  <rdf:Description rdf:about="" xmlns:xmpRights = "http://ns.adobe.com/xap/1.0/rights/">
+  </rdf:Description>
+ </rdf:RDF>
+</x:xmpmeta>
+
+<?xpacket end='w'?>