perl - rename the file according PDF title -
i trying write file rename perl script, reducing manual efforts. manually open pdf file, copy title , rename file name according title.
i writing below code rename pdf according file title. e.g. spe-180024-ms title , pdf should renamed that
according logic should rename file, output not proper
#!/usr/bin/perl use strict; #use warnings; use cwd; use file::basename; #use file::copy; use file::find; use pdf::api2; use cam::pdf; $path1 = getcwd; open( f6, ">ref.txt" ); opendir( dir, $path1 ) or die $!; @dots = grep /(.*?)\-(ms)$/, readdir(dir); closedir(dir); @file; @files; $check; $err_1; $err_2; $err_3; foreach $file (@dots) { #print f6 $file."\n"; opendir dir1, $file or die "can't open $file: $!"; @files = sort grep { -f "$file/$_" } readdir dir1; $data1 = join( ",", <@files> ); closedir dir1; #print f6 @files."\n"; $a = @files; if ($data1 =~ m#(((\w+)\-(\d+)\-ms)\.(pdf))# #&& $data1=~m#((\w+)\-(\d+)\-ms\.(xml))#) #((.*?)\.xml)# ) { $check = $2; #print f6 $1."\n"; if ( $data1 =~ m#(((\w+)\-(\d+)\-ms)\.(xml))# ) { $check1 = $2; $first = $1; if ( $check eq $file || $check1 eq $file ) { } else { #print f6 $file."\tdifferent file present\n"; } } } foreach $f1 ( glob("$file/*.xml") ) { #print f6 $f1."\n"; open( fh, '<', $f1 ) or die "cannot open file: $f1"; $data2 = join( "", <fh> ); #print f6 $data2."\n"; close fh; if ( $data2 =~ m#(<page-count count="(\d+)"/>)# ) { $page = $2; #print f6 $f1."\t".$1."\n"; if ( $f1 =~ m#(.*?)-ms/((.*?)-ms)#s #spe-173391-ms/spe-173393-ms #(.*?)\.(.*?)$/s) ) { $f11 = $2; #print f6 $f11."\n"; if ( $file eq $f11 ) { } else { $err_1 = $err_1 . $file . "\t" . $f11 . "\tdifferent xml file present\n"; #print f6 $file."\t".$f11."\tdifferent xml file present\n"; #print f6 $file."\tdifferent xml file present\n"; } foreach $f2 ( glob("$file/*.pdf") ) { open( f2, "<$f2" ) or die "cannot open file: $f2"; $data = join( "", <f2> ); close f2; $xml_list = $data; $pdf = pdf::api2->open($f2); $pages = $pdf->pages; #print f6 $f2."\t".$pages."\n"; if ($f2 =~ m#(.*?)-ms/((.*?)-ms)# #/(.*?)\.(.*?)$/s ) { $f21 = $2; if ( $file eq $f21 ) { } else { $err_2 = $err_2 . $file . "\t" . $f21 . "\tdifferent pdf file present\n"; #print f6 $file."\t".$f21."\tdifferent pdf file present\n"; } while ( $f11 =~ m/$f21/gs ) { if ( $page !~ m#$pages#s ) { $err_3 = $err_3 . $f1 . "\t" . $page . "\t" . $f2 . "\t" . $pages . "\n"; #print f6 $f1."\t".$page."\t".$f2."\t".$pages."\n"; $data2 =~ s#<page-count count="$page"\/>#<page-count count="$pages"\/>#gs; open( fh, '>', $f1 ) or die "cannot open file: $f1"; print fh $data2 . "\n"; close fh; } } } } } } } } close f6;
this document. marked heading want.
you cannot open pdf file , operate on it. it's different text file has parsed. can use cam::pdf. convert pdf text can later analysed title.
the links provided above covers enough stuff job done. reproducing relevant stuff here
use cam::pdf; $pdf = cam::pdf->new('test1.pdf'); $pagenum = 1 $page1 = $pdf->getpagecontent(pagenum);
the variable page1
have contents of page specified pagenum variable. rest matter of extracting required information.
if find converting entire pdf text can use getpdftext.pl part of cam::pdf that's inefficient compared reading single page.
Comments
Post a Comment