| @@ -11,13 +11,32 @@ | |||
| # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a | |||
| VERSION="1.3.5" | |||
| ################################################### | |||
| # PAGESIZE LOGIC | |||
| # 1- Try to get Mediabox with CAT/GREP | |||
| # Remove /BBox search as it is unreliable | |||
| # 2- MacOS => try to use mdls | |||
| # Linux => try to use pdfinfo | |||
| # 3- Try to use identify (imagemagick) | |||
| # 4- Fail | |||
| # Remove postscript method, | |||
| # may have licensing problems | |||
| ################################################### | |||
| VERSION="1.4.1" | |||
| SCALE="0.95" # scaling factor (0.95 = 95%, e.g.) | |||
| VERBOSE=0 # verbosity Level | |||
| BASENAME="$(basename $0)" # simplified name of this script | |||
| GSBIN="" # Set with which after we check dependencies | |||
| BCBIN="" # Set with which after we check dependencies | |||
| IDBIN="" # Set with which after we check dependencies | |||
| # Set with which after we check dependencies | |||
| GSBIN="" # GhostScript Binaries | |||
| BCBIN="" # BC Math binary | |||
| IDBIN="" # Identify Binary | |||
| PDFINFOBIN="" # PDF Info Binary | |||
| MDLSBIN="" # MacOS mdls binary | |||
| OSNAME="$(uname 2>/dev/null)" # Check were we are running | |||
| LC_MEASUREMENT="C" # To make sure our numbers have .decimals | |||
| LC_ALL="C" # Some languages use , as decimal token | |||
| @@ -27,10 +46,15 @@ LC_NUMERIC="C" | |||
| TRUE=0 # Silly stuff | |||
| FALSE=1 | |||
| ADAPTIVEMODE=$TRUE # Automatically try to guess best mode | |||
| MODE="" | |||
| USEIMGMGK=$FALSE # ImageMagick Flag, will use identify if true | |||
| USECATGREP=$FALSE # Use old cat + grep method | |||
| # Prints version | |||
| printVersion() { | |||
| if [[ $1 -eq 2 ]]; then | |||
| @@ -139,428 +163,37 @@ getPageSizeImagemagick() { | |||
| } | |||
| # Gets page size using toolbin_pdfinfo.ps | |||
| getPageSizeGS() { | |||
| local PDFINFOGS='' | |||
| read -r -d '' PDFINFOGS <<'EOF' | |||
| %!PS | |||
| % Copyright (C) 2001-2012 Artifex Software, Inc. | |||
| % All Rights Reserved. | |||
| % | |||
| % This software is provided AS-IS with no warranty, either express or | |||
| % implied. | |||
| % | |||
| % This software is distributed under license and may not be copied, | |||
| % modified or distributed except as expressly authorized under the terms | |||
| % of the license contained in the file LICENSE in this distribution. | |||
| % | |||
| % Refer to licensing information at http://www.artifex.com or contact | |||
| % Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, San Rafael, | |||
| % CA 94903, U.S.A., +1(415)492-9861, for further information. | |||
| % | |||
| % | |||
| % $Id: pdf_info.ps 6300 2005-12-28 19:56:24Z alexcher $ | |||
| % Dump some info from a PDF file | |||
| % usage: gs -dNODISPLAY -q -sFile=____.pdf [-dDumpMediaSizes=false] [-dDumpFontsNeeded=false] [-dDumpXML] | |||
| % [-dDumpFontsUsed [-dShowEmbeddedFonts] ] toolbin/pdf_info.ps | |||
| 128 dict begin | |||
| /QUIET true def % in case they forgot | |||
| /showoptions { | |||
| ( where "options" are:) = | |||
| ( -dDumpMediaSizes=false (default true) MediaBox and CropBox for each page) = | |||
| ( -dDumpFontsNeeded=false (default true)Fonts used, but not embedded) = | |||
| ( -dDumpXML print the XML Metadata from the PDF, if present) = | |||
| ( -dDumpFontsUsed List all fonts used) = | |||
| ( -dShowEmbeddedFonts only meaningful with -dDumpFontsUsed) = | |||
| (\n If no options are given, the default is -dDumpMediaSizes -dDumpFontsNeeded) = | |||
| () = | |||
| flush | |||
| } bind def | |||
| /DumpMediaSizes where { pop } { /DumpMediaSizes true def } ifelse | |||
| /DumpFontsNeeded where { pop } { /DumpFontsNeeded true def } ifelse | |||
| [ shellarguments | |||
| { counttomark 1 eq { | |||
| dup 0 get (-) 0 get ne { | |||
| % File specified on the command line using: -- toolbin/pdf_info.ps infile.pdf | |||
| /File exch def | |||
| false % dont show usage | |||
| } { | |||
| true % show usage and quit | |||
| } ifelse | |||
| } { true } ifelse | |||
| { | |||
| (\n*** Usage: gs [options] -- toolbin/pdf_info.ps infile.pdf ***\n\n) print | |||
| showoptions | |||
| quit | |||
| } if | |||
| } if | |||
| /File where not { | |||
| (\n *** Missing input file name \(use -sFile=____.pdf\)\n) = | |||
| ( usage: gs -dNODISPLAY -q -sFile=____.pdf [ options ] toolbin/pdf_info.ps\n) = | |||
| showoptions | |||
| quit | |||
| } if | |||
| cleartomark % discard the dict from --where-- | |||
| % ---- No more executable code on the top level after this line ----- | |||
| % ---- except 2 lines at the very end ----- | |||
| /printXML { % <string> printXML - | |||
| % print non-blank lines without trailing spaces | |||
| dup dup length 1 sub -1 0 { | |||
| 1 index 1 index get 32 eq { | |||
| 0 exch getinterval exch | |||
| } { | |||
| exch = exit % non-blank on this line | |||
| } | |||
| ifelse | |||
| } for | |||
| pop pop % clean up | |||
| } bind def | |||
| /dump-pdf-info { % (fname) -> - | |||
| () = ( ) print print ( has ) print | |||
| PDFPageCount dup =print 10 mod 1 eq { ( page.\n) } { ( pages\n) } ifelse = flush | |||
| /DumpXML where { | |||
| pop | |||
| Trailer /Root oget /Metadata knownoget { | |||
| //false resolvestream | |||
| { dup 256 string readline exch printXML not { exit } if } loop | |||
| pop % done with the stream | |||
| (_____________________________________________________________) = | |||
| flush | |||
| } if | |||
| } if | |||
| % Print out the "Info" dictionary if present | |||
| Trailer /Info knownoget { | |||
| dup /Title knownoget { (Title: ) print = flush } if | |||
| dup /Author knownoget { (Author: ) print = flush } if | |||
| dup /Subject knownoget { (Subject: ) print = flush } if | |||
| dup /Keywords knownoget { (Keywords: ) print = flush } if | |||
| dup /Creator knownoget { (Creator: ) print = flush } if | |||
| dup /Producer knownoget { (Producer: ) print = flush } if | |||
| dup /CreationDate knownoget { (CreationDate: ) print = flush } if | |||
| dup /ModDate knownoget { (ModDate: ) print = flush } if | |||
| dup /Trapped knownoget { (Trapped: ) print = flush } if | |||
| pop | |||
| } if | |||
| } bind def | |||
| % <page index> <page dict> dump-media-sizes - | |||
| /dump-media-sizes { | |||
| DumpMediaSizes { | |||
| () = | |||
| % Print out the Page Size info for each page. | |||
| (Page ) print =print | |||
| dup /UserUnit pget { | |||
| ( UserUnit: ) print =print | |||
| } if | |||
| dup /MediaBox pget { | |||
| ( MediaBox: ) print oforce_array ==only | |||
| } if | |||
| dup /CropBox pget { | |||
| ( CropBox: ) print oforce_array ==only | |||
| } if | |||
| dup /BleedBox pget { | |||
| ( BleedBox: ) print oforce_array ==only | |||
| } if | |||
| dup /TrimBox pget { | |||
| ( TrimBox: ) print oforce_array ==only | |||
| } if | |||
| dup /ArtBox pget { | |||
| ( ArtBox: ) print oforce_array ==only | |||
| } if | |||
| dup /Rotate pget { | |||
| ( Rotate = ) print =print | |||
| } if | |||
| dup /Annots pget { | |||
| pop | |||
| ( Page contains Annotations) print | |||
| } if | |||
| pageusestransparency { | |||
| ( Page uses transparency features) print | |||
| } if | |||
| () = flush | |||
| } | |||
| { | |||
| pop pop | |||
| } ifelse | |||
| } bind def | |||
| % List of standard font names for use when we are showing the FontsNeeded | |||
| /StdFontNames [ | |||
| /Times-Roman /Helvetica /Courier /Symbol | |||
| /Times-Bold /Helvetica-Bold /Courier-Bold /ZapfDingbats | |||
| /Times-Italic /Helvetica-Oblique /Courier-Oblique | |||
| /Times-BoldItalic /Helvetica-BoldOblique /Courier-BoldOblique | |||
| ] def | |||
| /res-type-dict 10 dict begin | |||
| /Font { | |||
| { | |||
| exch pop oforce | |||
| dup //null ne { | |||
| dup /DescendantFonts knownoget { | |||
| exch pop 0 get oforce | |||
| } if | |||
| dup /FontDescriptor knownoget { | |||
| dup /FontFile known 1 index /FontFile2 known or exch /FontFile3 known or | |||
| /ShowEmbeddedFonts where { pop pop //false } if { | |||
| pop % skip embedded fonts | |||
| } { | |||
| /BaseFont knownoget { % not embedded | |||
| 2 index exch //null put | |||
| } if | |||
| } ifelse | |||
| } { | |||
| /BaseFont knownoget { % no FontDescriptor, not embedded | |||
| 2 index exch //null put | |||
| } if | |||
| } ifelse | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } forall % traverse the dictionary | |||
| } bind def | |||
| /XObject { | |||
| { | |||
| exch pop oforce | |||
| dup //null ne { | |||
| dup /Subtype knownoget { | |||
| /Form eq { | |||
| /Resources knownoget { | |||
| get-fonts-from-res | |||
| } if | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } forall | |||
| } bind def | |||
| /Pattern { | |||
| { | |||
| exch pop oforce | |||
| dup //null ne { | |||
| /Resources knownoget { | |||
| get-fonts-from-res | |||
| } if | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } forall | |||
| } bind def | |||
| currentdict end readonly def | |||
| % <dict for fonts> <<res-dict>> get-fonts-from-res - | |||
| /get-fonts-from-res { | |||
| oforce | |||
| dup //null ne { | |||
| { | |||
| oforce | |||
| dup //null ne { | |||
| //res-type-dict 3 -1 roll | |||
| .knownget { | |||
| exec | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } { | |||
| pop pop | |||
| } ifelse | |||
| } forall | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } bind def | |||
| currentdict /res-type-dict undef | |||
| /getPDFfonts { % <dict for fonts> <page dict> getPDFfonts - | |||
| dup /Resources pget { get-fonts-from-res } if | |||
| /Annots knownoget { | |||
| { oforce | |||
| dup //null ne { | |||
| /AP knownoget { | |||
| { exch pop oforce | |||
| dup //null ne { | |||
| dup /Resources knownoget { | |||
| get-fonts-from-res | |||
| } if | |||
| { exch pop oforce | |||
| dup type /dicttype eq { | |||
| /Resources knownoget { | |||
| get-fonts-from-res | |||
| } if | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } forall | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } forall | |||
| } if | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } forall | |||
| } if | |||
| pop | |||
| } bind def | |||
| /dump-fonts-used { % <dict for fonts> dump-fonts-used - | |||
| % If DumpFontsUsed is not true, then remove the "standard" fonts from the list | |||
| systemdict /DumpFontsUsed known not { | |||
| StdFontNames { | |||
| 1 index 1 index known { 1 index 1 index undef } if | |||
| pop | |||
| } forall | |||
| } if | |||
| % Now dump the FontsUsed dict into an array so we can sort it. | |||
| [ 1 index { pop } forall ] | |||
| { 100 string cvs exch 100 string cvs exch lt } .sort | |||
| systemdict /DumpFontsUsed known | |||
| { | |||
| (\nFont or CIDFont resources used:) = | |||
| { = } forall | |||
| } { | |||
| DumpFontsNeeded { | |||
| dup length 0 gt { | |||
| (\nFonts Needed that are not embedded \(system fonts required\):) = | |||
| { ( ) print = } forall | |||
| } { | |||
| pop | |||
| (\nNo system fonts are needed.) = | |||
| } ifelse | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } ifelse | |||
| pop | |||
| } bind def | |||
| % Copy selected subfiles to temporary files and return the file names | |||
| % as a PostScript names to protect them from restore. | |||
| % Currently, all PDF files in the Portfolio are extracted and returned. | |||
| % | |||
| % - pdf_collection_files [ /temp_file_name ... /temp_file_name | |||
| /pdf_collection_files { | |||
| mark | |||
| Trailer /Root oget | |||
| dup /Collection oknown { | |||
| /Names knownoget { | |||
| /EmbeddedFiles knownoget { | |||
| pdf_collection_names | |||
| } if | |||
| } if | |||
| } { | |||
| pop | |||
| } ifelse | |||
| } bind def | |||
| % Output all the info about the file | |||
| /dump { % (title) -> - | |||
| /PDFPageCount pdfpagecount def | |||
| dump-pdf-info | |||
| % dict will be populated with fonts through a call to "getPDFfonts" | |||
| % per page, then the contents dumped out in "dump-fonts-used" | |||
| 1000 dict | |||
| 1 1 PDFPageCount | |||
| { | |||
| dup pdfgetpage dup 3 -1 roll | |||
| dump-media-sizes | |||
| 1 index exch getPDFfonts | |||
| } for | |||
| dump-fonts-used | |||
| } bind def | |||
| % Choose between collection vs plain file. | |||
| % Enumerate collections and apply the dump procedure. | |||
| /enum-pdfs { % - -> - | |||
| File (r) file runpdfbegin | |||
| pdf_collection_files | |||
| dup mark eq { | |||
| pop | |||
| File dump | |||
| runpdfend | |||
| } { | |||
| runpdfend | |||
| ] 0 1 2 index length 1 sub { | |||
| 2 copy get exch % [file ... ] file i | |||
| 1 add (0123456789) cvs % [file ... ] file (i+1) | |||
| File exch ( part ) exch concatstrings concatstrings | |||
| exch % [file ... ] (fname part i+1) file | |||
| dup type /filetype eq { | |||
| runpdfbegin | |||
| dump | |||
| runpdfend | |||
| closefile | |||
| } { | |||
| .namestring | |||
| dup (r) file | |||
| runpdfbegin | |||
| exch dump | |||
| runpdfend | |||
| deletefile | |||
| } ifelse | |||
| } for | |||
| pop | |||
| } ifelse | |||
| } bind def | |||
| enum-pdfs | |||
| end | |||
| quit | |||
| EOF | |||
| # get data from gs script | |||
| local identify="$("$GSBIN" -dNODISPLAY -q -sFile="$INFILEPDF" -dDumpMediaSizes -dDumpFontsNeeded=false -c "$PDFINFOGS" 2>/dev/null | grep MediaBox | head -n1)" | |||
| identify="${identify##*MediaBox:}" # get page size only for 1st page | |||
| # remove chars [ and ] | |||
| identify="${identify//[}" | |||
| identify="${identify//]}" | |||
| # Gets page size using Mac Quarts mdls | |||
| getPageSizeMdls() { | |||
| [[ ! $OSNAME = "Darwin" ]] && return | |||
| # get data from image magick | |||
| local identify="$("$MDLSBIN" -mdls -name kMDItemPageHeight -name kMDItemPageWidth "$INFILEPDF" 2>/dev/null)" | |||
| identify=($identify) # make it an array | |||
| echo " - ${identify[0]} - ${identify[1] - ${identify[3]} - ${identify[4]}}" | |||
| PGWIDTH=$(printf '%.0f' "${identify[1]}") # assign | |||
| PGHEIGHT=$(printf '%.0f' "${identify[3]}") # assign | |||
| } | |||
| # sanity | |||
| if [[ ${#identify[@]} -lt 4 ]]; then | |||
| echo "Error when reading the page size!" | |||
| echo "The page size information is invalid!" | |||
| exit 16 | |||
| fi | |||
| PGWIDTH=$(printf '%.0f' "${identify[2]}") # assign | |||
| # Gets page size using Mac Quarts mdls | |||
| getPageSizePdfInfo() { | |||
| # get data from image magick | |||
| local identify="$("$MDLSBIN" -mdls -name kMDItemPageHeight -name kMDItemPageWidth "$INFILEPDF" 2>/dev/null)" | |||
| identify=($identify) # make it an array | |||
| echo " - ${identify[0]} - ${identify[1] - ${identify[3]} - ${identify[4]}}" | |||
| PGWIDTH=$(printf '%.0f' "${identify[1]}") # assign | |||
| PGHEIGHT=$(printf '%.0f' "${identify[3]}") # assign | |||
| } | |||
| # Gets page size using cat and grep | |||
| getPageSize() { | |||
| getPageSizeCatGrep() { | |||
| # get MediaBox info from PDF file using cat and grep, these are all possible | |||
| # /MediaBox [0 0 595 841] | |||
| # /MediaBox [ 0 0 595.28 841.89] | |||
| @@ -570,19 +203,15 @@ getPageSize() { | |||
| local mediaBox="$(cat "$INFILEPDF" | grep -a '/MediaBox' | head -n1)" | |||
| mediaBox="${mediaBox##*/MediaBox}" | |||
| # If no MediaBox, try BBox | |||
| if [[ -z $mediaBox ]]; then | |||
| mediaBox="$(cat "$INFILEPDF" | grep -a '/BBox' | head -n1)" | |||
| mediaBox="${mediaBox##*/BBox}" | |||
| fi | |||
| # No page size data available | |||
| if [[ -z $mediaBox ]]; then | |||
| if [[ -z $mediaBox && $ADAPTIVEMODE = $FALSE ]]; then | |||
| echo "Error when reading input file!" | |||
| echo "Could not determine the page size!" | |||
| echo "There is no MediaBox or BBox in the pdf document!" | |||
| echo "Aborting..." | |||
| echo "There is no MediaBox in the pdf document!" | |||
| echo "Aborting! You may want to try the adaptive mode." | |||
| exit 15 | |||
| elif [[ -z $mediaBox && $ADAPTIVEMODE = $TRUE ]]; then | |||
| return $FALSE | |||
| fi | |||
| # remove chars [ and ] | |||
| @@ -602,6 +231,34 @@ getPageSize() { | |||
| # we are done | |||
| PGWIDTH=$(printf '%.0f' "${mediaBox[2]}") # Get Round Width | |||
| PGHEIGHT=$(printf '%.0f' "${mediaBox[3]}") # Get Round Height | |||
| return $TRUE | |||
| } | |||
| getPageSize() { | |||
| vprint "Detecting page size with cat+grep method" | |||
| getPageSizeCatGrep | |||
| if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then | |||
| vprint " -> method failed!" | |||
| if [[ $OSNAME = "Darwin" ]]; then | |||
| vprint "Detecting page size with Mac Quartz mdls" | |||
| getPageSizeMdls | |||
| else | |||
| vprint "Detecting page size with Linux pdfinfo" | |||
| getPageSizePdfInfo | |||
| fi | |||
| fi | |||
| if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then | |||
| vprint " -> method failed!" | |||
| vprint " Detecting page size with ImageMagick's identify" | |||
| getPageSizeImagemagick | |||
| fi | |||
| if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then | |||
| echo "Error when detecting PDF paper size!" | |||
| echo "All methods of detection failed" | |||
| exit 17 | |||
| fi | |||
| } | |||
| @@ -657,9 +314,13 @@ fi | |||
| # Get dependency binaries | |||
| GSBIN=$(which gs 2>/dev/null) | |||
| BCBIN=$(which bc 2>/dev/null) | |||
| GSBIN="$(which gs 2>/dev/null)" | |||
| BCBIN="$(which bc 2>/dev/null)" | |||
| if [[ $OSNAME = "Darwin" ]]; then | |||
| MDLSBIN="$(which mdls 2>/dev/null)" | |||
| else | |||
| PDFINFOBIN="$(which pdfinfo 2>/dev/null)" | |||
| fi | |||
| # Verbose scale info | |||
| vprint " Scale factor: $SCALE" | |||
| @@ -681,15 +342,16 @@ else | |||
| fi | |||
| vprint " Output file: $OUTFILEPDF" | |||
| getPageSizeMdls | |||
| # Set PGWIDTH and PGHEIGHT | |||
| if [[ $USEIMGMGK -eq $TRUE ]]; then | |||
| getPageSizeImagemagick | |||
| elif [[ $USECATGREP -eq $TRUE ]]; then | |||
| getPageSize | |||
| else | |||
| getPageSizeGS | |||
| fi | |||
| #if [[ $USEIMGMGK -eq $TRUE ]]; then | |||
| # getPageSizeImagemagick | |||
| #elif [[ $USECATGREP -eq $TRUE ]]; then | |||
| # getPageSize | |||
| #else | |||
| # getPageSizeGS | |||
| #fi | |||
| vprint " Width: $PGWIDTH postscript-points" | |||
| vprint " Height: $PGHEIGHT postscript-points" | |||