From 16fae939a1b9416dc58c5eb5ee66561e263e74b6 Mon Sep 17 00:00:00 2001 From: Gustavo Neves Date: Tue, 21 Feb 2017 10:03:57 -0300 Subject: [PATCH] v1.3.1 Now using a Postscript tool to get page size using ghostscript; Added option -c to let people use the cat+grep method; PS script is embedded; updated readme --- README.md | 6 +- pdfScale.sh | 442 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 440 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 5298e86..3e1d261 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ You will need to install imagemagick and have `identify` available on your `$PAT ## Help info ``` $ pdfscale -h -pdfscale v1.2.10 +pdfscale v1.3.1 Usage: pdfscale [-v] [-s ] [-i] [outfile.pdf] pdfscale -h @@ -39,7 +39,9 @@ Parameters: -h Print this help to screen and exits -V Prints version to screen and exits -i Use imagemagick to get page size, - instead of cat + grep method + instead of postscript method + -c Use cat + grep to get page size, + instead of postscript method -s Changes the scaling factor, defaults to 0.95 MUST be a number bigger than zero. Eg. -s 0.8 for 80% of the original size diff --git a/pdfScale.sh b/pdfScale.sh index ec8666b..a829cdf 100755 --- a/pdfScale.sh +++ b/pdfScale.sh @@ -11,7 +11,7 @@ # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a -VERSION="1.2.10" +VERSION="1.3.1" SCALE="0.95" # scaling factor (0.95 = 95%, e.g.) VERBOSE=0 # verbosity Level BASENAME="$(basename $0)" # simplified name of this script @@ -28,6 +28,7 @@ TRUE=0 # Silly stuff FALSE=1 USEIMGMGK=$FALSE # ImageMagick Flag, will use identify if true +USECATGREP=$FALSE # Use old cat + grep method # Prints version @@ -54,7 +55,9 @@ Parameters: -h Print this help to screen and exits -V Prints version to screen and exits -i Use imagemagick to get page size, - instead of cat + grep method + instead of postscript method + -c Use cat + grep to get page size, + instead of postscript method -s Changes the scaling factor, defaults to 0.95 MUST be a number bigger than zero. Eg. -s 0.8 for 80% of the original size @@ -131,8 +134,428 @@ getPageSizeImagemagick() { identify="${identify%%BREAKME*}" # get page size only for 1st page identify=($identify) # make it an array - PGWIDTH=${identify[0]} # assign - PGHEIGHT=${identify[1]} + PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign + PGHEIGHT=$(printf '%.0f' "${identify[1]}") # assign +} + + + +# Gets page size using toolbin_pdfinfo.ps +getPageSizeGS() { + local PDFINFOGS='' + read -r -d '' PDFINFOGS <<'EOF' +%!PS +% Copyright (C) 2001-2012 Artifex Software, Inc. +% All Rights Reserved. +% +% This software is provided AS-IS with no warranty, either express or +% implied. +% +% This software is distributed under license and may not be copied, +% modified or distributed except as expressly authorized under the terms +% of the license contained in the file LICENSE in this distribution. +% +% Refer to licensing information at http://www.artifex.com or contact +% Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, San Rafael, +% CA 94903, U.S.A., +1(415)492-9861, for further information. +% +% +% $Id: pdf_info.ps 6300 2005-12-28 19:56:24Z alexcher $ + +% Dump some info from a PDF file + +% usage: gs -dNODISPLAY -q -sFile=____.pdf [-dDumpMediaSizes=false] [-dDumpFontsNeeded=false] [-dDumpXML] +% [-dDumpFontsUsed [-dShowEmbeddedFonts] ] toolbin/pdf_info.ps + +128 dict begin + +/QUIET true def % in case they forgot + +/showoptions { + ( where "options" are:) = + ( -dDumpMediaSizes=false (default true) MediaBox and CropBox for each page) = + ( -dDumpFontsNeeded=false (default true)Fonts used, but not embedded) = + ( -dDumpXML print the XML Metadata from the PDF, if present) = + ( -dDumpFontsUsed List all fonts used) = + ( -dShowEmbeddedFonts only meaningful with -dDumpFontsUsed) = + (\n If no options are given, the default is -dDumpMediaSizes -dDumpFontsNeeded) = + () = + flush +} bind def + +/DumpMediaSizes where { pop } { /DumpMediaSizes true def } ifelse +/DumpFontsNeeded where { pop } { /DumpFontsNeeded true def } ifelse + +[ shellarguments + { counttomark 1 eq { + dup 0 get (-) 0 get ne { + % File specified on the command line using: -- toolbin/pdf_info.ps infile.pdf + /File exch def + false % dont show usage + } { + true % show usage and quit + } ifelse + } { true } ifelse + { + (\n*** Usage: gs [options] -- toolbin/pdf_info.ps infile.pdf ***\n\n) print + showoptions + quit + } if + } if + +/File where not { + (\n *** Missing input file name \(use -sFile=____.pdf\)\n) = + ( usage: gs -dNODISPLAY -q -sFile=____.pdf [ options ] toolbin/pdf_info.ps\n) = + showoptions + quit +} if +cleartomark % discard the dict from --where-- + +% ---- No more executable code on the top level after this line ----- +% ---- except 2 lines at the very end ----- + +/printXML { % printXML - + % print non-blank lines without trailing spaces + dup dup length 1 sub -1 0 { + 1 index 1 index get 32 eq { + 0 exch getinterval exch + } { + exch = exit % non-blank on this line + } + ifelse + } for + pop pop % clean up +} bind def + +/dump-pdf-info { % (fname) -> - + () = ( ) print print ( has ) print + PDFPageCount dup =print 10 mod 1 eq { ( page.\n) } { ( pages\n) } ifelse = flush + + /DumpXML where { + pop + Trailer /Root oget /Metadata knownoget { + //false resolvestream + { dup 256 string readline exch printXML not { exit } if } loop + pop % done with the stream + (_____________________________________________________________) = + flush + } if + } if + + % Print out the "Info" dictionary if present + Trailer /Info knownoget { + dup /Title knownoget { (Title: ) print = flush } if + dup /Author knownoget { (Author: ) print = flush } if + dup /Subject knownoget { (Subject: ) print = flush } if + dup /Keywords knownoget { (Keywords: ) print = flush } if + dup /Creator knownoget { (Creator: ) print = flush } if + dup /Producer knownoget { (Producer: ) print = flush } if + dup /CreationDate knownoget { (CreationDate: ) print = flush } if + dup /ModDate knownoget { (ModDate: ) print = flush } if + dup /Trapped knownoget { (Trapped: ) print = flush } if + pop + } if +} bind def + +% dump-media-sizes - +/dump-media-sizes { + DumpMediaSizes { + () = + % Print out the Page Size info for each page. + (Page ) print =print + dup /UserUnit pget { + ( UserUnit: ) print =print + } if + dup /MediaBox pget { + ( MediaBox: ) print oforce_array ==only + } if + dup /CropBox pget { + ( CropBox: ) print oforce_array ==only + } if + dup /BleedBox pget { + ( BleedBox: ) print oforce_array ==only + } if + dup /TrimBox pget { + ( TrimBox: ) print oforce_array ==only + } if + dup /ArtBox pget { + ( ArtBox: ) print oforce_array ==only + } if + dup /Rotate pget { + ( Rotate = ) print =print + } if + dup /Annots pget { + pop + ( Page contains Annotations) print + } if + pageusestransparency { + ( Page uses transparency features) print + } if + () = flush + } + { + pop pop + } ifelse +} bind def + +% List of standard font names for use when we are showing the FontsNeeded +/StdFontNames [ + /Times-Roman /Helvetica /Courier /Symbol + /Times-Bold /Helvetica-Bold /Courier-Bold /ZapfDingbats + /Times-Italic /Helvetica-Oblique /Courier-Oblique + /Times-BoldItalic /Helvetica-BoldOblique /Courier-BoldOblique +] def + +/res-type-dict 10 dict begin + /Font { + { + exch pop oforce + dup //null ne { + dup /DescendantFonts knownoget { + exch pop 0 get oforce + } if + dup /FontDescriptor knownoget { + dup /FontFile known 1 index /FontFile2 known or exch /FontFile3 known or + /ShowEmbeddedFonts where { pop pop //false } if { + pop % skip embedded fonts + } { + /BaseFont knownoget { % not embedded + 2 index exch //null put + } if + } ifelse + } { + /BaseFont knownoget { % no FontDescriptor, not embedded + 2 index exch //null put + } if + } ifelse + } { + pop + } ifelse + } forall % traverse the dictionary + } bind def + + /XObject { + { + exch pop oforce + dup //null ne { + dup /Subtype knownoget { + /Form eq { + /Resources knownoget { + get-fonts-from-res + } if + } { + pop + } ifelse + } { + pop + } ifelse + } { + pop + } ifelse + } forall + } bind def + + /Pattern { + { + exch pop oforce + dup //null ne { + /Resources knownoget { + get-fonts-from-res + } if + } { + pop + } ifelse + } forall + } bind def +currentdict end readonly def + +% <> get-fonts-from-res - +/get-fonts-from-res { + oforce + dup //null ne { + { + oforce + dup //null ne { + //res-type-dict 3 -1 roll + .knownget { + exec + } { + pop + } ifelse + } { + pop pop + } ifelse + } forall + } { + pop + } ifelse +} bind def + +currentdict /res-type-dict undef + +/getPDFfonts { % getPDFfonts - + dup /Resources pget { get-fonts-from-res } if + /Annots knownoget { + { oforce + dup //null ne { + /AP knownoget { + { exch pop oforce + dup //null ne { + dup /Resources knownoget { + get-fonts-from-res + } if + { exch pop oforce + dup type /dicttype eq { + /Resources knownoget { + get-fonts-from-res + } if + } { + pop + } ifelse + } forall + } { + pop + } ifelse + } forall + } if + } { + pop + } ifelse + } forall + } if + pop +} bind def + +/dump-fonts-used { % dump-fonts-used - + % If DumpFontsUsed is not true, then remove the "standard" fonts from the list + systemdict /DumpFontsUsed known not { + StdFontNames { + 1 index 1 index known { 1 index 1 index undef } if + pop + } forall + } if + + % Now dump the FontsUsed dict into an array so we can sort it. + [ 1 index { pop } forall ] + { 100 string cvs exch 100 string cvs exch lt } .sort + + systemdict /DumpFontsUsed known + { + (\nFont or CIDFont resources used:) = + { = } forall + } { + DumpFontsNeeded { + dup length 0 gt { + (\nFonts Needed that are not embedded \(system fonts required\):) = + { ( ) print = } forall + } { + pop + (\nNo system fonts are needed.) = + } ifelse + } { + pop + } ifelse + } ifelse + pop +} bind def + +% Copy selected subfiles to temporary files and return the file names +% as a PostScript names to protect them from restore. +% Currently, all PDF files in the Portfolio are extracted and returned. +% +% - pdf_collection_files [ /temp_file_name ... /temp_file_name +/pdf_collection_files { + mark + Trailer /Root oget + dup /Collection oknown { + /Names knownoget { + /EmbeddedFiles knownoget { + pdf_collection_names + } if + } if + } { + pop + } ifelse +} bind def + +% Output all the info about the file +/dump { % (title) -> - + /PDFPageCount pdfpagecount def + dump-pdf-info + % dict will be populated with fonts through a call to "getPDFfonts" + % per page, then the contents dumped out in "dump-fonts-used" + 1000 dict + + 1 1 PDFPageCount + { + dup pdfgetpage dup 3 -1 roll + dump-media-sizes + 1 index exch getPDFfonts + } for + + dump-fonts-used + +} bind def + +% Choose between collection vs plain file. +% Enumerate collections and apply the dump procedure. +/enum-pdfs { % - -> - + File (r) file runpdfbegin + pdf_collection_files + dup mark eq { + pop + File dump + runpdfend + } { + runpdfend + ] 0 1 2 index length 1 sub { + 2 copy get exch % [file ... ] file i + 1 add (0123456789) cvs % [file ... ] file (i+1) + File exch ( part ) exch concatstrings concatstrings + exch % [file ... ] (fname part i+1) file + dup type /filetype eq { + runpdfbegin + dump + runpdfend + closefile + } { + .namestring + dup (r) file + runpdfbegin + exch dump + runpdfend + deletefile + } ifelse + } for + pop + } ifelse +} bind def + +enum-pdfs +end +quit + +EOF + # get data from gs script + local identify="$("$GSBIN" -dNODISPLAY -q -sFile=../input-nup.pdf -dDumpMediaSizes -dDumpFontsNeeded=false -c "$PDFINFOGS" 2>/dev/null | grep MediaBox | head -n1)" + + identify="${identify##*MediaBox:}" # get page size only for 1st page + + # remove chars [ and ] + identify="${identify//[}" + identify="${identify//]}" + + identify=($identify) # make it an array + + # sanity + if [[ ${#identify[@]} -lt 4 ]]; then + echo "Error when reading the page size!" + echo "The page size information is invalid!" + exit 16 + fi + + PGWIDTH=$(printf '%.0f' "${identify[2]}") # assign + PGHEIGHT=$(printf '%.0f' "${identify[3]}") # assign } @@ -183,7 +606,7 @@ getPageSize() { # Parse options -while getopts ":vihVs:" o; do +while getopts ":vichVs:" o; do case "${o}" in v) ((VERBOSE++)) @@ -201,6 +624,11 @@ while getopts ":vihVs:" o; do ;; i) USEIMGMGK=$TRUE + USECATGREP=$FALSE + ;; + c) + USECATGREP=$TRUE + USEIMGMGK=$FALSE ;; *) usage @@ -256,8 +684,10 @@ vprint " Output file: $OUTFILEPDF" # Set PGWIDTH and PGHEIGHT if [[ $USEIMGMGK -eq $TRUE ]]; then getPageSizeImagemagick +elif [[ $USECATGREP -eq $TRUE ]]; then + getPageSize else - getPageSize + getPageSizeGS fi vprint " Width: $PGWIDTH postscript-points" vprint " Height: $PGHEIGHT postscript-points"