|
- #!/usr/bin/env bash
-
- # pdfScale.sh
- #
- # Scale PDF to specified percentage of original size.
- #
- # Gustavo Arnosti Neves - 2016 / 07 / 10
- #
- # This script: https://github.com/tavinus/pdfScale
- # Based on: http://ma.juii.net/blog/scale-page-content-of-pdf-files
- # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a
-
-
- VERSION="1.3.3"
- SCALE="0.95" # scaling factor (0.95 = 95%, e.g.)
- VERBOSE=0 # verbosity Level
- BASENAME="$(basename $0)" # simplified name of this script
- GSBIN="" # Set with which after we check dependencies
- BCBIN="" # Set with which after we check dependencies
- IDBIN="" # Set with which after we check dependencies
-
- LC_MEASUREMENT="C" # To make sure our numbers have .decimals
- LC_ALL="C" # Some languages use , as decimal token
- LC_CTYPE="C"
- LC_NUMERIC="C"
-
- TRUE=0 # Silly stuff
- FALSE=1
-
- USEIMGMGK=$FALSE # ImageMagick Flag, will use identify if true
- USECATGREP=$FALSE # Use old cat + grep method
-
-
- # Prints version
- printVersion() {
- if [[ $1 -eq 2 ]]; then
- echo >&2 "$BASENAME v$VERSION"
- else
- echo "$BASENAME v$VERSION"
- fi
- }
-
-
- # Prints help info
- printHelp() {
- printVersion
- echo "
- Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]
- $BASENAME -h
- $BASENAME -V
-
- Parameters:
- -v Verbose mode, prints extra information
- Use twice for even more information
- -h Print this help to screen and exits
- -V Prints version to screen and exits
- -i Use imagemagick to get page size,
- instead of postscript method
- -c Use cat + grep to get page size,
- instead of postscript method
- -s <factor> Changes the scaling factor, defaults to 0.95
- MUST be a number bigger than zero.
- Eg. -s 0.8 for 80% of the original size
-
- Notes:
- - Options must be passed before the file names to be parsed
- - The output filename is optional. If no file name is passed
- the output file will have the same name/destination of the
- input file, with .SCALED.pdf at the end (instead of just .pdf)
- - Having the extension .pdf on the output file name is optional,
- it will be added if not present
- - Should handle file names with spaces without problems
- - The scaling is centered and using a scale bigger than 1 may
- result on cropping parts of the pdf.
-
- Examples:
- $BASENAME myPdfFile.pdf
- $BASENAME myPdfFile.pdf myScaledPdf
- $BASENAME -v -v myPdfFile.pdf
- $BASENAME -s 0.85 myPdfFile.pdf myScaledPdf.pdf
- $BASENAME -i -s 0.80 -v myPdfFile.pdf
- $BASENAME -v -v -s 0.7 myPdfFile.pdf
- $BASENAME -h
- "
- }
-
-
- # Prints usage info
- usage() {
- printVersion 2
- echo >&2 "Usage: $BASENAME [-v] [-s <factor>] <inFile.pdf> [outfile.pdf]"
- echo >&2 "Try: $BASENAME -h # for help"
- exit 1
- }
-
-
- # Prints Verbose information
- vprint() {
- [[ $VERBOSE -eq 0 ]] && return 0
- timestamp=""
- [[ $VERBOSE -gt 1 ]] && timestamp="$(date +%Y-%m-%d:%H:%M:%S) | "
- echo "$timestamp$1"
- }
-
-
- # Prints dependency information and aborts execution
- printDependency() {
- printVersion 2
- echo >&2 $'\n'"ERROR! You need to install the package '$1'"$'\n'
- echo >&2 "Linux apt-get.: sudo apt-get install $1"
- echo >&2 "Linux yum.....: sudo yum install $1"
- echo >&2 "MacOS homebrew: brew install $1"
- echo >&2 $'\n'"Aborting..."
- exit 3
- }
-
-
- # Parses and validates the scaling factor
- parseScale() {
- if ! [[ -n "$1" && "$1" =~ ^-?[0-9]*([.][0-9]+)?$ && (($1 > 0 )) ]] ; then
- echo >&2 "Invalid factor: $1"
- echo >&2 "The factor must be a floating point number greater than 0"
- echo >&2 "Example: for 80% use 0.8"
- exit 2
- fi
- SCALE=$1
- }
-
-
- # Gets page size using imagemagick's identify
- getPageSizeImagemagick() {
- # get data from image magick
- local identify="$("$IDBIN" -format '%[fx:w] %[fx:h]BREAKME' "$INFILEPDF" 2>/dev/null)"
-
- identify="${identify%%BREAKME*}" # get page size only for 1st page
- identify=($identify) # make it an array
- PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
- PGHEIGHT=$(printf '%.0f' "${identify[1]}") # assign
- }
-
-
-
- # Gets page size using toolbin_pdfinfo.ps
- getPageSizeGS() {
- local PDFINFOGS=''
- read -r -d '' PDFINFOGS <<'EOF'
- %!PS
- % Copyright (C) 2001-2012 Artifex Software, Inc.
- % All Rights Reserved.
- %
- % This software is provided AS-IS with no warranty, either express or
- % implied.
- %
- % This software is distributed under license and may not be copied,
- % modified or distributed except as expressly authorized under the terms
- % of the license contained in the file LICENSE in this distribution.
- %
- % Refer to licensing information at http://www.artifex.com or contact
- % Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, San Rafael,
- % CA 94903, U.S.A., +1(415)492-9861, for further information.
- %
- %
- % $Id: pdf_info.ps 6300 2005-12-28 19:56:24Z alexcher $
-
- % Dump some info from a PDF file
-
- % usage: gs -dNODISPLAY -q -sFile=____.pdf [-dDumpMediaSizes=false] [-dDumpFontsNeeded=false] [-dDumpXML]
- % [-dDumpFontsUsed [-dShowEmbeddedFonts] ] toolbin/pdf_info.ps
-
- 128 dict begin
-
- /QUIET true def % in case they forgot
-
- /showoptions {
- ( where "options" are:) =
- ( -dDumpMediaSizes=false (default true) MediaBox and CropBox for each page) =
- ( -dDumpFontsNeeded=false (default true)Fonts used, but not embedded) =
- ( -dDumpXML print the XML Metadata from the PDF, if present) =
- ( -dDumpFontsUsed List all fonts used) =
- ( -dShowEmbeddedFonts only meaningful with -dDumpFontsUsed) =
- (\n If no options are given, the default is -dDumpMediaSizes -dDumpFontsNeeded) =
- () =
- flush
- } bind def
-
- /DumpMediaSizes where { pop } { /DumpMediaSizes true def } ifelse
- /DumpFontsNeeded where { pop } { /DumpFontsNeeded true def } ifelse
-
- [ shellarguments
- { counttomark 1 eq {
- dup 0 get (-) 0 get ne {
- % File specified on the command line using: -- toolbin/pdf_info.ps infile.pdf
- /File exch def
- false % dont show usage
- } {
- true % show usage and quit
- } ifelse
- } { true } ifelse
- {
- (\n*** Usage: gs [options] -- toolbin/pdf_info.ps infile.pdf ***\n\n) print
- showoptions
- quit
- } if
- } if
-
- /File where not {
- (\n *** Missing input file name \(use -sFile=____.pdf\)\n) =
- ( usage: gs -dNODISPLAY -q -sFile=____.pdf [ options ] toolbin/pdf_info.ps\n) =
- showoptions
- quit
- } if
- cleartomark % discard the dict from --where--
-
- % ---- No more executable code on the top level after this line -----
- % ---- except 2 lines at the very end -----
-
- /printXML { % <string> printXML -
- % print non-blank lines without trailing spaces
- dup dup length 1 sub -1 0 {
- 1 index 1 index get 32 eq {
- 0 exch getinterval exch
- } {
- exch = exit % non-blank on this line
- }
- ifelse
- } for
- pop pop % clean up
- } bind def
-
- /dump-pdf-info { % (fname) -> -
- () = ( ) print print ( has ) print
- PDFPageCount dup =print 10 mod 1 eq { ( page.\n) } { ( pages\n) } ifelse = flush
-
- /DumpXML where {
- pop
- Trailer /Root oget /Metadata knownoget {
- //false resolvestream
- { dup 256 string readline exch printXML not { exit } if } loop
- pop % done with the stream
- (_____________________________________________________________) =
- flush
- } if
- } if
-
- % Print out the "Info" dictionary if present
- Trailer /Info knownoget {
- dup /Title knownoget { (Title: ) print = flush } if
- dup /Author knownoget { (Author: ) print = flush } if
- dup /Subject knownoget { (Subject: ) print = flush } if
- dup /Keywords knownoget { (Keywords: ) print = flush } if
- dup /Creator knownoget { (Creator: ) print = flush } if
- dup /Producer knownoget { (Producer: ) print = flush } if
- dup /CreationDate knownoget { (CreationDate: ) print = flush } if
- dup /ModDate knownoget { (ModDate: ) print = flush } if
- dup /Trapped knownoget { (Trapped: ) print = flush } if
- pop
- } if
- } bind def
-
- % <page index> <page dict> dump-media-sizes -
- /dump-media-sizes {
- DumpMediaSizes {
- () =
- % Print out the Page Size info for each page.
- (Page ) print =print
- dup /UserUnit pget {
- ( UserUnit: ) print =print
- } if
- dup /MediaBox pget {
- ( MediaBox: ) print oforce_array ==only
- } if
- dup /CropBox pget {
- ( CropBox: ) print oforce_array ==only
- } if
- dup /BleedBox pget {
- ( BleedBox: ) print oforce_array ==only
- } if
- dup /TrimBox pget {
- ( TrimBox: ) print oforce_array ==only
- } if
- dup /ArtBox pget {
- ( ArtBox: ) print oforce_array ==only
- } if
- dup /Rotate pget {
- ( Rotate = ) print =print
- } if
- dup /Annots pget {
- pop
- ( Page contains Annotations) print
- } if
- pageusestransparency {
- ( Page uses transparency features) print
- } if
- () = flush
- }
- {
- pop pop
- } ifelse
- } bind def
-
- % List of standard font names for use when we are showing the FontsNeeded
- /StdFontNames [
- /Times-Roman /Helvetica /Courier /Symbol
- /Times-Bold /Helvetica-Bold /Courier-Bold /ZapfDingbats
- /Times-Italic /Helvetica-Oblique /Courier-Oblique
- /Times-BoldItalic /Helvetica-BoldOblique /Courier-BoldOblique
- ] def
-
- /res-type-dict 10 dict begin
- /Font {
- {
- exch pop oforce
- dup //null ne {
- dup /DescendantFonts knownoget {
- exch pop 0 get oforce
- } if
- dup /FontDescriptor knownoget {
- dup /FontFile known 1 index /FontFile2 known or exch /FontFile3 known or
- /ShowEmbeddedFonts where { pop pop //false } if {
- pop % skip embedded fonts
- } {
- /BaseFont knownoget { % not embedded
- 2 index exch //null put
- } if
- } ifelse
- } {
- /BaseFont knownoget { % no FontDescriptor, not embedded
- 2 index exch //null put
- } if
- } ifelse
- } {
- pop
- } ifelse
- } forall % traverse the dictionary
- } bind def
-
- /XObject {
- {
- exch pop oforce
- dup //null ne {
- dup /Subtype knownoget {
- /Form eq {
- /Resources knownoget {
- get-fonts-from-res
- } if
- } {
- pop
- } ifelse
- } {
- pop
- } ifelse
- } {
- pop
- } ifelse
- } forall
- } bind def
-
- /Pattern {
- {
- exch pop oforce
- dup //null ne {
- /Resources knownoget {
- get-fonts-from-res
- } if
- } {
- pop
- } ifelse
- } forall
- } bind def
- currentdict end readonly def
-
- % <dict for fonts> <<res-dict>> get-fonts-from-res -
- /get-fonts-from-res {
- oforce
- dup //null ne {
- {
- oforce
- dup //null ne {
- //res-type-dict 3 -1 roll
- .knownget {
- exec
- } {
- pop
- } ifelse
- } {
- pop pop
- } ifelse
- } forall
- } {
- pop
- } ifelse
- } bind def
-
- currentdict /res-type-dict undef
-
- /getPDFfonts { % <dict for fonts> <page dict> getPDFfonts -
- dup /Resources pget { get-fonts-from-res } if
- /Annots knownoget {
- { oforce
- dup //null ne {
- /AP knownoget {
- { exch pop oforce
- dup //null ne {
- dup /Resources knownoget {
- get-fonts-from-res
- } if
- { exch pop oforce
- dup type /dicttype eq {
- /Resources knownoget {
- get-fonts-from-res
- } if
- } {
- pop
- } ifelse
- } forall
- } {
- pop
- } ifelse
- } forall
- } if
- } {
- pop
- } ifelse
- } forall
- } if
- pop
- } bind def
-
- /dump-fonts-used { % <dict for fonts> dump-fonts-used -
- % If DumpFontsUsed is not true, then remove the "standard" fonts from the list
- systemdict /DumpFontsUsed known not {
- StdFontNames {
- 1 index 1 index known { 1 index 1 index undef } if
- pop
- } forall
- } if
-
- % Now dump the FontsUsed dict into an array so we can sort it.
- [ 1 index { pop } forall ]
- { 100 string cvs exch 100 string cvs exch lt } .sort
-
- systemdict /DumpFontsUsed known
- {
- (\nFont or CIDFont resources used:) =
- { = } forall
- } {
- DumpFontsNeeded {
- dup length 0 gt {
- (\nFonts Needed that are not embedded \(system fonts required\):) =
- { ( ) print = } forall
- } {
- pop
- (\nNo system fonts are needed.) =
- } ifelse
- } {
- pop
- } ifelse
- } ifelse
- pop
- } bind def
-
- % Copy selected subfiles to temporary files and return the file names
- % as a PostScript names to protect them from restore.
- % Currently, all PDF files in the Portfolio are extracted and returned.
- %
- % - pdf_collection_files [ /temp_file_name ... /temp_file_name
- /pdf_collection_files {
- mark
- Trailer /Root oget
- dup /Collection oknown {
- /Names knownoget {
- /EmbeddedFiles knownoget {
- pdf_collection_names
- } if
- } if
- } {
- pop
- } ifelse
- } bind def
-
- % Output all the info about the file
- /dump { % (title) -> -
- /PDFPageCount pdfpagecount def
- dump-pdf-info
- % dict will be populated with fonts through a call to "getPDFfonts"
- % per page, then the contents dumped out in "dump-fonts-used"
- 1000 dict
-
- 1 1 PDFPageCount
- {
- dup pdfgetpage dup 3 -1 roll
- dump-media-sizes
- 1 index exch getPDFfonts
- } for
-
- dump-fonts-used
-
- } bind def
-
- % Choose between collection vs plain file.
- % Enumerate collections and apply the dump procedure.
- /enum-pdfs { % - -> -
- File (r) file runpdfbegin
- pdf_collection_files
- dup mark eq {
- pop
- File dump
- runpdfend
- } {
- runpdfend
- ] 0 1 2 index length 1 sub {
- 2 copy get exch % [file ... ] file i
- 1 add (0123456789) cvs % [file ... ] file (i+1)
- File exch ( part ) exch concatstrings concatstrings
- exch % [file ... ] (fname part i+1) file
- dup type /filetype eq {
- runpdfbegin
- dump
- runpdfend
- closefile
- } {
- .namestring
- dup (r) file
- runpdfbegin
- exch dump
- runpdfend
- deletefile
- } ifelse
- } for
- pop
- } ifelse
- } bind def
-
- enum-pdfs
- end
- quit
-
- EOF
- # get data from gs script
- local identify="$("$GSBIN" -dNODISPLAY -q -sFile=../input-nup.pdf -dDumpMediaSizes -dDumpFontsNeeded=false -c "$PDFINFOGS" 2>/dev/null | grep MediaBox | head -n1)"
-
- identify="${identify##*MediaBox:}" # get page size only for 1st page
-
- # remove chars [ and ]
- identify="${identify//[}"
- identify="${identify//]}"
-
- identify=($identify) # make it an array
-
- # sanity
- if [[ ${#identify[@]} -lt 4 ]]; then
- echo "Error when reading the page size!"
- echo "The page size information is invalid!"
- exit 16
- fi
-
- PGWIDTH=$(printf '%.0f' "${identify[2]}") # assign
- PGHEIGHT=$(printf '%.0f' "${identify[3]}") # assign
- }
-
-
- # Gets page size using cat and grep
- getPageSize() {
- # get MediaBox info from PDF file using cat and grep, these are all possible
- # /MediaBox [0 0 595 841]
- # /MediaBox [ 0 0 595.28 841.89]
- # /MediaBox[ 0 0 595.28 841.89 ]
-
- # Get MediaBox data if possible
- local mediaBox="$(cat "$INFILEPDF" | grep -a '/MediaBox' | head -n1)"
- mediaBox="${mediaBox##*/MediaBox}"
-
- # If no MediaBox, try BBox
- if [[ -z $mediaBox ]]; then
- mediaBox="$(cat "$INFILEPDF" | grep -a '/BBox' | head -n1)"
- mediaBox="${mediaBox##*/BBox}"
- fi
-
- # No page size data available
- if [[ -z $mediaBox ]]; then
- echo "Error when reading input file!"
- echo "Could not determine the page size!"
- echo "There is no MediaBox or BBox in the pdf document!"
- echo "Aborting..."
- exit 15
- fi
-
- # remove chars [ and ]
- mediaBox="${mediaBox//[}"
- mediaBox="${mediaBox//]}"
-
- mediaBox=($mediaBox) # make it an array
- mbCount=${#mediaBox[@]} # array size
-
- # sanity
- if [[ $mbCount -lt 4 ]]; then
- echo "Error when reading the page size!"
- echo "The page size information is invalid!"
- exit 16
- fi
-
- # we are done
- PGWIDTH=$(printf '%.0f' "${mediaBox[2]}") # Get Round Width
- PGHEIGHT=$(printf '%.0f' "${mediaBox[3]}") # Get Round Height
- }
-
-
- # Parse options
- while getopts ":vichVs:" o; do
- case "${o}" in
- v)
- ((VERBOSE++))
- ;;
- h)
- printHelp
- exit 0
- ;;
- V)
- printVersion
- exit 0
- ;;
- s)
- parseScale ${OPTARG}
- ;;
- i)
- USEIMGMGK=$TRUE
- USECATGREP=$FALSE
- ;;
- c)
- USECATGREP=$TRUE
- USEIMGMGK=$FALSE
- ;;
- *)
- usage
- ;;
- esac
- done
- shift $((OPTIND-1))
-
-
-
- ######### START EXECUTION
-
- #Intro message
- vprint "$(basename $0) v$VERSION - Verbose execution"
-
-
- # Dependencies
- vprint "Checking for ghostscript and bcmath"
- command -v gs >/dev/null 2>&1 || printDependency 'ghostscript'
- command -v bc >/dev/null 2>&1 || printDependency 'bc'
- if [[ $USEIMGMGK -eq $TRUE ]]; then
- vprint "Checking for imagemagick's identify"
- command -v identify >/dev/null 2>&1 || printDependency 'imagemagick'
- IDBIN=$(which identify 2>/dev/null)
- fi
-
-
- # Get dependency binaries
- GSBIN=$(which gs 2>/dev/null)
- BCBIN=$(which bc 2>/dev/null)
-
-
- # Verbose scale info
- vprint " Scale factor: $SCALE"
-
-
- # Validate args
- [[ $# -lt 1 ]] && { usage; exit 1; }
- INFILEPDF="$1"
- [[ "$INFILEPDF" =~ ^..*\.pdf$ ]] || { usage; exit 2; }
- [[ -f "$INFILEPDF" ]] || { echo "Error! File not found: $INFILEPDF"; usage; exit 2; }
- vprint " Input file: $INFILEPDF"
-
-
- # Parse output filename
- if [[ -z $2 ]]; then
- OUTFILEPDF="${INFILEPDF%.pdf}.SCALED.pdf"
- else
- OUTFILEPDF="${2%.pdf}.pdf"
- fi
- vprint " Output file: $OUTFILEPDF"
-
-
- # Set PGWIDTH and PGHEIGHT
- if [[ $USEIMGMGK -eq $TRUE ]]; then
- getPageSizeImagemagick
- elif [[ $USECATGREP -eq $TRUE ]]; then
- getPageSize
- else
- getPageSizeGS
- fi
- vprint " Width: $PGWIDTH postscript-points"
- vprint " Height: $PGHEIGHT postscript-points"
-
-
- # Compute translation factors (to center page.
- XTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGWIDTH" | "$BCBIN")
- YTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGHEIGHT" | "$BCBIN")
- vprint " Translation X: $XTRANS"
- vprint " Translation Y: $YTRANS"
-
-
- # Do it.
- "$GSBIN" \
- -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dSAFER \
- -dCompatibilityLevel="1.5" -dPDFSETTINGS="/printer" \
- -dColorConversionStrategy=/LeaveColorUnchanged \
- -dSubsetFonts=true -dEmbedAllFonts=true \
- -dDEVICEWIDTH=$PGWIDTH -dDEVICEHEIGHT=$PGHEIGHT \
- -sOutputFile="$OUTFILEPDF" \
- -c "<</BeginPage{$SCALE $SCALE scale $XTRANS $YTRANS translate}>> setpagedevice" \
- -f "$INFILEPDF"
|