Bash Script to scale and/or resize PDFs from the command line.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

506 lines
16 KiB

  1. #!/usr/bin/env bash
  2. # pdfScale.sh
  3. #
  4. # Scale PDF to specified percentage of original size.
  5. #
  6. # Gustavo Arnosti Neves - 2016 / 07 / 10
  7. #
  8. # This script: https://github.com/tavinus/pdfScale
  9. # Based on: http://ma.juii.net/blog/scale-page-content-of-pdf-files
  10. # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a
  11. ###################################################
  12. # PAGESIZE LOGIC
  13. # 1- Try to get Mediabox with CAT/GREP
  14. # Remove /BBox search as it is unreliable
  15. # 2- MacOS => try to use mdls
  16. # Linux => try to use pdfinfo
  17. # 3- Try to use identify (imagemagick)
  18. # 4- Fail
  19. # Remove postscript method,
  20. # may have licensing problems
  21. ###################################################
  22. VERSION="1.4.5"
  23. SCALE="0.95" # scaling factor (0.95 = 95%, e.g.)
  24. VERBOSE=0 # verbosity Level
  25. BASENAME="$(basename $0)" # simplified name of this script
  26. # Set with which after we check dependencies
  27. GSBIN="" # GhostScript Binaries
  28. BCBIN="" # BC Math binary
  29. IDBIN="" # Identify Binary
  30. PDFINFOBIN="" # PDF Info Binary
  31. MDLSBIN="" # MacOS mdls binary
  32. OSNAME="$(uname 2>/dev/null)" # Check where we are running
  33. LC_MEASUREMENT="C" # To make sure our numbers have .decimals
  34. LC_ALL="C" # Some languages use , as decimal token
  35. LC_CTYPE="C"
  36. LC_NUMERIC="C"
  37. TRUE=0 # Silly stuff
  38. FALSE=1
  39. ADAPTIVEMODE=$TRUE # Automatically try to guess best mode
  40. MODE=""
  41. USEIMGMGK=$FALSE # ImageMagick Flag, will use identify if true
  42. USECATGREP=$FALSE # Use old cat + grep method
  43. # Prints version
  44. printVersion() {
  45. if [[ $1 -eq 2 ]]; then
  46. echo >&2 "$BASENAME v$VERSION"
  47. else
  48. echo "$BASENAME v$VERSION"
  49. fi
  50. }
  51. # Prints help info
  52. printHelp() {
  53. printVersion
  54. echo "
  55. Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]
  56. $BASENAME -h
  57. $BASENAME -V
  58. Parameters:
  59. -v Verbose mode, prints extra information
  60. Use twice for even more information
  61. -h Print this help to screen and exits
  62. -V Prints version to screen and exits
  63. -m <mode> Force a mode of page size detection.
  64. Will disable the Adaptive Mode.
  65. -c Use cat + grep to get page size,
  66. instead of postscript method
  67. -s <factor> Changes the scaling factor, defaults to 0.95
  68. MUST be a number bigger than zero.
  69. Eg. -s 0.8 for 80% of the original size
  70. Modes:
  71. a, adaptive Default mode, tries all the methods below
  72. c, cat+grep Forces the use of the cat + grep method
  73. m, mdls Forces the use of MacOS Quartz mdls
  74. p, pdfinfo Forces the use of Linux PdfInfo
  75. i, identify Forces the use of ImageMagick's Identify
  76. Notes:
  77. - Page size detection will try different modes until it gets
  78. a page size, or you can force a mode with -m 'mode'
  79. - Options must be passed before the file names to be parsed
  80. - The output filename is optional. If no file name is passed
  81. the output file will have the same name/destination of the
  82. input file, with .SCALED.pdf at the end (instead of just .pdf)
  83. - Having the extension .pdf on the output file name is optional,
  84. it will be added if not present
  85. - Should handle file names with spaces without problems
  86. - The scaling is centered and using a scale bigger than 1 may
  87. result on cropping parts of the pdf.
  88. Examples:
  89. $BASENAME myPdfFile.pdf
  90. $BASENAME myPdfFile.pdf myScaledPdf
  91. $BASENAME -v -v myPdfFile.pdf
  92. $BASENAME -s 0.85 myPdfFile.pdf myScaledPdf.pdf
  93. $BASENAME -m pdfinfo -s 0.80 -v myPdfFile.pdf
  94. $BASENAME -v -v -s 0.7 myPdfFile.pdf
  95. $BASENAME -h
  96. "
  97. }
  98. # Prints usage info
  99. usage() {
  100. printVersion 2
  101. echo >&2 "Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]"
  102. echo >&2 "Try: $BASENAME -h # for help"
  103. exit 1
  104. }
  105. # Prints Verbose information
  106. vprint() {
  107. [[ $VERBOSE -eq 0 ]] && return 0
  108. timestamp=""
  109. [[ $VERBOSE -gt 1 ]] && timestamp="$(date +%Y-%m-%d:%H:%M:%S) | "
  110. echo "$timestamp$1"
  111. }
  112. # Prints dependency information and aborts execution
  113. printDependency() {
  114. printVersion 2
  115. echo >&2 $'\n'"ERROR! You need to install the package '$1'"$'\n'
  116. echo >&2 "Linux apt-get.: sudo apt-get install $1"
  117. echo >&2 "Linux yum.....: sudo yum install $1"
  118. echo >&2 "MacOS homebrew: brew install $1"
  119. echo >&2 $'\n'"Aborting..."
  120. exit 3
  121. }
  122. # Parses and validates the scaling factor
  123. parseScale() {
  124. if ! [[ -n "$1" && "$1" =~ ^-?[0-9]*([.][0-9]+)?$ && (($1 > 0 )) ]] ; then
  125. echo >&2 "Invalid factor: $1"
  126. echo >&2 "The factor must be a floating point number greater than 0"
  127. echo >&2 "Example: for 80% use 0.8"
  128. exit 2
  129. fi
  130. SCALE=$1
  131. }
  132. # Parse a forced mode of operation
  133. parseMode() {
  134. if [[ -z $1 ]]; then
  135. echo "Mode is empty, please specify the desired mode"
  136. echo "Falling back to adaptive mode!"
  137. ADAPTIVEMODE=$TRUE
  138. MODE=""
  139. return $FALSE
  140. fi
  141. if [[ $1 = 'c' || $1 = 'catgrep' || $1 = 'cat+grep' || $1 = 'CatGrep' || $1 = 'C' || $1 = 'CATGREP' ]]; then
  142. ADAPTIVEMODE=$FALSE
  143. MODE="CATGREP"
  144. return $TRUE
  145. elif [[ $1 = 'i' || $1 = 'imagemagick' || $1 = 'identify' || $1 = 'ImageMagick' || $1 = 'Identify' || $1 = 'I' || $1 = 'IDENTIFY' ]]; then
  146. ADAPTIVEMODE=$FALSE
  147. MODE="IDENTIFY"
  148. return $TRUE
  149. elif [[ $1 = 'm' || $1 = 'mdls' || $1 = 'MDLS' || $1 = 'quartz' || $1 = 'mac' || $1 = 'M' ]]; then
  150. ADAPTIVEMODE=$FALSE
  151. MODE="MDLS"
  152. return $TRUE
  153. elif [[ $1 = 'p' || $1 = 'pdfinfo' || $1 = 'PDFINFO' || $1 = 'PdfInfo' || $1 = 'P' ]]; then
  154. ADAPTIVEMODE=$FALSE
  155. MODE="PDFINFO"
  156. return $TRUE
  157. elif [[ $1 = 'a' || $1 = 'adaptive' || $1 = 'automatic' || $1 = 'A' || $1 = 'ADAPTIVE' || $1 = 'AUTOMATIC' ]]; then
  158. ADAPTIVEMODE=$TRUE
  159. MODE=""
  160. return $TRUE
  161. else
  162. echo "Invalid mode: $1"
  163. echo "Falling back to adaptive mode!"
  164. ADAPTIVEMODE=$TRUE
  165. MODE=""
  166. return $FALSE
  167. fi
  168. return $FALSE
  169. }
  170. # Gets page size using imagemagick's identify
  171. getPageSizeImagemagick() {
  172. # Sanity
  173. if [[ ! -f $IDBIN && $ADAPTIVEMODE = $FALSE ]]; then
  174. echo "Error! ImageMagick's Identify was not found!"
  175. echo "Make sure you installed ImageMagick and have identify on your \$PATH"
  176. echo "Aborting! You may want to try the adaptive mode."
  177. exit 15
  178. elif [[ ! -f $IDBIN && $ADAPTIVEMODE = $TRUE ]]; then
  179. return $FALSE
  180. fi
  181. # get data from image magick
  182. local identify="$("$IDBIN" -format '%[fx:w] %[fx:h]BREAKME' "$INFILEPDF" 2>/dev/null)"
  183. # No page size data available
  184. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  185. echo "Error when reading input file!"
  186. echo "Could not determine the page size!"
  187. echo "ImageMagicks's Identify returned an empty string!"
  188. echo "Aborting! You may want to try the adaptive mode."
  189. exit 15
  190. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  191. return $FALSE
  192. fi
  193. identify="${identify%%BREAKME*}" # get page size only for 1st page
  194. identify=($identify) # make it an array
  195. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  196. PGHEIGHT=$(printf '%.0f' "${identify[1]}") # assign
  197. }
  198. # Gets page size using Mac Quarts mdls
  199. getPageSizeMdls() {
  200. # Sanity
  201. if [[ ! -f $MDLSBIN && $ADAPTIVEMODE = $FALSE ]]; then
  202. echo "Error! Mac Quartz mdls was not found!"
  203. echo "Are you even trying this on a Mac?"
  204. echo "Aborting! You may want to try the adaptive mode."
  205. exit 15
  206. elif [[ ! -f $MDLSBIN && $ADAPTIVEMODE = $TRUE ]]; then
  207. return $FALSE
  208. fi
  209. # get data from mdls
  210. local identify="$("$MDLSBIN" -mdls -name kMDItemPageHeight -name kMDItemPageWidth "$INFILEPDF" 2>/dev/null)"
  211. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  212. echo "Error when reading input file!"
  213. echo "Could not determine the page size!"
  214. echo "Mac Quartz mdls returned an empty string!"
  215. echo "Aborting! You may want to try the adaptive mode."
  216. exit 15
  217. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  218. return $FALSE
  219. fi
  220. identify=${identify//$'\t'/ } # change tab to space
  221. identify=($identify) # make it an array
  222. PGWIDTH=$(printf '%.0f' "${identify[2]}") # assign
  223. PGHEIGHT=$(printf '%.0f' "${identify[5]}") # assign
  224. }
  225. # Gets page size using Linux PdfInfo
  226. getPageSizePdfInfo() {
  227. # Sanity
  228. if [[ ! -f $PDFINFOBIN && $ADAPTIVEMODE = $FALSE ]]; then
  229. echo "Error! Linux pdfinfo was not found!"
  230. echo "Do you have pdfinfo installed and available on your \$PATH?"
  231. echo "Aborting! You may want to try the adaptive mode."
  232. exit 15
  233. elif [[ ! -f $PDFINFOBIN && $ADAPTIVEMODE = $TRUE ]]; then
  234. return $FALSE
  235. fi
  236. # get data from image magick
  237. local identify="$("$PDFINFOBIN" "$INFILEPDF" 2>/dev/null | grep -i 'Page size:' )"
  238. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  239. echo "Error when reading input file!"
  240. echo "Could not determine the page size!"
  241. echo "Linux PdfInfo returned an empty string!"
  242. echo "Aborting! You may want to try the adaptive mode."
  243. exit 15
  244. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  245. return $FALSE
  246. fi
  247. identify="${identify##*Page size:}" # remove stuff
  248. identify=($identify) # make it an array
  249. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  250. PGHEIGHT=$(printf '%.0f' "${identify[2]}") # assign
  251. }
  252. # Gets page size using cat and grep
  253. getPageSizeCatGrep() {
  254. # get MediaBox info from PDF file using cat and grep, these are all possible
  255. # /MediaBox [0 0 595 841]
  256. # /MediaBox [ 0 0 595.28 841.89]
  257. # /MediaBox[ 0 0 595.28 841.89 ]
  258. # Get MediaBox data if possible
  259. local mediaBox="$(cat "$INFILEPDF" | grep -a '/MediaBox' | head -n1)"
  260. mediaBox="${mediaBox##*/MediaBox}"
  261. # No page size data available
  262. if [[ -z $mediaBox && $ADAPTIVEMODE = $FALSE ]]; then
  263. echo "Error when reading input file!"
  264. echo "Could not determine the page size!"
  265. echo "There is no MediaBox in the pdf document!"
  266. echo "Aborting! You may want to try the adaptive mode."
  267. exit 15
  268. elif [[ -z $mediaBox && $ADAPTIVEMODE = $TRUE ]]; then
  269. return $FALSE
  270. fi
  271. # remove chars [ and ]
  272. mediaBox="${mediaBox//[}"
  273. mediaBox="${mediaBox//]}"
  274. mediaBox=($mediaBox) # make it an array
  275. mbCount=${#mediaBox[@]} # array size
  276. # sanity
  277. if [[ $mbCount -lt 4 ]]; then
  278. echo "Error when reading the page size!"
  279. echo "The page size information is invalid!"
  280. exit 16
  281. fi
  282. # we are done
  283. PGWIDTH=$(printf '%.0f' "${mediaBox[2]}") # Get Round Width
  284. PGHEIGHT=$(printf '%.0f' "${mediaBox[3]}") # Get Round Height
  285. return $TRUE
  286. }
  287. # Detects operation mode and also runs the adaptive mode
  288. getPageSize() {
  289. if [[ $ADAPTIVEMODE = $FALSE ]]; then
  290. vprint " Adaptive mode: Disabled"
  291. if [[ $MODE = "CATGREP" ]]; then
  292. vprint " Method: Cat + Grep"
  293. getPageSizeCatGrep
  294. elif [[ $MODE = "MDLS" ]]; then
  295. vprint " Method: Mac Quartz mdls"
  296. getPageSizeMdls
  297. elif [[ $MODE = "PDFINFO" ]]; then
  298. vprint " Method: Linux PdfInfo"
  299. getPageSizePdfInfo
  300. elif [[ $MODE = "IDENTIFY" ]]; then
  301. vprint " Method: ImageMagick's Identify"
  302. getPageSizeImagemagick
  303. else
  304. echo "Error! Invalid Mode: $MODE"
  305. echo "Aborting execution..."
  306. exit 20
  307. fi
  308. return $TRUE
  309. fi
  310. vprint " Adaptive mode: Enabled"
  311. vprint " Method: Cat + Grep"
  312. getPageSizeCatGrep
  313. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  314. vprint " Failed"
  315. if [[ $OSNAME = "Darwin" ]]; then
  316. vprint " Method: Mac Quartz mdls"
  317. getPageSizeMdls
  318. else
  319. vprint " Method: Linux PdfInfo"
  320. getPageSizePdfInfo
  321. fi
  322. fi
  323. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  324. vprint " Failed"
  325. vprint " Method: ImageMagick's Identify"
  326. getPageSizeImagemagick
  327. fi
  328. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  329. vprint " Failed"
  330. echo "Error when detecting PDF paper size!"
  331. echo "All methods of detection failed"
  332. echo "You may want to install pdfinfo or imagemagick"
  333. exit 17
  334. fi
  335. }
  336. # Parse options
  337. while getopts ":vhVs:m:" o; do
  338. case "${o}" in
  339. v)
  340. ((VERBOSE++))
  341. ;;
  342. h)
  343. printHelp
  344. exit 0
  345. ;;
  346. V)
  347. printVersion
  348. exit 0
  349. ;;
  350. s)
  351. parseScale ${OPTARG}
  352. ;;
  353. m)
  354. parseMode ${OPTARG}
  355. ;;
  356. *)
  357. usage
  358. ;;
  359. esac
  360. done
  361. shift $((OPTIND-1))
  362. ######### START EXECUTION
  363. #Intro message
  364. vprint "$(basename $0) v$VERSION - Verbose execution"
  365. # Dependencies
  366. vprint "Checking for ghostscript and bcmath"
  367. command -v gs >/dev/null 2>&1 || printDependency 'ghostscript'
  368. command -v bc >/dev/null 2>&1 || printDependency 'bc'
  369. if [[ $MODE = "IDENTIFY" ]]; then
  370. vprint "Checking for imagemagick's identify"
  371. command -v identify >/dev/null 2>&1 || printDependency 'imagemagick'
  372. fi
  373. if [[ $MODE = "PDFINFO" ]]; then
  374. vprint "Checking for pdfinfo"
  375. command -v pdfinfo >/dev/null 2>&1 || printDependency 'pdfinfo'
  376. fi
  377. # Get dependency binaries
  378. GSBIN="$(which gs 2>/dev/null)"
  379. BCBIN="$(which bc 2>/dev/null)"
  380. IDBIN=$(which identify 2>/dev/null)
  381. if [[ $OSNAME = "Darwin" ]]; then
  382. MDLSBIN="$(which mdls 2>/dev/null)"
  383. else
  384. PDFINFOBIN="$(which pdfinfo 2>/dev/null)"
  385. fi
  386. # Verbose scale info
  387. vprint " Scale factor: $SCALE"
  388. # Validate args
  389. [[ $# -lt 1 ]] && { usage; exit 1; }
  390. INFILEPDF="$1"
  391. [[ "$INFILEPDF" =~ ^..*\.pdf$ ]] || { usage; exit 2; }
  392. [[ -f "$INFILEPDF" ]] || { echo "Error! File not found: $INFILEPDF"; exit 3; }
  393. vprint " Input file: $INFILEPDF"
  394. # Parse output filename
  395. if [[ -z $2 ]]; then
  396. OUTFILEPDF="${INFILEPDF%.pdf}.SCALED.pdf"
  397. else
  398. OUTFILEPDF="${2%.pdf}.pdf"
  399. fi
  400. vprint " Output file: $OUTFILEPDF"
  401. getPageSize
  402. vprint " Width: $PGWIDTH postscript-points"
  403. vprint " Height: $PGHEIGHT postscript-points"
  404. # Compute translation factors (to center page.
  405. XTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGWIDTH" | "$BCBIN")
  406. YTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGHEIGHT" | "$BCBIN")
  407. vprint " Translation X: $XTRANS"
  408. vprint " Translation Y: $YTRANS"
  409. # Do it.
  410. "$GSBIN" \
  411. -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dSAFER \
  412. -dCompatibilityLevel="1.5" -dPDFSETTINGS="/printer" \
  413. -dColorConversionStrategy=/LeaveColorUnchanged \
  414. -dSubsetFonts=true -dEmbedAllFonts=true \
  415. -dDEVICEWIDTH=$PGWIDTH -dDEVICEHEIGHT=$PGHEIGHT \
  416. -sOutputFile="$OUTFILEPDF" \
  417. -c "<</BeginPage{$SCALE $SCALE scale $XTRANS $YTRANS translate}>> setpagedevice" \
  418. -f "$INFILEPDF"