Bash Script to scale and/or resize PDFs from the command line.
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.
 
 

499 satır
16 KiB

  1. #!/usr/bin/env bash
  2. # pdfScale.sh
  3. #
  4. # Scale PDF to specified percentage of original size.
  5. #
  6. # Gustavo Arnosti Neves - 2016 / 07 / 10
  7. #
  8. # This script: https://github.com/tavinus/pdfScale
  9. # Based on: http://ma.juii.net/blog/scale-page-content-of-pdf-files
  10. # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a
  11. ###################################################
  12. # PAGESIZE LOGIC
  13. # 1- Try to get Mediabox with CAT/GREP
  14. # Remove /BBox search as it is unreliable
  15. # 2- MacOS => try to use mdls
  16. # Linux => try to use pdfinfo
  17. # 3- Try to use identify (imagemagick)
  18. # 4- Fail
  19. # Remove postscript method,
  20. # may have licensing problems
  21. ###################################################
  22. VERSION="1.4.8"
  23. SCALE="0.95" # scaling factor (0.95 = 95%, e.g.)
  24. VERBOSE=0 # verbosity Level
  25. BASENAME="$(basename $0)" # simplified name of this script
  26. # Set with which after we check dependencies
  27. GSBIN="" # GhostScript Binary
  28. BCBIN="" # BC Math Binary
  29. IDBIN="" # Identify Binary
  30. PDFINFOBIN="" # PDF Info Binary
  31. MDLSBIN="" # MacOS mdls Binary
  32. OSNAME="$(uname 2>/dev/null)" # Check where we are running
  33. LC_MEASUREMENT="C" # To make sure our numbers have .decimals
  34. LC_ALL="C" # Some languages use , as decimal token
  35. LC_CTYPE="C"
  36. LC_NUMERIC="C"
  37. TRUE=0 # Silly stuff
  38. FALSE=1
  39. ADAPTIVEMODE=$TRUE # Automatically try to guess best mode
  40. MODE=""
  41. # Prints version
  42. printVersion() {
  43. if [[ $1 -eq 2 ]]; then
  44. echo >&2 "$BASENAME v$VERSION"
  45. else
  46. echo "$BASENAME v$VERSION"
  47. fi
  48. }
  49. # Prints help info
  50. printHelp() {
  51. printVersion
  52. echo "
  53. Usage: $BASENAME [-v] [-s <factor>] [-m <mode>] <inFile.pdf> [outfile.pdf]
  54. $BASENAME -h
  55. $BASENAME -V
  56. Parameters:
  57. -v Verbose mode, prints extra information
  58. Use twice for even more information
  59. -h Print this help to screen and exits
  60. -V Prints version to screen and exits
  61. -m <mode> Force a mode of page size detection.
  62. May disable the Adaptive Mode.
  63. -s <factor> Changes the scaling factor, defaults to 0.95
  64. MUST be a number bigger than zero.
  65. Eg. -s 0.8 for 80% of the original size
  66. Modes:
  67. a, adaptive Default mode, tries all the methods below
  68. c, cat+grep Forces the use of the cat + grep method
  69. m, mdls Forces the use of MacOS Quartz mdls
  70. p, pdfinfo Forces the use of Linux PdfInfo
  71. i, identify Forces the use of ImageMagick's Identify
  72. Notes:
  73. - Adaptive Page size detection will try different modes until
  74. it gets a page size. You can force a mode with -m 'mode'
  75. - Options must be passed before the file names to be parsed
  76. - The output filename is optional. If no file name is passed
  77. the output file will have the same name/destination of the
  78. input file, with .SCALED.pdf at the end (instead of just .pdf)
  79. - Having the extension .pdf on the output file name is optional,
  80. it will be added if not present
  81. - Should handle file names with spaces without problems
  82. - The scaling is centered and using a scale bigger than 1 may
  83. result on cropping parts of the pdf.
  84. Examples:
  85. $BASENAME myPdfFile.pdf
  86. $BASENAME myPdfFile.pdf myScaledPdf
  87. $BASENAME -v -v myPdfFile.pdf
  88. $BASENAME -s 0.85 myPdfFile.pdf myScaledPdf.pdf
  89. $BASENAME -m pdfinfo -s 0.80 -v myPdfFile.pdf
  90. $BASENAME -v -v -m i -s 0.7 myPdfFile.pdf
  91. $BASENAME -h
  92. "
  93. }
  94. # Prints usage info
  95. usage() {
  96. printVersion 2
  97. echo >&2 "Usage: $BASENAME [-v] [-s <factor>] [-m <mode>] <inFile.pdf> [outfile.pdf]"
  98. echo >&2 "Try: $BASENAME -h # for help"
  99. exit 1
  100. }
  101. # Prints Verbose information
  102. vprint() {
  103. [[ $VERBOSE -eq 0 ]] && return 0
  104. timestamp=""
  105. [[ $VERBOSE -gt 1 ]] && timestamp="$(date +%Y-%m-%d:%H:%M:%S) | "
  106. echo "$timestamp$1"
  107. }
  108. # Prints dependency information and aborts execution
  109. printDependency() {
  110. printVersion 2
  111. echo >&2 $'\n'"ERROR! You need to install the package '$1'"$'\n'
  112. echo >&2 "Linux apt-get.: sudo apt-get install $1"
  113. echo >&2 "Linux yum.....: sudo yum install $1"
  114. echo >&2 "MacOS homebrew: brew install $1"
  115. echo >&2 $'\n'"Aborting..."
  116. exit 3
  117. }
  118. # Parses and validates the scaling factor
  119. parseScale() {
  120. if ! [[ -n "$1" && "$1" =~ ^-?[0-9]*([.][0-9]+)?$ && (($1 > 0 )) ]] ; then
  121. echo >&2 "Invalid factor: $1"
  122. echo >&2 "The factor must be a floating point number greater than 0"
  123. echo >&2 "Example: for 80% use 0.8"
  124. exit 2
  125. fi
  126. SCALE=$1
  127. }
  128. # Parse a forced mode of operation
  129. parseMode() {
  130. if [[ -z $1 ]]; then
  131. echo "Mode is empty, please specify the desired mode"
  132. echo "Falling back to adaptive mode!"
  133. ADAPTIVEMODE=$TRUE
  134. MODE=""
  135. return $FALSE
  136. fi
  137. if [[ $1 = 'c' || $1 = 'catgrep' || $1 = 'cat+grep' || $1 = 'CatGrep' || $1 = 'C' || $1 = 'CATGREP' ]]; then
  138. ADAPTIVEMODE=$FALSE
  139. MODE="CATGREP"
  140. return $TRUE
  141. elif [[ $1 = 'i' || $1 = 'imagemagick' || $1 = 'identify' || $1 = 'ImageMagick' || $1 = 'Identify' || $1 = 'I' || $1 = 'IDENTIFY' ]]; then
  142. ADAPTIVEMODE=$FALSE
  143. MODE="IDENTIFY"
  144. return $TRUE
  145. elif [[ $1 = 'm' || $1 = 'mdls' || $1 = 'MDLS' || $1 = 'quartz' || $1 = 'mac' || $1 = 'M' ]]; then
  146. ADAPTIVEMODE=$FALSE
  147. MODE="MDLS"
  148. return $TRUE
  149. elif [[ $1 = 'p' || $1 = 'pdfinfo' || $1 = 'PDFINFO' || $1 = 'PdfInfo' || $1 = 'P' ]]; then
  150. ADAPTIVEMODE=$FALSE
  151. MODE="PDFINFO"
  152. return $TRUE
  153. elif [[ $1 = 'a' || $1 = 'adaptive' || $1 = 'automatic' || $1 = 'A' || $1 = 'ADAPTIVE' || $1 = 'AUTOMATIC' ]]; then
  154. ADAPTIVEMODE=$TRUE
  155. MODE=""
  156. return $TRUE
  157. else
  158. echo "Invalid mode: $1"
  159. echo "Falling back to adaptive mode!"
  160. ADAPTIVEMODE=$TRUE
  161. MODE=""
  162. return $FALSE
  163. fi
  164. return $FALSE
  165. }
  166. # Gets page size using imagemagick's identify
  167. getPageSizeImagemagick() {
  168. # Sanity
  169. if [[ ! -f $IDBIN && $ADAPTIVEMODE = $FALSE ]]; then
  170. echo "Error! ImageMagick's Identify was not found!"
  171. echo "Make sure you installed ImageMagick and have identify on your \$PATH"
  172. echo "Aborting! You may want to try the adaptive mode."
  173. exit 15
  174. elif [[ ! -f $IDBIN && $ADAPTIVEMODE = $TRUE ]]; then
  175. return $FALSE
  176. fi
  177. # get data from image magick
  178. local identify="$("$IDBIN" -format '%[fx:w] %[fx:h]BREAKME' "$INFILEPDF" 2>/dev/null)"
  179. # No page size data available
  180. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  181. echo "Error when reading input file!"
  182. echo "Could not determine the page size!"
  183. echo "ImageMagicks's Identify returned an empty string!"
  184. echo "Aborting! You may want to try the adaptive mode."
  185. exit 15
  186. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  187. return $FALSE
  188. fi
  189. identify="${identify%%BREAKME*}" # get page size only for 1st page
  190. identify=($identify) # make it an array
  191. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  192. PGHEIGHT=$(printf '%.0f' "${identify[1]}") # assign
  193. }
  194. # Gets page size using Mac Quarts mdls
  195. getPageSizeMdls() {
  196. # Sanity
  197. if [[ ! -f $MDLSBIN && $ADAPTIVEMODE = $FALSE ]]; then
  198. echo "Error! Mac Quartz mdls was not found!"
  199. echo "Are you even trying this on a Mac?"
  200. echo "Aborting! You may want to try the adaptive mode."
  201. exit 15
  202. elif [[ ! -f $MDLSBIN && $ADAPTIVEMODE = $TRUE ]]; then
  203. return $FALSE
  204. fi
  205. # get data from mdls
  206. local identify="$("$MDLSBIN" -mdls -name kMDItemPageHeight -name kMDItemPageWidth "$INFILEPDF" 2>/dev/null)"
  207. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  208. echo "Error when reading input file!"
  209. echo "Could not determine the page size!"
  210. echo "Mac Quartz mdls returned an empty string!"
  211. echo "Aborting! You may want to try the adaptive mode."
  212. exit 15
  213. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  214. return $FALSE
  215. fi
  216. identify=${identify//$'\t'/ } # change tab to space
  217. identify=($identify) # make it an array
  218. PGWIDTH=$(printf '%.0f' "${identify[2]}") # assign
  219. PGHEIGHT=$(printf '%.0f' "${identify[5]}") # assign
  220. }
  221. # Gets page size using Linux PdfInfo
  222. getPageSizePdfInfo() {
  223. # Sanity
  224. if [[ ! -f $PDFINFOBIN && $ADAPTIVEMODE = $FALSE ]]; then
  225. echo "Error! Linux pdfinfo was not found!"
  226. echo "Do you have pdfinfo installed and available on your \$PATH?"
  227. echo "Aborting! You may want to try the adaptive mode."
  228. exit 15
  229. elif [[ ! -f $PDFINFOBIN && $ADAPTIVEMODE = $TRUE ]]; then
  230. return $FALSE
  231. fi
  232. # get data from image magick
  233. local identify="$("$PDFINFOBIN" "$INFILEPDF" 2>/dev/null | grep -i 'Page size:' )"
  234. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  235. echo "Error when reading input file!"
  236. echo "Could not determine the page size!"
  237. echo "Linux PdfInfo returned an empty string!"
  238. echo "Aborting! You may want to try the adaptive mode."
  239. exit 15
  240. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  241. return $FALSE
  242. fi
  243. identify="${identify##*Page size:}" # remove stuff
  244. identify=($identify) # make it an array
  245. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  246. PGHEIGHT=$(printf '%.0f' "${identify[2]}") # assign
  247. }
  248. # Gets page size using cat and grep
  249. getPageSizeCatGrep() {
  250. # get MediaBox info from PDF file using cat and grep, these are all possible
  251. # /MediaBox [0 0 595 841]
  252. # /MediaBox [ 0 0 595.28 841.89]
  253. # /MediaBox[ 0 0 595.28 841.89 ]
  254. # Get MediaBox data if possible
  255. local mediaBox="$(cat "$INFILEPDF" | grep -a '/MediaBox' | head -n1)"
  256. mediaBox="${mediaBox##*/MediaBox}"
  257. # No page size data available
  258. if [[ -z $mediaBox && $ADAPTIVEMODE = $FALSE ]]; then
  259. echo "Error when reading input file!"
  260. echo "Could not determine the page size!"
  261. echo "There is no MediaBox in the pdf document!"
  262. echo "Aborting! You may want to try the adaptive mode."
  263. exit 15
  264. elif [[ -z $mediaBox && $ADAPTIVEMODE = $TRUE ]]; then
  265. return $FALSE
  266. fi
  267. # remove chars [ and ]
  268. mediaBox="${mediaBox//[}"
  269. mediaBox="${mediaBox//]}"
  270. mediaBox=($mediaBox) # make it an array
  271. mbCount=${#mediaBox[@]} # array size
  272. # sanity
  273. if [[ $mbCount -lt 4 ]]; then
  274. echo "Error when reading the page size!"
  275. echo "The page size information is invalid!"
  276. exit 16
  277. fi
  278. # we are done
  279. PGWIDTH=$(printf '%.0f' "${mediaBox[2]}") # Get Round Width
  280. PGHEIGHT=$(printf '%.0f' "${mediaBox[3]}") # Get Round Height
  281. return $TRUE
  282. }
  283. # Detects operation mode and also runs the adaptive mode
  284. getPageSize() {
  285. if [[ $ADAPTIVEMODE = $FALSE ]]; then
  286. vprint " Adaptive mode: Disabled"
  287. if [[ $MODE = "CATGREP" ]]; then
  288. vprint " Method: Cat + Grep"
  289. getPageSizeCatGrep
  290. elif [[ $MODE = "MDLS" ]]; then
  291. vprint " Method: Mac Quartz mdls"
  292. getPageSizeMdls
  293. elif [[ $MODE = "PDFINFO" ]]; then
  294. vprint " Method: Linux PdfInfo"
  295. getPageSizePdfInfo
  296. elif [[ $MODE = "IDENTIFY" ]]; then
  297. vprint " Method: ImageMagick's Identify"
  298. getPageSizeImagemagick
  299. else
  300. echo "Error! Invalid Mode: $MODE"
  301. echo "Aborting execution..."
  302. exit 20
  303. fi
  304. return $TRUE
  305. fi
  306. vprint " Adaptive mode: Enabled"
  307. vprint " Method: Cat + Grep"
  308. getPageSizeCatGrep
  309. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  310. vprint " Failed"
  311. if [[ $OSNAME = "Darwin" ]]; then
  312. vprint " Method: Mac Quartz mdls"
  313. getPageSizeMdls
  314. else
  315. vprint " Method: Linux PdfInfo"
  316. getPageSizePdfInfo
  317. fi
  318. fi
  319. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  320. vprint " Failed"
  321. vprint " Method: ImageMagick's Identify"
  322. getPageSizeImagemagick
  323. fi
  324. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  325. vprint " Failed"
  326. echo "Error when detecting PDF paper size!"
  327. echo "All methods of detection failed"
  328. echo "You may want to install pdfinfo or imagemagick"
  329. exit 17
  330. fi
  331. }
  332. # Parse options
  333. while getopts ":vhVs:m:" o; do
  334. case "${o}" in
  335. v)
  336. ((VERBOSE++))
  337. ;;
  338. h)
  339. printHelp
  340. exit 0
  341. ;;
  342. V)
  343. printVersion
  344. exit 0
  345. ;;
  346. s)
  347. parseScale ${OPTARG}
  348. ;;
  349. m)
  350. parseMode ${OPTARG}
  351. ;;
  352. *)
  353. usage
  354. ;;
  355. esac
  356. done
  357. shift $((OPTIND-1))
  358. ######### START EXECUTION
  359. #Intro message
  360. vprint "$(basename $0) v$VERSION - Verbose execution"
  361. # Dependencies
  362. vprint "Checking for ghostscript and bcmath"
  363. command -v gs >/dev/null 2>&1 || printDependency 'ghostscript'
  364. command -v bc >/dev/null 2>&1 || printDependency 'bc'
  365. if [[ $MODE = "IDENTIFY" ]]; then
  366. vprint "Checking for imagemagick's identify"
  367. command -v identify >/dev/null 2>&1 || printDependency 'imagemagick'
  368. fi
  369. if [[ $MODE = "PDFINFO" ]]; then
  370. vprint "Checking for pdfinfo"
  371. command -v pdfinfo >/dev/null 2>&1 || printDependency 'pdfinfo'
  372. fi
  373. # Get dependency binaries
  374. GSBIN="$(which gs 2>/dev/null)"
  375. BCBIN="$(which bc 2>/dev/null)"
  376. IDBIN=$(which identify 2>/dev/null)
  377. if [[ $OSNAME = "Darwin" ]]; then
  378. MDLSBIN="$(which mdls 2>/dev/null)"
  379. else
  380. PDFINFOBIN="$(which pdfinfo 2>/dev/null)"
  381. fi
  382. # Verbose scale info
  383. vprint " Scale factor: $SCALE"
  384. # Validate args
  385. [[ $# -lt 1 ]] && { usage; exit 1; }
  386. INFILEPDF="$1"
  387. [[ "$INFILEPDF" =~ ^..*\.pdf$ ]] || { usage; exit 2; }
  388. [[ -f "$INFILEPDF" ]] || { echo "Error! File not found: $INFILEPDF"; exit 3; }
  389. vprint " Input file: $INFILEPDF"
  390. # Parse output filename
  391. if [[ -z $2 ]]; then
  392. OUTFILEPDF="${INFILEPDF%.pdf}.SCALED.pdf"
  393. else
  394. OUTFILEPDF="${2%.pdf}.pdf"
  395. fi
  396. vprint " Output file: $OUTFILEPDF"
  397. getPageSize
  398. vprint " Width: $PGWIDTH postscript-points"
  399. vprint " Height: $PGHEIGHT postscript-points"
  400. # Compute translation factors (to center page.
  401. XTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGWIDTH" | "$BCBIN")
  402. YTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGHEIGHT" | "$BCBIN")
  403. vprint " Translation X: $XTRANS"
  404. vprint " Translation Y: $YTRANS"
  405. # Do it.
  406. "$GSBIN" \
  407. -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dSAFER \
  408. -dCompatibilityLevel="1.5" -dPDFSETTINGS="/printer" \
  409. -dColorConversionStrategy=/LeaveColorUnchanged \
  410. -dSubsetFonts=true -dEmbedAllFonts=true \
  411. -dDEVICEWIDTH=$PGWIDTH -dDEVICEHEIGHT=$PGHEIGHT \
  412. -sOutputFile="$OUTFILEPDF" \
  413. -c "<</BeginPage{$SCALE $SCALE scale $XTRANS $YTRANS translate}>> setpagedevice" \
  414. -f "$INFILEPDF"