Bash Script to scale and/or resize PDFs from the command line.
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

514 行
15 KiB

  1. #!/usr/bin/env bash
  2. # pdfScale.sh
  3. #
  4. # Scale PDF to specified percentage of original size.
  5. #
  6. # Gustavo Arnosti Neves - 2016 / 07 / 10
  7. #
  8. # This script: https://github.com/tavinus/pdfScale
  9. # Based on: http://ma.juii.net/blog/scale-page-content-of-pdf-files
  10. # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a
  11. ###################################################
  12. # PAGESIZE LOGIC
  13. # 1- Try to get Mediabox with CAT/GREP
  14. # Remove /BBox search as it is unreliable
  15. # 2- MacOS => try to use mdls
  16. # Linux => try to use pdfinfo
  17. # 3- Try to use identify (imagemagick)
  18. # 4- Fail
  19. # Remove postscript method,
  20. # may have licensing problems
  21. ###################################################
  22. VERSION="1.4.3"
  23. SCALE="0.95" # scaling factor (0.95 = 95%, e.g.)
  24. VERBOSE=0 # verbosity Level
  25. BASENAME="$(basename $0)" # simplified name of this script
  26. # Set with which after we check dependencies
  27. GSBIN="" # GhostScript Binaries
  28. BCBIN="" # BC Math binary
  29. IDBIN="" # Identify Binary
  30. PDFINFOBIN="" # PDF Info Binary
  31. MDLSBIN="" # MacOS mdls binary
  32. OSNAME="$(uname 2>/dev/null)" # Check where we are running
  33. LC_MEASUREMENT="C" # To make sure our numbers have .decimals
  34. LC_ALL="C" # Some languages use , as decimal token
  35. LC_CTYPE="C"
  36. LC_NUMERIC="C"
  37. TRUE=0 # Silly stuff
  38. FALSE=1
  39. ADAPTIVEMODE=$TRUE # Automatically try to guess best mode
  40. MODE=""
  41. USEIMGMGK=$FALSE # ImageMagick Flag, will use identify if true
  42. USECATGREP=$FALSE # Use old cat + grep method
  43. # Prints version
  44. printVersion() {
  45. if [[ $1 -eq 2 ]]; then
  46. echo >&2 "$BASENAME v$VERSION"
  47. else
  48. echo "$BASENAME v$VERSION"
  49. fi
  50. }
  51. # Prints help info
  52. printHelp() {
  53. printVersion
  54. echo "
  55. Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]
  56. $BASENAME -h
  57. $BASENAME -V
  58. Parameters:
  59. -v Verbose mode, prints extra information
  60. Use twice for even more information
  61. -h Print this help to screen and exits
  62. -V Prints version to screen and exits
  63. -m <mode> Force a mode of page size detection.
  64. Will disable the Adaptive Mode.
  65. -c Use cat + grep to get page size,
  66. instead of postscript method
  67. -s <factor> Changes the scaling factor, defaults to 0.95
  68. MUST be a number bigger than zero.
  69. Eg. -s 0.8 for 80% of the original size
  70. Modes:
  71. c, cat+grep Forces the use of the cat + grep method
  72. m, mdls Forces the use of MacOS Quartz mdls
  73. p, pdfinfo Forces the use of Linux PdfInfo
  74. i, identify Forces the use of ImageMagick's Identify
  75. Notes:
  76. - Page size detection will try different modes until it gets
  77. a page size, or you can force a mode with -m 'mode'
  78. - Options must be passed before the file names to be parsed
  79. - The output filename is optional. If no file name is passed
  80. the output file will have the same name/destination of the
  81. input file, with .SCALED.pdf at the end (instead of just .pdf)
  82. - Having the extension .pdf on the output file name is optional,
  83. it will be added if not present
  84. - Should handle file names with spaces without problems
  85. - The scaling is centered and using a scale bigger than 1 may
  86. result on cropping parts of the pdf.
  87. Examples:
  88. $BASENAME myPdfFile.pdf
  89. $BASENAME myPdfFile.pdf myScaledPdf
  90. $BASENAME -v -v myPdfFile.pdf
  91. $BASENAME -s 0.85 myPdfFile.pdf myScaledPdf.pdf
  92. $BASENAME -m pdfinfo -s 0.80 -v myPdfFile.pdf
  93. $BASENAME -v -v -s 0.7 myPdfFile.pdf
  94. $BASENAME -h
  95. "
  96. }
  97. # Prints usage info
  98. usage() {
  99. printVersion 2
  100. echo >&2 "Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]"
  101. echo >&2 "Try: $BASENAME -h # for help"
  102. exit 1
  103. }
  104. # Prints Verbose information
  105. vprint() {
  106. [[ $VERBOSE -eq 0 ]] && return 0
  107. timestamp=""
  108. [[ $VERBOSE -gt 1 ]] && timestamp="$(date +%Y-%m-%d:%H:%M:%S) | "
  109. echo "$timestamp$1"
  110. }
  111. # Prints dependency information and aborts execution
  112. printDependency() {
  113. printVersion 2
  114. echo >&2 $'\n'"ERROR! You need to install the package '$1'"$'\n'
  115. echo >&2 "Linux apt-get.: sudo apt-get install $1"
  116. echo >&2 "Linux yum.....: sudo yum install $1"
  117. echo >&2 "MacOS homebrew: brew install $1"
  118. echo >&2 $'\n'"Aborting..."
  119. exit 3
  120. }
  121. # Parses and validates the scaling factor
  122. parseScale() {
  123. if ! [[ -n "$1" && "$1" =~ ^-?[0-9]*([.][0-9]+)?$ && (($1 > 0 )) ]] ; then
  124. echo >&2 "Invalid factor: $1"
  125. echo >&2 "The factor must be a floating point number greater than 0"
  126. echo >&2 "Example: for 80% use 0.8"
  127. exit 2
  128. fi
  129. SCALE=$1
  130. }
  131. # Parse a forced mode of operation
  132. parseMode() {
  133. if [[ -z $1 ]]; then
  134. echo "Mode is empty, please specify the desired mode"
  135. echo "Falling back to adaptive mode!"
  136. ADAPTIVEMODE=$TRUE
  137. MODE=""
  138. return $FALSE
  139. fi
  140. if [[ $1 = 'c' || $1 = 'catgrep' || $1 = 'cat+grep' || $1 = 'CatGrep' || $1 = 'C' ]]; then
  141. ADAPTIVEMODE=$FALSE
  142. MODE="CATGREP"
  143. return $TRUE
  144. elif [[ $1 = 'i' || $1 = 'imagemagick' || $1 = 'identify' || $1 = 'ImageMagick' || $1 = 'Identify' || $1 = 'I' ]]; then
  145. ADAPTIVEMODE=$FALSE
  146. MODE="IDENTIFY"
  147. return $TRUE
  148. elif [[ $1 = 'm' || $1 = 'mdls' || $1 = 'MDLS' || $1 = 'quartz' || $1 = 'mac' || $1 = 'M' ]]; then
  149. ADAPTIVEMODE=$FALSE
  150. MODE="MDLS"
  151. return $TRUE
  152. elif [[ $1 = 'p' || $1 = 'pdfinfo' || $1 = 'PDFINFO' || $1 = 'PdfInfo' || $1 = 'P' ]]; then
  153. ADAPTIVEMODE=$FALSE
  154. MODE="PDFINFO"
  155. return $TRUE
  156. else
  157. echo "Invalid mode: $1"
  158. echo "Falling back to adaptive mode!"
  159. ADAPTIVEMODE=$TRUE
  160. MODE=""
  161. return $FALSE
  162. fi
  163. return $FALSE
  164. }
  165. # Gets page size using imagemagick's identify
  166. getPageSizeImagemagick() {
  167. # Sanity
  168. if [[ ! -f $IDBIN && $ADAPTIVEMODE = $FALSE ]]; then
  169. echo "Error! ImageMagick's Identify was not found!"
  170. echo "Make sure you installed ImageMagick and have identify on your \$PATH"
  171. echo "Aborting! You may want to try the adaptive mode."
  172. exit 15
  173. elif [[ ! -f $IDBIN && $ADAPTIVEMODE = $TRUE ]]; then
  174. return $FALSE
  175. fi
  176. # get data from image magick
  177. local identify="$("$IDBIN" -format '%[fx:w] %[fx:h]BREAKME' "$INFILEPDF" 2>/dev/null)"
  178. # No page size data available
  179. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  180. echo "Error when reading input file!"
  181. echo "Could not determine the page size!"
  182. echo "ImageMagicks's Identify returned an empty string!"
  183. echo "Aborting! You may want to try the adaptive mode."
  184. exit 15
  185. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  186. return $FALSE
  187. fi
  188. identify="${identify%%BREAKME*}" # get page size only for 1st page
  189. identify=($identify) # make it an array
  190. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  191. PGHEIGHT=$(printf '%.0f' "${identify[1]}") # assign
  192. }
  193. # Gets page size using Mac Quarts mdls
  194. getPageSizeMdls() {
  195. # Sanity
  196. if [[ ! -f $MDLSBIN && $ADAPTIVEMODE = $FALSE ]]; then
  197. echo "Error! Mac Quartz mdls was not found!"
  198. echo "Are you even trying this on a Mac?"
  199. echo "Aborting! You may want to try the adaptive mode."
  200. exit 15
  201. elif [[ ! -f $MDLSBIN && $ADAPTIVEMODE = $TRUE ]]; then
  202. return $FALSE
  203. fi
  204. # get data from mdls
  205. local identify="$("$MDLSBIN" -mdls -name kMDItemPageHeight -name kMDItemPageWidth "$INFILEPDF" 2>/dev/null)"
  206. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  207. echo "Error when reading input file!"
  208. echo "Could not determine the page size!"
  209. echo "Mac Quartz mdls returned an empty string!"
  210. echo "Aborting! You may want to try the adaptive mode."
  211. exit 15
  212. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  213. return $FALSE
  214. fi
  215. identify=${identify//$'\t'/ } # change tab to space
  216. identify=($identify) # make it an array
  217. PGWIDTH=$(printf '%.0f' "${identify[2]}") # assign
  218. PGHEIGHT=$(printf '%.0f' "${identify[5]}") # assign
  219. }
  220. # Gets page size using Linux PdfInfo
  221. getPageSizePdfInfo() {
  222. # Sanity
  223. if [[ ! -f $PDFINFOBIN && $ADAPTIVEMODE = $FALSE ]]; then
  224. echo "Error! Linux pdfinfo was not found!"
  225. echo "Do you have pdfinfo installed and available on your \$PATH?"
  226. echo "Aborting! You may want to try the adaptive mode."
  227. exit 15
  228. elif [[ ! -f $PDFINFOBIN && $ADAPTIVEMODE = $TRUE ]]; then
  229. return $FALSE
  230. fi
  231. # get data from image magick
  232. local identify="$("$PDFINFOBIN" "$INFILEPDF" 2>/dev/null | grep -i 'Page size:' )"
  233. if [[ -z $identify && $ADAPTIVEMODE = $FALSE ]]; then
  234. echo "Error when reading input file!"
  235. echo "Could not determine the page size!"
  236. echo "Linux PdfInfo returned an empty string!"
  237. echo "Aborting! You may want to try the adaptive mode."
  238. exit 15
  239. elif [[ -z $identify && $ADAPTIVEMODE = $TRUE ]]; then
  240. return $FALSE
  241. fi
  242. identify="${identify##*Page size:}"
  243. echo "-->$identify"
  244. identify=($identify) # make it an array
  245. echo " - ${identify[0]}"
  246. echo " - ${identify[1]}"
  247. echo " - ${identify[2]}"
  248. echo " - ${identify[3]}"
  249. echo " - ${identify[4]}"
  250. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  251. PGHEIGHT=$(printf '%.0f' "${identify[2]}") # assign
  252. }
  253. # Gets page size using cat and grep
  254. getPageSizeCatGrep() {
  255. # get MediaBox info from PDF file using cat and grep, these are all possible
  256. # /MediaBox [0 0 595 841]
  257. # /MediaBox [ 0 0 595.28 841.89]
  258. # /MediaBox[ 0 0 595.28 841.89 ]
  259. # Get MediaBox data if possible
  260. local mediaBox="$(cat "$INFILEPDF" | grep -a '/MediaBox' | head -n1)"
  261. mediaBox="${mediaBox##*/MediaBox}"
  262. # No page size data available
  263. if [[ -z $mediaBox && $ADAPTIVEMODE = $FALSE ]]; then
  264. echo "Error when reading input file!"
  265. echo "Could not determine the page size!"
  266. echo "There is no MediaBox in the pdf document!"
  267. echo "Aborting! You may want to try the adaptive mode."
  268. exit 15
  269. elif [[ -z $mediaBox && $ADAPTIVEMODE = $TRUE ]]; then
  270. return $FALSE
  271. fi
  272. # remove chars [ and ]
  273. mediaBox="${mediaBox//[}"
  274. mediaBox="${mediaBox//]}"
  275. mediaBox=($mediaBox) # make it an array
  276. mbCount=${#mediaBox[@]} # array size
  277. # sanity
  278. if [[ $mbCount -lt 4 ]]; then
  279. echo "Error when reading the page size!"
  280. echo "The page size information is invalid!"
  281. exit 16
  282. fi
  283. # we are done
  284. PGWIDTH=$(printf '%.0f' "${mediaBox[2]}") # Get Round Width
  285. PGHEIGHT=$(printf '%.0f' "${mediaBox[3]}") # Get Round Height
  286. return $TRUE
  287. }
  288. getPageSize() {
  289. if [[ $ADAPTIVEMODE = $FALSE ]]; then
  290. vprint " Adaptive mode: Disabled"
  291. if [[ $MODE = "CATGREP" ]]; then
  292. vprint " Method: Cat + Grep"
  293. getPageSizeCatGrep
  294. elif [[ $MODE = "MDLS" ]]; then
  295. vprint " Method: Mac Quartz mdls"
  296. getPageSizeMdls
  297. elif [[ $MODE = "PDFINFO" ]]; then
  298. vprint " Method: Linux PdfInfo"
  299. getPageSizePdfInfo
  300. elif [[ $MODE = "IDENTIFY" ]]; then
  301. vprint " Method: ImageMagick's Identify"
  302. getPageSizeImagemagick
  303. else
  304. echo "Error! Invalid Mode: $MODE"
  305. echo "Aborting execution..."
  306. exit 20
  307. fi
  308. return $TRUE
  309. fi
  310. vprint " Adaptive mode: Enabled"
  311. vprint " Method: Cat + Grep"
  312. getPageSizeCatGrep
  313. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  314. vprint " Failed"
  315. if [[ $OSNAME = "Darwin" ]]; then
  316. vprint " Method: Mac Quartz mdls"
  317. getPageSizeMdls
  318. else
  319. vprint " Method: Linux PdfInfo"
  320. getPageSizePdfInfo
  321. fi
  322. fi
  323. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  324. vprint " Failed"
  325. vprint " Method: ImageMagick's Identify"
  326. getPageSizeImagemagick
  327. fi
  328. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  329. vprint " Failed"
  330. echo "Error when detecting PDF paper size!"
  331. echo "All methods of detection failed"
  332. exit 17
  333. fi
  334. }
  335. # Parse options
  336. while getopts ":vhVs:m:" o; do
  337. case "${o}" in
  338. v)
  339. ((VERBOSE++))
  340. ;;
  341. h)
  342. printHelp
  343. exit 0
  344. ;;
  345. V)
  346. printVersion
  347. exit 0
  348. ;;
  349. s)
  350. parseScale ${OPTARG}
  351. ;;
  352. m)
  353. parseMode ${OPTARG}
  354. ;;
  355. *)
  356. usage
  357. ;;
  358. esac
  359. done
  360. shift $((OPTIND-1))
  361. ######### START EXECUTION
  362. #Intro message
  363. vprint "$(basename $0) v$VERSION - Verbose execution"
  364. # Dependencies
  365. vprint "Checking for ghostscript and bcmath"
  366. command -v gs >/dev/null 2>&1 || printDependency 'ghostscript'
  367. command -v bc >/dev/null 2>&1 || printDependency 'bc'
  368. if [[ $MODE = "IDENTIFY" ]]; then
  369. vprint "Checking for imagemagick's identify"
  370. command -v identify >/dev/null 2>&1 || printDependency 'imagemagick'
  371. fi
  372. if [[ $MODE = "PDFINFO" ]]; then
  373. vprint "Checking for pdfinfo"
  374. command -v pdfinfo >/dev/null 2>&1 || printDependency 'pdfinfo'
  375. fi
  376. # Get dependency binaries
  377. GSBIN="$(which gs 2>/dev/null)"
  378. BCBIN="$(which bc 2>/dev/null)"
  379. IDBIN=$(which identify 2>/dev/null)
  380. if [[ $OSNAME = "Darwin" ]]; then
  381. MDLSBIN="$(which mdls 2>/dev/null)"
  382. else
  383. PDFINFOBIN="$(which pdfinfo 2>/dev/null)"
  384. fi
  385. # Verbose scale info
  386. vprint " Scale factor: $SCALE"
  387. # Validate args
  388. [[ $# -lt 1 ]] && { usage; exit 1; }
  389. INFILEPDF="$1"
  390. [[ "$INFILEPDF" =~ ^..*\.pdf$ ]] || { usage; exit 2; }
  391. [[ -f "$INFILEPDF" ]] || { echo "Error! File not found: $INFILEPDF"; exit 3; }
  392. vprint " Input file: $INFILEPDF"
  393. # Parse output filename
  394. if [[ -z $2 ]]; then
  395. OUTFILEPDF="${INFILEPDF%.pdf}.SCALED.pdf"
  396. else
  397. OUTFILEPDF="${2%.pdf}.pdf"
  398. fi
  399. vprint " Output file: $OUTFILEPDF"
  400. getPageSize
  401. # Set PGWIDTH and PGHEIGHT
  402. #if [[ $USEIMGMGK -eq $TRUE ]]; then
  403. # getPageSizeImagemagick
  404. #elif [[ $USECATGREP -eq $TRUE ]]; then
  405. # getPageSize
  406. #else
  407. # getPageSizeGS
  408. #fi
  409. vprint " Width: $PGWIDTH postscript-points"
  410. vprint " Height: $PGHEIGHT postscript-points"
  411. # Compute translation factors (to center page.
  412. XTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGWIDTH" | "$BCBIN")
  413. YTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGHEIGHT" | "$BCBIN")
  414. vprint " Translation X: $XTRANS"
  415. vprint " Translation Y: $YTRANS"
  416. # Do it.
  417. "$GSBIN" \
  418. -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dSAFER \
  419. -dCompatibilityLevel="1.5" -dPDFSETTINGS="/printer" \
  420. -dColorConversionStrategy=/LeaveColorUnchanged \
  421. -dSubsetFonts=true -dEmbedAllFonts=true \
  422. -dDEVICEWIDTH=$PGWIDTH -dDEVICEHEIGHT=$PGHEIGHT \
  423. -sOutputFile="$OUTFILEPDF" \
  424. -c "<</BeginPage{$SCALE $SCALE scale $XTRANS $YTRANS translate}>> setpagedevice" \
  425. -f "$INFILEPDF"