Bash Script to scale and/or resize PDFs from the command line.
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 

376 řádky
11 KiB

  1. #!/usr/bin/env bash
  2. # pdfScale.sh
  3. #
  4. # Scale PDF to specified percentage of original size.
  5. #
  6. # Gustavo Arnosti Neves - 2016 / 07 / 10
  7. #
  8. # This script: https://github.com/tavinus/pdfScale
  9. # Based on: http://ma.juii.net/blog/scale-page-content-of-pdf-files
  10. # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a
  11. ###################################################
  12. # PAGESIZE LOGIC
  13. # 1- Try to get Mediabox with CAT/GREP
  14. # Remove /BBox search as it is unreliable
  15. # 2- MacOS => try to use mdls
  16. # Linux => try to use pdfinfo
  17. # 3- Try to use identify (imagemagick)
  18. # 4- Fail
  19. # Remove postscript method,
  20. # may have licensing problems
  21. ###################################################
  22. VERSION="1.4.1"
  23. SCALE="0.95" # scaling factor (0.95 = 95%, e.g.)
  24. VERBOSE=0 # verbosity Level
  25. BASENAME="$(basename $0)" # simplified name of this script
  26. # Set with which after we check dependencies
  27. GSBIN="" # GhostScript Binaries
  28. BCBIN="" # BC Math binary
  29. IDBIN="" # Identify Binary
  30. PDFINFOBIN="" # PDF Info Binary
  31. MDLSBIN="" # MacOS mdls binary
  32. OSNAME="$(uname 2>/dev/null)" # Check were we are running
  33. LC_MEASUREMENT="C" # To make sure our numbers have .decimals
  34. LC_ALL="C" # Some languages use , as decimal token
  35. LC_CTYPE="C"
  36. LC_NUMERIC="C"
  37. TRUE=0 # Silly stuff
  38. FALSE=1
  39. ADAPTIVEMODE=$TRUE # Automatically try to guess best mode
  40. MODE=""
  41. USEIMGMGK=$FALSE # ImageMagick Flag, will use identify if true
  42. USECATGREP=$FALSE # Use old cat + grep method
  43. # Prints version
  44. printVersion() {
  45. if [[ $1 -eq 2 ]]; then
  46. echo >&2 "$BASENAME v$VERSION"
  47. else
  48. echo "$BASENAME v$VERSION"
  49. fi
  50. }
  51. # Prints help info
  52. printHelp() {
  53. printVersion
  54. echo "
  55. Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]
  56. $BASENAME -h
  57. $BASENAME -V
  58. Parameters:
  59. -v Verbose mode, prints extra information
  60. Use twice for even more information
  61. -h Print this help to screen and exits
  62. -V Prints version to screen and exits
  63. -i Use imagemagick to get page size,
  64. instead of postscript method
  65. -c Use cat + grep to get page size,
  66. instead of postscript method
  67. -s <factor> Changes the scaling factor, defaults to 0.95
  68. MUST be a number bigger than zero.
  69. Eg. -s 0.8 for 80% of the original size
  70. Notes:
  71. - Options must be passed before the file names to be parsed
  72. - The output filename is optional. If no file name is passed
  73. the output file will have the same name/destination of the
  74. input file, with .SCALED.pdf at the end (instead of just .pdf)
  75. - Having the extension .pdf on the output file name is optional,
  76. it will be added if not present
  77. - Should handle file names with spaces without problems
  78. - The scaling is centered and using a scale bigger than 1 may
  79. result on cropping parts of the pdf.
  80. Examples:
  81. $BASENAME myPdfFile.pdf
  82. $BASENAME myPdfFile.pdf myScaledPdf
  83. $BASENAME -v -v myPdfFile.pdf
  84. $BASENAME -s 0.85 myPdfFile.pdf myScaledPdf.pdf
  85. $BASENAME -i -s 0.80 -v myPdfFile.pdf
  86. $BASENAME -v -v -s 0.7 myPdfFile.pdf
  87. $BASENAME -h
  88. "
  89. }
  90. # Prints usage info
  91. usage() {
  92. printVersion 2
  93. echo >&2 "Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]"
  94. echo >&2 "Try: $BASENAME -h # for help"
  95. exit 1
  96. }
  97. # Prints Verbose information
  98. vprint() {
  99. [[ $VERBOSE -eq 0 ]] && return 0
  100. timestamp=""
  101. [[ $VERBOSE -gt 1 ]] && timestamp="$(date +%Y-%m-%d:%H:%M:%S) | "
  102. echo "$timestamp$1"
  103. }
  104. # Prints dependency information and aborts execution
  105. printDependency() {
  106. printVersion 2
  107. echo >&2 $'\n'"ERROR! You need to install the package '$1'"$'\n'
  108. echo >&2 "Linux apt-get.: sudo apt-get install $1"
  109. echo >&2 "Linux yum.....: sudo yum install $1"
  110. echo >&2 "MacOS homebrew: brew install $1"
  111. echo >&2 $'\n'"Aborting..."
  112. exit 3
  113. }
  114. # Parses and validates the scaling factor
  115. parseScale() {
  116. if ! [[ -n "$1" && "$1" =~ ^-?[0-9]*([.][0-9]+)?$ && (($1 > 0 )) ]] ; then
  117. echo >&2 "Invalid factor: $1"
  118. echo >&2 "The factor must be a floating point number greater than 0"
  119. echo >&2 "Example: for 80% use 0.8"
  120. exit 2
  121. fi
  122. SCALE=$1
  123. }
  124. # Gets page size using imagemagick's identify
  125. getPageSizeImagemagick() {
  126. # get data from image magick
  127. local identify="$("$IDBIN" -format '%[fx:w] %[fx:h]BREAKME' "$INFILEPDF" 2>/dev/null)"
  128. identify="${identify%%BREAKME*}" # get page size only for 1st page
  129. identify=($identify) # make it an array
  130. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  131. PGHEIGHT=$(printf '%.0f' "${identify[1]}") # assign
  132. }
  133. # Gets page size using Mac Quarts mdls
  134. getPageSizeMdls() {
  135. [[ ! $OSNAME = "Darwin" ]] && return
  136. # get data from image magick
  137. local identify="$("$MDLSBIN" -mdls -name kMDItemPageHeight -name kMDItemPageWidth "$INFILEPDF" 2>/dev/null)"
  138. identify=($identify) # make it an array
  139. echo " - ${identify[0]} - ${identify[1] - ${identify[3]} - ${identify[4]}}"
  140. PGWIDTH=$(printf '%.0f' "${identify[1]}") # assign
  141. PGHEIGHT=$(printf '%.0f' "${identify[3]}") # assign
  142. }
  143. # Gets page size using Mac Quarts mdls
  144. getPageSizePdfInfo() {
  145. # get data from image magick
  146. local identify="$("$MDLSBIN" -mdls -name kMDItemPageHeight -name kMDItemPageWidth "$INFILEPDF" 2>/dev/null)"
  147. identify=($identify) # make it an array
  148. echo " - ${identify[0]} - ${identify[1] - ${identify[3]} - ${identify[4]}}"
  149. PGWIDTH=$(printf '%.0f' "${identify[1]}") # assign
  150. PGHEIGHT=$(printf '%.0f' "${identify[3]}") # assign
  151. }
  152. # Gets page size using cat and grep
  153. getPageSizeCatGrep() {
  154. # get MediaBox info from PDF file using cat and grep, these are all possible
  155. # /MediaBox [0 0 595 841]
  156. # /MediaBox [ 0 0 595.28 841.89]
  157. # /MediaBox[ 0 0 595.28 841.89 ]
  158. # Get MediaBox data if possible
  159. local mediaBox="$(cat "$INFILEPDF" | grep -a '/MediaBox' | head -n1)"
  160. mediaBox="${mediaBox##*/MediaBox}"
  161. # No page size data available
  162. if [[ -z $mediaBox && $ADAPTIVEMODE = $FALSE ]]; then
  163. echo "Error when reading input file!"
  164. echo "Could not determine the page size!"
  165. echo "There is no MediaBox in the pdf document!"
  166. echo "Aborting! You may want to try the adaptive mode."
  167. exit 15
  168. elif [[ -z $mediaBox && $ADAPTIVEMODE = $TRUE ]]; then
  169. return $FALSE
  170. fi
  171. # remove chars [ and ]
  172. mediaBox="${mediaBox//[}"
  173. mediaBox="${mediaBox//]}"
  174. mediaBox=($mediaBox) # make it an array
  175. mbCount=${#mediaBox[@]} # array size
  176. # sanity
  177. if [[ $mbCount -lt 4 ]]; then
  178. echo "Error when reading the page size!"
  179. echo "The page size information is invalid!"
  180. exit 16
  181. fi
  182. # we are done
  183. PGWIDTH=$(printf '%.0f' "${mediaBox[2]}") # Get Round Width
  184. PGHEIGHT=$(printf '%.0f' "${mediaBox[3]}") # Get Round Height
  185. return $TRUE
  186. }
  187. getPageSize() {
  188. vprint "Detecting page size with cat+grep method"
  189. getPageSizeCatGrep
  190. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  191. vprint " -> method failed!"
  192. if [[ $OSNAME = "Darwin" ]]; then
  193. vprint "Detecting page size with Mac Quartz mdls"
  194. getPageSizeMdls
  195. else
  196. vprint "Detecting page size with Linux pdfinfo"
  197. getPageSizePdfInfo
  198. fi
  199. fi
  200. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  201. vprint " -> method failed!"
  202. vprint " Detecting page size with ImageMagick's identify"
  203. getPageSizeImagemagick
  204. fi
  205. if [[ -z $PGWIDTH && -z $PGHEIGHT ]]; then
  206. echo "Error when detecting PDF paper size!"
  207. echo "All methods of detection failed"
  208. exit 17
  209. fi
  210. }
  211. # Parse options
  212. while getopts ":vichVs:" o; do
  213. case "${o}" in
  214. v)
  215. ((VERBOSE++))
  216. ;;
  217. h)
  218. printHelp
  219. exit 0
  220. ;;
  221. V)
  222. printVersion
  223. exit 0
  224. ;;
  225. s)
  226. parseScale ${OPTARG}
  227. ;;
  228. i)
  229. USEIMGMGK=$TRUE
  230. USECATGREP=$FALSE
  231. ;;
  232. c)
  233. USECATGREP=$TRUE
  234. USEIMGMGK=$FALSE
  235. ;;
  236. *)
  237. usage
  238. ;;
  239. esac
  240. done
  241. shift $((OPTIND-1))
  242. ######### START EXECUTION
  243. #Intro message
  244. vprint "$(basename $0) v$VERSION - Verbose execution"
  245. # Dependencies
  246. vprint "Checking for ghostscript and bcmath"
  247. command -v gs >/dev/null 2>&1 || printDependency 'ghostscript'
  248. command -v bc >/dev/null 2>&1 || printDependency 'bc'
  249. if [[ $USEIMGMGK -eq $TRUE ]]; then
  250. vprint "Checking for imagemagick's identify"
  251. command -v identify >/dev/null 2>&1 || printDependency 'imagemagick'
  252. IDBIN=$(which identify 2>/dev/null)
  253. fi
  254. # Get dependency binaries
  255. GSBIN="$(which gs 2>/dev/null)"
  256. BCBIN="$(which bc 2>/dev/null)"
  257. if [[ $OSNAME = "Darwin" ]]; then
  258. MDLSBIN="$(which mdls 2>/dev/null)"
  259. else
  260. PDFINFOBIN="$(which pdfinfo 2>/dev/null)"
  261. fi
  262. # Verbose scale info
  263. vprint " Scale factor: $SCALE"
  264. # Validate args
  265. [[ $# -lt 1 ]] && { usage; exit 1; }
  266. INFILEPDF="$1"
  267. [[ "$INFILEPDF" =~ ^..*\.pdf$ ]] || { usage; exit 2; }
  268. [[ -f "$INFILEPDF" ]] || { echo "Error! File not found: $INFILEPDF"; exit 3; }
  269. vprint " Input file: $INFILEPDF"
  270. # Parse output filename
  271. if [[ -z $2 ]]; then
  272. OUTFILEPDF="${INFILEPDF%.pdf}.SCALED.pdf"
  273. else
  274. OUTFILEPDF="${2%.pdf}.pdf"
  275. fi
  276. vprint " Output file: $OUTFILEPDF"
  277. getPageSizeMdls
  278. # Set PGWIDTH and PGHEIGHT
  279. #if [[ $USEIMGMGK -eq $TRUE ]]; then
  280. # getPageSizeImagemagick
  281. #elif [[ $USECATGREP -eq $TRUE ]]; then
  282. # getPageSize
  283. #else
  284. # getPageSizeGS
  285. #fi
  286. vprint " Width: $PGWIDTH postscript-points"
  287. vprint " Height: $PGHEIGHT postscript-points"
  288. # Compute translation factors (to center page.
  289. XTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGWIDTH" | "$BCBIN")
  290. YTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGHEIGHT" | "$BCBIN")
  291. vprint " Translation X: $XTRANS"
  292. vprint " Translation Y: $YTRANS"
  293. # Do it.
  294. "$GSBIN" \
  295. -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dSAFER \
  296. -dCompatibilityLevel="1.5" -dPDFSETTINGS="/printer" \
  297. -dColorConversionStrategy=/LeaveColorUnchanged \
  298. -dSubsetFonts=true -dEmbedAllFonts=true \
  299. -dDEVICEWIDTH=$PGWIDTH -dDEVICEHEIGHT=$PGHEIGHT \
  300. -sOutputFile="$OUTFILEPDF" \
  301. -c "<</BeginPage{$SCALE $SCALE scale $XTRANS $YTRANS translate}>> setpagedevice" \
  302. -f "$INFILEPDF"