Bash Script to scale and/or resize PDFs from the command line.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 

714 行
19 KiB

  1. #!/usr/bin/env bash
  2. # pdfScale.sh
  3. #
  4. # Scale PDF to specified percentage of original size.
  5. #
  6. # Gustavo Arnosti Neves - 2016 / 07 / 10
  7. #
  8. # This script: https://github.com/tavinus/pdfScale
  9. # Based on: http://ma.juii.net/blog/scale-page-content-of-pdf-files
  10. # And: https://gist.github.com/MichaelJCole/86e4968dbfc13256228a
  11. VERSION="1.3.3"
  12. SCALE="0.95" # scaling factor (0.95 = 95%, e.g.)
  13. VERBOSE=0 # verbosity Level
  14. BASENAME="$(basename $0)" # simplified name of this script
  15. GSBIN="" # Set with which after we check dependencies
  16. BCBIN="" # Set with which after we check dependencies
  17. IDBIN="" # Set with which after we check dependencies
  18. LC_MEASUREMENT="C" # To make sure our numbers have .decimals
  19. LC_ALL="C" # Some languages use , as decimal token
  20. LC_CTYPE="C"
  21. LC_NUMERIC="C"
  22. TRUE=0 # Silly stuff
  23. FALSE=1
  24. USEIMGMGK=$FALSE # ImageMagick Flag, will use identify if true
  25. USECATGREP=$FALSE # Use old cat + grep method
  26. # Prints version
  27. printVersion() {
  28. if [[ $1 -eq 2 ]]; then
  29. echo >&2 "$BASENAME v$VERSION"
  30. else
  31. echo "$BASENAME v$VERSION"
  32. fi
  33. }
  34. # Prints help info
  35. printHelp() {
  36. printVersion
  37. echo "
  38. Usage: $BASENAME [-v] [-s <factor>] [-i|-c] <inFile.pdf> [outfile.pdf]
  39. $BASENAME -h
  40. $BASENAME -V
  41. Parameters:
  42. -v Verbose mode, prints extra information
  43. Use twice for even more information
  44. -h Print this help to screen and exits
  45. -V Prints version to screen and exits
  46. -i Use imagemagick to get page size,
  47. instead of postscript method
  48. -c Use cat + grep to get page size,
  49. instead of postscript method
  50. -s <factor> Changes the scaling factor, defaults to 0.95
  51. MUST be a number bigger than zero.
  52. Eg. -s 0.8 for 80% of the original size
  53. Notes:
  54. - Options must be passed before the file names to be parsed
  55. - The output filename is optional. If no file name is passed
  56. the output file will have the same name/destination of the
  57. input file, with .SCALED.pdf at the end (instead of just .pdf)
  58. - Having the extension .pdf on the output file name is optional,
  59. it will be added if not present
  60. - Should handle file names with spaces without problems
  61. - The scaling is centered and using a scale bigger than 1 may
  62. result on cropping parts of the pdf.
  63. Examples:
  64. $BASENAME myPdfFile.pdf
  65. $BASENAME myPdfFile.pdf myScaledPdf
  66. $BASENAME -v -v myPdfFile.pdf
  67. $BASENAME -s 0.85 myPdfFile.pdf myScaledPdf.pdf
  68. $BASENAME -i -s 0.80 -v myPdfFile.pdf
  69. $BASENAME -v -v -s 0.7 myPdfFile.pdf
  70. $BASENAME -h
  71. "
  72. }
  73. # Prints usage info
  74. usage() {
  75. printVersion 2
  76. echo >&2 "Usage: $BASENAME [-v] [-s <factor>] <inFile.pdf> [outfile.pdf]"
  77. echo >&2 "Try: $BASENAME -h # for help"
  78. exit 1
  79. }
  80. # Prints Verbose information
  81. vprint() {
  82. [[ $VERBOSE -eq 0 ]] && return 0
  83. timestamp=""
  84. [[ $VERBOSE -gt 1 ]] && timestamp="$(date +%Y-%m-%d:%H:%M:%S) | "
  85. echo "$timestamp$1"
  86. }
  87. # Prints dependency information and aborts execution
  88. printDependency() {
  89. printVersion 2
  90. echo >&2 $'\n'"ERROR! You need to install the package '$1'"$'\n'
  91. echo >&2 "Linux apt-get.: sudo apt-get install $1"
  92. echo >&2 "Linux yum.....: sudo yum install $1"
  93. echo >&2 "MacOS homebrew: brew install $1"
  94. echo >&2 $'\n'"Aborting..."
  95. exit 3
  96. }
  97. # Parses and validates the scaling factor
  98. parseScale() {
  99. if ! [[ -n "$1" && "$1" =~ ^-?[0-9]*([.][0-9]+)?$ && (($1 > 0 )) ]] ; then
  100. echo >&2 "Invalid factor: $1"
  101. echo >&2 "The factor must be a floating point number greater than 0"
  102. echo >&2 "Example: for 80% use 0.8"
  103. exit 2
  104. fi
  105. SCALE=$1
  106. }
  107. # Gets page size using imagemagick's identify
  108. getPageSizeImagemagick() {
  109. # get data from image magick
  110. local identify="$("$IDBIN" -format '%[fx:w] %[fx:h]BREAKME' "$INFILEPDF" 2>/dev/null)"
  111. identify="${identify%%BREAKME*}" # get page size only for 1st page
  112. identify=($identify) # make it an array
  113. PGWIDTH=$(printf '%.0f' "${identify[0]}") # assign
  114. PGHEIGHT=$(printf '%.0f' "${identify[1]}") # assign
  115. }
  116. # Gets page size using toolbin_pdfinfo.ps
  117. getPageSizeGS() {
  118. local PDFINFOGS=''
  119. read -r -d '' PDFINFOGS <<'EOF'
  120. %!PS
  121. % Copyright (C) 2001-2012 Artifex Software, Inc.
  122. % All Rights Reserved.
  123. %
  124. % This software is provided AS-IS with no warranty, either express or
  125. % implied.
  126. %
  127. % This software is distributed under license and may not be copied,
  128. % modified or distributed except as expressly authorized under the terms
  129. % of the license contained in the file LICENSE in this distribution.
  130. %
  131. % Refer to licensing information at http://www.artifex.com or contact
  132. % Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, San Rafael,
  133. % CA 94903, U.S.A., +1(415)492-9861, for further information.
  134. %
  135. %
  136. % $Id: pdf_info.ps 6300 2005-12-28 19:56:24Z alexcher $
  137. % Dump some info from a PDF file
  138. % usage: gs -dNODISPLAY -q -sFile=____.pdf [-dDumpMediaSizes=false] [-dDumpFontsNeeded=false] [-dDumpXML]
  139. % [-dDumpFontsUsed [-dShowEmbeddedFonts] ] toolbin/pdf_info.ps
  140. 128 dict begin
  141. /QUIET true def % in case they forgot
  142. /showoptions {
  143. ( where "options" are:) =
  144. ( -dDumpMediaSizes=false (default true) MediaBox and CropBox for each page) =
  145. ( -dDumpFontsNeeded=false (default true)Fonts used, but not embedded) =
  146. ( -dDumpXML print the XML Metadata from the PDF, if present) =
  147. ( -dDumpFontsUsed List all fonts used) =
  148. ( -dShowEmbeddedFonts only meaningful with -dDumpFontsUsed) =
  149. (\n If no options are given, the default is -dDumpMediaSizes -dDumpFontsNeeded) =
  150. () =
  151. flush
  152. } bind def
  153. /DumpMediaSizes where { pop } { /DumpMediaSizes true def } ifelse
  154. /DumpFontsNeeded where { pop } { /DumpFontsNeeded true def } ifelse
  155. [ shellarguments
  156. { counttomark 1 eq {
  157. dup 0 get (-) 0 get ne {
  158. % File specified on the command line using: -- toolbin/pdf_info.ps infile.pdf
  159. /File exch def
  160. false % dont show usage
  161. } {
  162. true % show usage and quit
  163. } ifelse
  164. } { true } ifelse
  165. {
  166. (\n*** Usage: gs [options] -- toolbin/pdf_info.ps infile.pdf ***\n\n) print
  167. showoptions
  168. quit
  169. } if
  170. } if
  171. /File where not {
  172. (\n *** Missing input file name \(use -sFile=____.pdf\)\n) =
  173. ( usage: gs -dNODISPLAY -q -sFile=____.pdf [ options ] toolbin/pdf_info.ps\n) =
  174. showoptions
  175. quit
  176. } if
  177. cleartomark % discard the dict from --where--
  178. % ---- No more executable code on the top level after this line -----
  179. % ---- except 2 lines at the very end -----
  180. /printXML { % <string> printXML -
  181. % print non-blank lines without trailing spaces
  182. dup dup length 1 sub -1 0 {
  183. 1 index 1 index get 32 eq {
  184. 0 exch getinterval exch
  185. } {
  186. exch = exit % non-blank on this line
  187. }
  188. ifelse
  189. } for
  190. pop pop % clean up
  191. } bind def
  192. /dump-pdf-info { % (fname) -> -
  193. () = ( ) print print ( has ) print
  194. PDFPageCount dup =print 10 mod 1 eq { ( page.\n) } { ( pages\n) } ifelse = flush
  195. /DumpXML where {
  196. pop
  197. Trailer /Root oget /Metadata knownoget {
  198. //false resolvestream
  199. { dup 256 string readline exch printXML not { exit } if } loop
  200. pop % done with the stream
  201. (_____________________________________________________________) =
  202. flush
  203. } if
  204. } if
  205. % Print out the "Info" dictionary if present
  206. Trailer /Info knownoget {
  207. dup /Title knownoget { (Title: ) print = flush } if
  208. dup /Author knownoget { (Author: ) print = flush } if
  209. dup /Subject knownoget { (Subject: ) print = flush } if
  210. dup /Keywords knownoget { (Keywords: ) print = flush } if
  211. dup /Creator knownoget { (Creator: ) print = flush } if
  212. dup /Producer knownoget { (Producer: ) print = flush } if
  213. dup /CreationDate knownoget { (CreationDate: ) print = flush } if
  214. dup /ModDate knownoget { (ModDate: ) print = flush } if
  215. dup /Trapped knownoget { (Trapped: ) print = flush } if
  216. pop
  217. } if
  218. } bind def
  219. % <page index> <page dict> dump-media-sizes -
  220. /dump-media-sizes {
  221. DumpMediaSizes {
  222. () =
  223. % Print out the Page Size info for each page.
  224. (Page ) print =print
  225. dup /UserUnit pget {
  226. ( UserUnit: ) print =print
  227. } if
  228. dup /MediaBox pget {
  229. ( MediaBox: ) print oforce_array ==only
  230. } if
  231. dup /CropBox pget {
  232. ( CropBox: ) print oforce_array ==only
  233. } if
  234. dup /BleedBox pget {
  235. ( BleedBox: ) print oforce_array ==only
  236. } if
  237. dup /TrimBox pget {
  238. ( TrimBox: ) print oforce_array ==only
  239. } if
  240. dup /ArtBox pget {
  241. ( ArtBox: ) print oforce_array ==only
  242. } if
  243. dup /Rotate pget {
  244. ( Rotate = ) print =print
  245. } if
  246. dup /Annots pget {
  247. pop
  248. ( Page contains Annotations) print
  249. } if
  250. pageusestransparency {
  251. ( Page uses transparency features) print
  252. } if
  253. () = flush
  254. }
  255. {
  256. pop pop
  257. } ifelse
  258. } bind def
  259. % List of standard font names for use when we are showing the FontsNeeded
  260. /StdFontNames [
  261. /Times-Roman /Helvetica /Courier /Symbol
  262. /Times-Bold /Helvetica-Bold /Courier-Bold /ZapfDingbats
  263. /Times-Italic /Helvetica-Oblique /Courier-Oblique
  264. /Times-BoldItalic /Helvetica-BoldOblique /Courier-BoldOblique
  265. ] def
  266. /res-type-dict 10 dict begin
  267. /Font {
  268. {
  269. exch pop oforce
  270. dup //null ne {
  271. dup /DescendantFonts knownoget {
  272. exch pop 0 get oforce
  273. } if
  274. dup /FontDescriptor knownoget {
  275. dup /FontFile known 1 index /FontFile2 known or exch /FontFile3 known or
  276. /ShowEmbeddedFonts where { pop pop //false } if {
  277. pop % skip embedded fonts
  278. } {
  279. /BaseFont knownoget { % not embedded
  280. 2 index exch //null put
  281. } if
  282. } ifelse
  283. } {
  284. /BaseFont knownoget { % no FontDescriptor, not embedded
  285. 2 index exch //null put
  286. } if
  287. } ifelse
  288. } {
  289. pop
  290. } ifelse
  291. } forall % traverse the dictionary
  292. } bind def
  293. /XObject {
  294. {
  295. exch pop oforce
  296. dup //null ne {
  297. dup /Subtype knownoget {
  298. /Form eq {
  299. /Resources knownoget {
  300. get-fonts-from-res
  301. } if
  302. } {
  303. pop
  304. } ifelse
  305. } {
  306. pop
  307. } ifelse
  308. } {
  309. pop
  310. } ifelse
  311. } forall
  312. } bind def
  313. /Pattern {
  314. {
  315. exch pop oforce
  316. dup //null ne {
  317. /Resources knownoget {
  318. get-fonts-from-res
  319. } if
  320. } {
  321. pop
  322. } ifelse
  323. } forall
  324. } bind def
  325. currentdict end readonly def
  326. % <dict for fonts> <<res-dict>> get-fonts-from-res -
  327. /get-fonts-from-res {
  328. oforce
  329. dup //null ne {
  330. {
  331. oforce
  332. dup //null ne {
  333. //res-type-dict 3 -1 roll
  334. .knownget {
  335. exec
  336. } {
  337. pop
  338. } ifelse
  339. } {
  340. pop pop
  341. } ifelse
  342. } forall
  343. } {
  344. pop
  345. } ifelse
  346. } bind def
  347. currentdict /res-type-dict undef
  348. /getPDFfonts { % <dict for fonts> <page dict> getPDFfonts -
  349. dup /Resources pget { get-fonts-from-res } if
  350. /Annots knownoget {
  351. { oforce
  352. dup //null ne {
  353. /AP knownoget {
  354. { exch pop oforce
  355. dup //null ne {
  356. dup /Resources knownoget {
  357. get-fonts-from-res
  358. } if
  359. { exch pop oforce
  360. dup type /dicttype eq {
  361. /Resources knownoget {
  362. get-fonts-from-res
  363. } if
  364. } {
  365. pop
  366. } ifelse
  367. } forall
  368. } {
  369. pop
  370. } ifelse
  371. } forall
  372. } if
  373. } {
  374. pop
  375. } ifelse
  376. } forall
  377. } if
  378. pop
  379. } bind def
  380. /dump-fonts-used { % <dict for fonts> dump-fonts-used -
  381. % If DumpFontsUsed is not true, then remove the "standard" fonts from the list
  382. systemdict /DumpFontsUsed known not {
  383. StdFontNames {
  384. 1 index 1 index known { 1 index 1 index undef } if
  385. pop
  386. } forall
  387. } if
  388. % Now dump the FontsUsed dict into an array so we can sort it.
  389. [ 1 index { pop } forall ]
  390. { 100 string cvs exch 100 string cvs exch lt } .sort
  391. systemdict /DumpFontsUsed known
  392. {
  393. (\nFont or CIDFont resources used:) =
  394. { = } forall
  395. } {
  396. DumpFontsNeeded {
  397. dup length 0 gt {
  398. (\nFonts Needed that are not embedded \(system fonts required\):) =
  399. { ( ) print = } forall
  400. } {
  401. pop
  402. (\nNo system fonts are needed.) =
  403. } ifelse
  404. } {
  405. pop
  406. } ifelse
  407. } ifelse
  408. pop
  409. } bind def
  410. % Copy selected subfiles to temporary files and return the file names
  411. % as a PostScript names to protect them from restore.
  412. % Currently, all PDF files in the Portfolio are extracted and returned.
  413. %
  414. % - pdf_collection_files [ /temp_file_name ... /temp_file_name
  415. /pdf_collection_files {
  416. mark
  417. Trailer /Root oget
  418. dup /Collection oknown {
  419. /Names knownoget {
  420. /EmbeddedFiles knownoget {
  421. pdf_collection_names
  422. } if
  423. } if
  424. } {
  425. pop
  426. } ifelse
  427. } bind def
  428. % Output all the info about the file
  429. /dump { % (title) -> -
  430. /PDFPageCount pdfpagecount def
  431. dump-pdf-info
  432. % dict will be populated with fonts through a call to "getPDFfonts"
  433. % per page, then the contents dumped out in "dump-fonts-used"
  434. 1000 dict
  435. 1 1 PDFPageCount
  436. {
  437. dup pdfgetpage dup 3 -1 roll
  438. dump-media-sizes
  439. 1 index exch getPDFfonts
  440. } for
  441. dump-fonts-used
  442. } bind def
  443. % Choose between collection vs plain file.
  444. % Enumerate collections and apply the dump procedure.
  445. /enum-pdfs { % - -> -
  446. File (r) file runpdfbegin
  447. pdf_collection_files
  448. dup mark eq {
  449. pop
  450. File dump
  451. runpdfend
  452. } {
  453. runpdfend
  454. ] 0 1 2 index length 1 sub {
  455. 2 copy get exch % [file ... ] file i
  456. 1 add (0123456789) cvs % [file ... ] file (i+1)
  457. File exch ( part ) exch concatstrings concatstrings
  458. exch % [file ... ] (fname part i+1) file
  459. dup type /filetype eq {
  460. runpdfbegin
  461. dump
  462. runpdfend
  463. closefile
  464. } {
  465. .namestring
  466. dup (r) file
  467. runpdfbegin
  468. exch dump
  469. runpdfend
  470. deletefile
  471. } ifelse
  472. } for
  473. pop
  474. } ifelse
  475. } bind def
  476. enum-pdfs
  477. end
  478. quit
  479. EOF
  480. # get data from gs script
  481. local identify="$("$GSBIN" -dNODISPLAY -q -sFile=../input-nup.pdf -dDumpMediaSizes -dDumpFontsNeeded=false -c "$PDFINFOGS" 2>/dev/null | grep MediaBox | head -n1)"
  482. identify="${identify##*MediaBox:}" # get page size only for 1st page
  483. # remove chars [ and ]
  484. identify="${identify//[}"
  485. identify="${identify//]}"
  486. identify=($identify) # make it an array
  487. # sanity
  488. if [[ ${#identify[@]} -lt 4 ]]; then
  489. echo "Error when reading the page size!"
  490. echo "The page size information is invalid!"
  491. exit 16
  492. fi
  493. PGWIDTH=$(printf '%.0f' "${identify[2]}") # assign
  494. PGHEIGHT=$(printf '%.0f' "${identify[3]}") # assign
  495. }
  496. # Gets page size using cat and grep
  497. getPageSize() {
  498. # get MediaBox info from PDF file using cat and grep, these are all possible
  499. # /MediaBox [0 0 595 841]
  500. # /MediaBox [ 0 0 595.28 841.89]
  501. # /MediaBox[ 0 0 595.28 841.89 ]
  502. # Get MediaBox data if possible
  503. local mediaBox="$(cat "$INFILEPDF" | grep -a '/MediaBox' | head -n1)"
  504. mediaBox="${mediaBox##*/MediaBox}"
  505. # If no MediaBox, try BBox
  506. if [[ -z $mediaBox ]]; then
  507. mediaBox="$(cat "$INFILEPDF" | grep -a '/BBox' | head -n1)"
  508. mediaBox="${mediaBox##*/BBox}"
  509. fi
  510. # No page size data available
  511. if [[ -z $mediaBox ]]; then
  512. echo "Error when reading input file!"
  513. echo "Could not determine the page size!"
  514. echo "There is no MediaBox or BBox in the pdf document!"
  515. echo "Aborting..."
  516. exit 15
  517. fi
  518. # remove chars [ and ]
  519. mediaBox="${mediaBox//[}"
  520. mediaBox="${mediaBox//]}"
  521. mediaBox=($mediaBox) # make it an array
  522. mbCount=${#mediaBox[@]} # array size
  523. # sanity
  524. if [[ $mbCount -lt 4 ]]; then
  525. echo "Error when reading the page size!"
  526. echo "The page size information is invalid!"
  527. exit 16
  528. fi
  529. # we are done
  530. PGWIDTH=$(printf '%.0f' "${mediaBox[2]}") # Get Round Width
  531. PGHEIGHT=$(printf '%.0f' "${mediaBox[3]}") # Get Round Height
  532. }
  533. # Parse options
  534. while getopts ":vichVs:" o; do
  535. case "${o}" in
  536. v)
  537. ((VERBOSE++))
  538. ;;
  539. h)
  540. printHelp
  541. exit 0
  542. ;;
  543. V)
  544. printVersion
  545. exit 0
  546. ;;
  547. s)
  548. parseScale ${OPTARG}
  549. ;;
  550. i)
  551. USEIMGMGK=$TRUE
  552. USECATGREP=$FALSE
  553. ;;
  554. c)
  555. USECATGREP=$TRUE
  556. USEIMGMGK=$FALSE
  557. ;;
  558. *)
  559. usage
  560. ;;
  561. esac
  562. done
  563. shift $((OPTIND-1))
  564. ######### START EXECUTION
  565. #Intro message
  566. vprint "$(basename $0) v$VERSION - Verbose execution"
  567. # Dependencies
  568. vprint "Checking for ghostscript and bcmath"
  569. command -v gs >/dev/null 2>&1 || printDependency 'ghostscript'
  570. command -v bc >/dev/null 2>&1 || printDependency 'bc'
  571. if [[ $USEIMGMGK -eq $TRUE ]]; then
  572. vprint "Checking for imagemagick's identify"
  573. command -v identify >/dev/null 2>&1 || printDependency 'imagemagick'
  574. IDBIN=$(which identify 2>/dev/null)
  575. fi
  576. # Get dependency binaries
  577. GSBIN=$(which gs 2>/dev/null)
  578. BCBIN=$(which bc 2>/dev/null)
  579. # Verbose scale info
  580. vprint " Scale factor: $SCALE"
  581. # Validate args
  582. [[ $# -lt 1 ]] && { usage; exit 1; }
  583. INFILEPDF="$1"
  584. [[ "$INFILEPDF" =~ ^..*\.pdf$ ]] || { usage; exit 2; }
  585. [[ -f "$INFILEPDF" ]] || { echo "Error! File not found: $INFILEPDF"; usage; exit 2; }
  586. vprint " Input file: $INFILEPDF"
  587. # Parse output filename
  588. if [[ -z $2 ]]; then
  589. OUTFILEPDF="${INFILEPDF%.pdf}.SCALED.pdf"
  590. else
  591. OUTFILEPDF="${2%.pdf}.pdf"
  592. fi
  593. vprint " Output file: $OUTFILEPDF"
  594. # Set PGWIDTH and PGHEIGHT
  595. if [[ $USEIMGMGK -eq $TRUE ]]; then
  596. getPageSizeImagemagick
  597. elif [[ $USECATGREP -eq $TRUE ]]; then
  598. getPageSize
  599. else
  600. getPageSizeGS
  601. fi
  602. vprint " Width: $PGWIDTH postscript-points"
  603. vprint " Height: $PGHEIGHT postscript-points"
  604. # Compute translation factors (to center page.
  605. XTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGWIDTH" | "$BCBIN")
  606. YTRANS=$(echo "scale=6; 0.5*(1.0-$SCALE)/$SCALE*$PGHEIGHT" | "$BCBIN")
  607. vprint " Translation X: $XTRANS"
  608. vprint " Translation Y: $YTRANS"
  609. # Do it.
  610. "$GSBIN" \
  611. -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dSAFER \
  612. -dCompatibilityLevel="1.5" -dPDFSETTINGS="/printer" \
  613. -dColorConversionStrategy=/LeaveColorUnchanged \
  614. -dSubsetFonts=true -dEmbedAllFonts=true \
  615. -dDEVICEWIDTH=$PGWIDTH -dDEVICEHEIGHT=$PGHEIGHT \
  616. -sOutputFile="$OUTFILEPDF" \
  617. -c "<</BeginPage{$SCALE $SCALE scale $XTRANS $YTRANS translate}>> setpagedevice" \
  618. -f "$INFILEPDF"