Coverage for pdfrw/pdfrw/pagemerge.py: 21%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# A part of pdfrw (https://github.com/pmaupin/pdfrw)
2# Copyright (C) 2015 Patrick Maupin, Austin, Texas
3# MIT license -- See LICENSE.txt for details
5'''
6This module contains code to edit pages. Sort of a canvas, I
7suppose, but I wouldn't want to call it that and get people all
8excited or anything.
10No, this is just for doing basic things like merging/splitting
11apart pages, watermarking, etc. All it does is allow converting
12pages (or parts of pages) into Form XObject rectangles, and then
13plopping those down on new or pre-existing pages.
14'''
16from .objects import PdfDict, PdfArray, PdfName
17from .buildxobj import pagexobj, ViewInfo
19NullInfo = ViewInfo()
22class RectXObj(PdfDict):
23 ''' This class facilitates doing positioning (moving and scaling)
24 of Form XObjects within their containing page, by modifying
25 the Form XObject's transformation matrix.
27 By default, this class keeps the aspect ratio locked. For
28 example, if your object is foo, you can write 'foo.w = 200',
29 and it will scale in both the x and y directions.
31 To unlock the aspect ration, you have to do a tiny bit of math
32 and call the scale function.
33 '''
34 def __init__(self, page, viewinfo=NullInfo, **kw):
35 ''' The page is a page returned by PdfReader. It will be
36 turned into a cached Form XObject (so that multiple
37 rectangles can be extracted from it if desired), and then
38 another Form XObject will be built using it and the viewinfo
39 (which should be a ViewInfo class). The viewinfo includes
40 source coordinates (from the top/left) and rotation information.
42 Once the object has been built, its destination coordinates
43 may be examined and manipulated by using x, y, w, h, and
44 scale. The destination coordinates are in the normal
45 PDF programmatic system (starting at bottom left).
46 '''
47 if kw:
48 if viewinfo is not NullInfo:
49 raise ValueError("Cannot modify preexisting ViewInfo")
50 viewinfo = ViewInfo(**kw)
51 viewinfo.cacheable = False
52 base = pagexobj(page, viewinfo)
53 self.update(base)
54 self.indirect = True
55 self.stream = base.stream
56 private = self.private
57 private._rect = [base.x, base.y, base.w, base.h]
58 matrix = self.Matrix
59 if matrix is None:
60 matrix = self.Matrix = PdfArray((1, 0, 0, 1, 0, 0))
61 private._matrix = matrix # Lookup optimization
62 # Default to lower-left corner
63 self.x = 0
64 self.y = 0
66 @property
67 def x(self):
68 ''' X location (from left) of object in points
69 '''
70 return self._rect[0]
72 @property
73 def y(self):
74 ''' Y location (from bottom) of object in points
75 '''
76 return self._rect[1]
78 @property
79 def w(self):
80 ''' Width of object in points
81 '''
82 return self._rect[2]
84 @property
85 def h(self):
86 ''' Height of object in points
87 '''
88 return self._rect[3]
90 def __setattr__(self, name, value, next=PdfDict.__setattr__,
91 mine=set('x y w h'.split())):
92 ''' The underlying __setitem__ won't let us use a property
93 setter, so we have to fake one.
94 '''
95 if name not in mine:
96 return next(self, name, value)
97 if name in 'xy':
98 r_index, m_index = (0, 4) if name == 'x' else (1, 5)
99 self._rect[r_index], old = value, self._rect[r_index]
100 self._matrix[m_index] += value - old
101 else:
102 index = 2 + (value == 'h')
103 self.scale(value / self._rect[index])
105 def scale(self, x_scale, y_scale=None):
106 ''' Current scaling deals properly with things that
107 have been rotated in 90 degree increments
108 (via the ViewMerge object given when instantiating).
109 '''
110 if y_scale is None:
111 y_scale = x_scale
112 x, y, w, h = rect = self._rect
113 ao, bo, co, do, eo, fo = matrix = self._matrix
114 an = ao * x_scale
115 bn = bo * y_scale
116 cn = co * x_scale
117 dn = do * y_scale
118 en = x + (eo - x) * 1.0 * (an + cn) / (ao + co)
119 fn = y + (fo - y) * 1.0 * (bn + dn) / (bo + do)
120 matrix[:] = an, bn, cn, dn, en, fn
121 rect[:] = x, y, w * x_scale, h * y_scale
123 @property
124 def box(self):
125 ''' Return the bounding box for the object
126 '''
127 x, y, w, h = self._rect
128 return PdfArray([x, y, x + w, y + h])
131class PageMerge(list):
132 ''' A PageMerge object can have 0 or 1 underlying pages
133 (that get edited with the results of the merge)
134 and 0-n RectXObjs that can be applied before or
135 after the underlying page.
136 '''
137 page = None
138 mbox = None
139 cbox = None
140 resources = None
141 rotate = None
142 contents = None
144 def __init__(self, page=None):
145 if page is not None:
146 self.setpage(page)
148 def setpage(self, page):
149 if page.Type != PdfName.Page:
150 raise TypeError("Expected page")
151 self.append(None) # Placeholder
152 self.page = page
153 inheritable = page.inheritable
154 self.mbox = inheritable.MediaBox
155 self.cbox = inheritable.CropBox
156 self.resources = inheritable.Resources
157 self.rotate = inheritable.Rotate
158 self.contents = page.Contents
160 def __add__(self, other):
161 if isinstance(other, dict):
162 other = [other]
163 for other in other:
164 self.add(other)
165 return self
167 def add(self, obj, prepend=False, **kw):
168 if kw:
169 obj = RectXObj(obj, **kw)
170 elif obj.Type == PdfName.Page:
171 obj = RectXObj(obj)
172 if prepend:
173 self.insert(0, obj)
174 else:
175 self.append(obj)
176 return self
178 def render(self):
179 def do_xobjs(xobj_list, restore_first=False):
180 content = ['Q'] if restore_first else []
181 for obj in xobj_list:
182 index = PdfName('pdfrw_%d' % (key_offset + len(xobjs)))
183 if xobjs.setdefault(index, obj) is not obj:
184 raise KeyError("XObj key %s already in use" % index)
185 content.append('%s Do' % index)
186 return PdfDict(indirect=True, stream='\n'.join(content))
188 mbox = self.mbox
189 cbox = self.cbox
190 page = self.page
191 old_contents = self.contents
192 resources = self.resources or PdfDict()
194 key_offset = 0
195 xobjs = resources.XObject
196 if xobjs is None:
197 xobjs = resources.XObject = PdfDict()
198 else:
199 allkeys = xobjs.keys()
200 if allkeys:
201 keys = (x for x in allkeys if x.startswith('/pdfrw_'))
202 keys = (x for x in keys if x[7:].isdigit())
203 keys = sorted(keys, key=lambda x: int(x[7:]))
204 key_offset = (int(keys[-1][7:]) + 1) if keys else 0
205 key_offset -= len(allkeys)
207 if old_contents is None:
208 new_contents = do_xobjs(self)
209 else:
210 isdict = isinstance(old_contents, PdfDict)
211 old_contents = [old_contents] if isdict else old_contents
212 new_contents = PdfArray()
213 index = self.index(None)
214 if index:
215 new_contents.append(do_xobjs(self[:index]))
217 index += 1
218 if index < len(self):
219 # There are elements to add after the original page contents,
220 # so push the graphics state to the stack. Restored below.
221 new_contents.append(PdfDict(indirect=True, stream='q'))
223 new_contents.extend(old_contents)
225 if index < len(self):
226 # Restore graphics state and add other elements.
227 new_contents.append(do_xobjs(self[index:], restore_first=True))
229 if mbox is None:
230 cbox = None
231 mbox = self.xobj_box
232 mbox[0] = min(0, mbox[0])
233 mbox[1] = min(0, mbox[1])
235 page = PdfDict(indirect=True) if page is None else page
236 page.Type = PdfName.Page
237 page.Resources = resources
238 page.MediaBox = mbox
239 page.CropBox = cbox
240 page.Rotate = self.rotate
241 page.Contents = new_contents
242 return page
244 @property
245 def xobj_box(self):
246 ''' Return the smallest box that encloses every object
247 in the list.
248 '''
249 a, b, c, d = zip(*(xobj.box for xobj in self))
250 return PdfArray((min(a), min(b), max(c), max(d)))