Coverage for pdfrw/pdfrw/pagemerge.py: 93%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

153 statements  

1# A part of pdfrw (https://github.com/pmaupin/pdfrw) 

2# Copyright (C) 2015 Patrick Maupin, Austin, Texas 

3# MIT license -- See LICENSE.txt for details 

4 

5''' 

6This module contains code to edit pages. Sort of a canvas, I 

7suppose, but I wouldn't want to call it that and get people all 

8excited or anything. 

9 

10No, this is just for doing basic things like merging/splitting 

11apart pages, watermarking, etc. All it does is allow converting 

12pages (or parts of pages) into Form XObject rectangles, and then 

13plopping those down on new or pre-existing pages. 

14''' 

15 

16from .objects import PdfDict, PdfArray, PdfName 

17from .buildxobj import pagexobj, ViewInfo 

18 

19NullInfo = ViewInfo() 

20 

21 

22class RectXObj(PdfDict): 

23 ''' This class facilitates doing positioning (moving and scaling) 

24 of Form XObjects within their containing page, by modifying 

25 the Form XObject's transformation matrix. 

26 

27 By default, this class keeps the aspect ratio locked. For 

28 example, if your object is foo, you can write 'foo.w = 200', 

29 and it will scale in both the x and y directions. 

30 

31 To unlock the aspect ration, you have to do a tiny bit of math 

32 and call the scale function. 

33 ''' 

34 def __init__(self, page, viewinfo=NullInfo, **kw): 

35 ''' The page is a page returned by PdfReader. It will be 

36 turned into a cached Form XObject (so that multiple 

37 rectangles can be extracted from it if desired), and then 

38 another Form XObject will be built using it and the viewinfo 

39 (which should be a ViewInfo class). The viewinfo includes 

40 source coordinates (from the top/left) and rotation information. 

41 

42 Once the object has been built, its destination coordinates 

43 may be examined and manipulated by using x, y, w, h, and 

44 scale. The destination coordinates are in the normal 

45 PDF programmatic system (starting at bottom left). 

46 ''' 

47 if kw: 

48 if viewinfo is not NullInfo: 

49 raise ValueError("Cannot modify preexisting ViewInfo") 

50 viewinfo = ViewInfo(**kw) 

51 viewinfo.cacheable = False 

52 base = pagexobj(page, viewinfo) 

53 self.update(base) 

54 self.indirect = True 

55 self.stream = base.stream 

56 private = self.private 

57 private._rect = [base.x, base.y, base.w, base.h] 

58 matrix = self.Matrix 

59 if matrix is None: 

60 matrix = self.Matrix = PdfArray((1, 0, 0, 1, 0, 0)) 

61 private._matrix = matrix # Lookup optimization 

62 # Default to lower-left corner 

63 self.x = 0 

64 self.y = 0 

65 

66 @property 

67 def x(self): 

68 ''' X location (from left) of object in points 

69 ''' 

70 return self._rect[0] 

71 

72 @property 

73 def y(self): 

74 ''' Y location (from bottom) of object in points 

75 ''' 

76 return self._rect[1] 

77 

78 @property 

79 def w(self): 

80 ''' Width of object in points 

81 ''' 

82 return self._rect[2] 

83 

84 @property 

85 def h(self): 

86 ''' Height of object in points 

87 ''' 

88 return self._rect[3] 

89 

90 def __setattr__(self, name, value, next=PdfDict.__setattr__, 

91 mine=set('x y w h'.split())): 

92 ''' The underlying __setitem__ won't let us use a property 

93 setter, so we have to fake one. 

94 ''' 

95 if name not in mine: 

96 return next(self, name, value) 

97 if name in 'xy': 

98 r_index, m_index = (0, 4) if name == 'x' else (1, 5) 

99 self._rect[r_index], old = value, self._rect[r_index] 

100 self._matrix[m_index] += value - old 

101 else: 

102 index = 2 + (value == 'h') 

103 self.scale(value / self._rect[index]) 

104 

105 def scale(self, x_scale, y_scale=None): 

106 ''' Current scaling deals properly with things that 

107 have been rotated in 90 degree increments 

108 (via the ViewMerge object given when instantiating). 

109 ''' 

110 if y_scale is None: 

111 y_scale = x_scale 

112 x, y, w, h = rect = self._rect 

113 ao, bo, co, do, eo, fo = matrix = self._matrix 

114 an = ao * x_scale 

115 bn = bo * y_scale 

116 cn = co * x_scale 

117 dn = do * y_scale 

118 en = x + (eo - x) * 1.0 * (an + cn) / (ao + co) 

119 fn = y + (fo - y) * 1.0 * (bn + dn) / (bo + do) 

120 matrix[:] = an, bn, cn, dn, en, fn 

121 rect[:] = x, y, w * x_scale, h * y_scale 

122 

123 @property 

124 def box(self): 

125 ''' Return the bounding box for the object 

126 ''' 

127 x, y, w, h = self._rect 

128 return PdfArray([x, y, x + w, y + h]) 

129 

130 

131class PageMerge(list): 

132 ''' A PageMerge object can have 0 or 1 underlying pages 

133 (that get edited with the results of the merge) 

134 and 0-n RectXObjs that can be applied before or 

135 after the underlying page. 

136 ''' 

137 page = None 

138 mbox = None 

139 cbox = None 

140 resources = None 

141 rotate = None 

142 contents = None 

143 

144 def __init__(self, page=None): 

145 if page is not None: 

146 self.setpage(page) 

147 

148 def setpage(self, page): 

149 if page.Type != PdfName.Page: 

150 raise TypeError("Expected page") 

151 self.append(None) # Placeholder 

152 self.page = page 

153 inheritable = page.inheritable 

154 self.mbox = inheritable.MediaBox 

155 self.cbox = inheritable.CropBox 

156 self.resources = inheritable.Resources 

157 self.rotate = inheritable.Rotate 

158 self.contents = page.Contents 

159 

160 def __add__(self, other): 

161 if isinstance(other, dict): 

162 other = [other] 

163 for other in other: 

164 self.add(other) 

165 return self 

166 

167 def add(self, obj, prepend=False, **kw): 

168 if kw: 

169 obj = RectXObj(obj, **kw) 

170 elif obj.Type == PdfName.Page: 

171 obj = RectXObj(obj) 

172 if prepend: 

173 self.insert(0, obj) 

174 else: 

175 self.append(obj) 

176 return self 

177 

178 def render(self): 

179 def do_xobjs(xobj_list, restore_first=False): 

180 content = ['Q'] if restore_first else [] 

181 for obj in xobj_list: 

182 index = PdfName('pdfrw_%d' % (key_offset + len(xobjs))) 

183 if xobjs.setdefault(index, obj) is not obj: 

184 raise KeyError("XObj key %s already in use" % index) 

185 content.append('%s Do' % index) 

186 return PdfDict(indirect=True, stream='\n'.join(content)) 

187 

188 mbox = self.mbox 

189 cbox = self.cbox 

190 page = self.page 

191 old_contents = self.contents 

192 resources = self.resources or PdfDict() 

193 

194 key_offset = 0 

195 xobjs = resources.XObject 

196 if xobjs is None: 

197 xobjs = resources.XObject = PdfDict() 

198 else: 

199 allkeys = xobjs.keys() 

200 if allkeys: 

201 keys = (x for x in allkeys if x.startswith('/pdfrw_')) 

202 keys = (x for x in keys if x[7:].isdigit()) 

203 keys = sorted(keys, key=lambda x: int(x[7:])) 

204 key_offset = (int(keys[-1][7:]) + 1) if keys else 0 

205 key_offset -= len(allkeys) 

206 

207 if old_contents is None: 

208 new_contents = do_xobjs(self) 

209 else: 

210 isdict = isinstance(old_contents, PdfDict) 

211 old_contents = [old_contents] if isdict else old_contents 

212 new_contents = PdfArray() 

213 index = self.index(None) 

214 if index: 

215 new_contents.append(do_xobjs(self[:index])) 

216 

217 index += 1 

218 if index < len(self): 

219 # There are elements to add after the original page contents, 

220 # so push the graphics state to the stack. Restored below. 

221 new_contents.append(PdfDict(indirect=True, stream='q')) 

222 

223 new_contents.extend(old_contents) 

224 

225 if index < len(self): 

226 # Restore graphics state and add other elements. 

227 new_contents.append(do_xobjs(self[index:], restore_first=True)) 

228 

229 if mbox is None: 

230 cbox = None 

231 mbox = self.xobj_box 

232 mbox[0] = min(0, mbox[0]) 

233 mbox[1] = min(0, mbox[1]) 

234 

235 page = PdfDict(indirect=True) if page is None else page 

236 page.Type = PdfName.Page 

237 page.Resources = resources 

238 page.MediaBox = mbox 

239 page.CropBox = cbox 

240 page.Rotate = self.rotate 

241 page.Contents = new_contents 

242 return page 

243 

244 @property 

245 def xobj_box(self): 

246 ''' Return the smallest box that encloses every object 

247 in the list. 

248 ''' 

249 a, b, c, d = zip(*(xobj.box for xobj in self)) 

250 return PdfArray((min(a), min(b), max(c), max(d)))