Coverage for pdfrw/pdfrw/objects/pdfdict.py: 83%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

110 statements  

1# A part of pdfrw (https://github.com/pmaupin/pdfrw) 

2# Copyright (C) 2006-2015 Patrick Maupin, Austin, Texas 

3# MIT license -- See LICENSE.txt for details 

4 

5from .pdfname import PdfName, BasePdfName 

6from .pdfindirect import PdfIndirect 

7from .pdfobject import PdfObject 

8from ..py23_diffs import iteritems 

9from ..errors import PdfParseError 

10 

11 

12class _DictSearch(object): 

13 ''' Used to search for inheritable attributes. 

14 ''' 

15 

16 def __init__(self, basedict): 

17 self.basedict = basedict 

18 

19 def __getattr__(self, name, PdfName=PdfName): 

20 return self[PdfName(name)] 

21 

22 def __getitem__(self, name, set=set, getattr=getattr, id=id): 

23 visited = set() 

24 mydict = self.basedict 

25 while 1: 

26 value = mydict[name] 

27 if value is not None: 

28 return value 

29 myid = id(mydict) 

30 assert myid not in visited 

31 visited.add(myid) 

32 mydict = mydict.Parent 

33 if mydict is None: 

34 return 

35 

36 

37class _Private(object): 

38 ''' Used to store private attributes (not output to PDF files) 

39 on PdfDict classes 

40 ''' 

41 

42 def __init__(self, pdfdict): 

43 vars(self)['pdfdict'] = pdfdict 

44 

45 def __setattr__(self, name, value): 

46 vars(self.pdfdict)[name] = value 

47 

48 

49class PdfDict(dict): 

50 ''' PdfDict objects are subclassed dictionaries 

51 with the following features: 

52 

53 - Every key in the dictionary starts with "/" 

54 

55 - A dictionary item can be deleted by assigning it to None 

56 

57 - Keys that (after the initial "/") conform to Python 

58 naming conventions can also be accessed (set and retrieved) 

59 as attributes of the dictionary. E.g. mydict.Page is the 

60 same thing as mydict['/Page'] 

61 

62 - Private attributes (not in the PDF space) can be set 

63 on the dictionary object attribute dictionary by using 

64 the private attribute: 

65 

66 mydict.private.foo = 3 

67 mydict.foo = 5 

68 x = mydict.foo # x will now contain 3 

69 y = mydict['/foo'] # y will now contain 5 

70 

71 Most standard adobe dictionary keys start with an upper case letter, 

72 so to avoid conflicts, it is best to start private attributes with 

73 lower case letters. 

74 

75 - PdfDicts have the following read-only properties: 

76 

77 - private -- as discussed above, provides write access to 

78 dictionary's attributes 

79 - inheritable -- this creates and returns a "view" attribute 

80 that will search through the object hierarchy for 

81 any desired attribute, such as /Rotate or /MediaBox 

82 

83 - PdfDicts also have the following special attributes: 

84 - indirect is not stored in the PDF dictionary, but in the object's 

85 attribute dictionary 

86 - stream is also stored in the object's attribute dictionary 

87 and will also update the stream length. 

88 - _stream will store in the object's attribute dictionary without 

89 updating the stream length. 

90 

91 It is possible, for example, to have a PDF name such as "/indirect" 

92 or "/stream", but you cannot access such a name as an attribute: 

93 

94 mydict.indirect -- accesses object's attribute dictionary 

95 mydict["/indirect"] -- accesses actual PDF dictionary 

96 ''' 

97 indirect = False 

98 stream = None 

99 

100 _special = dict(indirect=('indirect', False), 

101 stream=('stream', True), 

102 _stream=('stream', False), 

103 ) 

104 

105 def __setitem__(self, name, value, setter=dict.__setitem__, 

106 BasePdfName=BasePdfName, isinstance=isinstance): 

107 if not isinstance(name, BasePdfName): 

108 raise PdfParseError('Dict key %s is not a PdfName' % repr(name)) 

109 if value is not None: 

110 setter(self, name, value) 

111 elif name in self: 

112 del self[name] 

113 

114 def __init__(self, *args, **kw): 

115 if args: 

116 if len(args) == 1: 

117 args = args[0] 

118 self.update(args) 

119 if isinstance(args, PdfDict): 

120 self.indirect = args.indirect 

121 self._stream = args.stream 

122 for key, value in iteritems(kw): 

123 setattr(self, key, value) 

124 

125 def __getattr__(self, name, PdfName=PdfName): 

126 ''' If the attribute doesn't exist on the dictionary object, 

127 try to slap a '/' in front of it and get it out 

128 of the actual dictionary itself. 

129 ''' 

130 return self.get(PdfName(name)) 

131 

132 def get(self, key, dictget=dict.get, isinstance=isinstance, 

133 PdfIndirect=PdfIndirect): 

134 ''' Get a value out of the dictionary, 

135 after resolving any indirect objects. 

136 ''' 

137 value = dictget(self, key) 

138 if isinstance(value, PdfIndirect): 

139 # We used to use self[key] here, but that does an 

140 # unwanted check on the type of the key (github issue #98). 

141 # Python will keep the old key object in the dictionary, 

142 # so that check is not necessary. 

143 value = value.real_value() 

144 if value is not None: 

145 dict.__setitem__(self, key, value) 

146 else: 

147 del self[key] 

148 return value 

149 

150 def __getitem__(self, key): 

151 return self.get(key) 

152 

153 def __setattr__(self, name, value, special=_special.get, 

154 PdfName=PdfName, vars=vars): 

155 ''' Set an attribute on the dictionary. Handle the keywords 

156 indirect, stream, and _stream specially (for content objects) 

157 ''' 

158 info = special(name) 

159 if info is None: 

160 self[PdfName(name)] = value 

161 else: 

162 name, setlen = info 

163 vars(self)[name] = value 

164 if setlen: 

165 notnone = value is not None 

166 self.Length = notnone and PdfObject(len(value)) or None 

167 

168 def iteritems(self, dictiter=iteritems, 

169 isinstance=isinstance, PdfIndirect=PdfIndirect, 

170 BasePdfName=BasePdfName): 

171 ''' Iterate over the dictionary, resolving any unresolved objects 

172 ''' 

173 for key, value in list(dictiter(self)): 

174 if isinstance(value, PdfIndirect): 

175 self[key] = value = value.real_value() 

176 if value is not None: 

177 if not isinstance(key, BasePdfName): 

178 raise PdfParseError('Dict key %s is not a PdfName' % 

179 repr(key)) 

180 yield key, value 

181 

182 def items(self): 

183 return list(self.iteritems()) 

184 

185 def itervalues(self): 

186 for key, value in self.iteritems(): 

187 yield value 

188 

189 def values(self): 

190 return list((value for key, value in self.iteritems())) 

191 

192 def keys(self): 

193 return list((key for key, value in self.iteritems())) 

194 

195 def __iter__(self): 

196 for key, value in self.iteritems(): 

197 yield key 

198 

199 def iterkeys(self): 

200 return iter(self) 

201 

202 def copy(self): 

203 return type(self)(self) 

204 

205 def pop(self, key): 

206 value = self.get(key) 

207 del self[key] 

208 return value 

209 

210 def popitem(self): 

211 key, value = dict.pop(self) 

212 if isinstance(value, PdfIndirect): 

213 value = value.real_value() 

214 return value 

215 

216 def inheritable(self): 

217 ''' Search through ancestors as needed for inheritable 

218 dictionary items. 

219 NOTE: You might think it would be a good idea 

220 to cache this class, but then you'd have to worry 

221 about it pointing to the wrong dictionary if you 

222 made a copy of the object... 

223 ''' 

224 return _DictSearch(self) 

225 inheritable = property(inheritable) 

226 

227 def private(self): 

228 ''' Allows setting private metadata for use in 

229 processing (not sent to PDF file). 

230 See note on inheritable 

231 ''' 

232 return _Private(self) 

233 private = property(private) 

234 

235 

236class IndirectPdfDict(PdfDict): 

237 ''' IndirectPdfDict is a convenience class. You could 

238 create a direct PdfDict and then set indirect = True on it, 

239 or you could just create an IndirectPdfDict. 

240 ''' 

241 indirect = True