Coverage for pdfrw/pdfrw/objects/pdfdict.py: 85%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# A part of pdfrw (https://github.com/pmaupin/pdfrw)
2# Copyright (C) 2006-2015 Patrick Maupin, Austin, Texas
3# MIT license -- See LICENSE.txt for details
5from .pdfname import PdfName, BasePdfName
6from .pdfindirect import PdfIndirect
7from .pdfobject import PdfObject
8from ..py23_diffs import iteritems
9from ..errors import PdfParseError
12class _DictSearch(object):
13 ''' Used to search for inheritable attributes.
14 '''
16 def __init__(self, basedict):
17 self.basedict = basedict
19 def __getattr__(self, name, PdfName=PdfName):
20 return self[PdfName(name)]
22 def __getitem__(self, name, set=set, getattr=getattr, id=id):
23 visited = set()
24 mydict = self.basedict
25 while 1:
26 value = mydict[name]
27 if value is not None:
28 return value
29 myid = id(mydict)
30 assert myid not in visited
31 visited.add(myid)
32 mydict = mydict.Parent
33 if mydict is None:
34 return
37class _Private(object):
38 ''' Used to store private attributes (not output to PDF files)
39 on PdfDict classes
40 '''
42 def __init__(self, pdfdict):
43 vars(self)['pdfdict'] = pdfdict
45 def __setattr__(self, name, value):
46 vars(self.pdfdict)[name] = value
49class PdfDict(dict):
50 ''' PdfDict objects are subclassed dictionaries
51 with the following features:
53 - Every key in the dictionary starts with "/"
55 - A dictionary item can be deleted by assigning it to None
57 - Keys that (after the initial "/") conform to Python
58 naming conventions can also be accessed (set and retrieved)
59 as attributes of the dictionary. E.g. mydict.Page is the
60 same thing as mydict['/Page']
62 - Private attributes (not in the PDF space) can be set
63 on the dictionary object attribute dictionary by using
64 the private attribute:
66 mydict.private.foo = 3
67 mydict.foo = 5
68 x = mydict.foo # x will now contain 3
69 y = mydict['/foo'] # y will now contain 5
71 Most standard adobe dictionary keys start with an upper case letter,
72 so to avoid conflicts, it is best to start private attributes with
73 lower case letters.
75 - PdfDicts have the following read-only properties:
77 - private -- as discussed above, provides write access to
78 dictionary's attributes
79 - inheritable -- this creates and returns a "view" attribute
80 that will search through the object hierarchy for
81 any desired attribute, such as /Rotate or /MediaBox
83 - PdfDicts also have the following special attributes:
84 - indirect is not stored in the PDF dictionary, but in the object's
85 attribute dictionary
86 - stream is also stored in the object's attribute dictionary
87 and will also update the stream length.
88 - _stream will store in the object's attribute dictionary without
89 updating the stream length.
91 It is possible, for example, to have a PDF name such as "/indirect"
92 or "/stream", but you cannot access such a name as an attribute:
94 mydict.indirect -- accesses object's attribute dictionary
95 mydict["/indirect"] -- accesses actual PDF dictionary
96 '''
97 indirect = False
98 stream = None
100 _special = dict(indirect=('indirect', False),
101 stream=('stream', True),
102 _stream=('stream', False),
103 )
105 def __setitem__(self, name, value, setter=dict.__setitem__,
106 BasePdfName=BasePdfName, isinstance=isinstance):
107 if not isinstance(name, BasePdfName):
108 raise PdfParseError('Dict key %s is not a PdfName' % repr(name))
109 if value is not None:
110 setter(self, name, value)
111 elif name in self:
112 del self[name]
114 def __init__(self, *args, **kw):
115 if args:
116 if len(args) == 1:
117 args = args[0]
118 self.update(args)
119 if isinstance(args, PdfDict):
120 self.indirect = args.indirect
121 self._stream = args.stream
122 for key, value in iteritems(kw):
123 setattr(self, key, value)
125 def __getattr__(self, name, PdfName=PdfName):
126 ''' If the attribute doesn't exist on the dictionary object,
127 try to slap a '/' in front of it and get it out
128 of the actual dictionary itself.
129 '''
130 return self.get(PdfName(name))
132 def get(self, key, dictget=dict.get, isinstance=isinstance,
133 PdfIndirect=PdfIndirect):
134 ''' Get a value out of the dictionary,
135 after resolving any indirect objects.
136 '''
137 value = dictget(self, key)
138 if isinstance(value, PdfIndirect):
139 # We used to use self[key] here, but that does an
140 # unwanted check on the type of the key (github issue #98).
141 # Python will keep the old key object in the dictionary,
142 # so that check is not necessary.
143 value = value.real_value()
144 if value is not None:
145 dict.__setitem__(self, key, value)
146 else:
147 del self[key]
148 return value
150 def __getitem__(self, key):
151 return self.get(key)
153 def __setattr__(self, name, value, special=_special.get,
154 PdfName=PdfName, vars=vars):
155 ''' Set an attribute on the dictionary. Handle the keywords
156 indirect, stream, and _stream specially (for content objects)
157 '''
158 info = special(name)
159 if info is None:
160 self[PdfName(name)] = value
161 else:
162 name, setlen = info
163 vars(self)[name] = value
164 if setlen:
165 notnone = value is not None
166 self.Length = notnone and PdfObject(len(value)) or None
168 def iteritems(self, dictiter=iteritems,
169 isinstance=isinstance, PdfIndirect=PdfIndirect,
170 BasePdfName=BasePdfName):
171 ''' Iterate over the dictionary, resolving any unresolved objects
172 '''
173 for key, value in list(dictiter(self)):
174 if isinstance(value, PdfIndirect):
175 self[key] = value = value.real_value()
176 if value is not None:
177 if not isinstance(key, BasePdfName):
178 raise PdfParseError('Dict key %s is not a PdfName' %
179 repr(key))
180 yield key, value
182 def items(self):
183 return list(self.iteritems())
185 def itervalues(self):
186 for key, value in self.iteritems():
187 yield value
189 def values(self):
190 return list((value for key, value in self.iteritems()))
192 def keys(self):
193 return list((key for key, value in self.iteritems()))
195 def __iter__(self):
196 for key, value in self.iteritems():
197 yield key
199 def iterkeys(self):
200 return iter(self)
202 def copy(self):
203 return type(self)(self)
205 def pop(self, key):
206 value = self.get(key)
207 del self[key]
208 return value
210 def popitem(self):
211 key, value = dict.pop(self)
212 if isinstance(value, PdfIndirect):
213 value = value.real_value()
214 return value
216 def inheritable(self):
217 ''' Search through ancestors as needed for inheritable
218 dictionary items.
219 NOTE: You might think it would be a good idea
220 to cache this class, but then you'd have to worry
221 about it pointing to the wrong dictionary if you
222 made a copy of the object...
223 '''
224 return _DictSearch(self)
225 inheritable = property(inheritable)
227 def private(self):
228 ''' Allows setting private metadata for use in
229 processing (not sent to PDF file).
230 See note on inheritable
231 '''
232 return _Private(self)
233 private = property(private)
236class IndirectPdfDict(PdfDict):
237 ''' IndirectPdfDict is a convenience class. You could
238 create a direct PdfDict and then set indirect = True on it,
239 or you could just create an IndirectPdfDict.
240 '''
241 indirect = True