Coverage for pdfrw/pdfrw/objects/pdfname.py: 79%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# A part of pdfrw (https://github.com/pmaupin/pdfrw)
2# Copyright (C) 2006-2015 Patrick Maupin, Austin, Texas
3# MIT license -- See LICENSE.txt for details
5import re
7from ..errors import log
9warn = log.warning
12class BasePdfName(str):
13 ''' A PdfName is an identifier that starts with
14 a slash.
16 If a PdfName has illegal space or delimiter characters,
17 then it will be decorated with an "encoded" attribute that
18 has those characters properly escaped as #<hex><hex>
20 The "encoded" attribute is what is sent out to a PDF file,
21 the non-encoded main object is what is compared for equality
22 in a PDF dictionary.
23 '''
25 indirect = False
26 encoded = None
28 whitespace = '\x00 \t\f\r\n'
29 delimiters = '()<>{}[]/%'
30 forbidden = list(whitespace) + list('\\' + x for x in delimiters)
31 remap = dict((x, '#%02X' % ord(x)) for x in (whitespace + delimiters))
32 split_to_encode = re.compile('(%s)' % '|'.join(forbidden)).split
33 split_to_decode = re.compile(r'\#([0-9A-Fa-f]{2})').split
35 def __new__(cls, name, pre_encoded=True, remap=remap,
36 join=''.join, new=str.__new__, chr=chr, int=int,
37 split_to_encode=split_to_encode,
38 split_to_decode=split_to_decode,
39 ):
40 ''' We can build a PdfName from scratch, or from
41 a pre-encoded name (e.g. coming in from a file).
42 '''
43 # Optimization for normal case
44 if name[1:].isalnum():
45 return new(cls, name)
46 encoded = name
47 if pre_encoded:
48 if '#' in name:
49 substrs = split_to_decode(name)
50 substrs[1::2] = (chr(int(x, 16)) for x in substrs[1::2])
51 name = join(substrs)
52 else:
53 encoded = split_to_encode(encoded)
54 encoded[3::2] = (remap[x] for x in encoded[3::2])
55 encoded = join(encoded)
56 self = new(cls, name)
57 if encoded != name:
58 self.encoded = encoded
59 return self
62# We could have used a metaclass, but this matches what
63# we were doing historically.
65class PdfName(object):
66 ''' Two simple ways to get a PDF name from a string:
68 x = PdfName.FooBar
69 x = pdfName('FooBar')
71 Either technique will return "/FooBar"
73 '''
75 def __getattr__(self, name, BasePdfName=BasePdfName):
76 return BasePdfName('/' + name, False)
78 def __call__(self, name, BasePdfName=BasePdfName):
79 return BasePdfName('/' + name, False)
81PdfName = PdfName()