Coverage for pdfrw/pdfrw/objects/pdfname.py: 88%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

34 statements  

1# A part of pdfrw (https://github.com/pmaupin/pdfrw) 

2# Copyright (C) 2006-2015 Patrick Maupin, Austin, Texas 

3# MIT license -- See LICENSE.txt for details 

4 

5import re 

6 

7from ..errors import log 

8 

9warn = log.warning 

10 

11 

12class BasePdfName(str): 

13 ''' A PdfName is an identifier that starts with 

14 a slash. 

15 

16 If a PdfName has illegal space or delimiter characters, 

17 then it will be decorated with an "encoded" attribute that 

18 has those characters properly escaped as #<hex><hex> 

19 

20 The "encoded" attribute is what is sent out to a PDF file, 

21 the non-encoded main object is what is compared for equality 

22 in a PDF dictionary. 

23 ''' 

24 

25 indirect = False 

26 encoded = None 

27 

28 whitespace = '\x00 \t\f\r\n' 

29 delimiters = '()<>{}[]/%' 

30 forbidden = list(whitespace) + list('\\' + x for x in delimiters) 

31 remap = dict((x, '#%02X' % ord(x)) for x in (whitespace + delimiters)) 

32 split_to_encode = re.compile('(%s)' % '|'.join(forbidden)).split 

33 split_to_decode = re.compile(r'\#([0-9A-Fa-f]{2})').split 

34 

35 def __new__(cls, name, pre_encoded=True, remap=remap, 

36 join=''.join, new=str.__new__, chr=chr, int=int, 

37 split_to_encode=split_to_encode, 

38 split_to_decode=split_to_decode, 

39 ): 

40 ''' We can build a PdfName from scratch, or from 

41 a pre-encoded name (e.g. coming in from a file). 

42 ''' 

43 # Optimization for normal case 

44 if name[1:].isalnum(): 

45 return new(cls, name) 

46 encoded = name 

47 if pre_encoded: 

48 if '#' in name: 

49 substrs = split_to_decode(name) 

50 substrs[1::2] = (chr(int(x, 16)) for x in substrs[1::2]) 

51 name = join(substrs) 

52 else: 

53 encoded = split_to_encode(encoded) 

54 encoded[3::2] = (remap[x] for x in encoded[3::2]) 

55 encoded = join(encoded) 

56 self = new(cls, name) 

57 if encoded != name: 

58 self.encoded = encoded 

59 return self 

60 

61 

62# We could have used a metaclass, but this matches what 

63# we were doing historically. 

64 

65class PdfName(object): 

66 ''' Two simple ways to get a PDF name from a string: 

67 

68 x = PdfName.FooBar 

69 x = pdfName('FooBar') 

70 

71 Either technique will return "/FooBar" 

72 

73 ''' 

74 

75 def __getattr__(self, name, BasePdfName=BasePdfName): 

76 return BasePdfName('/' + name, False) 

77 

78 def __call__(self, name, BasePdfName=BasePdfName): 

79 return BasePdfName('/' + name, False) 

80 

81PdfName = PdfName()