Coverage for casanova/casanova/utils.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

42 statements  

1# ============================================================================= 

2# Casanova Utils 

3# ============================================================================= 

4# 

5# Miscellaneous utility functions. 

6# 

7import re 

8import csv 

9import gzip 

10from io import StringIO 

11 

12 

13def encoding_fingerprint(encoding): 

14 return encoding.lower().replace('-', '') 

15 

16 

17def ensure_open(p, encoding='utf-8', mode='r'): 

18 if not isinstance(p, str): 

19 return p 

20 

21 if p.endswith('.gz'): 

22 if 'b' in mode: 

23 return gzip.open(p, mode=mode) 

24 

25 mode += 't' 

26 return gzip.open(p, encoding=encoding, mode=mode) 

27 

28 if encoding_fingerprint(encoding) != 'utf8': 

29 return codecs.open(p, encoding=encoding, mode=mode) 

30 

31 return open(p, mode=mode) 

32 

33 

34BOM_RE = re.compile(r'^\ufeff') 

35 

36 

37def suppress_BOM(string): 

38 return re.sub(BOM_RE, '', string) 

39 

40 

41def size_of_row_in_memory(row): 

42 """ 

43 Returns the approximate amount of bytes needed to represent the given row into 

44 the python's program memory. 

45 

46 The magic numbers are based on `sys.getsizeof`. 

47 """ 

48 a = 64 + 8 * len(row) # Size of the array 

49 a += sum(49 + len(cell) for cell in row) # Size of the contained strings 

50 

51 return a 

52 

53 

54def size_of_row_in_file(row): 

55 """ 

56 Returns the approximate amount of bytes originally used to represent the 

57 given row in its CSV file. It assumes the delimiter uses only one byte. 

58 

59 I also ignores quotes (-2 bytes) around escaped cells if they were 

60 originally present. 

61 

62 I also don't think that it counts 16 bit chars correctly. 

63 """ 

64 a = max(0, len(row) - 1) 

65 a += sum(len(cell) for cell in row) 

66 

67 return a 

68 

69 

70def CsvCellIO(column, value): 

71 buf = StringIO() 

72 writer = csv.writer(buf, dialect=csv.unix_dialect, quoting=csv.QUOTE_MINIMAL) 

73 writer.writerow([column]) 

74 writer.writerow([value]) 

75 

76 buf.seek(0) 

77 

78 return buf 

79 

80 

81def CsvRowIO(columns, row): 

82 buf = StringIO() 

83 writer = csv.writer(buf, dialect=csv.unix_dialect, quoting=csv.QUOTE_MINIMAL) 

84 writer.writerow(columns) 

85 writer.writerow(row) 

86 

87 buf.seek(0) 

88 

89 return buf