Coverage for casanova/casanova/utils.py: 95%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# =============================================================================
2# Casanova Utils
3# =============================================================================
4#
5# Miscellaneous utility functions.
6#
7import re
8import csv
9import gzip
10from io import StringIO
13def encoding_fingerprint(encoding):
14 return encoding.lower().replace('-', '')
17def ensure_open(p, encoding='utf-8', mode='r'):
18 if not isinstance(p, str):
19 return p
21 if p.endswith('.gz'):
22 if 'b' in mode:
23 return gzip.open(p, mode=mode)
25 mode += 't'
26 return gzip.open(p, encoding=encoding, mode=mode)
28 if encoding_fingerprint(encoding) != 'utf8':
29 return codecs.open(p, encoding=encoding, mode=mode)
31 return open(p, mode=mode)
34BOM_RE = re.compile(r'^\ufeff')
37def suppress_BOM(string):
38 return re.sub(BOM_RE, '', string)
41def size_of_row_in_memory(row):
42 """
43 Returns the approximate amount of bytes needed to represent the given row into
44 the python's program memory.
46 The magic numbers are based on `sys.getsizeof`.
47 """
48 a = 64 + 8 * len(row) # Size of the array
49 a += sum(49 + len(cell) for cell in row) # Size of the contained strings
51 return a
54def size_of_row_in_file(row):
55 """
56 Returns the approximate amount of bytes originally used to represent the
57 given row in its CSV file. It assumes the delimiter uses only one byte.
59 I also ignores quotes (-2 bytes) around escaped cells if they were
60 originally present.
62 I also don't think that it counts 16 bit chars correctly.
63 """
64 a = max(0, len(row) - 1)
65 a += sum(len(cell) for cell in row)
67 return a
70def CsvCellIO(column, value):
71 buf = StringIO()
72 writer = csv.writer(buf, dialect=csv.unix_dialect, quoting=csv.QUOTE_MINIMAL)
73 writer.writerow([column])
74 writer.writerow([value])
76 buf.seek(0)
78 return buf
81def CsvRowIO(columns, row):
82 buf = StringIO()
83 writer = csv.writer(buf, dialect=csv.unix_dialect, quoting=csv.QUOTE_MINIMAL)
84 writer.writerow(columns)
85 writer.writerow(row)
87 buf.seek(0)
89 return buf