Coverage for flair/flair/visual/ner_html.py: 0%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import html
2from typing import Union, List
4from flair.data import Sentence
6TAGGED_ENTITY = """
7<mark class="entity" style="background: {color}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 3; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone">
8 {entity}
9 <span style="font-size: 0.8em; font-weight: bold; line-height: 3; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem">{label}</span>
10</mark>
11"""
13PARAGRAPH = """<p>{sentence}</p>"""
15HTML_PAGE = """
16<!DOCTYPE html>
17<html lang="en">
18 <head>
19 <title>{title}</title>
20 </head>
22 <body style="font-size: 16px; font-family: 'Segoe UI'; padding: 4rem 2rem">{text}</body>
23</html>
24"""
27def split_to_spans(s: Sentence):
28 orig = s.to_original_text()
29 last_idx = 0
30 spans = []
31 tagged_ents = s.get_spans("ner")
32 for ent in tagged_ents:
33 if last_idx != ent.start_pos:
34 spans.append((orig[last_idx : ent.start_pos], None))
35 spans.append((ent.text, ent.tag))
36 last_idx = ent.end_pos
37 if last_idx < len(orig) - 1:
38 spans.append((orig[last_idx : len(orig)], None))
39 return spans
42def render_ner_html(
43 sentences: Union[List[Sentence], Sentence],
44 title: str = "Flair",
45 colors={
46 "PER": "#F7FF53",
47 "ORG": "#E8902E",
48 "LOC": "#FF40A3",
49 "MISC": "#4647EB",
50 "O": "#ddd",
51 },
52 default_color: str = "#ddd",
53 wrap_page=True,
54) -> str:
55 """
56 :param sentences: single sentence or list of sentences to convert to HTML
57 :param title: title of the HTML page
58 :param colors: dict where keys are tags and values are color HTML codes
59 :param default_color: color to use if colors parameter is missing a tag
60 :param wrap_page: if True method returns result of processing sentences wrapped by <html> and <body> tags, otherwise - without these tags
61 :return: HTML as a string
62 """
63 if isinstance(sentences, Sentence):
64 sentences = [sentences]
65 sentences_html = []
66 for s in sentences:
67 spans = split_to_spans(s)
68 spans_html = list()
69 for fragment, tag in spans:
70 escaped_fragment = html.escape(fragment).replace("\n", "<br/>")
71 if tag:
72 escaped_fragment = TAGGED_ENTITY.format(
73 entity=escaped_fragment,
74 label=tag,
75 color=colors.get(tag, default_color),
76 )
77 spans_html.append(escaped_fragment)
78 line = PARAGRAPH.format(sentence="".join(spans_html))
79 sentences_html.append(line)
81 final_text = "".join(sentences_html)
83 if wrap_page:
84 return HTML_PAGE.format(text=final_text, title=title)
85 else:
86 return final_text