Coverage for flair/flair/visual/ner_html.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

37 statements  

1import html 

2from typing import Union, List 

3 

4from flair.data import Sentence 

5 

6TAGGED_ENTITY = """ 

7<mark class="entity" style="background: {color}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 3; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone"> 

8 {entity} 

9 <span style="font-size: 0.8em; font-weight: bold; line-height: 3; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem">{label}</span> 

10</mark> 

11""" 

12 

13PARAGRAPH = """<p>{sentence}</p>""" 

14 

15HTML_PAGE = """ 

16<!DOCTYPE html> 

17<html lang="en"> 

18 <head> 

19 <title>{title}</title> 

20 </head> 

21 

22 <body style="font-size: 16px; font-family: 'Segoe UI'; padding: 4rem 2rem">{text}</body> 

23</html> 

24""" 

25 

26 

27def split_to_spans(s: Sentence): 

28 orig = s.to_original_text() 

29 last_idx = 0 

30 spans = [] 

31 tagged_ents = s.get_spans("ner") 

32 for ent in tagged_ents: 

33 if last_idx != ent.start_pos: 

34 spans.append((orig[last_idx : ent.start_pos], None)) 

35 spans.append((ent.text, ent.tag)) 

36 last_idx = ent.end_pos 

37 if last_idx < len(orig) - 1: 

38 spans.append((orig[last_idx : len(orig)], None)) 

39 return spans 

40 

41 

42def render_ner_html( 

43 sentences: Union[List[Sentence], Sentence], 

44 title: str = "Flair", 

45 colors={ 

46 "PER": "#F7FF53", 

47 "ORG": "#E8902E", 

48 "LOC": "#FF40A3", 

49 "MISC": "#4647EB", 

50 "O": "#ddd", 

51 }, 

52 default_color: str = "#ddd", 

53 wrap_page=True, 

54) -> str: 

55 """ 

56 :param sentences: single sentence or list of sentences to convert to HTML 

57 :param title: title of the HTML page 

58 :param colors: dict where keys are tags and values are color HTML codes 

59 :param default_color: color to use if colors parameter is missing a tag 

60 :param wrap_page: if True method returns result of processing sentences wrapped by &lt;html&gt; and &lt;body&gt; tags, otherwise - without these tags 

61 :return: HTML as a string 

62 """ 

63 if isinstance(sentences, Sentence): 

64 sentences = [sentences] 

65 sentences_html = [] 

66 for s in sentences: 

67 spans = split_to_spans(s) 

68 spans_html = list() 

69 for fragment, tag in spans: 

70 escaped_fragment = html.escape(fragment).replace("\n", "<br/>") 

71 if tag: 

72 escaped_fragment = TAGGED_ENTITY.format( 

73 entity=escaped_fragment, 

74 label=tag, 

75 color=colors.get(tag, default_color), 

76 ) 

77 spans_html.append(escaped_fragment) 

78 line = PARAGRAPH.format(sentence="".join(spans_html)) 

79 sentences_html.append(line) 

80 

81 final_text = "".join(sentences_html) 

82 

83 if wrap_page: 

84 return HTML_PAGE.format(text=final_text, title=title) 

85 else: 

86 return final_text