import json import logging from dataclasses import dataclass, asdict import logging from typing import Tuple from copy import deepcopy from cv2 import merge with open("../canva.fonts.json", "r") as f: fonts = json.load(f) fonts = {f"{font['_id']},{font['index']}": font for font in fonts} def catch_keyerror(default): def _catch_keyerror(func): def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except KeyError as e: logging.warning(f"Key {e} not found at {func.__name__}({args[0].id})") return default return wrapper return _catch_keyerror @dataclass class TextElement(): text: str position: Tuple[float, float, float, float] label: str style: dict @dataclass class TextStyle(): transform: dict class CdfParser(object): def __init__(self, cdf, id): # self.data = json.loads(cdf) self.data = cdf self.id = id # FIXME: Not all cdf have title @catch_keyerror(None) def get_title(self): return self.data['content']['D'] @catch_keyerror([]) def get_elements(self): if len(self.data['content']['A']) > 1: logging.warning(f"More than 1 layout at {self.id}") elements = self.data['content']['A'][0]['E'] if elements is None: return [] else: return elements @staticmethod def get_text(element): position = (element['A'], element['B'], element['C'], element['D']) text = element['a']['A'][0]['A'] label = element.get('N') merged_style = {} styles = element['a']['B'] if styles is None: logging.warning("Empty font styles") return None for style in element['a']['B']: if style['A?'] != 'A': continue merged_style.update({ attribute_name: list(attr_value_dict.values())[0] for attribute_name, attr_value_dict in style['A'].items() }) try: merged_style['font-info'] = fonts[merged_style['font-family']] except: pass return TextElement(text, position,label, merged_style) def get_texts(self): res = [] for index, element in enumerate(self.get_elements()[::-1]): # Enumerate backwards to align with pop() in remove_elements_iter() if element['A?'] == 'K': if len(element['a']['A']) > 1: logging.warning(f"More than 1 text element at {self.id}") breakpoint() text_groups = self.get_text(element) if text_groups is None: continue text_groups = asdict(text_groups) text_groups['index'] = index res.append(text_groups) elif element['A?'] == 'H': subres = [] for child_element in element['c']: if child_element['A?'] == "K": text_groups = self.get_text(child_element) if text_groups is None: continue text_groups = asdict(text_groups) text_groups['index'] = index subres.append(text_groups) if subres: res.append(subres) return res def get_data(self): return self.data def remote_texts(self): for element in self.get_elements(): if element['A?'] == "K": element['a']['A'][0]['A'] = "" # clear texts elif element['A?'] == "H": # Grouped text for child in element['c']: if child['A?'] == "K": child['a']['A'][0]['A'] = "" else: continue # remove elements one by one and return a generator of self def remove_elements_iter(self): elements = self.get_elements() if len(elements) == 0: return None while elements: elements.pop() yield deepcopy(self.data) if __name__ == "__main__": t = TextElement("abc", (1, 1, 1, 1), "label", {"s": "style"}) breakpoint()