import argparse import json from re import L import cv2 import pprint import os.path as osp from cdf_parser import CdfParser from threading import Lock from concurrent.futures import ThreadPoolExecutor output_f = open("cdfs_with_masks.json", "a+") output_lock = Lock() def get_text_mask(line): data = json.loads(line) cdf_parser = CdfParser(data['content'], data['rendered_folder']) elements = cdf_parser.get_texts() data["text_layer"] = elements del data["content"] output_f.write(json.dumps(data) + "\n") def main(cdf_file): with open(cdf_file, 'r') as f: for line in f: with ThreadPoolExecutor(max_workers=24) as executor: executor.submit(get_text_mask, line) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--cdf", type=str, required=True) args = parser.parse_args() main(args.cdf)