33 lines
No EOL
905 B
Python
33 lines
No EOL
905 B
Python
import argparse
|
|
import json
|
|
from re import L
|
|
import cv2
|
|
import pprint
|
|
import os.path as osp
|
|
from cdf_parser import CdfParser
|
|
from threading import Lock
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
output_f = open("cdfs_with_masks.json", "a+")
|
|
output_lock = Lock()
|
|
|
|
def get_text_mask(line):
|
|
data = json.loads(line)
|
|
cdf_parser = CdfParser(data['content'], data['rendered_folder'])
|
|
elements = cdf_parser.get_texts()
|
|
data["text_layer"] = elements
|
|
del data["content"]
|
|
output_f.write(json.dumps(data) + "\n")
|
|
|
|
|
|
def main(cdf_file):
|
|
with open(cdf_file, 'r') as f:
|
|
for line in f:
|
|
with ThreadPoolExecutor(max_workers=24) as executor:
|
|
executor.submit(get_text_mask, line)
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--cdf", type=str, required=True)
|
|
args = parser.parse_args()
|
|
main(args.cdf) |