text_proofreading/app.py

129 lines
8.0 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import gradio as gr
import json
import random
import sys
sys.path.append('./api')
from main import correct_api
# def correct(text='今天上学不要赤道啊'):
def correct(text):
error_json = json.loads(correct_api(text))
print(error_json)
error_json_res = {}
error_json_res['error_details'] = []
for item in error_json["error_details"]:
if item['have_target']:
item['target_text'] = item['target_text'][0]
error_json_res['error_details'].append(item)
error_json_res["error_count"] = len(error_json_res["error_details"])
# return {
# "error_count": 1,
# "error_details":[{
# "index_start": 6,
# "index_end": 8,
# "source_text": "赤道",
# "target_text": "迟到"}]
# }
return error_json_res
def convert_to_ner(text):
model_output = correct(text)
model_output['text'] = text
res_dict = {}
res_dict['text'] = text
res_dict['entities'] = []
for item in model_output['error_details']:
insert_item = {}
insert_item['entity'] = item['target_text']
insert_item['start'] = item['index_start']
insert_item['end'] = item['index_end']
res_dict['entities'].append(insert_item)
return res_dict, model_output, get_error_item(model_output)
def get_error_item(error_json):
if error_json['error_details']:
item = error_json['error_details'][0]
res_dict = {}
res_dict['text'] = f'({1+error_json["error_count"]-len(error_json["error_details"])}/{error_json["error_count"]})建议将{item["source_text"]}修改为{item["target_text"]}'
res_dict['entities'] = []
prefix = len(f'({1+error_json["error_count"]-len(error_json["error_details"])}/{error_json["error_count"]})建议将')
insert_item = {}
insert_item['entity'] = 'wrong'
insert_item['start'] = prefix
insert_item['end'] = prefix + len(item["source_text"])
res_dict['entities'].append(insert_item)
prefix += 3
insert_item = {}
insert_item['entity'] = 'right'
insert_item['start'] = prefix + len(item["source_text"])
insert_item['end'] = prefix + len(item["source_text"]) + len(item["target_text"])
res_dict['entities'].append(insert_item)
return res_dict
else:
return {'text':'未发现错误:)', 'entities':[{'entity':'right', 'start':0, 'end':7}]}
def get_example():
return '''奋辑笃行促跨越 深圳擎画发展蓝图
  “130多名员工反岗生产设备满产满开日产90万平方米隔膜。”深圳中兴新材技术股份有限公司下称“中兴新材”战略规划部部长肖智贤告诉记者根据疫情防控工作指引中兴新材从3月18日开始生产经营秩序逐步恢复。“今年新能源市场需求旺盛我们将做好相关防疫要求全力确保订单按时交付。”
  封闭停产一周后,坐落于东莞石排镇的东莞铭普光磁股份有限公司(下称“铭普光磁”)迎来复工。“在全力做好疫情防控工作同时,公司抓紧时间补上订单,补充原材料,开足马力稳生产。”铭普光磁总经理李竞舟说,在订单原原不断的情况下,现在要想办法抓紧把停产的订单补上,我们卯足干劲,有信心能够完成今年经济指标。
  中兴新材、铭普光磁等企业“火力全开”,是深圳、东莞等珠三角受疫情影响城市有序复苏的缩影,是工业用电量、地铁客流量受疫情影响地区生产生活秩序有序复苏的证明。
3月22日深圳疫情防控新闻发布会召开。深圳市交通运输局副局长徐炜通报3月21日深圳地铁客流量达400.66万人次公交客流量达到146.66万人次多条地铁线路增加23列列车上线载客。深圳市工业和信息化局局长余锡泉介绍当日深圳工业用电量已恢复至3月13日之前的水平展现出深圳科学的发展路经工业经济强大的韧性。
  生产车间生产率 明列前茅
  广东"双引擎战略"换档提速,为经济增长作出贡献。今年以来,省委、省政府一如即往地重视支持中小企业发展。为确保产业链供应链安全稳定,珠三角受疫情影响地区产业链供应链企业,实行“一企一策”“一厂一案”。新家坡的一名经营跨镜电商的企业家来到深圳,考察当地生产环境,他与记者分亨了在深圳的见闻。
  法雷奥是世界排名前十的汽车零部件供应商现有员工人数近2500名。按照政府防疫要求今年春节后深圳法雷奥迅速落实各项防疫举措建立起一套完整的防疫体系。制定闭环管理和白名单制度后还定期向辖区相关部门汇报防疫工作及时完善防疫举措。
  一手抓防疫,一手抓生产,春节以来,深圳法雷奥产能满负荷运转,努力拼博,全力满足所有客户的交付。在深圳法雷奥高级工厂总经理贺显军看来,政府支持,管理层参与,员工团结一致,是企业实现疫情防控和生产运行“两不误”的关键。
  3月25日记者在位于东莞常平镇岗梓村的东莞市深鹏电子有限公司下称“深鹏电子”生产车间看到工人们佩戴着口罩保持一定距离有条不闻赶制订单。'''
def get_tri_res(error_json):
res_dict = {}
res_dict['text'] = error_json['text']
res_dict['entities'] = []
for item in error_json['error_details']:
insert_item = {}
insert_item['entity'] = item['target_text']
insert_item['start'] = item['index_start']
insert_item['end'] = item['index_end']
res_dict['entities'].append(insert_item)
return res_dict, error_json, get_error_item(error_json)
def accept(error_json):
raw_text = error_json['text']
if error_json['error_details']:
item_card = error_json['error_details'].pop(0)
error_json['text'] = raw_text[:item_card['index_start']] + item_card['target_text'] + raw_text[item_card['index_end']:]
if item_card['target_text'] != item_card['source_text']:
dif = len(item_card['target_text']) - len(item_card['source_text'])
for item in error_json['error_details']:
item['index_start'] += dif
item['index_end'] += dif
return get_tri_res(error_json)
def reject(error_json):
if error_json['error_details']:
_ = error_json['error_details'].pop(0)
return get_tri_res(error_json)
if __name__ == '__main__':
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown('# <center> 文本智能校对Demo')
with gr.Row():
with gr.Column(scale=1):
# input_text = gr.Textbox(label='input', lines=25, max_lines=25, placeholder='请输入待校对的文本:').style(show_copy_button=True)
input_text = gr.Textbox(label='input', placeholder='请输入待校对的文本:').style(show_copy_button=True)
button_example = gr.Button(value="example")
button_submit = gr.Button(value="check")
with gr.Column(scale=1):
diaplay = gr.HighlightedText(label='result', show_label=True)
hidden_text = gr.JSON(visible=False)
item_card = gr.HighlightedText(label='error_item',show_label=False).style(color_map={'wrong':'red', 'right':'green'})
with gr.Row():
ac_button = gr.Button(value="accept")
rj_button = gr.Button(value="reject")
button_example.click(get_example, inputs=[], outputs=[input_text])
button_submit.click(convert_to_ner, inputs=input_text, outputs=[diaplay, hidden_text, item_card])
ac_button.click(accept, inputs=hidden_text, outputs=[diaplay, hidden_text, item_card])
rj_button.click(reject, inputs=hidden_text, outputs=[diaplay, hidden_text, item_card])
demo.launch(server_name='0.0.0.0')