From 621e0bc65c1511bb822e297f34accd1d1220667b Mon Sep 17 00:00:00 2001 From: lihengda Date: Mon, 13 May 2024 19:18:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9C=A8=20openai-translator=20gradio=20?= =?UTF-8?q?=E5=9B=BE=E5=BD=A2=E5=8C=96=E7=95=8C=E9=9D=A2=E5=9F=BA=E7=A1=80?= =?UTF-8?q?=E4=B8=8A=EF=BC=8C=E6=94=AF=E6=8C=81=E9=A3=8E=E6=A0=BC=E5=8C=96?= =?UTF-8?q?=E7=BF=BB=E8=AF=91=EF=BC=8C=E5=A6=82=EF=BC=9A=E5=B0=8F=E8=AF=B4?= =?UTF-8?q?=E3=80=81=E6=96=B0=E9=97=BB=E7=A8=BF=E3=80=81=E4=BD=9C=E5=AE=B6?= =?UTF-8?q?=E9=A3=8E=E6=A0=BC=E7=AD=89=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{ => ai_translator}/config.yaml | 0 .../ai_translator/gradio_server.py | 12 +++++++----- .../ai_translator/translator/pdf_translator.py | 18 ++++++++++-------- .../translator/translation_chain.py | 9 ++++++--- 4 files changed, 23 insertions(+), 16 deletions(-) rename langchain/openai-translator/{ => ai_translator}/config.yaml (100%) diff --git a/langchain/openai-translator/config.yaml b/langchain/openai-translator/ai_translator/config.yaml similarity index 100% rename from langchain/openai-translator/config.yaml rename to langchain/openai-translator/ai_translator/config.yaml diff --git a/langchain/openai-translator/ai_translator/gradio_server.py b/langchain/openai-translator/ai_translator/gradio_server.py index 8f7d8569..ba2d6e25 100644 --- a/langchain/openai-translator/ai_translator/gradio_server.py +++ b/langchain/openai-translator/ai_translator/gradio_server.py @@ -8,11 +8,11 @@ from translator import PDFTranslator, TranslationConfig -def translation(input_file, source_language, target_language): - LOG.debug(f"[翻译任务]\n源文件: {input_file.name}\n源语言: {source_language}\n目标语言: {target_language}") +def translation(input_file, source_language, target_language, style): + LOG.debug(f"[翻译任务]\n源文件: {input_file.name}\n源语言: {source_language}\n目标语言: {target_language}\n风格:{style}") output_file_path = Translator.translate_pdf( - input_file.name, source_language=source_language, target_language=target_language) + input_file.name, source_language=source_language, target_language=target_language, style=style) return output_file_path @@ -24,10 +24,12 @@ def launch_gradio(): inputs=[ gr.File(label="上传PDF文件"), gr.Textbox(label="源语言(默认:英文)", placeholder="English", value="English"), - gr.Textbox(label="目标语言(默认:中文)", placeholder="Chinese", value="Chinese") + gr.Textbox(label="目标语言(默认:中文)", placeholder="Chinese", value="Chinese"), + gr.Textbox(label="风格(默认:小说)", placeholder="Novel", value="novel") ], outputs=[ - gr.File(label="下载翻译文件") + gr.File(label="下载翻译文件"), + # gr.Markdown(label="预览文件") ], allow_flagging="never" ) diff --git a/langchain/openai-translator/ai_translator/translator/pdf_translator.py b/langchain/openai-translator/ai_translator/translator/pdf_translator.py index 873dee77..2334c741 100644 --- a/langchain/openai-translator/ai_translator/translator/pdf_translator.py +++ b/langchain/openai-translator/ai_translator/translator/pdf_translator.py @@ -4,6 +4,7 @@ from translator.translation_chain import TranslationChain from utils import LOG + class PDFTranslator: def __init__(self, model_name: str): self.translate_chain = TranslationChain(model_name) @@ -11,19 +12,20 @@ def __init__(self, model_name: str): self.writer = Writer() def translate_pdf(self, - input_file: str, - output_file_format: str = 'markdown', - source_language: str = "English", - target_language: str = 'Chinese', - pages: Optional[int] = None): - + input_file: str, + output_file_format: str = 'markdown', + source_language: str = "English", + target_language: str = 'Chinese', + style: str = 'novel', + pages: Optional[int] = None): + self.book = self.pdf_parser.parse_pdf(input_file, pages) for page_idx, page in enumerate(self.book.pages): for content_idx, content in enumerate(page.contents): # Translate content.original - translation, status = self.translate_chain.run(content, source_language, target_language) + translation, status = self.translate_chain.run(content, source_language, target_language, style) # Update the content in self.book.pages directly self.book.pages[page_idx].contents[content_idx].set_translation(translation, status) - + return self.writer.save_translated_book(self.book, output_file_format) diff --git a/langchain/openai-translator/ai_translator/translator/translation_chain.py b/langchain/openai-translator/ai_translator/translator/translation_chain.py index dbcf53fc..d0dbb7a7 100644 --- a/langchain/openai-translator/ai_translator/translator/translation_chain.py +++ b/langchain/openai-translator/ai_translator/translator/translation_chain.py @@ -9,12 +9,14 @@ from utils import LOG + class TranslationChain: def __init__(self, model_name: str = "gpt-3.5-turbo", verbose: bool = True): - + # 翻译任务指令始终由 System 角色承担 template = ( - """You are a translation expert, proficient in various languages. \n + """You are a translation expert, proficient in various languages, \n + and you can navigate a wide variety of translation styles. Now use '{style}' style Translates {source_language} to {target_language}.""" ) system_message_prompt = SystemMessagePromptTemplate.from_template(template) @@ -33,13 +35,14 @@ def __init__(self, model_name: str = "gpt-3.5-turbo", verbose: bool = True): self.chain = LLMChain(llm=chat, prompt=chat_prompt_template, verbose=verbose) - def run(self, text: str, source_language: str, target_language: str) -> (str, bool): + def run(self, text: str, source_language: str, target_language: str, style: str) -> (str, bool): result = "" try: result = self.chain.run({ "text": text, "source_language": source_language, "target_language": target_language, + "style": style }) except Exception as e: LOG.error(f"An error occurred during translation: {e}")