Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

风格化翻译 #100

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions langchain/openai-translator/ai_translator/gradio_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from translator import PDFTranslator, TranslationConfig


def translation(input_file, source_language, target_language):
LOG.debug(f"[翻译任务]\n源文件: {input_file.name}\n源语言: {source_language}\n目标语言: {target_language}")
def translation(input_file, source_language, target_language, style):
LOG.debug(f"[翻译任务]\n源文件: {input_file.name}\n源语言: {source_language}\n目标语言: {target_language}\n风格:{style}")

output_file_path = Translator.translate_pdf(
input_file.name, source_language=source_language, target_language=target_language)
input_file.name, source_language=source_language, target_language=target_language, style=style)

return output_file_path

Expand All @@ -24,10 +24,12 @@ def launch_gradio():
inputs=[
gr.File(label="上传PDF文件"),
gr.Textbox(label="源语言(默认:英文)", placeholder="English", value="English"),
gr.Textbox(label="目标语言(默认:中文)", placeholder="Chinese", value="Chinese")
gr.Textbox(label="目标语言(默认:中文)", placeholder="Chinese", value="Chinese"),
gr.Textbox(label="风格(默认:小说)", placeholder="Novel", value="novel")
],
outputs=[
gr.File(label="下载翻译文件")
gr.File(label="下载翻译文件"),
# gr.Markdown(label="预览文件")
],
allow_flagging="never"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,28 @@
from translator.translation_chain import TranslationChain
from utils import LOG


class PDFTranslator:
def __init__(self, model_name: str):
self.translate_chain = TranslationChain(model_name)
self.pdf_parser = PDFParser()
self.writer = Writer()

def translate_pdf(self,
input_file: str,
output_file_format: str = 'markdown',
source_language: str = "English",
target_language: str = 'Chinese',
pages: Optional[int] = None):

input_file: str,
output_file_format: str = 'markdown',
source_language: str = "English",
target_language: str = 'Chinese',
style: str = 'novel',
pages: Optional[int] = None):

self.book = self.pdf_parser.parse_pdf(input_file, pages)

for page_idx, page in enumerate(self.book.pages):
for content_idx, content in enumerate(page.contents):
# Translate content.original
translation, status = self.translate_chain.run(content, source_language, target_language)
translation, status = self.translate_chain.run(content, source_language, target_language, style)
# Update the content in self.book.pages directly
self.book.pages[page_idx].contents[content_idx].set_translation(translation, status)

return self.writer.save_translated_book(self.book, output_file_format)
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@

from utils import LOG


class TranslationChain:
def __init__(self, model_name: str = "gpt-3.5-turbo", verbose: bool = True):

# 翻译任务指令始终由 System 角色承担
template = (
"""You are a translation expert, proficient in various languages. \n
"""You are a translation expert, proficient in various languages, \n
and you can navigate a wide variety of translation styles. Now use '{style}' style
Translates {source_language} to {target_language}."""
)
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
Expand All @@ -33,13 +35,14 @@ def __init__(self, model_name: str = "gpt-3.5-turbo", verbose: bool = True):

self.chain = LLMChain(llm=chat, prompt=chat_prompt_template, verbose=verbose)

def run(self, text: str, source_language: str, target_language: str) -> (str, bool):
def run(self, text: str, source_language: str, target_language: str, style: str) -> (str, bool):
result = ""
try:
result = self.chain.run({
"text": text,
"source_language": source_language,
"target_language": target_language,
"style": style
})
except Exception as e:
LOG.error(f"An error occurred during translation: {e}")
Expand Down