最近碰到一個需求:需要批量化把中文弄成英文 ,寫了一個py腳本
step1:
#!/usr/bin/env python3
"""
文件翻譯腳本
功能:讀取本地文本文件,將內容翻譯成中文,保存為新文件
"""
import re
import json
import urllib.parse
import urllib.request
from pathlib import Path
def is_chinese(text):
"""
判斷文本是否包含中文字符
"""
if not text:
return False
return bool(re.search(r'[\u4e00-\u9fff]', text))
def translate_text(text, target_lang='zh-CN'):
"""
翻譯單個文本
返回翻譯結果列表
"""
if not text or not text.strip():
return []
# 如果已經是中文,直接返回
if target_lang == 'zh-CN' and is_chinese(text):
return [text]
# 構建Google翻譯API URL
url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl={target_lang}&dt=t&dt=at&dt=bd&q={urllib.parse.quote(text)}"
try:
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req) as response:
res_content = response.read().decode('utf-8')
data = json.loads(res_content)
results = []
# 1. 提取主要翻譯
if data and isinstance(data[0], list):
main_parts = []
for part in data[0]:
if isinstance(part, list) and len(part) > 0 and part[0]:
main_parts.append(str(part[0]))
main_translation = "".join(main_parts)
if main_translation:
results.append(main_translation)
# 2. 提取備選翻譯
if len(data) > 5 and isinstance(data[5], list):
for alternative_group in data[5]:
if isinstance(alternative_group, list) and len(alternative_group) > 1 and isinstance(
alternative_group[1], list):
for alt in alternative_group[1]:
if isinstance(alt, str) and alt and alt not in results:
results.append(alt)
# 3. 提取詞典結果
if len(data) > 1 and isinstance(data[1], list):
for pos_group in data[1]:
if isinstance(pos_group, list) and len(pos_group) > 1 and isinstance(pos_group[1], list):
for word in pos_group[1]:
if isinstance(word, str) and word and word not in results:
results.append(word)
return results if results else ["未找到翻譯結果"]
except Exception as e:
return [f"翻譯錯誤: {str(e)}"]
def translate_file_content(content, target_lang='zh-CN'):
"""
翻譯文件內容
按段落或行處理,保持原文結構
"""
if not content:
return ""
# 按行分割,保持原有結構
lines = content.splitlines(keepends=False)
translated_lines = []
for i, line in enumerate(lines):
line = line.rstrip() # 去掉右側空白
if not line: # 空行保留
translated_lines.append("")
continue
# 跳過已經是中文的行
if target_lang == 'zh-CN' and is_chinese(line):
translated_lines.append(line)
print(f"第 {i + 1} 行: 已是中文,跳過翻譯")
continue
# 翻譯該行
print(f"翻譯第 {i + 1}/{len(lines)} 行: {line[:50]}..." if len(
line) > 50 else f"翻譯第 {i + 1}/{len(lines)} 行: {line}")
try:
results = translate_text(line, target_lang)
if results and results[0]:
translated_lines.append(results[0])
if len(results) > 1:
print(f" -> 備選結果: {', '.join(results[1:3])}")
else:
translated_lines.append(line) # 翻譯失敗,保留原文
print(f" -> 翻譯失敗,保留原文")
except Exception as e:
print(f" -> 翻譯錯誤: {e}")
translated_lines.append(line) # 出錯時保留原文
return "\n".join(translated_lines)
def translate_file(file_path, target_lang='zh-CN'):
"""
主函數:讀取文件,翻譯內容,保存新文件
"""
# 檢查文件是否存在
path = Path(file_path)
if not path.exists():
print(f"錯誤: 文件不存在 - {file_path}")
return False
if not path.is_file():
print(f"錯誤: 不是有效的文件 - {file_path}")
return False
# 生成新文件名
new_file_name = f"{path.stem}_translated{path.suffix}"
new_file_path = path.parent / new_file_name
try:
# 讀取文件
print(f"正在讀取文件: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
print(f"文件大小: {len(content)} 字符")
# 如果是空文件
if not content.strip():
print("文件內容為空")
return False
# 翻譯內容
print(f"開始翻譯,目標語言: {target_lang}")
print("-" * 50)
translated_content = translate_file_content(content, target_lang)
print("-" * 50)
print("翻譯完成!")
# 保存新文件
print(f"正在保存文件: {new_file_path}")
with open(new_file_path, 'w', encoding='utf-8') as f:
f.write(translated_content)
# 顯示統計信息
original_lines = len([l for l in content.splitlines() if l.strip()])
translated_lines = len([l for l in translated_content.splitlines() if l.strip()])
print(f"\n翻譯統計:")
print(f" - 原文行數: {original_lines}")
print(f" - 譯文行數: {translated_lines}")
print(f" - 保存位置: {new_file_path}")
return True
except UnicodeDecodeError:
print(f"錯誤: 文件編碼不是UTF-8,請確保文件使用UTF-8編碼")
return False
except Exception as e:
print(f"錯誤: {e}")
return False
def main():
"""
主程序入口
"""
# 要翻譯的文件路徑
file_path = r"D:\Users\wangrusheng\Downloads\za.txt"
print("=" * 60)
print("文件翻譯工具 v1.0")
print("功能: 讀取文本文件,翻譯成中文")
print("=" * 60)
# 檢查文件是否存在
if not Path(file_path).exists():
print(f"警告: 指定文件不存在")
print(f"文件路徑: {file_path}")
# 嘗試尋找其他文件
downloads_dir = Path(r"D:\Users\wangrusheng\Downloads")
txt_files = list(downloads_dir.glob("*.txt"))
if txt_files:
print(f"\n在下載文件夾中找到以下txt文件:")
for i, f in enumerate(txt_files[:5], 1): # 只顯示前5個
print(f" {i}. {f.name}")
choice = input("\n請選擇要翻譯的文件編號(1-5),或按回車退出: ").strip()
if choice and choice.isdigit() and 1 <= int(choice) <= len(txt_files[:5]):
file_path = str(txt_files[int(choice) - 1])
print(f"已選擇: {file_path}")
else:
print("已取消")
return
else:
print("未找到任何txt文件,請檢查文件路徑")
return
# 執行翻譯
print(f"\n開始處理文件: {file_path}")
success = translate_file(file_path)
if success:
print(f"\n✓ 翻譯完成!")
else:
print(f"\n✗ 翻譯失敗")
if __name__ == "__main__":
main()
親測可用,效果很好 速來體驗 需要可自取
end