Sleeping-post-detection-fir.../docs/update_translations.py

181 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Script to fix broken Markdown links and front matter in language-specific directories zh, ko, ja, ru, de, fr, es, pt.
This script processes markdown files in language-specific directories (like /zh/). It finds Markdown links and checks
their existence. If a link is broken and does not exist in the language-specific directory but exists in the /en/
directory, the script updates the link to point to the corresponding file in the /en/ directory.
It also ensures that front matter keywords like 'comments:', 'description:', and 'keywords:' are not translated and
remain in English.
"""
import re
from pathlib import Path
class MarkdownLinkFixer:
"""Class to fix Markdown links and front matter in language-specific directories."""
def __init__(self, base_dir, update_links=True, update_text=True):
"""Initialize the MarkdownLinkFixer with the base directory."""
self.base_dir = Path(base_dir)
self.update_links = update_links
self.update_text = update_text
self.md_link_regex = re.compile(r'\[([^]]+)]\(([^:)]+)\.md\)')
@staticmethod
def replace_front_matter(content, lang_dir):
"""Ensure front matter keywords remain in English."""
english = ['comments', 'description', 'keywords']
translations = {
'zh': ['评论', '描述', '关键词'], # Mandarin Chinese (Simplified) warning, sometimes translates as 关键字
'es': ['comentarios', 'descripción', 'palabras clave'], # Spanish
'ru': ['комментарии', 'описание', 'ключевые слова'], # Russian
'pt': ['comentários', 'descrição', 'palavras-chave'], # Portuguese
'fr': ['commentaires', 'description', 'mots-clés'], # French
'de': ['kommentare', 'beschreibung', 'schlüsselwörter'], # German
'ja': ['コメント', '説明', 'キーワード'], # Japanese
'ko': ['댓글', '설명', '키워드'], # Korean
'hi': ['टिप्पणियाँ', 'विवरण', 'कीवर्ड'], # Hindi
'ar': ['التعليقات', 'الوصف', 'الكلمات الرئيسية'] # Arabic
} # front matter translations for comments, description, keyword
for term, eng_key in zip(translations.get(lang_dir.stem, []), english):
content = re.sub(rf'{term} *[:].*', f'{eng_key}: true', content, flags=re.IGNORECASE) if \
eng_key == 'comments' else re.sub(rf'{term} *[:] *', f'{eng_key}: ', content, flags=re.IGNORECASE)
return content
@staticmethod
def replace_admonitions(content, lang_dir):
"""Ensure front matter keywords remain in English."""
english = [
'Note', 'Summary', 'Tip', 'Info', 'Success', 'Question', 'Warning', 'Failure', 'Danger', 'Bug', 'Example',
'Quote', 'Abstract', 'Seealso', 'Admonition']
translations = {
'en':
english,
'zh': ['笔记', '摘要', '提示', '信息', '成功', '问题', '警告', '失败', '危险', '故障', '示例', '引用', '摘要', '另见', '警告'],
'es': [
'Nota', 'Resumen', 'Consejo', 'Información', 'Éxito', 'Pregunta', 'Advertencia', 'Fracaso', 'Peligro',
'Error', 'Ejemplo', 'Cita', 'Abstracto', 'Véase También', 'Amonestación'],
'ru': [
'Заметка', 'Сводка', 'Совет', 'Информация', 'Успех', 'Вопрос', 'Предупреждение', 'Неудача', 'Опасность',
'Ошибка', 'Пример', 'Цитата', 'Абстракт', 'См. Также', 'Предостережение'],
'pt': [
'Nota', 'Resumo', 'Dica', 'Informação', 'Sucesso', 'Questão', 'Aviso', 'Falha', 'Perigo', 'Bug',
'Exemplo', 'Citação', 'Abstrato', 'Veja Também', 'Advertência'],
'fr': [
'Note', 'Résumé', 'Conseil', 'Info', 'Succès', 'Question', 'Avertissement', 'Échec', 'Danger', 'Bug',
'Exemple', 'Citation', 'Abstrait', 'Voir Aussi', 'Admonestation'],
'de': [
'Hinweis', 'Zusammenfassung', 'Tipp', 'Info', 'Erfolg', 'Frage', 'Warnung', 'Ausfall', 'Gefahr',
'Fehler', 'Beispiel', 'Zitat', 'Abstrakt', 'Siehe Auch', 'Ermahnung'],
'ja': ['ノート', '要約', 'ヒント', '情報', '成功', '質問', '警告', '失敗', '危険', 'バグ', '', '引用', '抄録', '参照', '訓告'],
'ko': ['노트', '요약', '', '정보', '성공', '질문', '경고', '실패', '위험', '버그', '예제', '인용', '추상', '참조', '경고'],
'hi': [
'नोट', 'सारांश', 'सुझाव', 'जानकारी', 'सफलता', 'प्रश्न', 'चेतावनी', 'विफलता', 'खतरा', 'बग', 'उदाहरण',
'उद्धरण', 'सार', 'देखें भी', 'आगाही'],
'ar': [
'ملاحظة', 'ملخص', 'نصيحة', 'معلومات', 'نجاح', 'سؤال', 'تحذير', 'فشل', 'خطر', 'عطل', 'مثال', 'اقتباس',
'ملخص', 'انظر أيضاً', 'تحذير']}
for term, eng_key in zip(translations.get(lang_dir.stem, []), english):
if lang_dir.stem != 'en':
content = re.sub(rf'!!! *{eng_key} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE)
content = re.sub(rf'!!! *{term} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE)
content = re.sub(rf'!!! *{term}', f'!!! {eng_key}', content, flags=re.IGNORECASE)
content = re.sub(r'!!! *"', '!!! Example "', content, flags=re.IGNORECASE)
return content
@staticmethod
def update_iframe(content):
"""Update the 'allow' attribute of iframe if it does not contain the specific English permissions."""
english = 'accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share'
pattern = re.compile(f'allow="(?!{re.escape(english)}).+?"')
return pattern.sub(f'allow="{english}"', content)
def link_replacer(self, match, parent_dir, lang_dir, use_abs_link=False):
"""Replace broken links with corresponding links in the /en/ directory."""
text, path = match.groups()
linked_path = (parent_dir / path).resolve().with_suffix('.md')
if not linked_path.exists():
en_linked_path = Path(str(linked_path).replace(str(lang_dir), str(lang_dir.parent / 'en')))
if en_linked_path.exists():
if use_abs_link:
# Use absolute links WARNING: BUGS, DO NOT USE
docs_root_relative_path = en_linked_path.relative_to(lang_dir.parent)
updated_path = str(docs_root_relative_path).replace('en/', '/../')
else:
# Use relative links
steps_up = len(parent_dir.relative_to(self.base_dir).parts)
updated_path = Path('../' * steps_up) / en_linked_path.relative_to(self.base_dir)
updated_path = str(updated_path).replace('/en/', '/')
print(f"Redirecting link '[{text}]({path})' from {parent_dir} to {updated_path}")
return f'[{text}]({updated_path})'
else:
print(f"Warning: Broken link '[{text}]({path})' found in {parent_dir} does not exist in /docs/en/.")
return match.group(0)
@staticmethod
def update_html_tags(content):
"""Updates HTML tags in docs."""
alt_tag = 'MISSING'
# Remove closing slashes from self-closing HTML tags
pattern = re.compile(r'<([^>]+?)\s*/>')
content = re.sub(pattern, r'<\1>', content)
# Find all images without alt tags and add placeholder alt text
pattern = re.compile(r'!\[(.*?)\]\((.*?)\)')
content, num_replacements = re.subn(pattern, lambda match: f'![{match.group(1) or alt_tag}]({match.group(2)})',
content)
# Add missing alt tags to HTML images
pattern = re.compile(r'<img\s+(?!.*?\balt\b)[^>]*src=["\'](.*?)["\'][^>]*>')
content, num_replacements = re.subn(pattern, lambda match: match.group(0).replace('>', f' alt="{alt_tag}">', 1),
content)
return content
def process_markdown_file(self, md_file_path, lang_dir):
"""Process each markdown file in the language directory."""
print(f'Processing file: {md_file_path}')
with open(md_file_path, encoding='utf-8') as file:
content = file.read()
if self.update_links:
content = self.md_link_regex.sub(lambda m: self.link_replacer(m, md_file_path.parent, lang_dir), content)
if self.update_text:
content = self.replace_front_matter(content, lang_dir)
content = self.replace_admonitions(content, lang_dir)
content = self.update_iframe(content)
content = self.update_html_tags(content)
with open(md_file_path, 'w', encoding='utf-8') as file:
file.write(content)
def process_language_directory(self, lang_dir):
"""Process each language-specific directory."""
print(f'Processing language directory: {lang_dir}')
for md_file in lang_dir.rglob('*.md'):
self.process_markdown_file(md_file, lang_dir)
def run(self):
"""Run the link fixing and front matter updating process for each language-specific directory."""
for subdir in self.base_dir.iterdir():
if subdir.is_dir() and re.match(r'^\w\w$', subdir.name):
self.process_language_directory(subdir)
if __name__ == '__main__':
# Set the path to your MkDocs 'docs' directory here
docs_dir = str(Path(__file__).parent.resolve())
fixer = MarkdownLinkFixer(docs_dir, update_links=True, update_text=True)
fixer.run()