You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
7.1 KiB

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Author: Sotiris Papadopoulos <ytubedlg@gmail.com>
  5. Last-Revision: 2017-04-19
  6. Script to automatically check PO files
  7. """
  8. from __future__ import unicode_literals
  9. import os
  10. import sys
  11. import logging
  12. import argparse
  13. from time import sleep
  14. from datetime import datetime, timedelta
  15. try:
  16. import polib
  17. import google_translate
  18. except ImportError as error:
  19. print(error)
  20. sys.exit(1)
  21. WTIME = 2.0 # Time in seconds to wait between requests to avoid ban
  22. PACKAGE = "youtube_dl_gui"
  23. PO_FILENAME = "{}.po".format(PACKAGE)
  24. LOCALE_PATH_TMPL = os.path.join(PACKAGE, "locale", "{lang}", "LC_MESSAGES", PO_FILENAME)
  25. logging.basicConfig(level=logging.ERROR)
  26. def parse():
  27. """Parse command line arguments."""
  28. parser = argparse.ArgumentParser(description="Script to automatically check PO files")
  29. parser.add_argument("language", help="language of the PO file to check")
  30. parser.add_argument("-w", "--werror", action="store_true", help="treat all warning messages as errors")
  31. parser.add_argument("-o", "--only-headers", action="store_true", help="check only the PO file headers")
  32. parser.add_argument("-n", "--no-translate", action="store_true", help="do not use the translator to check 'msgstr' fields")
  33. return parser.parse_args()
  34. def parse_date(date_string):
  35. """Parse date string into a datetime object."""
  36. return datetime.strptime(date_string, "%Y-%m-%d %H:%M+%Z")
  37. # Print helpers
  38. def my_print(msg, char="*", value=None, exit=False):
  39. """Print 'msg', debug 'value' and exit if 'exit' is True."""
  40. print("[{}] {}".format(char, msg))
  41. if value is not None:
  42. print("\tvalue= \"{}\"".format(value))
  43. if exit:
  44. sys.exit(1)
  45. def perror(msg, value=None):
  46. my_print(msg, "-", value, True)
  47. def pwarn(msg, value=None, exit=False):
  48. my_print(msg, "!", value, exit)
  49. def pinfo(msg):
  50. my_print(msg)
  51. #############################
  52. def main(args):
  53. os.chdir("..")
  54. # setup
  55. pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US")
  56. po_file_path = LOCALE_PATH_TMPL.format(lang=args.language)
  57. if not os.path.exists(pot_file_path):
  58. perror("Failed to locate POT file, exiting...", pot_file_path)
  59. if not os.path.exists(po_file_path):
  60. perror("Failed to locate PO file, exiting...", po_file_path)
  61. pot_file = polib.pofile(pot_file_path)
  62. po_file = polib.pofile(po_file_path)
  63. # check headers
  64. pinfo("Checking PO headers")
  65. pot_headers = pot_file.metadata
  66. po_headers = po_file.metadata
  67. if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]:
  68. pwarn("'Project-Id-Version' headers do not match", exit=args.werror)
  69. if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]:
  70. pwarn("'POT-Creation-Date' headers do not match", exit=args.werror)
  71. po_creation_date = parse_date(po_headers["POT-Creation-Date"])
  72. po_revision_date = parse_date(po_headers["PO-Revision-Date"])
  73. timediff = po_revision_date - po_creation_date
  74. if timediff.days <= 0:
  75. pwarn("PO file seems outdated", exit=args.werror)
  76. if "Language" in po_headers and po_headers["Language"] != args.language:
  77. pwarn("'Language' header does not match with the given language", po_headers["Language"], args.werror)
  78. pinfo("Last-Translator: {}".format(po_headers["Last-Translator"]))
  79. # check translations
  80. if args.only_headers:
  81. sys.exit(0)
  82. pinfo("Checking translations, this might take a while...")
  83. eta = timedelta(seconds=len(pot_file) * WTIME)
  84. pinfo("Approximate time to check translations online: {}".format(eta))
  85. pot_msgid = [entry.msgid for entry in pot_file]
  86. po_msgid = [entry.msgid for entry in po_file]
  87. # lists to hold reports
  88. missing_msgid = []
  89. not_translated = []
  90. same_msgstr = []
  91. with_typo = []
  92. verify_trans = []
  93. fuzzy_trans = po_file.fuzzy_entries()
  94. for msgid in pot_msgid:
  95. if msgid not in po_msgid:
  96. missing_msgid.append(msgid)
  97. translator = google_translate.GoogleTranslator(timeout=5.0, retries=2, wait_time=WTIME)
  98. # Get a valid source language for Google
  99. # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN'
  100. src_lang = args.language
  101. if src_lang not in translator._lang_dict:
  102. src_lang = src_lang.replace("_", "-")
  103. if src_lang not in translator._lang_dict:
  104. src_lang = src_lang.split("-")[0]
  105. for entry in po_file:
  106. if not entry.translated():
  107. not_translated.append(entry)
  108. elif entry.msgid == entry.msgstr:
  109. same_msgstr.append(entry)
  110. else:
  111. if args.no_translate:
  112. continue
  113. word_dict = translator.get_info_dict(entry.msgstr, "en", src_lang)
  114. if word_dict is not None:
  115. if word_dict["has_typo"]:
  116. with_typo.append(entry)
  117. if word_dict["translation"].lower() != entry.msgid.lower():
  118. found = False
  119. # Check verbs, nouns, adverbs, etc..
  120. for key in word_dict["extra"]:
  121. if entry.msgid.lower() in word_dict["extra"][key].keys():
  122. found = True
  123. break
  124. if not found:
  125. verify_trans.append((entry, word_dict["translation"]))
  126. sleep(WTIME)
  127. # time to report
  128. print("=" * 25 + "Report" + "=" * 25)
  129. if missing_msgid:
  130. print("Missing msgids")
  131. for msgid in missing_msgid:
  132. print(" \"{}\"".format(msgid))
  133. if not_translated:
  134. print("Not translated")
  135. for entry in not_translated:
  136. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  137. if same_msgstr:
  138. print("Same msgstr")
  139. for entry in same_msgstr:
  140. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  141. if with_typo:
  142. print("With typo")
  143. for entry in with_typo:
  144. print(" line: {} msgid: \"{}\" msgstr: \"{}\"".format(entry.linenum, entry.msgid, entry.msgstr))
  145. if verify_trans:
  146. print("Verify translation")
  147. for item in verify_trans:
  148. entry, translation = item
  149. print(" line: {} msgid: \"{}\" trans: \"{}\"".format(entry.linenum, entry.msgid, translation))
  150. if fuzzy_trans:
  151. print("Fuzzy translations")
  152. for entry in fuzzy_trans:
  153. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  154. total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len(with_typo) + len(verify_trans) + len(fuzzy_trans)
  155. print("")
  156. print("Missing msgids\t\t: {}".format(len(missing_msgid)))
  157. print("Not translated\t\t: {}".format(len(not_translated)))
  158. print("Same msgstr\t\t: {}".format(len(same_msgstr)))
  159. print("With typo\t\t: {}".format(len(with_typo)))
  160. print("Verify translation\t: {}".format(len(verify_trans)))
  161. print("Fuzzy translations\t: {}".format(len(fuzzy_trans)))
  162. print("Total\t\t\t: {}".format(total))
  163. if __name__ == "__main__":
  164. try:
  165. main(parse())
  166. except KeyboardInterrupt:
  167. print("KeyboardInterrupt")