You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

300 lines
8.5 KiB

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Author: Sotiris Papadopoulos <ytubedlg@gmail.com>
  5. Last-Revision: 2017-04-19
  6. Script to automatically check PO files
  7. """
  8. from __future__ import unicode_literals
  9. import os
  10. import sys
  11. import logging
  12. import argparse
  13. from time import sleep
  14. from datetime import datetime, timedelta, tzinfo
  15. try:
  16. import polib
  17. import google_translate
  18. except ImportError as error:
  19. print(error)
  20. sys.exit(1)
  21. WTIME = 2.0 # Time in seconds to wait between requests to avoid ban
  22. PACKAGE = "youtube_dl_gui"
  23. PO_FILENAME = "{}.po".format(PACKAGE)
  24. LOCALE_PATH_TMPL = os.path.join(PACKAGE, "locale", "{lang}", "LC_MESSAGES", PO_FILENAME)
  25. logging.basicConfig(level=logging.ERROR)
  26. def parse():
  27. """Parse command line arguments."""
  28. parser = argparse.ArgumentParser(description="Script to automatically check PO files")
  29. parser.add_argument("language", help="language of the PO file to check")
  30. parser.add_argument("-w", "--werror", action="store_true", help="treat all warning messages as errors")
  31. parser.add_argument("-o", "--only-headers", action="store_true", help="check only the PO file headers")
  32. parser.add_argument("-n", "--no-translate", action="store_true", help="do not use the translator to check 'msgstr' fields")
  33. return parser.parse_args()
  34. class UTC_Offset_Timezone(tzinfo):
  35. """Class that represents a UTC offset in the format +/-0000."""
  36. def __init__(self, offset_string):
  37. self.offset = timedelta(seconds=UTC_Offset_Timezone.parse_offset(offset_string))
  38. def utcoffset(self, dt):
  39. return self.offset + self.dst(dt)
  40. def dst(self, dt):
  41. return timedelta(0)
  42. @staticmethod
  43. def parse_offset(offset_string):
  44. """Parse the offset string into seconds."""
  45. if len(offset_string) != 5:
  46. raise ValueError("Invalid length for offset string ({})".format(offset_string))
  47. hours = offset_string[1:3]
  48. minutes = offset_string[3:5]
  49. offset = int(hours) * 3600 + int(minutes) * 60
  50. if offset_string[0] == "-":
  51. return -1 * offset
  52. return offset
  53. def parse_date(date_string):
  54. """Parse date string into an aware datetime object."""
  55. # Just a small list with the most common timezones
  56. offset_list = [
  57. ("EEST", "0300"),
  58. ("EET", "0200"),
  59. ("GMT", "0000"),
  60. ("UTC", "0000")
  61. ]
  62. # Replace all the timezones with the offset
  63. for item in offset_list:
  64. timezone, offset = item
  65. date_string = date_string.replace(timezone, offset)
  66. datetime_string = date_string[:16]
  67. offset_string = date_string[16:]
  68. naive_date = datetime.strptime(datetime_string, "%Y-%m-%d %H:%M")
  69. # Create & return an aware datetime object based on the offset
  70. return naive_date.replace(tzinfo=UTC_Offset_Timezone(offset_string))
  71. # Print helpers
  72. def my_print(msg, char="*", value=None, exit=False):
  73. """Print 'msg', debug 'value' and exit if 'exit' is True."""
  74. print("[{}] {}".format(char, msg))
  75. if value is not None:
  76. print("\tvalue= \"{}\"".format(value))
  77. if exit:
  78. sys.exit(1)
  79. def perror(msg, value=None):
  80. my_print(msg, "-", value, True)
  81. def pwarn(msg, value=None, exit=False):
  82. my_print(msg, "!", value, exit)
  83. def pinfo(msg):
  84. my_print(msg)
  85. #############################
  86. def main(args):
  87. os.chdir("..")
  88. # setup
  89. pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US")
  90. po_file_path = LOCALE_PATH_TMPL.format(lang=args.language)
  91. if not os.path.exists(pot_file_path):
  92. perror("Failed to locate POT file, exiting...", pot_file_path)
  93. if not os.path.exists(po_file_path):
  94. perror("Failed to locate PO file, exiting...", po_file_path)
  95. pot_file = polib.pofile(pot_file_path)
  96. po_file = polib.pofile(po_file_path)
  97. # check headers
  98. pinfo("Checking PO headers")
  99. pot_headers = pot_file.metadata
  100. po_headers = po_file.metadata
  101. if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]:
  102. pwarn("'Project-Id-Version' headers do not match", exit=args.werror)
  103. if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]:
  104. pwarn("'POT-Creation-Date' headers do not match", exit=args.werror)
  105. po_creation_date = parse_date(po_headers["POT-Creation-Date"])
  106. po_revision_date = parse_date(po_headers["PO-Revision-Date"])
  107. # Aware datetimes convert to UTC automatically when comparing
  108. if po_revision_date <= po_creation_date:
  109. pwarn("PO file seems outdated", exit=args.werror)
  110. if "Language" in po_headers and po_headers["Language"] != args.language:
  111. pwarn("'Language' header does not match with the given language", po_headers["Language"], args.werror)
  112. pinfo("Last-Translator: {}".format(po_headers["Last-Translator"]))
  113. # check translations
  114. if args.only_headers:
  115. sys.exit(0)
  116. pinfo("Checking translations, this might take a while...")
  117. eta = timedelta(seconds=len(pot_file) * WTIME)
  118. pinfo("Approximate time to check translations online: {}".format(eta))
  119. pot_msgid = [entry.msgid for entry in pot_file]
  120. po_msgid = [entry.msgid for entry in po_file]
  121. # lists to hold reports
  122. missing_msgid = []
  123. not_translated = []
  124. same_msgstr = []
  125. with_typo = []
  126. verify_trans = []
  127. fuzzy_trans = po_file.fuzzy_entries()
  128. for msgid in pot_msgid:
  129. if msgid not in po_msgid:
  130. missing_msgid.append(msgid)
  131. translator = google_translate.GoogleTranslator(timeout=5.0, retries=2, wait_time=WTIME)
  132. # Get a valid source language for Google
  133. # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN'
  134. src_lang = args.language
  135. if src_lang not in translator._lang_dict:
  136. src_lang = src_lang.replace("_", "-")
  137. if src_lang not in translator._lang_dict:
  138. src_lang = src_lang.split("-")[0]
  139. for entry in po_file:
  140. if not entry.translated():
  141. not_translated.append(entry)
  142. elif entry.msgid == entry.msgstr:
  143. same_msgstr.append(entry)
  144. else:
  145. if args.no_translate:
  146. continue
  147. word_dict = translator.get_info_dict(entry.msgstr, "en", src_lang)
  148. if word_dict is not None:
  149. if word_dict["has_typo"]:
  150. with_typo.append(entry)
  151. if word_dict["translation"].lower() != entry.msgid.lower():
  152. found = False
  153. # Check verbs, nouns, adverbs, etc..
  154. for key in word_dict["extra"]:
  155. if entry.msgid.lower() in word_dict["extra"][key].keys():
  156. found = True
  157. break
  158. if not found:
  159. verify_trans.append((entry, word_dict["translation"]))
  160. sleep(WTIME)
  161. # time to report
  162. print("=" * 25 + "Report" + "=" * 25)
  163. if missing_msgid:
  164. print("Missing msgids")
  165. for msgid in missing_msgid:
  166. print(" \"{}\"".format(msgid))
  167. if not_translated:
  168. print("Not translated")
  169. for entry in not_translated:
  170. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  171. if same_msgstr:
  172. print("Same msgstr")
  173. for entry in same_msgstr:
  174. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  175. if with_typo:
  176. print("With typo")
  177. for entry in with_typo:
  178. print(" line: {} msgid: \"{}\" msgstr: \"{}\"".format(entry.linenum, entry.msgid, entry.msgstr))
  179. if verify_trans:
  180. print("Verify translation")
  181. for item in verify_trans:
  182. entry, translation = item
  183. print(" line: {} msgid: \"{}\" trans: \"{}\"".format(entry.linenum, entry.msgid, translation))
  184. if fuzzy_trans:
  185. print("Fuzzy translations")
  186. for entry in fuzzy_trans:
  187. print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid))
  188. total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len(with_typo) + len(verify_trans) + len(fuzzy_trans)
  189. print("")
  190. print("Missing msgids\t\t: {}".format(len(missing_msgid)))
  191. print("Not translated\t\t: {}".format(len(not_translated)))
  192. print("Same msgstr\t\t: {}".format(len(same_msgstr)))
  193. print("With typo\t\t: {}".format(len(with_typo)))
  194. print("Verify translation\t: {}".format(len(verify_trans)))
  195. print("Fuzzy translations\t: {}".format(len(fuzzy_trans)))
  196. print("Total\t\t\t: {}".format(total))
  197. if __name__ == "__main__":
  198. try:
  199. main(parse())
  200. except KeyboardInterrupt:
  201. print("KeyboardInterrupt")