Source code for jasy.core.Locale

#
# Jasy - Web Tooling Framework
# Copyright 2010-2012 Zynga Inc.
# Copyright 2013-2014 Sebastian Werner
#

import os
import json
import re
import xml.etree.ElementTree

import jasy.core.Console as Console
from jasy import datadir, __version__

import jasy.core.File

__all__ = ("LocaleParser")


# Here we load our CLDR data from
CLDR_DIR = os.path.join(datadir, "cldr")

# Regular expression used for parsing CLDR plural rules
REGEXP_REL = re.compile(r"(\band\b|\bor\b)")
REGEXP_IS = re.compile(r"^(.*?) is (not )?([0-9]+)")
REGEXP_IN = re.compile(r"^(.*?) (not )?(within|in) ([0-9]+)\.\.([0-9]+)")

# Script template as used to generate JS files
SCRIPT_TEMPLATE = "// Automatically generated by Jasy %s\ncore.Module(\"%s\", %s);"


[docs]def camelCaseToUpper(input): if input.upper() == input: return input result = [] for char in input: conv = char.upper() if char == conv and len(result) > 0: result.append("_") result.append(conv) return "".join(result)
[docs]def pluralToJavaScript(expr): """ Translates the CLDR plural rules from http://cldr.unicode.org/index/cldr-spec/plural-rules into JavaScript expressions """ res = "" for relation in REGEXP_REL.split(expr.lower()): if relation == "and": res += "&&" elif relation == "or": res += "||" else: match = REGEXP_IS.match(relation) if match: expr = match.group(1).strip() if " " in expr: expr = "(%s)" % re.compile(r"\s+mod\s+").sub("%", expr) res += expr if match.group(2) is not None: res += "!=" else: res += "==" res += match.group(3) continue match = REGEXP_IN.match(relation) if match: expr = match.group(1).strip() if " " in expr: expr = "(%s)" % re.compile(r"\s+mod\s+").sub("%", expr) if match.group(2) is not None: res += "!" res += "(" if match.group(3) == "in": # Fast integer check via: http://jsperf.com/simple-integer-check res += "~~" + expr + "==" + expr + "&&" res += expr + ">=" + match.group(4) + "&&" + expr + "<=" + match.group(5) res += ")" continue raise Exception("Unsupported relation: %s" % relation) return res
[docs]class LocaleParser(): """Parses CLDR locales into JavaScript files.""" def __init__(self, locale): Console.info("Parsing CLDR files for %s..." % locale) Console.indent() splits = locale.split("_") # Store for internal usage self.__locale = locale self.__language = splits[0] self.__territory = splits[1] if len(splits) > 1 else None # This will hold all data extracted data self.__data = {} # Add info section self.__data["info"] = { "LOCALE" : self.__locale, "LANGUAGE" : self.__language, "TERRITORY" : self.__territory } # Add keys (fallback to C-default locale) path = "%s.xml" % os.path.join(CLDR_DIR, "keys", self.__language) try: Console.info("Processing %s..." % os.path.relpath(path, CLDR_DIR)) tree = xml.etree.ElementTree.parse(path) except IOError: path = "%s.xml" % os.path.join(CLDR_DIR, "keys", "C") Console.info("Processing %s..." % os.path.relpath(path, CLDR_DIR)) tree = xml.etree.ElementTree.parse(path) self.__data["key"] = { "Short" : {key.get("type"): key.text for key in tree.findall("./keys/short/key")}, "Full" : {key.get("type"): key.text for key in tree.findall("./keys/full/key")} } # Add main CLDR data: Fallback chain for locales main = os.path.join(CLDR_DIR, "main") files = [] while True: filename = "%s.xml" % os.path.join(main, locale) if os.path.isfile(filename): files.append(filename) if "_" in locale: locale = locale[:locale.rindex("_")] else: break # Extend data with root data files.append(os.path.join(main, "root.xml")) # Finally import all these files in order for path in reversed(files): Console.info("Processing %s..." % os.path.relpath(path, CLDR_DIR)) tree = xml.etree.ElementTree.parse(path) self.__addDisplayNames(tree) self.__addDelimiters(tree) self.__addCalendars(tree) self.__addNumbers(tree) # Add supplemental CLDR data self.__addSupplementals(self.__territory) Console.outdent()
[docs] def getData(self): """Returns a Python object with the parsed CLDR data.""" return self.__data
[docs] def export(self, path): Console.info("Writing result...") Console.info("Target directory: %s", path) Console.indent() jasy.core.File.write(os.path.join(path, "jasyproject.yaml"), 'name: locale\npackage: ""\n') count = self.__exportRecurser(self.__data, "locale", path) Console.info("Created %s classes", count) Console.outdent()
def __exportRecurser(self, data, prefix, project): counter = 0 for key in data: # Ignore invalid values if key is None: continue value = data[key] firstIsDict = False for childKey in value: if isinstance(value[childKey], dict): firstIsDict = True break if firstIsDict: name = "%s.%s" % (prefix, key) counter += self.__exportRecurser(value, name, project) else: name = "%s.%s%s" % (prefix, key[0].upper(), key[1:]) result = SCRIPT_TEMPLATE % (__version__, name, json.dumps(value, sort_keys=True, indent=2, ensure_ascii=False)) filename = "%s.js" % name.replace(".", os.path.sep) jasy.core.File.write(os.path.join(project, "src", filename), result) counter += 1 return counter def __getStore(self, parent, name): """Manages data fields.""" if name not in parent: store = {} parent[name] = store else: store = parent[name] return store def __addSupplementals(self, territory): """Converts data from supplemental folder.""" supplemental = os.path.join(CLDR_DIR, "supplemental") # Plurals path = os.path.join(supplemental, "plurals.xml") Console.info("Processing %s..." % os.path.relpath(path, CLDR_DIR)) tree = xml.etree.ElementTree.parse(path) self.__data["Plural"] = {} for item in tree.findall("plurals/pluralRules"): attr = item.get("locales") if attr is not None: if self.__language in attr.split(" "): for rule in item.findall("pluralRule"): jsPlural = pluralToJavaScript(rule.text) self.__data["Plural"][rule.get("count").upper()] = jsPlural # Telephone Codes path = os.path.join(supplemental, "telephoneCodeData.xml") Console.info("Processing %s..." % os.path.relpath(path, CLDR_DIR)) tree = xml.etree.ElementTree.parse(path) for item in tree.findall("telephoneCodeData/codesByTerritory"): territoryId = item.get("territory") if territoryId == territory: for rule in item.findall("telephoneCountryCode"): self.__data["PhoneCode"] = {"CODE": int(rule.get("code"))} # Respect first only break # Postal Codes path = os.path.join(supplemental, "postalCodeData.xml") Console.info("Processing %s..." % os.path.relpath(path, CLDR_DIR)) tree = xml.etree.ElementTree.parse(path) for item in tree.findall("postalCodeData/postCodeRegex"): territoryId = item.get("territoryId") if territory == territoryId: self.__data["PostalCode"] = {"CODE": item.text} break # Supplemental Data path = os.path.join(supplemental, "supplementalData.xml") Console.info("Processing %s..." % os.path.relpath(path, CLDR_DIR)) tree = xml.etree.ElementTree.parse(path) # :: Calendar Preference ordering = None for item in tree.findall("calendarPreferenceData/calendarPreference"): if item.get("territories") == "001" and ordering is None: ordering = item.get("ordering") elif territory in item.get("territories").split(" "): ordering = item.get("ordering") break self.__data["CalendarPref"] = {"ORDERING" : ordering.split(" ")} # :: Week Data self.__data["Week"] = {} weekData = tree.find("weekData") for key in ["firstDay", "weekendStart", "weekendEnd"]: day = None for item in weekData.findall(key): if item.get("territories") == "001" and day is None: day = item.get("day") elif territory in item.get("territories").split(" "): day = item.get("day") break self.__data["Week"][camelCaseToUpper(key)] = day # :: Measurement System self.__data["Measurement"] = {} measurementData = tree.find("measurementData") for key in ["measurementSystem", "paperSize"]: mtype = None for item in measurementData.findall(key): if item.get("territories") == "001" and mtype is None: mtype = item.get("type") elif territory in item.get("territories").split(" "): mtype = item.get("type") break self.__data["Measurement"][camelCaseToUpper(key)] = mtype def __addDisplayNames(self, tree): """Adds CLDR display names section.""" display = self.__getStore(self.__data, "display") for key in ["languages", "scripts", "territories", "variants", "keys", "types", "measurementSystemNames"]: # make it a little bit shorter, there is not really any conflict potential if key == "measurementSystemNames": store = self.__getStore(display, "Measure") elif key == "territories": store = self.__getStore(display, "Territory") else: # remove last character "s" to force singular store = self.__getStore(display, key[:-1]) for element in tree.findall("./localeDisplayNames/%s/*" % key): if not element.get("draft"): field = element.get("type") if field not in store: store[camelCaseToUpper(field)] = element.text def __addDelimiters(self, tree): """Adds CLDR delimiters.""" delimiters = self.__getStore(self.__data, "delimiter") for element in tree.findall("./delimiters/*"): if not element.get("draft"): field = element.tag if field not in delimiters: delimiters[camelCaseToUpper(field)] = element.text def __addCalendars(self, tree, key="dates/calendars"): """Loops through all CLDR calendars and adds them.""" calendars = self.__getStore(self.__data, "calendar") for element in tree.findall("./%s/*" % key): if not element.get("draft"): self.__addCalendar(calendars, element) def __addCalendar(self, store, element): """Adds data from a CLDR calendar section.""" calendar = self.__getStore(store, element.get("type")) # Months Widths if element.find("months/monthContext/monthWidth") is not None: months = self.__getStore(calendar, "month") for child in element.findall("months/monthContext/monthWidth"): if not child.get("draft"): format = child.get("type") if format not in months: months[format] = {} for month in child.findall("month"): if not month.get("draft"): name = month.get("type").upper() if not name in months[format]: months[format][name] = month.text # Day Widths if element.find("days/dayContext/dayWidth") is not None: days = self.__getStore(calendar, "day") for child in element.findall("days/dayContext/dayWidth"): if not child.get("draft"): format = child.get("type") if format not in days: days[format] = {} for day in child.findall("day"): if not day.get("draft"): name = day.get("type").upper() if not name in days[format]: days[format][name] = day.text # Quarter Widths if element.find("quarters/quarterContext/quarterWidth") is not None: quarters = self.__getStore(calendar, "quarter") for child in element.findall("quarters/quarterContext/quarterWidth"): if not child.get("draft"): format = child.get("type") if format not in quarters: quarters[format] = {} for quarter in child.findall("quarter"): if not quarter.get("draft"): name = quarter.get("type").upper() if not name in quarters[format]: quarters[format][name] = quarter.text # Date Formats if element.find("dateFormats/dateFormatLength") is not None: dateFormats = self.__getStore(calendar, "date") for child in element.findall("dateFormats/dateFormatLength"): if not child.get("draft"): format = child.get("type").upper() text = child.find("dateFormat/pattern").text if format not in dateFormats: dateFormats[format] = text # Time Formats if element.find("timeFormats/timeFormatLength") is not None: timeFormats = self.__getStore(calendar, "time") for child in element.findall("timeFormats/timeFormatLength"): if not child.get("draft"): format = child.get("type").upper() text = child.find("timeFormat/pattern").text if format not in timeFormats: timeFormats[format] = text # DateTime Formats if element.find("dateTimeFormats/availableFormats") is not None: datetime = self.__getStore(calendar, "datetime") for child in element.findall("dateTimeFormats/availableFormats/dateFormatItem"): if not child.get("draft"): # no uppercase here, because of intentianal camelcase format = child.get("id") text = child.text if format not in datetime: datetime[format] = text # Fields if element.find("fields/field") is not None: fields = self.__getStore(calendar, "field") for child in element.findall("fields/field"): if not child.get("draft"): format = child.get("type").upper() for nameChild in child.findall("displayName"): if not nameChild.get("draft"): text = nameChild.text if format not in fields: fields[format] = text break # Relative if element.find("fields/field") is not None: relatives = self.__getStore(calendar, "relative") for child in element.findall("fields/field"): if not child.get("draft"): format = child.get("type") if child.findall("relative"): relativeField = self.__getStore(relatives, format) for relChild in child.findall("relative"): if not relChild.get("draft"): pos = relChild.get("type") text = relChild.text if pos not in relativeField: relativeField[pos] = text def __addNumbers(self, tree): store = self.__getStore(self.__data, "number") # Symbols symbols = self.__getStore(store, "symbol") for element in tree.findall("numbers/symbols/*"): if not element.get("draft"): field = camelCaseToUpper(element.tag) if field not in store: symbols[field] = element.text # Formats if not "format" in store: store["format"] = {} for format in ["decimal", "scientific", "percent", "currency"]: if not format in store["format"]: for element in tree.findall("numbers//%sFormat/pattern" % format): store["format"][camelCaseToUpper(format)] = element.text # Currencies currencies = self.__getStore(store, "currencyName") currenciesSymbols = self.__getStore(store, "currencySymbol") for child in tree.findall("numbers/currencies/currency"): if not child.get("draft"): short = child.get("type") for nameChild in child.findall("displayName"): if not nameChild.get("draft"): text = nameChild.text if format not in currencies: currencies[short] = text break for symbolChild in child.findall("symbol"): currenciesSymbols[short] = symbolChild.text