"""
babel.core
~~~~~~~~~~
Core locale representation and locale data access.
:copyright: (c) 2013-2023 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
import os
import pickle
from collections.abc import Iterable, Mapping
from typing import TYPE_CHECKING, Any
from babel import localedata
from babel.plural import PluralRule
__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
'parse_locale']
if TYPE_CHECKING:
from typing_extensions import Literal, TypeAlias
_GLOBAL_KEY: TypeAlias = Literal[
"all_currencies",
"currency_fractions",
"language_aliases",
"likely_subtags",
"parent_exceptions",
"script_aliases",
"territory_aliases",
"territory_currencies",
"territory_languages",
"territory_zones",
"variant_aliases",
"windows_zone_mapping",
"zone_aliases",
"zone_territories",
]
_global_data: Mapping[_GLOBAL_KEY, Mapping[str, Any]] | None
_global_data = None
_default_plural_rule = PluralRule({})
def _raise_no_data_error():
raise RuntimeError('The babel data files are not available. '
'This usually happens because you are using '
'a source checkout from Babel and you did '
'not build the data files. Just make sure '
'to run "python setup.py import_cldr" before '
'installing the library.')
def get_global(key: _GLOBAL_KEY) -> Mapping[str, Any]:
"""Return the dictionary for the given key in the global data.
The global data is stored in the ``babel/global.dat`` file and contains
information independent of individual locales.
>>> get_global('zone_aliases')['UTC']
u'Etc/UTC'
>>> get_global('zone_territories')['Europe/Berlin']
u'DE'
The keys available are:
- ``all_currencies``
- ``currency_fractions``
- ``language_aliases``
- ``likely_subtags``
- ``parent_exceptions``
- ``script_aliases``
- ``territory_aliases``
- ``territory_currencies``
- ``territory_languages``
- ``territory_zones``
- ``variant_aliases``
- ``windows_zone_mapping``
- ``zone_aliases``
- ``zone_territories``
.. note:: The internal structure of the data may change between versions.
.. versionadded:: 0.9
:param key: the data key
"""
global _global_data
if _global_data is None:
dirname = os.path.join(os.path.dirname(__file__))
filename = os.path.join(dirname, 'global.dat')
if not os.path.isfile(filename):
_raise_no_data_error()
with open(filename, 'rb') as fileobj:
_global_data = pickle.load(fileobj)
assert _global_data is not None
return _global_data.get(key, {})
LOCALE_ALIASES = {
'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ',
'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES',
'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES',
'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT',
'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV',
'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL',
'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI',
'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA'
}
class UnknownLocaleError(Exception):
"""Exception thrown when a locale is requested for which no locale data
is available.
"""
def __init__(self, identifier: str) -> None:
"""Create the exception.
:param identifier: the identifier string of the unsupported locale
"""
Exception.__init__(self, f"unknown locale {identifier!r}")
#: The identifier of the locale that could not be found.
self.identifier = identifier
class Locale:
"""Representation of a specific locale.
>>> locale = Locale('en', 'US')
>>> repr(locale)
"Locale('en', territory='US')"
>>> locale.display_name
u'English (United States)'
A `Locale` object can also be instantiated from a raw locale string:
>>> locale = Locale.parse('en-US', sep='-')
>>> repr(locale)
"Locale('en', territory='US')"
`Locale` objects provide access to a collection of locale data, such as
territory and language names, number and date format patterns, and more:
>>> locale.number_symbols['decimal']
u'.'
If a locale is requested for which no locale data is available, an
`UnknownLocaleError` is raised:
>>> Locale.parse('en_XX')
Traceback (most recent call last):
...
UnknownLocaleError: unknown locale 'en_XX'
For more information see :rfc:`3066`.
"""
def __init__(
self,
language: str,
territory: str | None = None,
script: str | None = None,
variant: str | None = None,
modifier: str | None = None,
) -> None:
"""Initialize the locale object from the given identifier components.
>>> locale = Locale('en', 'US')
>>> locale.language
'en'
>>> locale.territory
'US'
:param language: the language code
:param territory: the territory (country or region) code
:param script: the script code
:param variant: the variant code
:param modifier: a modifier (following the '@' symbol, sometimes called '@variant')
:raise `UnknownLocaleError`: if no locale data is available for the
requested locale
"""
#: the language code
self.language = language
#: the territory (country or region) code
self.territory = territory
#: the script code
self.script = script
#: the variant code
self.variant = variant
#: the modifier
self.modifier = modifier
self.__data = None
identifier = str(self)
identifier_without_modifier = identifier.partition('@')[0]
if not localedata.exists(identifier_without_modifier):
raise UnknownLocaleError(identifier)
@classmethod
def default(cls, category: str | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES) -> Locale:
"""Return the system default locale for the specified category.
>>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES']:
... os.environ[name] = ''
>>> os.environ['LANG'] = 'fr_FR.UTF-8'
>>> Locale.default('LC_MESSAGES')
Locale('fr', territory='FR')
The following fallbacks to the variable are always considered:
- ``LANGUAGE``
- ``LC_ALL``
- ``LC_CTYPE``
- ``LANG``
:param category: one of the ``LC_XXX`` environment variable names
:param aliases: a dictionary of aliases for locale identifiers
"""
# XXX: use likely subtag expansion here instead of the
# aliases dictionary.
locale_string = default_locale(category, aliases=aliases)
return cls.parse(locale_string)
@classmethod
def negotiate(
cls,
preferred: Iterable[str],
available: Iterable[str],
sep: str = '_',
aliases: Mapping[str, str] = LOCALE_ALIASES,
) -> Locale | None:
"""Find the best match between available and requested locale strings.
>>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
Locale('de', territory='DE')
>>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de'])
Locale('de')
>>> Locale.negotiate(['de_DE', 'de'], ['en_US'])
You can specify the character used in the locale identifiers to separate
the different components. This separator is applied to both lists. Also,
case is ignored in the comparison:
>>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-')
Locale('de', territory='DE')
:param preferred: the list of locale identifiers preferred by the user
:param available: the list of locale identifiers available
:param aliases: a dictionary of aliases for locale identifiers
"""
identifier = negotiate_locale(preferred, available, sep=sep,
aliases=aliases)
if identifier:
return Locale.parse(identifier, sep=sep)
@classmethod
def parse(
cls,
identifier: str | Locale | None,
sep: str = '_',
resolve_likely_subtags: bool = True,
) -> Locale:
"""Create a `Locale` instance for the given locale identifier.
>>> l = Locale.parse('de-DE', sep='-')
>>> l.display_name
u'Deutsch (Deutschland)'
If the `identifier` parameter is not a string, but actually a `Locale`
object, that object is returned:
>>> Locale.parse(l)
Locale('de', territory='DE')
This also can perform resolving of likely subtags which it does
by default. This is for instance useful to figure out the most
likely locale for a territory you can use ``'und'`` as the
language tag:
>>> Locale.parse('und_AT')
Locale('de', territory='AT')
Modifiers are optional, and always at the end, separated by "@":
>>> Locale.parse('de_AT@euro')
Locale('de', territory='AT', modifier='euro')
:param identifier: the locale identifier string
:param sep: optional component separator
:param resolve_likely_subtags: if this is specified then a locale will
have its likely subtag resolved if the
locale otherwise does not exist. For
instance ``zh_TW`` by itself is not a
locale that exists but Babel can
automatically expand it to the full
form of ``zh_hant_TW``. Note that this
expansion is only taking place if no
locale exists otherwise. For instance
there is a locale ``en`` that can exist
by itself.
:raise `ValueError`: if the string does not appear to be a valid locale
identifier
:raise `UnknownLocaleError`: if no locale data is available for the
requested locale
:raise `TypeError`: if the identifier is not a string or a `Locale`
"""
if isinstance(identifier, Locale):
return identifier
elif not isinstance(identifier, str):
raise TypeError(f"Unexpected value for identifier: {identifier!r}")
parts = parse_locale(identifier, sep=sep)
input_id = get_locale_identifier(parts)
def _try_load(parts):
try:
return cls(*parts)
except UnknownLocaleError:
return None
def _try_load_reducing(parts):
# Success on first hit, return it.
locale = _try_load(parts)
if locale is not None:
return locale
# Now try without script and variant
locale = _try_load(parts[:2])
if locale is not None:
return locale
locale = _try_load(parts)
if locale is not None:
return locale
if not resolve_likely_subtags:
raise UnknownLocaleError(input_id)
# From here onwards is some very bad likely subtag resolving. This
# whole logic is not entirely correct but good enough (tm) for the
# time being. This has been added so that zh_TW does not cause
# errors for people when they upgrade. Later we should properly
# implement ICU like fuzzy locale objects and provide a way to
# maximize and minimize locale tags.
if len(parts) == 5:
language, territory, script, variant, modifier = parts
else:
language, territory, script, variant = parts
modifier = None
language = get_global('language_aliases').get(language, language)
territory = get_global('territory_aliases').get(territory or '', (territory,))[0]
script = get_global('script_aliases').get(script or '', script)
variant = get_global('variant_aliases').get(variant or '', variant)
if territory == 'ZZ':
territory = None
if script == 'Zzzz':
script = None
parts = language, territory, script, variant, modifier
# First match: try the whole identifier
new_id = get_locale_identifier(parts)
likely_subtag = get_global('likely_subtags').get(new_id)
if likely_subtag is not None:
locale = _try_load_reducing(parse_locale(likely_subtag))
if locale is not None:
return locale
# If we did not find anything so far, try again with a
# simplified identifier that is just the language
likely_subtag = get_global('likely_subtags').get(language)
if likely_subtag is not None:
parts2 = parse_locale(likely_subtag)
if len(parts2) == 5:
language2, _, script2, variant2, modifier2 = parts2
else:
language2, _, script2, variant2 = parts2
modifier2 = None
locale = _try_load_reducing((language2, territory, script2, variant2, modifier2))
if locale is not None:
return locale
raise UnknownLocaleError(input_id)
def __eq__(self, other: object) -> bool:
for key in ('language', 'territory', 'script', 'variant', 'modifier'):
if not hasattr(other, key):
return False
return (
self.language == getattr(other, 'language') and # noqa: B009
self.territory == getattr(other, 'territory') and # noqa: B009
self.script == getattr(other, 'script') and # noqa: B009
self.variant == getattr(other, 'variant') and # noqa: B009
self.modifier == getattr(other, 'modifier') # noqa: B009
)
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
def __hash__(self) -> int:
return hash((self.language, self.territory, self.script,
self.variant, self.modifier))
def __repr__(self) -> str:
parameters = ['']
for key in ('territory', 'script', 'variant', 'modifier'):
value = getattr(self, key)
if value is not None:
parameters.append(f"{key}={value!r}")
return f"Locale({self.language!r}{', '.join(parameters)})"
def __str__(self) -> str:
return get_locale_identifier((self.language, self.territory,
self.script, self.variant,
self.modifier))
@property
def _data(self) -> localedata.LocaleDataDict:
if self.__data is None:
self.__data = localedata.LocaleDataDict(localedata.load(str(self)))
return self.__data
def get_display_name(self, locale: Locale | str | None = None) -> str | None:
"""Return the display name of the locale using the given locale.
The display name will include the language, territory, script, and
variant, if those are specified.
>>> Locale('zh', 'CN', script='Hans').get_display_name('en')
u'Chinese (Simplified, China)'
Modifiers are currently passed through verbatim:
>>> Locale('it', 'IT', modifier='euro').get_display_name('en')
u'Italian (Italy, euro)'
:param locale: the locale to use
"""
if locale is None:
locale = self
locale = Locale.parse(locale)
retval = locale.languages.get(self.language)
if retval and (self.territory or self.script or self.variant):
details = []
if self.script:
details.append(locale.scripts.get(self.script))
if self.territory:
details.append(locale.territories.get(self.territory))
if self.variant:
details.append(locale.variants.get(self.variant))
if self.modifier:
details.append(self.modifier)
details = filter(None, details)
if details:
retval += f" ({', '.join(details)})"
return retval
display_name = property(get_display_name, doc="""\
The localized display name of the locale.
>>> Locale('en').display_name
u'English'
>>> Locale('en', 'US').display_name
u'English (United States)'
>>> Locale('sv').display_name
u'svenska'
:type: `unicode`
""")
def get_language_name(self, locale: Locale | str | None = None) -> str | None:
"""Return the language of this locale in the given locale.
>>> Locale('zh', 'CN', script='Hans').get_language_name('de')
u'Chinesisch'
.. versionadded:: 1.0
:param locale: the locale to use
"""
if locale is None:
locale = self
locale = Locale.parse(locale)
return locale.languages.get(self.language)
language_name = property(get_language_name, doc="""\
The localized language name of the locale.
>>> Locale('en', 'US').language_name
u'English'
""")
def get_territory_name(self, locale: Locale | str | None = None) -> str | None:
"""Return the territory name in the given locale."""
if locale is None:
locale = self
locale = Locale.parse(locale)
return locale.territories.get(self.territory or '')
territory_name = property(get_territory_name, doc="""\
The localized territory name of the locale if available.
>>> Locale('de', 'DE').territory_name
u'Deutschland'
""")
def get_script_name(self, locale: Locale | str | None = None) -> str | None:
"""Return the script name in the given locale."""
if locale is None:
locale = self
locale = Locale.parse(locale)
return locale.scripts.get(self.script or '')
script_name = property(get_script_name, doc="""\
The localized script name of the locale if available.
>>> Locale('sr', 'ME', script='Latn').script_name
u'latinica'
""")
@property
def english_name(self) -> str | None:
"""The english display name of the locale.
>>> Locale('de').english_name
u'German'
>>> Locale('de', 'DE').english_name
u'German (Germany)'
:type: `unicode`"""
return self.get_display_name(Locale('en'))
# { General Locale Display Names
@property
def languages(self) -> localedata.LocaleDataDict:
"""Mapping of language codes to translated language names.
>>> Locale('de', 'DE').languages['ja']
u'Japanisch'
See `ISO 639