-
Notifications
You must be signed in to change notification settings - Fork 272
/
Copy pathlanguage.ts
91 lines (78 loc) · 2.81 KB
/
language.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
// This function guesses the human language (writing system) of the given
// JavaScript string, using the Unicode Alias in extended RegExp.
//
// You can learn more about this in:
// - https://en.wikipedia.org/wiki/Script_(Unicode)
// - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Unicode_Property_Escapes
// - https://unicode.org/reports/tr18/#General_Category_Property
// - https://tc39.es/ecma262/multipage/text-processing.html#table-unicode-script-values
import createEmojiRegex from 'emoji-regex'
// Remove the "g" flag
const emojiRegex = new RegExp(createEmojiRegex(), '')
// Supported languages. The order matters.
// Usually, this is only for "special cases" like CJKV languages as latin
// characters are usually included in the base font, and can be safely fallback
// to the Noto Sans font. A list of special cases we want to support can be
// found here (sort by popularity):
// - https://fonts.google.com/noto/fonts?sort=popularity¬o.query=sans
//
// We can't tell if a hanzi(kanji) is Chinese or Japanese by regular expressions.
// - https://unicode.org/faq/han_cjk.html
const specialCode = {
emoji: emojiRegex,
symbol: /\p{Symbol}/u,
math: /\p{Math}/u,
} as const
const code = {
'ja-JP': /\p{scx=Hira}|\p{scx=Kana}|\p{scx=Han}|[\u3000]|[\uFF00-\uFFEF]/u,
'ko-KR': /\p{scx=Hangul}/u,
'zh-CN': /\p{scx=Han}/u,
'zh-TW': /\p{scx=Han}/u,
'zh-HK': /\p{scx=Han}/u,
'th-TH': /\p{scx=Thai}/u,
'bn-IN': /\p{scx=Bengali}/u,
'ar-AR': /\p{scx=Arabic}/u,
'ta-IN': /\p{scx=Tamil}/u,
'ml-IN': /\p{scx=Malayalam}/u,
'he-IL': /\p{scx=Hebrew}/u,
'te-IN': /\p{scx=Telugu}/u,
devanagari: /\p{scx=Devanagari}/u,
kannada: /\p{scx=Kannada}/u,
} as const
type SpecialCodeKey = keyof typeof specialCode
type CodeKey = keyof typeof specialCode | keyof typeof code
export type Locale = keyof typeof code
export type LangCode = CodeKey | 'unknown'
export const locales = Object.keys({ ...code, ...specialCode }) as Locale[]
export function isValidLocale(x: any): x is Locale {
return locales.includes(x)
}
export function detectLanguageCode(
segment: string,
locale?: Locale
): Array<Locale> | ['unknown'] | [SpecialCodeKey] {
for (const c of Object.keys(specialCode) as SpecialCodeKey[]) {
if (specialCode[c].test(segment)) {
return [c]
}
}
const languages = Object.keys(code).filter((lang) =>
code[lang].test(segment)
) as Locale[]
if (languages.length === 0) {
return ['unknown']
}
if (locale) {
const index = languages.findIndex((lang) => lang === locale)
if (index !== -1) {
languages.splice(index, 1)
languages.unshift(locale)
}
}
return languages
}
export function normalizeLocale(locale?: string): Locale | undefined {
if (locale) {
return locales.find((l) => l.toLowerCase().startsWith(locale.toLowerCase()))
}
}