/**
* Sanscript * * Sanscript is a Sanskrit transliteration library. Currently, it supports * other Indian languages only incidentally. * * License: MIT */
function exportSanscriptSingleton(global){
"use strict"; var Sanscript = {}; // First, we define the Sanscript singleton, with its variables and methods. Sanscript.defaults = { "skip_sgml": false, "syncope": false }; /* Schemes * ======= * Schemes are of two kinds: "Brahmic" and "roman." "Brahmic" schemes * describe abugida scripts found in India. "Roman" schemes describe * manufactured alphabets that are meant to describe or encode Brahmi * scripts. Abugidas and alphabets are processed by separate algorithms * because of the unique difficulties involved with each. * * Brahmic consonants are stated without a virama. Roman consonants are * stated without the vowel 'a'. * * (Since "abugida" is not a well-known term, Sanscript uses "Brahmic" * and "roman" for clarity.) */ var schemes = Sanscript.schemes = { /* Bengali * ------- * 'va' and 'ba' are both rendered as ব. */ "bengali": { "vowels": ["অ", "আ", "ই", "ঈ", "উ", "ঊ", "ঋ", "ৠ", "ঌ", "ৡ", "", "এ", "ঐ", "", "ও", "ঔ"], "vowel_marks": ["া", "ি", "ী", "ু", "ূ", "ৃ", "ৄ", "ৢ", "ৣ", "", "ে", "ৈ", "", "ো", "ৌ"], "other_marks": ["ং", "ঃ", "ঁ"], "virama": ["্"], "consonants": ["ক", "খ", "গ", "ঘ", "ঙ", "চ", "ছ", "জ", "ঝ", "ঞ", "ট", "ঠ", "ড", "ঢ", "ণ", "ত", "থ", "দ", "ধ", "ন", "প", "ফ", "ব", "ভ", "ম", "য", "র", "ল", "ব", "শ", "ষ", "স", "হ", "ळ", "ক্ষ", "জ্ঞ"], "symbols": ["০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯", "ॐ", "ঽ", "।", "॥"], "other": ["", "", "", "", "ড", "ঢ", "", "য", ""] }, /* Devanagari * ---------- * The most comprehensive and unambiguous Brahmic script listed. */ "devanagari": { // "Independent" forms of the vowels. These are used whenever the // vowel does not immediately follow a consonant. "vowels": ["अ", "आ", "इ", "ई", "उ", "ऊ", "ऋ", "ॠ", "ऌ", "ॡ", "ऎ", "ए", "ऐ", "ऒ", "ओ", "औ"], // "Dependent" forms of the vowels. These are used whenever the // vowel immediately follows a consonant. If a letter is not // listed in `vowels`, it should not be listed here. "vowel_marks": ["ा", "ि", "ी", "ु", "ू", "ृ", "ॄ", "ॢ", "ॣ", "ॆ", "े", "ै", "ॊ", "ो", "ौ"], // Miscellaneous marks, all of which are used in Sanskrit. "other_marks": ["ं", "ः", "ँ"], // In syllabic scripts like Devanagari, consonants have an inherent // vowel that must be suppressed explicitly. We do so by putting a // virama after the consonant. "virama": ["्"], // Various Sanskrit consonants and consonant clusters. Every token // here has an explicit vowel. Thus "क" is "ka" instead of "k". "consonants": ["क", "ख", "ग", "घ", "ङ", "च", "छ", "ज", "झ", "ञ", "ट", "ठ", "ड", "ढ", "ण", "त", "थ", "द", "ध", "न", "प", "फ", "ब", "भ", "म", "य", "र", "ल", "व", "श", "ष", "स", "ह", "ळ", "क्ष", "ज्ञ"], // Numbers and punctuation "symbols": ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९", "ॐ", "ऽ", "।", "॥"], // Zero-width joiner. This is used to separate a consonant cluster // and avoid a complex ligature. "zwj": ["\u200D"], // Dummy consonant. This is used in ITRANS to prevert certain types // of parser ambiguity. Thus "barau" -> बरौ but "bara_u" -> बरउ. "skip": [""], // Vedic accent. Udatta and anudatta. "accent": ["\u0951", "\u0952"], // Accent combined with anusvara and and visarga. For compatibility // with ITRANS, which allows the reverse of these four. "combo_accent": ["ः॑", "ः॒", "ं॑", "ं॒"], "candra": ["ॅ"], // Non-Sanskrit consonants "other": ["क़", "ख़", "ग़", "ज़", "ड़", "ढ़", "फ़", "य़", "ऱ"] }, /* Gujarati * -------- * Sanskrit-complete. */ "gujarati": { "vowels": ["અ", "આ", "ઇ", "ઈ", "ઉ", "ઊ", "ઋ", "ૠ", "ઌ", "ૡ", "", "એ", "ઐ", "", "ઓ", "ઔ"], "vowel_marks": ["ા", "િ", "ી", "ુ", "ૂ", "ૃ", "ૄ", "ૢ", "ૣ", "", "ે", "ૈ", "", "ો", "ૌ"], "other_marks": ["ં", "ઃ", "ઁ"], "virama": ["્"], "consonants": ["ક", "ખ", "ગ", "ઘ", "ઙ", "ચ", "છ", "જ", "ઝ", "ઞ", "ટ", "ઠ", "ડ", "ઢ", "ણ", "ત", "થ", "દ", "ધ", "ન", "પ", "ફ", "બ", "ભ", "મ", "ય", "ર", "લ", "વ", "શ", "ષ", "સ", "હ", "ળ", "ક્ષ", "જ્ઞ"], "symbols": ["૦", "૧", "૨", "૩", "૪", "૫", "૬", "૭", "૮", "૯", "ૐ", "ઽ", "", ""], "candra": ["ૅ"] }, /* Gurmukhi * -------- * Missing R/RR/lR/lRR */ "gurmukhi": { "vowels": ["ਅ", "ਆ", "ਇ", "ਈ", "ਉ", "ਊ", "", "", "", "", "", "ਏ", "ਐ", "", "ਓ", "ਔ"], "vowel_marks": ["ਾ", "ਿ", "ੀ", "ੁ", "ੂ", "", "", "", "", "", "ੇ", "ੈ", "", "ੋ", "ੌ"], "other_marks": ["ਂ", "ਃ", "ਁ"], "virama": ["੍"], "consonants": ["ਕ", "ਖ", "ਗ", "ਘ", "ਙ", "ਚ", "ਛ", "ਜ", "ਝ", "ਞ", "ਟ", "ਠ", "ਡ", "ਢ", "ਣ", "ਤ", "ਥ", "ਦ", "ਧ", "ਨ", "ਪ", "ਫ", "ਬ", "ਭ", "ਮ", "ਯ", "ਰ", "ਲ", "ਵ", "ਸ਼", "ਸ਼", "ਸ", "ਹ", "ਲ਼", "ਕ੍ਸ਼", "ਜ੍ਞ"], "symbols": ["੦", "੧", "੨", "੩", "੪", "੫", "੬", "੭", "੮", "੯", "ॐ", "ऽ", "।", "॥"], "other": ["", "ਖ", "ਗ", "ਜ", "ਡ", "", "ਫ", "", ""] }, /* Kannada * ------- * Sanskrit-complete. */ "kannada": { "vowels": ["ಅ", "ಆ", "ಇ", "ಈ", "ಉ", "ಊ", "ಋ", "ೠ", "ಌ", "ೡ", "ಎ", "ಏ", "ಐ", "ಒ", "ಓ", "ಔ"], "vowel_marks": ["ಾ", "ಿ", "ೀ", "ು", "ೂ", "ೃ", "ೄ", "ೢ", "ೣ", "ೆ", "ೇ", "ೈ", "ೊ", "ೋ", "ೌ"], "other_marks": ["ಂ", "ಃ", "ँ"], "virama": ["್"], "consonants": ["ಕ", "ಖ", "ಗ", "ಘ", "ಙ", "ಚ", "ಛ", "ಜ", "ಝ", "ಞ", "ಟ", "ಠ", "ಡ", "ಢ", "ಣ", "ತ", "ಥ", "ದ", "ಧ", "ನ", "ಪ", "ಫ", "ಬ", "ಭ", "ಮ", "ಯ", "ರ", "ಲ", "ವ", "ಶ", "ಷ", "ಸ", "ಹ", "ಳ", "ಕ್ಷ", "ಜ್ಞ"], "symbols": ["೦", "೧", "೨", "೩", "೪", "೫", "೬", "೭", "೮", "೯", "ಓಂ", "ಽ", "।", "॥"], "other": ["", "", "", "", "", "", "ಫ", "", "ಱ"] }, /* Malayalam * --------- * Sanskrit-complete. */ "malayalam": { "vowels": ["അ", "ആ", "ഇ", "ഈ", "ഉ", "ഊ", "ഋ", "ൠ", "ഌ", "ൡ", "എ", "ഏ", "ഐ", "ഒ", "ഓ", "ഔ"], "vowel_marks": ["ാ", "ി", "ീ", "ു", "ൂ", "ൃ", "ൄ", "ൢ", "ൣ", "െ", "േ", "ൈ", "ൊ", "ോ", "ൌ"], "other_marks": ["ം", "ഃ", "ँ"], "virama": ["്"], "consonants": ["ക", "ഖ", "ഗ", "ഘ", "ങ", "ച", "ഛ", "ജ", "ഝ", "ഞ", "ട", "ഠ", "ഡ", "ഢ", "ണ", "ത", "ഥ", "ദ", "ധ", "ന", "പ", "ഫ", "ബ", "ഭ", "മ", "യ", "ര", "ല", "വ", "ശ", "ഷ", "സ", "ഹ", "ള", "ക്ഷ", "ജ്ഞ"], "symbols": ["൦", "൧", "൨", "൩", "൪", "൫", "൬", "൭", "൮", "൯", "ഓം", "ഽ", "।", "॥"], "other": ["", "", "", "", "", "", "", "", "റ"] }, /* Oriya * ----- * Sanskrit-complete. */ "oriya": { "vowels": ["ଅ", "ଆ", "ଇ", "ଈ", "ଉ", "ଊ", "ଋ", "ୠ", "ଌ", "ୡ", "", "ଏ", "ଐ", "", "ଓ", "ଔ"], "vowel_marks": ["ା", "ି", "ୀ", "ୁ", "ୂ", "ୃ", "ୄ", "ୢ", "ୣ", "", "େ", "ୈ", "", "ୋ", "ୌ"], "other_marks": ["ଂ", "ଃ", "ଁ"], "virama": ["୍"], "consonants": ["କ", "ଖ", "ଗ", "ଘ", "ଙ", "ଚ", "ଛ", "ଜ", "ଝ", "ଞ", "ଟ", "ଠ", "ଡ", "ଢ", "ଣ", "ତ", "ଥ", "ଦ", "ଧ", "ନ", "ପ", "ଫ", "ବ", "ଭ", "ମ", "ଯ", "ର", "ଲ", "ଵ", "ଶ", "ଷ", "ସ", "ହ", "ଳ", "କ୍ଷ", "ଜ୍ଞ"], "symbols": ["୦", "୧", "୨", "୩", "୪", "୫", "୬", "୭", "୮", "୯", "ଓଂ", "ଽ", "।", "॥"], "other": ["", "", "", "", "ଡ", "ଢ", "", "ଯ", ""] }, /* Tamil * ----- * Missing R/RR/lR/lRR vowel marks and voice/aspiration distinctions. * The most incomplete of the Sanskrit schemes here. */ "tamil": { "vowels": ["அ", "ஆ", "இ", "ஈ", "உ", "ஊ", "", "", "", "", "எ", "ஏ", "ஐ", "ஒ", "ஓ", "ஔ"], "vowel_marks": ["ா", "ி", "ீ", "ு", "ூ", "", "", "", "", "ெ", "ே", "ை", "ொ", "ோ", "ௌ"], "other_marks": ["ஂ", "ஃ", ""], "virama": ["்"], "consonants": ["க", "க", "க", "க", "ங", "ச", "ச", "ஜ", "ச", "ஞ", "ட", "ட", "ட", "ட", "ண", "த", "த", "த", "த", "ந", "ப", "ப", "ப", "ப", "ம", "ய", "ர", "ல", "வ", "ஶ", "ஷ", "ஸ", "ஹ", "ள", "க்ஷ", "ஜ்ஞ"], "symbols": ["௦", "௧", "௨", "௩", "௪", "௫", "௬", "௭", "௮", "௯", "ௐ", "ऽ", "।", "॥"], "other": ["", "", "", "", "", "", "", "", "ற"] }, /* Telugu * ------ * Sanskrit-complete. */ "telugu": { "vowels": ["అ", "ఆ", "ఇ", "ఈ", "ఉ", "ఊ", "ఋ", "ౠ", "ఌ", "ౡ", "ఎ", "ఏ", "ఐ", "ఒ", "ఓ", "ఔ"], "vowel_marks": ["ా", "ి", "ీ", "ు", "ూ", "ృ", "ౄ", "ౢ", "ౣ", "ె", "ే", "ై", "ొ", "ో", "ౌ"], "other_marks": ["ం", "ః", "ఁ"], "virama": ["్"], "consonants": ["క", "ఖ", "గ", "ఘ", "ఙ", "చ", "ఛ", "జ", "ఝ", "ఞ", "ట", "ఠ", "డ", "ఢ", "ణ", "త", "థ", "ద", "ధ", "న", "ప", "ఫ", "బ", "భ", "మ", "య", "ర", "ల", "వ", "శ", "ష", "స", "హ", "ళ", "క్ష", "జ్ఞ"], "symbols": ["౦", "౧", "౨", "౩", "౪", "౫", "౬", "౭", "౮", "౯", "ఓం", "ఽ", "।", "॥"], "other": ["", "", "", "", "", "", "", "", "ఱ"] }, /* International Alphabet of Sanskrit Transliteration * -------------------------------------------------- * The most "professional" Sanskrit romanization scheme. */ "iast": { "vowels": ["a", "ā", "i", "ī", "u", "ū", "ṛ", "ṝ", "ḷ", "ḹ", "", "e", "ai", "", "o", "au"], "other_marks": ["ṃ", "ḥ", "~"], "virama": [""], "consonants": ["k", "kh", "g", "gh", "ṅ", "c", "ch", "j", "jh", "ñ", "ṭ", "ṭh", "ḍ", "ḍh", "ṇ", "t", "th", "d", "dh", "n", "p", "ph", "b", "bh", "m", "y", "r", "l", "v", "ś", "ṣ", "s", "h", "ḻ", "kṣ", "jñ"], "symbols": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "oṃ", "'", "।", "॥"] }, /* ITRANS * ------ * One of the first romanization schemes -- and one of the most * complicated. For alternate forms, see the "allAlternates" variable * below. * * '_' is a "null" letter, which allows adjacent vowels. */ "itrans": { "vowels": ["a", "A", "i", "I", "u", "U", "RRi", "RRI", "LLi", "LLI", "", "e", "ai", "", "o", "au"], "other_marks": ["M", "H", ".N"], "virama": [""], "consonants": ["k", "kh", "g", "gh", "~N", "ch", "Ch", "j", "jh", "~n", "T", "Th", "D", "Dh", "N", "t", "th", "d", "dh", "n", "p", "ph", "b", "bh", "m", "y", "r", "l", "v", "sh", "Sh", "s", "h", "L", "kSh", "j~n"], "symbols": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "OM", ".a", "|", "||"], "candra": [".c"], "zwj": ["{}"], "skip": "_", "accent": ["\\'", "\\_"], "combo_accent": ["\\'H", "\\_H", "\\'M", "\\_M"], "other": ["q", "K", "G", "z", ".D", ".Dh", "f", "Y", "R"] }, /* Harvard-Kyoto * ------------- * A simple 1:1 mapping. */ "hk": { "vowels": ["a", "A", "i", "I", "u", "U", "R", "RR", "lR", "lRR", "", "e", "ai", "", "o", "au"], "other_marks": ["M", "H", "~",], "virama": [""], "consonants": ["k", "kh", "g", "gh", "G", "c", "ch", "j", "jh", "J", "T", "Th", "D", "Dh", "N", "t", "th", "d", "dh", "n", "p", "ph", "b", "bh", "m", "y", "r", "l", "v", "z", "S", "s", "h", "L", "kS", "jJ"], "symbols": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "OM", "'", "|", "||"] }, /* National Library at Kolkata * --------------------------- * Apart from using "ē" and "ō" instead of "e" and "o", this scheme is * identical to IAST. ṝ, ḷ, and ḹ are not part of the scheme proper. * * This is defined further below. */ /* Sanskrit Library Phonetic Basic * ------------------------------- * With one ASCII letter per phoneme, this is the tersest transliteration * scheme in use today and is especially suited to computer processing. */ "slp1": { "vowels": ["a", "A", "i", "I", "u", "U", "f", "F", "x", "X", "", "e", "E", "", "o", "O"], "other_marks": ["M", "H", "~"], "virama": [""], "consonants": ["k", "K", "g", "G", "N", "c", "C", "j", "J", "Y", "w", "W", "q", "Q", "R", "t", "T", "d", "D", "n", "p", "P", "b", "B", "m", "y", "r", "l", "v", "S", "z", "s", "h", "L", "kz", "jY"], "symbols": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "oM", "'", ".", ".."] }, /* Velthuis * -------- * A case-insensitive Sanskrit encoding. */ "velthuis": { "vowels": ["a", "aa", "i", "ii", "u", "uu", ".r", ".rr", ".li", ".ll", "", "e", "ai", "", "o", "au"], "other_marks": [".m", ".h", ""], "virama": [""], "consonants": ["k", "kh", "g", "gh", "\"n", "c", "ch", "j", "jh", "~n", ".t", ".th", ".d", ".d", ".n", "t", "th", "d", "dh", "n", "p", "ph", "b", "bh", "m", "y", "r", "l", "v", "~s", ".s", "s", "h", "L", "k.s", "j~n"], "symbols": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "o.m", "'", "|", "||"] }, /* WX * -- * As terse as SLP1. */ "wx": { "vowels": ["a", "A", "i", "I", "u", "U", "q", "Q", "L", "", "", "e", "E", "", "o", "O"], "other_marks": ["M", "H", "z"], "virama": [""], "consonants": ["k", "K", "g", "G", "f", "c", "C", "j", "J", "F", "t", "T", "d", "D", "N", "w", "W", "x", "X", "n", "p", "P", "b", "B", "m", "y", "r", "l", "v", "S", "R", "s", "h", "", "kR", "jF"], "symbols": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "oM", "'", "|", "||"] }, }, // Set of names of schemes romanSchemes = {}, // Map of alternate encodings. allAlternates = { "itrans": { "A": ["aa"], "I": ["ii", "ee"], "U": ["uu", "oo"], "RRi": ["R^i"], "RRI": ["R^I"], "LLi": ["L^i"], "LLI": ["L^I"], "M": [".m", ".n"], "~N": ["N^"], "ch": ["c"], "Ch": ["C", "chh"], "~n": ["JN"], "v": ["w"], "Sh": ["S", "shh"], "kSh": ["kS", "x"], "j~n": ["GY", "dny"], "OM": ["AUM"], "\\_": ["\\`"], "\\_H": ["\\`H"], "\\'M": ["\\'.m", "\\'.n"], "\\_M": ["\\_.m", "\\_.n", "\\`M", "\\`.m", "\\`.n"], ".a": ["~"], "|": ["."], "||": [".."], "z": ["J"] } }, // object cache cache = {}; /** * Check whether the given scheme encodes romanized Sanskrit. * * @param name the scheme name * @return boolean */ Sanscript.isRomanScheme = function(name) { return romanSchemes.hasOwnProperty(name); }; /** * Add a Brahmic scheme to Sanscript. * * Schemes are of two types: "Brahmic" and "roman". Brahmic consonants * have an inherent vowel sound, but roman consonants do not. This is the * main difference between these two types of scheme. * * A scheme definition is an object ("{}") that maps a group name to a * list of characters. For illustration, see the "devanagari" scheme at * the top of this file. * * You can use whatever group names you like, but for the best results, * you should use the same group names that Sanscript does. * * @param name the scheme name * @param scheme the scheme data itself. This should be constructed as * described above. */ Sanscript.addBrahmicScheme = function(name, scheme) { Sanscript.schemes[name] = scheme; }; /** * Add a roman scheme to Sanscript. * * See the comments on Sanscript.addBrahmicScheme. The "vowel_marks" field * can be omitted. * * @param name the scheme name * @param scheme the scheme data itself */ Sanscript.addRomanScheme = function(name, scheme) { if (!('vowel_marks' in scheme)) { scheme.vowel_marks = scheme.vowels.slice(1); } Sanscript.schemes[name] = scheme; romanSchemes[name] = true; }; /** * Create a deep copy of an object, for certain kinds of objects. * * @param scheme the scheme to copy * @return the copy */ var cheapCopy = function(scheme) { var copy = {}; for (var key in scheme) { if (!scheme.hasOwnProperty(key)) { continue; } copy[key] = scheme[key].slice(0); } return copy; }; // Set up various schemes (function() { // Set up roman schemes var kolkata = schemes.kolkata = cheapCopy(schemes.iast), schemeNames = ["iast", "itrans", "hk", "kolkata", "slp1", "velthuis", "wx"]; kolkata.vowels = ["a", "ā", "i", "ī", "u", "ū", "ṛ", "ṝ", "ḷ", "ḹ", "e", "ē", "ai", "o", "ō", "au"]; // These schemes already belong to Sanscript.schemes. But by adding // them again with `addRomanScheme`, we automatically build up // `romanSchemes` and define a `vowel_marks` field for each one. for (var i = 0, name; (name = schemeNames[i]); i++) { Sanscript.addRomanScheme(name, schemes[name]); } // ITRANS variant, which supports Dravidian short 'e' and 'o'. var itrans_dravidian = cheapCopy(schemes.itrans); itrans_dravidian.vowels = ["a", "A", "i", "I", "u", "U", "Ri", "RRI", "LLi", "LLi", "e", "E", "ai", "o", "O", "au"]; itrans_dravidian.vowel_marks = itrans_dravidian.vowels.slice(1); allAlternates.itrans_dravidian = allAlternates.itrans; Sanscript.addRomanScheme('itrans_dravidian', itrans_dravidian); }()); /** * Create a map from every character in `from` to its partner in `to`. * Also, store any "marks" that `from` might have. * * @param from input scheme * @param to output scheme * @param options scheme options */ var makeMap = function(from, to, options) { var alternates = allAlternates[from] || {}, consonants = {}, fromScheme = Sanscript.schemes[from], letters = {}, tokenLengths = [], marks = {}, toScheme = Sanscript.schemes[to]; for (var group in fromScheme) { if (!fromScheme.hasOwnProperty(group)) { continue; } var fromGroup = fromScheme[group], toGroup = toScheme[group]; if (toGroup === undefined) { continue; } for (var i = 0; i < fromGroup.length; i++) { var F = fromGroup[i], T = toGroup[i], alts = alternates[F] || [], numAlts = alts.length, j = 0; tokenLengths.push(F.length); for (j = 0; j < numAlts; j++) { tokenLengths.push(alts[j].length); } if (group === 'vowel_marks' || group === 'virama') { marks[F] = T; for (j = 0; j < numAlts; j++) { marks[alts[j]] = T; } } else { letters[F] = T; for (j = 0; j < numAlts; j++) { letters[alts[j]] = T; } if (group === 'consonants' || group === 'other') { consonants[F] = T; for (j = 0; j < numAlts; j++) { consonants[alts[j]] = T; } } } } } return {consonants: consonants, fromRoman: Sanscript.isRomanScheme(from), letters: letters, marks: marks, maxTokenLength: Math.max.apply(Math, tokenLengths), toRoman: Sanscript.isRomanScheme(to), virama: toScheme.virama}; }; /** * Transliterate from a romanized script. * * @param data the string to transliterate * @param map map data generated from makeMap() * @param options transliteration options * @return the finished string */ var transliterateRoman = function(data, map, options) { var buf = [], consonants = map.consonants, dataLength = data.length, hadConsonant = false, letters = map.letters, marks = map.marks, maxTokenLength = map.maxTokenLength, optSkipSGML = options.skip_sgml, optSyncope = options.syncope, tempLetter, tempMark, tokenBuffer = '', toRoman = map.toRoman, virama = map.virama; // Transliteration state. It's controlled by these values: // - `skippingSGML`: are we in SGML? // - `toggledTrans`: are we in a toggled region? // // We combine these values into a single variable `skippingTrans`: // // `skippingTrans` = skippingSGML || toggledTrans; // // If (and only if) this value is true, don't transliterate. var skippingSGML = false, skippingTrans = false, toggledTrans = false; for (var i = 0, L; (L = data.charAt(i)) || tokenBuffer; i++) { // Fill the token buffer, if possible. var difference = maxTokenLength - tokenBuffer.length; if (difference > 0 && i < dataLength) { tokenBuffer += L; if (difference > 1) { continue; } } // Match all token substrings to our map. for (var j = 0; j < maxTokenLength; j++) { var token = tokenBuffer.substr(0,maxTokenLength-j); if (skippingSGML === true) { skippingSGML = (token !== '>'); } else if (token === '<') { skippingSGML = optSkipSGML; } else if (token === '##') { toggledTrans = !toggledTrans; tokenBuffer = tokenBuffer.substr(2); break; } skippingTrans = skippingSGML || toggledTrans; if ((tempLetter = letters[token]) !== undefined && !skippingTrans) { if (toRoman) { buf.push(tempLetter); } else { // Handle the implicit vowel. Ignore 'a' and force // vowels to appear as marks if we've just seen a // consonant. if (hadConsonant) { if ((tempMark = marks[token])) { buf.push(tempMark); } else if (token !== 'a') { buf.push(virama); buf.push(tempLetter); } } else { buf.push(tempLetter); } hadConsonant = token in consonants; } tokenBuffer = tokenBuffer.substr(maxTokenLength-j); break; } else if (j === maxTokenLength - 1) { if (hadConsonant) { hadConsonant = false; if (!optSyncope) { buf.push(virama); } } buf.push(token); tokenBuffer = tokenBuffer.substr(1); // 'break' is redundant here, "j == ..." is true only on // the last iteration. } } } if (hadConsonant && !optSyncope) { buf.push(virama); } return buf.join(''); }; /** * Transliterate from a Brahmic script. * * @param data the string to transliterate * @param map map data generated from makeMap() * @param options transliteration options * @return the finished string */ var transliterateBrahmic = function(data, map, options) { var buf = [], consonants = map.consonants, danglingHash = false, hadRomanConsonant = false, letters = map.letters, marks = map.marks, temp, toRoman = map.toRoman, skippingTrans = false; for (var i = 0, L; (L = data.charAt(i)); i++) { // Toggle transliteration state if (L === '#') { if (danglingHash) { skippingTrans = !skippingTrans; danglingHash = false; } else { danglingHash = true; } if (hadRomanConsonant) { buf.push('a'); hadRomanConsonant = false; } continue; } else if (skippingTrans) { buf.push(L); continue; } if ((temp = marks[L]) !== undefined) { buf.push(temp); hadRomanConsonant = false; } else { if (danglingHash) { buf.push('#'); danglingHash = false; } if (hadRomanConsonant) { buf.push('a'); hadRomanConsonant = false; } // Push transliterated letter if possible. Otherwise, push // the letter itself. if ((temp = letters[L])) { buf.push(temp); hadRomanConsonant = toRoman && (L in consonants); } else { buf.push(L); } } } if (hadRomanConsonant) { buf.push('a'); } return buf.join(''); }; /** * Transliterate from one script to another. * * @param data the string to transliterate * @param from the source script * @param to the destination script * @param options transliteration options * @return the finished string */ Sanscript.t = function(data, from, to, options) { options = options || {}; var cachedOptions = cache.options || {}, defaults = Sanscript.defaults, hasPriorState = (cache.from === from && cache.to === to), map; // Here we simultaneously build up an `options` object and compare // these options to the options from the last run. for (var key in defaults) { if (defaults.hasOwnProperty(key)) { var value = defaults[key]; if (key in options) { value = options[key]; } options[key] = value; // This comparison method is not generalizable, but since these // objects are associative arrays with identical keys and with // values of known type, it works fine here. if (value !== cachedOptions[key]) { hasPriorState = false; } } } if (hasPriorState) { map = cache.map; } else { map = makeMap(from, to, options); cache = { from: from, map: map, options: options, to: to}; } // Easy way out for "{\m+}", "\", and ".h". if (from === 'itrans') { data = data.replace(/\{\\m\+\}/g, ".h.N"); data = data.replace(/\.h/g, ''); data = data.replace(/\\([^'`_]|$)/g, "##$1##"); } if (map.fromRoman) { return transliterateRoman(data, map, options); } else { return transliterateBrahmic(data, map, options); } }; // Now that Sanscript is fully defined, we now safely export it for use elsewhere. // The below block was copied from https://www.npmjs.com/package/sanscript . // define seems to be a requirejs thing https://requirejs.org/docs/whyamd.html#amd . if (typeof define === 'function' && define.amd) { define(function(){ return Sanscript; }); } else if(typeof exports !== 'undefined'){ if (typeof module !== 'undefined' && module.exports) { exports = module.exports = Sanscript; } exports.Sanscript = Sanscript; }else { global.Sanscript = Sanscript; }
};
exportSanscriptSingleton(this);