package org.madore.android.unicodeMap;

import java.util.Arrays;
import java.util.Set;
import java.util.Map;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Formatter;
import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.IOException;

public class UnicodeCharacter implements UnicodeDisplayable {

    public static enum Range implements UnicodeDisplayable, UnicodeRangeable {
	BASIC_LATIN(0x0000, 0x007F, "Basic Latin"),
	LATIN_1_SUPPLEMENT(0x0080, 0x00FF, "Latin-1 Supplement"),
	LATIN_EXTENDED_A(0x0100, 0x017F, "Latin Extended-A"),
	LATIN_EXTENDED_B(0x0180, 0x024F, "Latin Extended-B"),
	IPA_EXTENSIONS(0x0250, 0x02AF, "IPA Extensions"),
	SPACING_MODIFIER_LETTERS(0x02B0, 0x02FF, "Spacing Modifier Letters"),
	COMBINING_DIACRITICAL_MARKS(0x0300, 0x036F, "Combining Diacritical Marks"),
	GREEK_AND_COPTIC(0x0370, 0x03FF, "Greek and Coptic"),
	CYRILLIC(0x0400, 0x04FF, "Cyrillic"),
	CYRILLIC_SUPPLEMENT(0x0500, 0x052F, "Cyrillic Supplement"),
	ARMENIAN(0x0530, 0x058F, "Armenian"),
	HEBREW(0x0590, 0x05FF, "Hebrew"),
	ARABIC(0x0600, 0x06FF, "Arabic"),
	SYRIAC(0x0700, 0x074F, "Syriac"),
	ARABIC_SUPPLEMENT(0x0750, 0x077F, "Arabic Supplement"),
	THAANA(0x0780, 0x07BF, "Thaana"),
	NKO(0x07C0, 0x07FF, "NKo"),
	SAMARITAN(0x0800, 0x083F, "Samaritan"),
	MANDAIC(0x0840, 0x085F, "Mandaic"),
	ARABIC_EXTENDED_A(0x08A0, 0x08FF, "Arabic Extended-A"),
	DEVANAGARI(0x0900, 0x097F, "Devanagari"),
	BENGALI(0x0980, 0x09FF, "Bengali"),
	GURMUKHI(0x0A00, 0x0A7F, "Gurmukhi"),
	GUJARATI(0x0A80, 0x0AFF, "Gujarati"),
	ORIYA(0x0B00, 0x0B7F, "Oriya"),
	TAMIL(0x0B80, 0x0BFF, "Tamil"),
	TELUGU(0x0C00, 0x0C7F, "Telugu"),
	KANNADA(0x0C80, 0x0CFF, "Kannada"),
	MALAYALAM(0x0D00, 0x0D7F, "Malayalam"),
	SINHALA(0x0D80, 0x0DFF, "Sinhala"),
	THAI(0x0E00, 0x0E7F, "Thai"),
	LAO(0x0E80, 0x0EFF, "Lao"),
	TIBETAN(0x0F00, 0x0FFF, "Tibetan"),
	MYANMAR(0x1000, 0x109F, "Myanmar"),
	GEORGIAN(0x10A0, 0x10FF, "Georgian"),
	HANGUL_JAMO(0x1100, 0x11FF, "Hangul Jamo"),
	ETHIOPIC(0x1200, 0x137F, "Ethiopic"),
	ETHIOPIC_SUPPLEMENT(0x1380, 0x139F, "Ethiopic Supplement"),
	CHEROKEE(0x13A0, 0x13FF, "Cherokee"),
	UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS(0x1400, 0x167F, "Unified Canadian Aboriginal Syllabics"),
	OGHAM(0x1680, 0x169F, "Ogham"),
	RUNIC(0x16A0, 0x16FF, "Runic"),
	TAGALOG(0x1700, 0x171F, "Tagalog"),
	HANUNOO(0x1720, 0x173F, "Hanunoo"),
	BUHID(0x1740, 0x175F, "Buhid"),
	TAGBANWA(0x1760, 0x177F, "Tagbanwa"),
	KHMER(0x1780, 0x17FF, "Khmer"),
	MONGOLIAN(0x1800, 0x18AF, "Mongolian"),
	UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED(0x18B0, 0x18FF, "Unified Canadian Aboriginal Syllabics Extended"),
	LIMBU(0x1900, 0x194F, "Limbu"),
	TAI_LE(0x1950, 0x197F, "Tai Le"),
	NEW_TAI_LUE(0x1980, 0x19DF, "New Tai Lue"),
	KHMER_SYMBOLS(0x19E0, 0x19FF, "Khmer Symbols"),
	BUGINESE(0x1A00, 0x1A1F, "Buginese"),
	TAI_THAM(0x1A20, 0x1AAF, "Tai Tham"),
	BALINESE(0x1B00, 0x1B7F, "Balinese"),
	SUNDANESE(0x1B80, 0x1BBF, "Sundanese"),
	BATAK(0x1BC0, 0x1BFF, "Batak"),
	LEPCHA(0x1C00, 0x1C4F, "Lepcha"),
	OL_CHIKI(0x1C50, 0x1C7F, "Ol Chiki"),
	SUNDANESE_SUPPLEMENT(0x1CC0, 0x1CCF, "Sundanese Supplement"),
	VEDIC_EXTENSIONS(0x1CD0, 0x1CFF, "Vedic Extensions"),
	PHONETIC_EXTENSIONS(0x1D00, 0x1D7F, "Phonetic Extensions"),
	PHONETIC_EXTENSIONS_SUPPLEMENT(0x1D80, 0x1DBF, "Phonetic Extensions Supplement"),
	COMBINING_DIACRITICAL_MARKS_SUPPLEMENT(0x1DC0, 0x1DFF, "Combining Diacritical Marks Supplement"),
	LATIN_EXTENDED_ADDITIONAL(0x1E00, 0x1EFF, "Latin Extended Additional"),
	GREEK_EXTENDED(0x1F00, 0x1FFF, "Greek Extended"),
	GENERAL_PUNCTUATION(0x2000, 0x206F, "General Punctuation"),
	SUPERSCRIPTS_AND_SUBSCRIPTS(0x2070, 0x209F, "Superscripts and Subscripts"),
	CURRENCY_SYMBOLS(0x20A0, 0x20CF, "Currency Symbols"),
	COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS(0x20D0, 0x20FF, "Combining Diacritical Marks for Symbols"),
	LETTERLIKE_SYMBOLS(0x2100, 0x214F, "Letterlike Symbols"),
	NUMBER_FORMS(0x2150, 0x218F, "Number Forms"),
	ARROWS(0x2190, 0x21FF, "Arrows"),
	MATHEMATICAL_OPERATORS(0x2200, 0x22FF, "Mathematical Operators"),
	MISCELLANEOUS_TECHNICAL(0x2300, 0x23FF, "Miscellaneous Technical"),
	CONTROL_PICTURES(0x2400, 0x243F, "Control Pictures"),
	OPTICAL_CHARACTER_RECOGNITION(0x2440, 0x245F, "Optical Character Recognition"),
	ENCLOSED_ALPHANUMERICS(0x2460, 0x24FF, "Enclosed Alphanumerics"),
	BOX_DRAWING(0x2500, 0x257F, "Box Drawing"),
	BLOCK_ELEMENTS(0x2580, 0x259F, "Block Elements"),
	GEOMETRIC_SHAPES(0x25A0, 0x25FF, "Geometric Shapes"),
	MISCELLANEOUS_SYMBOLS(0x2600, 0x26FF, "Miscellaneous Symbols"),
	DINGBATS(0x2700, 0x27BF, "Dingbats"),
	MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A(0x27C0, 0x27EF, "Miscellaneous Mathematical Symbols-A"),
	SUPPLEMENTAL_ARROWS_A(0x27F0, 0x27FF, "Supplemental Arrows-A"),
	BRAILLE_PATTERNS(0x2800, 0x28FF, "Braille Patterns"),
	SUPPLEMENTAL_ARROWS_B(0x2900, 0x297F, "Supplemental Arrows-B"),
	MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B(0x2980, 0x29FF, "Miscellaneous Mathematical Symbols-B"),
	SUPPLEMENTAL_MATHEMATICAL_OPERATORS(0x2A00, 0x2AFF, "Supplemental Mathematical Operators"),
	MISCELLANEOUS_SYMBOLS_AND_ARROWS(0x2B00, 0x2BFF, "Miscellaneous Symbols and Arrows"),
	GLAGOLITIC(0x2C00, 0x2C5F, "Glagolitic"),
	LATIN_EXTENDED_C(0x2C60, 0x2C7F, "Latin Extended-C"),
	COPTIC(0x2C80, 0x2CFF, "Coptic"),
	GEORGIAN_SUPPLEMENT(0x2D00, 0x2D2F, "Georgian Supplement"),
	TIFINAGH(0x2D30, 0x2D7F, "Tifinagh"),
	ETHIOPIC_EXTENDED(0x2D80, 0x2DDF, "Ethiopic Extended"),
	CYRILLIC_EXTENDED_A(0x2DE0, 0x2DFF, "Cyrillic Extended-A"),
	SUPPLEMENTAL_PUNCTUATION(0x2E00, 0x2E7F, "Supplemental Punctuation"),
	CJK_RADICALS_SUPPLEMENT(0x2E80, 0x2EFF, "CJK Radicals Supplement"),
	KANGXI_RADICALS(0x2F00, 0x2FDF, "Kangxi Radicals"),
	IDEOGRAPHIC_DESCRIPTION_CHARACTERS(0x2FF0, 0x2FFF, "Ideographic Description Characters"),
	CJK_SYMBOLS_AND_PUNCTUATION(0x3000, 0x303F, "CJK Symbols and Punctuation"),
	HIRAGANA(0x3040, 0x309F, "Hiragana"),
	KATAKANA(0x30A0, 0x30FF, "Katakana"),
	BOPOMOFO(0x3100, 0x312F, "Bopomofo"),
	HANGUL_COMPATIBILITY_JAMO(0x3130, 0x318F, "Hangul Compatibility Jamo"),
	KANBUN(0x3190, 0x319F, "Kanbun"),
	BOPOMOFO_EXTENDED(0x31A0, 0x31BF, "Bopomofo Extended"),
	CJK_STROKES(0x31C0, 0x31EF, "CJK Strokes"),
	KATAKANA_PHONETIC_EXTENSIONS(0x31F0, 0x31FF, "Katakana Phonetic Extensions"),
	ENCLOSED_CJK_LETTERS_AND_MONTHS(0x3200, 0x32FF, "Enclosed CJK Letters and Months"),
	CJK_COMPATIBILITY(0x3300, 0x33FF, "CJK Compatibility"),
	CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A(0x3400, 0x4DB5, "CJK Unified Ideographs Extension A"),
	YIJING_HEXAGRAM_SYMBOLS(0x4DC0, 0x4DFF, "Yijing Hexagram Symbols"),
	CJK_UNIFIED_IDEOGRAPHS(0x4E00, 0x9FCC, "CJK Unified Ideographs"),
	YI_SYLLABLES(0xA000, 0xA48F, "Yi Syllables"),
	YI_RADICALS(0xA490, 0xA4CF, "Yi Radicals"),
	LISU(0xA4D0, 0xA4FF, "Lisu"),
	VAI(0xA500, 0xA63F, "Vai"),
	CYRILLIC_EXTENDED_B(0xA640, 0xA69F, "Cyrillic Extended-B"),
	BAMUM(0xA6A0, 0xA6FF, "Bamum"),
	MODIFIER_TONE_LETTERS(0xA700, 0xA71F, "Modifier Tone Letters"),
	LATIN_EXTENDED_D(0xA720, 0xA7FF, "Latin Extended-D"),
	SYLOTI_NAGRI(0xA800, 0xA82F, "Syloti Nagri"),
	COMMON_INDIC_NUMBER_FORMS(0xA830, 0xA83F, "Common Indic Number Forms"),
	PHAGS_PA(0xA840, 0xA87F, "Phags-pa"),
	SAURASHTRA(0xA880, 0xA8DF, "Saurashtra"),
	DEVANAGARI_EXTENDED(0xA8E0, 0xA8FF, "Devanagari Extended"),
	KAYAH_LI(0xA900, 0xA92F, "Kayah Li"),
	REJANG(0xA930, 0xA95F, "Rejang"),
	HANGUL_JAMO_EXTENDED_A(0xA960, 0xA97F, "Hangul Jamo Extended-A"),
	JAVANESE(0xA980, 0xA9DF, "Javanese"),
	CHAM(0xAA00, 0xAA5F, "Cham"),
	MYANMAR_EXTENDED_A(0xAA60, 0xAA7F, "Myanmar Extended-A"),
	TAI_VIET(0xAA80, 0xAADF, "Tai Viet"),
	MEETEI_MAYEK_EXTENSIONS(0xAAE0, 0xAAFF, "Meetei Mayek Extensions"),
	ETHIOPIC_EXTENDED_A(0xAB00, 0xAB2F, "Ethiopic Extended-A"),
	MEETEI_MAYEK(0xABC0, 0xABFF, "Meetei Mayek"),
	HANGUL_SYLLABLES(0xAC00, 0xD7A3, "Hangul Syllables"),
	HANGUL_JAMO_EXTENDED_B(0xD7B0, 0xD7FF, "Hangul Jamo Extended-B"),
	HIGH_SURROGATES(0xD800, 0xDB7F, "High Surrogates"),
	HIGH_PRIVATE_USE_SURROGATES(0xDB80, 0xDBFF, "High Private Use Surrogates"),
	LOW_SURROGATES(0xDC00, 0xDFFF, "Low Surrogates"),
	PRIVATE_USE_AREA(0xE000, 0xF8FF, "Private Use Area"),
	CJK_COMPATIBILITY_IDEOGRAPHS(0xF900, 0xFAFF, "CJK Compatibility Ideographs"),
	ALPHABETIC_PRESENTATION_FORMS(0xFB00, 0xFB4F, "Alphabetic Presentation Forms"),
	ARABIC_PRESENTATION_FORMS_A(0xFB50, 0xFDFF, "Arabic Presentation Forms-A"),
	VARIATION_SELECTORS(0xFE00, 0xFE0F, "Variation Selectors"),
	VERTICAL_FORMS(0xFE10, 0xFE1F, "Vertical Forms"),
	COMBINING_HALF_MARKS(0xFE20, 0xFE2F, "Combining Half Marks"),
	CJK_COMPATIBILITY_FORMS(0xFE30, 0xFE4F, "CJK Compatibility Forms"),
	SMALL_FORM_VARIANTS(0xFE50, 0xFE6F, "Small Form Variants"),
	ARABIC_PRESENTATION_FORMS_B(0xFE70, 0xFEFF, "Arabic Presentation Forms-B"),
	HALFWIDTH_AND_FULLWIDTH_FORMS(0xFF00, 0xFFEF, "Halfwidth and Fullwidth Forms"),
	SPECIALS(0xFFF0, 0xFFFF, "Specials"),
	LINEAR_B_SYLLABARY(0x10000, 0x1007F, "Linear B Syllabary"),
	LINEAR_B_IDEOGRAMS(0x10080, 0x100FF, "Linear B Ideograms"),
	AEGEAN_NUMBERS(0x10100, 0x1013F, "Aegean Numbers"),
	ANCIENT_GREEK_NUMBERS(0x10140, 0x1018F, "Ancient Greek Numbers"),
	ANCIENT_SYMBOLS(0x10190, 0x101CF, "Ancient Symbols"),
	PHAISTOS_DISC(0x101D0, 0x101FF, "Phaistos Disc"),
	LYCIAN(0x10280, 0x1029F, "Lycian"),
	CARIAN(0x102A0, 0x102DF, "Carian"),
	OLD_ITALIC(0x10300, 0x1032F, "Old Italic"),
	GOTHIC(0x10330, 0x1034F, "Gothic"),
	UGARITIC(0x10380, 0x1039F, "Ugaritic"),
	OLD_PERSIAN(0x103A0, 0x103DF, "Old Persian"),
	DESERET(0x10400, 0x1044F, "Deseret"),
	SHAVIAN(0x10450, 0x1047F, "Shavian"),
	OSMANYA(0x10480, 0x104AF, "Osmanya"),
	CYPRIOT_SYLLABARY(0x10800, 0x1083F, "Cypriot Syllabary"),
	IMPERIAL_ARAMAIC(0x10840, 0x1085F, "Imperial Aramaic"),
	PHOENICIAN(0x10900, 0x1091F, "Phoenician"),
	LYDIAN(0x10920, 0x1093F, "Lydian"),
	MEROITIC_HIEROGLYPHS(0x10980, 0x1099F, "Meroitic Hieroglyphs"),
	MEROITIC_CURSIVE(0x109A0, 0x109FF, "Meroitic Cursive"),
	KHAROSHTHI(0x10A00, 0x10A5F, "Kharoshthi"),
	OLD_SOUTH_ARABIAN(0x10A60, 0x10A7F, "Old South Arabian"),
	AVESTAN(0x10B00, 0x10B3F, "Avestan"),
	INSCRIPTIONAL_PARTHIAN(0x10B40, 0x10B5F, "Inscriptional Parthian"),
	INSCRIPTIONAL_PAHLAVI(0x10B60, 0x10B7F, "Inscriptional Pahlavi"),
	OLD_TURKIC(0x10C00, 0x10C4F, "Old Turkic"),
	RUMI_NUMERAL_SYMBOLS(0x10E60, 0x10E7F, "Rumi Numeral Symbols"),
	BRAHMI(0x11000, 0x1107F, "Brahmi"),
	KAITHI(0x11080, 0x110CF, "Kaithi"),
	SORA_SOMPENG(0x110D0, 0x110FF, "Sora Sompeng"),
	CHAKMA(0x11100, 0x1114F, "Chakma"),
	SHARADA(0x11180, 0x111DF, "Sharada"),
	TAKRI(0x11680, 0x116CF, "Takri"),
	CUNEIFORM(0x12000, 0x123FF, "Cuneiform"),
	CUNEIFORM_NUMBERS_AND_PUNCTUATION(0x12400, 0x1247F, "Cuneiform Numbers and Punctuation"),
	EGYPTIAN_HIEROGLYPHS(0x13000, 0x1342F, "Egyptian Hieroglyphs"),
	BAMUM_SUPPLEMENT(0x16800, 0x16A3F, "Bamum Supplement"),
	MIAO(0x16F00, 0x16F9F, "Miao"),
	KANA_SUPPLEMENT(0x1B000, 0x1B0FF, "Kana Supplement"),
	BYZANTINE_MUSICAL_SYMBOLS(0x1D000, 0x1D0FF, "Byzantine Musical Symbols"),
	MUSICAL_SYMBOLS(0x1D100, 0x1D1FF, "Musical Symbols"),
	ANCIENT_GREEK_MUSICAL_NOTATION(0x1D200, 0x1D24F, "Ancient Greek Musical Notation"),
	TAI_XUAN_JING_SYMBOLS(0x1D300, 0x1D35F, "Tai Xuan Jing Symbols"),
	COUNTING_ROD_NUMERALS(0x1D360, 0x1D37F, "Counting Rod Numerals"),
	MATHEMATICAL_ALPHANUMERIC_SYMBOLS(0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols"),
	ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS(0x1EE00, 0x1EEFF, "Arabic Mathematical Alphabetic Symbols"),
	MAHJONG_TILES(0x1F000, 0x1F02F, "Mahjong Tiles"),
	DOMINO_TILES(0x1F030, 0x1F09F, "Domino Tiles"),
	PLAYING_CARDS(0x1F0A0, 0x1F0FF, "Playing Cards"),
	ENCLOSED_ALPHANUMERIC_SUPPLEMENT(0x1F100, 0x1F1FF, "Enclosed Alphanumeric Supplement"),
	ENCLOSED_IDEOGRAPHIC_SUPPLEMENT(0x1F200, 0x1F2FF, "Enclosed Ideographic Supplement"),
	MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS(0x1F300, 0x1F5FF, "Miscellaneous Symbols and Pictographs"),
	EMOTICONS(0x1F600, 0x1F64F, "Emoticons"),
	TRANSPORT_AND_MAP_SYMBOLS(0x1F680, 0x1F6FF, "Transport and Map Symbols"),
	ALCHEMICAL_SYMBOLS(0x1F700, 0x1F77F, "Alchemical Symbols"),
	CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B(0x20000, 0x2A6D6, "CJK Unified Ideographs Extension B"),
	CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C(0x2A700, 0x2B734, "CJK Unified Ideographs Extension C"),
	CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D(0x2B740, 0x2B81D, "CJK Unified Ideographs Extension D"),
	CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT(0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement"),
	TAGS(0xE0000, 0xE007F, "Tags"),
	VARIATION_SELECTORS_SUPPLEMENT(0xE0100, 0xE01EF, "Variation Selectors Supplement"),
	SUPPLEMENTARY_PRIVATE_USE_AREA_A(0xF0000, 0xFFFFF, "Supplementary Private Use Area-A"),
	SUPPLEMENTARY_PRIVATE_USE_AREA_B(0x100000, 0x10FFFF, "Supplementary Private Use Area-B");
	protected final int from;  protected final int to;
	protected final String descr;
	Range(int from, int last, String descr) {
	    this.from = from;
	    this.to = last+1;
	    this.descr = descr;
	}
	public int getFrom() { return this.from; }
	public int getTo() { return this.to; }
	public String getDescr() { return this.descr; }
	public String getTitle() { return this.descr; }
	public boolean belongs(int codePoint) {
	    return ( codePoint>=this.from && codePoint<this.to );
	}
	public boolean belongs(UnicodeCharacter ch) {
	    return ( ch.codePoint>=this.from && ch.codePoint<this.to );
	}
	@Override
	public String toString() { return this.descr; }
    }

    public static enum SpecialRange {
	CJK_IDEOGRAPH_EXTENSION_A(0x3400, 0x4DB5, Category.OTHER_LETTER) {
	    public String getName(int codePoint) {
		return cjkIdeographName(codePoint);
	    }
	},
	CJK_IDEOGRAPH(0x4E00, 0x9FCC, Category.OTHER_LETTER) {
	    public String getName(int codePoint) {
		return cjkIdeographName(codePoint);
	    }
	},
	HANGUL_SYLLABLE(0xAC00, 0xD7A3, Category.OTHER_LETTER) {
	    public String getName(int codePoint) {
		return hangulSyllableName(codePoint);
	    }
	},
	CJK_IDEOGRAPH_EXTENSION_B(0x20000, 0x2A6D6, Category.OTHER_LETTER) {
	    public String getName(int codePoint) {
		return cjkIdeographName(codePoint);
	    }
	},
	CJK_IDEOGRAPH_EXTENSION_C(0x2A700, 0x2B734, Category.OTHER_LETTER) {
	    public String getName(int codePoint) {
		return cjkIdeographName(codePoint);
	    }
	},
	CJK_IDEOGRAPH_EXTENSION_D(0x2B740, 0x2B81D, Category.OTHER_LETTER) {
	    public String getName(int codePoint) {
		return cjkIdeographName(codePoint);
	    }
	};
	protected final int from;  protected final int to;
	protected Category category;
	SpecialRange(int from, int last, Category category) {
	    this.from = from;
	    this.to = last+1;
	    this.category = category;
	}
	public int getFrom() { return this.from; }
	public int getTo() { return this.to; }
	public Category getCategory() { return this.category; }
	public String getName(int codePoint) { return null; }
	public boolean belongs(int codePoint) {
	    return ( codePoint>=this.from && codePoint<this.to );
	}
	public int interCount(int from, int to) {
	    int from0 = Math.max(this.from, from);
	    int to0 = Math.min(this.to, to);
	    return Math.max(to0-from0, 0);
	}
	public boolean inside(int from, int to) {
	    return ( from >= this.from && to <= this.to );
	}
	public static boolean isCjkIdeograph(int codePoint) {
	    return ( CJK_IDEOGRAPH.belongs(codePoint)
		     || CJK_IDEOGRAPH_EXTENSION_A.belongs(codePoint)
		     || CJK_IDEOGRAPH_EXTENSION_B.belongs(codePoint)
		     || CJK_IDEOGRAPH_EXTENSION_C.belongs(codePoint) );
	}
	public static boolean isHangulSyllable(int codePoint) {
	    return HANGUL_SYLLABLE.belongs(codePoint);
	}
	protected static String cjkIdeographName(int codePoint) {
	    return String.format("CJK UNIFIED IDEOGRAPH-%04X", codePoint);
	}
	protected static String hangulSyllableName(int codePoint) {
	    int index = codePoint - HANGUL_SYLLABLE.getFrom();
	    final int tCount = 28;  final int nCount = 21*tCount;
	    int l = index/nCount;
	    int v = (index%nCount)/tCount;
	    int t = index%tCount;
	    final String[] partL = {
		"G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S",
		"SS", "", "J", "JJ", "C", "K", "T", "P", "H"
	    };
	    final String[] partV = {
		"A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
		"WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",
		"I"
	    };
	    final String[] partT = {
		"", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG",
		"LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S",
		"SS", "NG", "J", "C", "K", "T", "P", "H"
	    };
	    return String.format("HANGUL SYLLABLE %s%s%s",
				 partL[l], partV[v], partT[t]);
	}
    }

    public static enum Category {
	UPPERCASE_LETTER("Lu", Character.UPPERCASE_LETTER, "Letter, Uppercase"),
	LOWERCASE_LETTER("Ll", Character.LOWERCASE_LETTER, "Letter, Lowercase"),
	TITLECASE_LETTER("Lt", Character.TITLECASE_LETTER, "Letter, Titlecase"),
	MODIFIER_LETTER("Lm", Character.MODIFIER_LETTER, "Letter, Modifier"),
	OTHER_LETTER("Lo", Character.OTHER_LETTER, "Letter, Other"),
	NON_SPACING_MARK("Mn", Character.NON_SPACING_MARK, "Mark, Nonspacing"),
	COMBINING_SPACING_MARK("Mc", Character.COMBINING_SPACING_MARK, "Mark, Spacing Combining"),
	ENCLOSING_MARK("Me", Character.ENCLOSING_MARK, "Mark, Enclosing"),
	DECIMAL_DIGIT_NUMBER("Nd", Character.DECIMAL_DIGIT_NUMBER, "Number, Decimal Digit"),
	LETTER_NUMBER("Nl", Character.LETTER_NUMBER, "Number, Letter"),
	OTHER_NUMBER("No", Character.OTHER_NUMBER, "Number, Other"),
	CONNECTOR_PUNCTUATION("Pc", Character.CONNECTOR_PUNCTUATION, "Punctuation, Connector"),
	DASH_PUNCTUATION("Pd", Character.DASH_PUNCTUATION, "Punctuation, Dash"),
	START_PUNCTUATION("Ps", Character.START_PUNCTUATION, "Punctuation, Open"),
	END_PUNCTUATION("Pe", Character.END_PUNCTUATION, "Punctuation, Close"),
	INITIAL_QUOTE_PUNCTUATION("Pi", Character.INITIAL_QUOTE_PUNCTUATION, "Punctuation, Initial quote"),
	FINAL_QUOTE_PUNCTUATION("Pf", Character.FINAL_QUOTE_PUNCTUATION, "Punctuation, Final quote"),
	OTHER_PUNCTUATION("Po", Character.OTHER_PUNCTUATION, "Punctuation, Other"),
	MATH_SYMBOL("Sm", Character.MATH_SYMBOL, "Symbol, Math"),
	CURRENCY_SYMBOL("Sc", Character.CURRENCY_SYMBOL, "Symbol, Currency"),
	MODIFIER_SYMBOL("Sk", Character.MODIFIER_SYMBOL, "Symbol, Modifier"),
	OTHER_SYMBOL("So", Character.OTHER_SYMBOL, "Symbol, Other"),
	SPACE_SEPARATOR("Zs", Character.SPACE_SEPARATOR, "Separator, Space"),
	LINE_SEPARATOR("Zl", Character.LINE_SEPARATOR, "Separator, Line"),
	PARAGRAPH_SEPARATOR("Zp", Character.PARAGRAPH_SEPARATOR, "Separator, Paragraph"),
	CONTROL("Cc", Character.CONTROL, "Other, Control"),
	FORMAT("Cf", Character.FORMAT, "Other, Format"),
	SURROGATE("Cs", Character.SURROGATE, "Other, Surrogate"),
	PRIVATE_USE("Co", Character.PRIVATE_USE, "Other, Private Use"),
	UNASSIGNED("Cn", Character.UNASSIGNED, "Other, Not Assigned");
	protected final String code;
	protected final byte javaValue;
	protected final String descr;
	Category(String code, byte javaValue, String descr) {
	    this.code = code;
	    this.javaValue = javaValue;
	    this.descr = descr;
	}
	protected final static Map<String,Category> revMap
	    = new HashMap<String,Category>();
	static {
	    for ( Category cat : Category.values() )
		revMap.put(cat.code, cat);
	}
	public static Category fromCode(String code) {
	    Category cat = revMap.get(code);
	    if ( cat == null )
		cat = UNASSIGNED;
	    return cat;
	}
	public String getCode() { return this.code; }
	public byte getJavaValue() { return this.javaValue; }
	public String getDescr() { return this.descr; }
    }

    protected final static Set<Category> printable;

    static {
	Category[] prlist = new Category[] {
	    Category.UPPERCASE_LETTER, Category.LOWERCASE_LETTER,
	    Category.TITLECASE_LETTER, Category.MODIFIER_LETTER,
	    Category.OTHER_LETTER, Category.NON_SPACING_MARK,
	    Category.COMBINING_SPACING_MARK, Category.ENCLOSING_MARK,
	    Category.DECIMAL_DIGIT_NUMBER, Category.LETTER_NUMBER,
	    Category.OTHER_NUMBER, Category.CONNECTOR_PUNCTUATION,
	    Category.DASH_PUNCTUATION, Category.START_PUNCTUATION,
	    Category.END_PUNCTUATION, Category.INITIAL_QUOTE_PUNCTUATION,
	    Category.FINAL_QUOTE_PUNCTUATION, Category.OTHER_PUNCTUATION,
	    Category.MATH_SYMBOL, Category.CURRENCY_SYMBOL,
	    Category.MODIFIER_SYMBOL, Category.OTHER_SYMBOL
	};
	printable = EnumSet.copyOf(Arrays.asList(prlist));
    }

    protected final int codePoint;
    protected final String name;
    protected final Category category;
    protected final boolean isUnicode;
    protected final String charStr;
    protected final String label;

    protected String makeCharStr() {
	return new String(Character.toChars(codePoint));
    }

    protected String makeLabel() {
	StringBuilder s = new StringBuilder();
	Formatter fmt = new Formatter(s);
	fmt.format("U+%04X %s", codePoint, name);
	return new String(s);
    }

    public UnicodeCharacter(int codePoint, String name, Category category) {
	this.codePoint = codePoint;
	this.name = name;
	this.category = category;
	this.isUnicode = true;
	this.charStr = makeCharStr();
	this.label = makeLabel();
    }

    public UnicodeCharacter(int codePoint, String name, Category category,
			    boolean isUnicode) {
	this.codePoint = codePoint;
	this.name = name;
	this.category = category;
	this.isUnicode = isUnicode;
	this.charStr = makeCharStr();
	this.label = makeLabel();
    }

    public int getCodePoint() {
	return this.codePoint;
    }

    public String getName() {
	return this.name;
    }

    public Category getCategory() {
	return this.category;
    }

    public String getChar() {
	return this.charStr;
    }

    public String getLabel() {
	return this.label;
    }

    public boolean isUnicode() {
	return this.isUnicode;
    }

    public boolean isPrintable() {
	return this.isUnicode && printable.contains(this.category);
    }

    @Override
    public String toString() {
	return this.getLabel();
    }

    static byte[] toUtf8(String s) {
	try {
	    ByteArrayOutputStream buf = new ByteArrayOutputStream(8);
	    OutputStreamWriter writer = new OutputStreamWriter(buf, "UTF-8");
	    writer.write(s, 0, s.length());
	    writer.close();
	    return buf.toByteArray();
	} catch (UnsupportedEncodingException e) {
	    throw new AssertionError("UTF-8 encoding unsupported");
	} catch (IOException e) {
	    throw new AssertionError("this is impossible");
	}
    }

    static byte[] toUtf16(String s) {
	try {
	    ByteArrayOutputStream buf = new ByteArrayOutputStream(8);
	    OutputStreamWriter writer = new OutputStreamWriter(buf, "UTF-16BE");
	    writer.write(s, 0, s.length());
	    writer.close();
	    return buf.toByteArray();
	} catch (UnsupportedEncodingException e) {
	    throw new AssertionError("UTF-16BE encoding unsupported");
	} catch (IOException e) {
	    throw new AssertionError("this is impossible");
	}
    }

}
