(*---------------------------------------------------------------------------
   Copyright (c) 2012 The uucd programmers. All rights reserved.
   SPDX-License-Identifier: ISC
  ---------------------------------------------------------------------------*)

let str = Printf.sprintf
let str_of_name (u,l) = str "{%s}%s" u l
let split_string s sep =
  let rec split accum j =
    let i = try (String.rindex_from s j sep) with Not_found -> -1 in
    if (i = -1) then
      let p = String.sub s 0 (j + 1) in
      if p <> "" then p :: accum else accum
    else
    let p = String.sub s (i + 1) (j - i) in
    let accum' = if p <> "" then p :: accum else accum in
    split accum' (i - 1)
  in
  split [] (String.length s - 1)

(* Error messages *)

let err s = failwith s
let err_data = "character data not allowed here"
let err_exp_el_end = "expected end of element"
let err_exp_data = "expected character data"
let err_wf = "document not well formed"
let err_dup n = str "duplicate element (%s)" (str_of_name n)
let err_miss_att n = str "missing attribute (%s)" n
let err_att_val v = str "invalid attribute value (\"%s\")" v
let err_invalid_cp v = str "invalid code point (\"%s\")" v
let err_empty_cps = "empty code point sequence"
let err_exp_ucd fnd = str "expected ucd element found %s" (str_of_name fnd)
let err_invalid_cp_spec = str "invalid code point specification"
let err_invalid_name_alias_spec = str "invalid name alias specification"

(* Code points *)

module Cp = struct
  type t = int
  let compare : int -> int -> int = compare
end

type cp = Cp.t

let is_cp i = 0x0000 <= i && i <= 0x10_FFFF
let is_scalar_value i =
  (0x0000 <= i && i <= 0xD7FF) || (0xE000 <= i && i <= 0x10FFFF)

let cp_of_string v =                           (* parses a code point value. *)
  let is_hex c = (0x30 <= c && c <= 0x39) || (0x41 <= c && c <= 0x46) in
  let cp = ref 0 in
  for k = 0 to (String.length v) - 1 do
    let c = Char.code v.[k] in
    if not (is_hex c) then err (err_invalid_cp v) else
    cp := !cp * 16 + (if c <= 0x39 then c - 48 else c - 55)
  done;
  if is_cp !cp then !cp else err (err_invalid_cp v)

let cps_of_string ?(empty = false) v = (* parses a code point sequence value. *)
  if (v = "") then (if empty then [] else err err_empty_cps) else
  List.map cp_of_string (split_string v ' ')

module Cpmap = Map.Make (Cp)

(* Properties *)

type key =                            (* the type for property keys (names). *)
| Age
| Alphabetic
| Ascii_hex_digit
| Bidi_class
| Bidi_control
| Bidi_mirrored
| Bidi_mirroring_glyph
| Bidi_paired_bracket
| Bidi_paired_bracket_type
| Block
| Canonical_combining_class
| Cased
| Case_folding
| Case_ignorable
| Changes_when_casefolded
| Changes_when_casemapped
| Changes_when_lowercased
| Changes_when_nfkc_casefolded
| Changes_when_titlecased
| Changes_when_uppercased
| Composition_exclusion
| Dash
| Decomposition_mapping
| Decomposition_type
| Default_ignorable_code_point
| Deprecated
| Diacritic
| East_asian_width
| Emoji
| Emoji_presentation
| Emoji_modifier
| Emoji_modifier_base
| Emoji_component
| Equivalent_unified_ideograph
| Extender
| Extended_pictographic
| Full_composition_exclusion
| General_category
| Grapheme_base
| Grapheme_cluster_break
| Grapheme_extend
| Hangul_syllable_type
| Hex_digit
| Id_continue
| Id_compat_math_continue
| Id_compat_math_start
| Id_start
| Ideographic
| Ids_binary_operator
| Ids_trinary_operator
| Ids_unary_operator
| Indic_conjunct_break
| Indic_syllabic_category
| Indic_matra_category
| Indic_positional_category
| Jamo_short_name
| Join_control
| Joining_group
| Joining_type
| Line_break
| Logical_order_exception
| Lowercase
| Lowercase_mapping
| Math
| Modifier_combining_mark
| Name
| Name_alias
| Nfc_quick_check
| Nfd_quick_check
| Nfkc_quick_check
| Nfkc_casefold
| Nfkc_simple_casefold
| Nfkd_quick_check
| Noncharacter_code_point
| Numeric_type
| Numeric_value
| Other_alphabetic
| Other_default_ignorable_code_point
| Other_grapheme_extend
| Other_id_continue
| Other_id_start
| Other_lowercase
| Other_math
| Other_uppercase
| Pattern_syntax
| Pattern_white_space
| Prepended_concatenation_mark
| Quotation_mark
| Radical
| Regional_indicator
| Script
| Script_extensions
| Sentence_break
| Simple_case_folding
| Simple_lowercase_mapping
| Simple_titlecase_mapping
| Simple_uppercase_mapping
| Soft_dotted
| Sterm
| Terminal_punctuation
| Titlecase_mapping
| UAX_42_element
| Unicode_1_name
| Unified_ideograph
| Uppercase
| Uppercase_mapping
| Variation_selector
| Vertical_orientation
| White_space
| Word_break
| Xid_continue
| Xid_start
(* Unihan *)
| KAccountingNumeric
| KAlternateHanYu
| KAlternateJEF
| KAlternateKangXi
| KAlternateMorohashi
| KAlternateTotalStrokes
| KBigFive
| KCCCII
| KCNS1986
| KCNS1992
| KCangjie
| KCantonese
| KCheungBauer
| KCheungBauerIndex
| KCihaiT
| KCompatibilityVariant
| KCowles
| KDaeJaweon
| KDefinition
| KEACC
| KFanqie
| KFenn
| KFennIndex
| KFourCornerCode
| KFrequency
| KGB0
| KGB1
| KGB3
| KGB5
| KGB8
| KGSR
| KGradeLevel
| KHDZRadBreak
| KHKGlyph
| KHKSCS
| KHanYu
| KHangul
| KHanyuPinlu
| KHanyuPinyin
| KIBMJapan
| KIICore
| KIRGDaeJaweon
| KIRGDaiKanwaZiten
| KIRGHanyuDaZidian
| KIRGKangXi
| KIRG_GSource
| KIRG_HSource
| KIRG_JSource
| KIRG_KPSource
| KIRG_KSource
| KIRG_MSource
| KIRG_SSource
| KIRG_TSource
| KIRG_USource
| KIRG_UKSource
| KIRG_VSource
| KJapanese
| KJHJ
| KJIS0213
| KJapaneseKun
| KJapaneseOn
| KJinmeiyoKanji
| KJis0
| KJis1
| KJoyoKanji
| KKPS0
| KKPS1
| KKSC0
| KKSC1
| KKangXi
| KKarlgren
| KKorean
| KKoreanEducationHanja
| KKoreanName
| KLau
| KMainlandTelegraph
| KMandarin
| KMatthews
| KMeyerWempe
| KMojiJoho
| KMorohashi
| KNelson
| KNSHU_DubenSrc
| KNSHU_Reading
| KOtherNumeric
| KPhonetic
| KPrimaryNumeric
| KPseudoGB1
| KRSAdobe_Japan1_6
| KRSJapanese
| KRSKanWa
| KRSKangXi
| KRSKorean
| KRSMerged
| KRSUnicode
| KSBGY
| KSemanticVariant
| KSimplifiedVariant
| KSMSZD2003Index
| KSMSZD2003Readings
| KSpecializedSemanticVariant
| KSpoofingVariant
| KStrange
| KTGH
| KTGHZ2013
| KTGT_MergedSrc
| KTGT_RSUnicode
| KTaiwanTelegraph
| KTang
| KTayNumeric
| KTotalStrokes
| KTraditionalVariant
| KUnihanCore2020
| KVietnamese
| KVietnameseNumeric
| KWubi
| KXHC1983
| KXerox
| KZhuang
| KZhuangNumeric
| KZVariant
(* Unikemet *)
| KEH_Cat
| KEH_Core
| KEH_Desc
| KEH_Func
| KEH_FVal
| KEH_UniK
| KEH_JSesh
| KEH_HG
| KEH_IFAO
| KEH_NoMirror
| KEH_NoRotate
| KEH_AltSeq
| Other of (string * string)                           (* expanded XML name. *)

type script = [
| `Adlm
| `Aghb
| `Ahom
| `Arab
| `Armi
| `Armn
| `Avst
| `Bali
| `Bamu
| `Bass
| `Batk
| `Beng
| `Berf
| `Bhks
| `Bopo
| `Brah
| `Brai
| `Bugi
| `Buhd
| `Cakm
| `Cans
| `Cari
| `Cham
| `Cher
| `Chrs
| `Copt
| `Cpmn
| `Cprt
| `Cyrl
| `Deva
| `Diak
| `Dogr
| `Dsrt
| `Dupl
| `Egyp
| `Elba
| `Elym
| `Ethi
| `Gara
| `Geor
| `Glag
| `Gong
| `Gonm
| `Goth
| `Gran
| `Grek
| `Gujr
| `Gukh
| `Guru
| `Hang
| `Hani
| `Hano
| `Hatr
| `Hebr
| `Hira
| `Hluw
| `Hmng
| `Hmnp
| `Hrkt
| `Hung
| `Ital
| `Java
| `Kali
| `Kana
| `Kawi
| `Khar
| `Khmr
| `Khoj
| `Knda
| `Krai
| `Kthi
| `Kits
| `Lana
| `Laoo
| `Latn
| `Lepc
| `Limb
| `Lina
| `Linb
| `Lisu
| `Lyci
| `Lydi
| `Mahj
| `Maka
| `Mand
| `Mani
| `Marc
| `Medf
| `Mend
| `Merc
| `Mero
| `Mlym
| `Modi
| `Mong
| `Mroo
| `Mtei
| `Mult
| `Mymr
| `Nagm
| `Nand
| `Narb
| `Nbat
| `Newa
| `Nkoo
| `Nshu
| `Ogam
| `Olck
| `Onao
| `Orkh
| `Orya
| `Osge
| `Osma
| `Ougr
| `Palm
| `Pauc
| `Perm
| `Phag
| `Phli
| `Phlp
| `Phnx
| `Plrd
| `Prti
| `Qaai
| `Rjng
| `Rohg
| `Runr
| `Samr
| `Sarb
| `Saur
| `Sgnw
| `Shaw
| `Shrd
| `Sidd
| `Sidt
| `Sind
| `Sinh
| `Sogd
| `Sogo
| `Sora
| `Soyo
| `Sund
| `Sunu
| `Sylo
| `Syrc
| `Tagb
| `Takr
| `Tale
| `Talu
| `Taml
| `Tang
| `Tavt
| `Tayo
| `Telu
| `Tfng
| `Tglg
| `Thaa
| `Thai
| `Tibt
| `Tirh
| `Tnsa
| `Todr
| `Tols
| `Toto
| `Tutg
| `Ugar
| `Vaii
| `Vith
| `Wara
| `Wcho
| `Xpeo
| `Xsux
| `Yezi
| `Yiii
| `Zanb
| `Zinh
| `Zyyy
| `Zzzz
]

type block_prop = [
| `ASCII
| `Adlam
| `Aegean_Numbers
| `Ahom
| `Alchemical
| `Alphabetic_PF
| `Anatolian_Hieroglyphs
| `Ancient_Greek_Music
| `Ancient_Greek_Numbers
| `Ancient_Symbols
| `Arabic
| `Arabic_Ext_A
| `Arabic_Ext_B
| `Arabic_Ext_C
| `Arabic_Math
| `Arabic_PF_A
| `Arabic_PF_B
| `Arabic_Sup
| `Armenian
| `Arrows
| `Avestan
| `Balinese
| `Bamum
| `Bamum_Sup
| `Bassa_Vah
| `Batak
| `Bengali
| `Beria_Erfe
| `Bhaiksuki
| `Block_Elements
| `Bopomofo
| `Bopomofo_Ext
| `Box_Drawing
| `Brahmi
| `Braille
| `Buginese
| `Buhid
| `Byzantine_Music
| `CJK
| `CJK_Compat
| `CJK_Compat_Forms
| `CJK_Compat_Ideographs
| `CJK_Compat_Ideographs_Sup
| `CJK_Ext_A
| `CJK_Ext_B
| `CJK_Ext_C
| `CJK_Ext_D
| `CJK_Ext_E
| `CJK_Ext_F
| `CJK_Ext_G
| `CJK_Ext_H
| `CJK_Ext_I
| `CJK_Ext_J
| `CJK_Radicals_Sup
| `CJK_Strokes
| `CJK_Symbols
| `Carian
| `Caucasian_Albanian
| `Chakma
| `Cham
| `Cherokee
| `Cherokee_Sup
| `Chess_Symbols
| `Chorasmian
| `Compat_Jamo
| `Control_Pictures
| `Coptic
| `Coptic_Epact_Numbers
| `Counting_Rod
| `Cuneiform
| `Cuneiform_Numbers
| `Currency_Symbols
| `Cypriot_Syllabary
| `Cypro_Minoan
| `Cyrillic
| `Cyrillic_Ext_A
| `Cyrillic_Ext_B
| `Cyrillic_Ext_C
| `Cyrillic_Ext_D
| `Cyrillic_Sup
| `Deseret
| `Devanagari
| `Devanagari_Ext
| `Devanagari_Ext_A
| `Diacriticals
| `Diacriticals_Ext
| `Diacriticals_For_Symbols
| `Diacriticals_Sup
| `Dingbats
| `Dives_Akuru
| `Dogra
| `Domino
| `Duployan
| `Early_Dynastic_Cuneiform
| `Egyptian_Hieroglyph_Format_Controls
| `Egyptian_Hieroglyphs
| `Egyptian_Hieroglyphs_Ext_A
| `Elbasan
| `Elymaic
| `Emoticons
| `Enclosed_Alphanum
| `Enclosed_Alphanum_Sup
| `Enclosed_CJK
| `Enclosed_Ideographic_Sup
| `Ethiopic
| `Ethiopic_Ext
| `Ethiopic_Ext_A
| `Ethiopic_Ext_B
| `Ethiopic_Sup
| `Garay
| `Geometric_Shapes
| `Geometric_Shapes_Ext
| `Georgian
| `Georgian_Ext
| `Georgian_Sup
| `Glagolitic
| `Glagolitic_Sup
| `Gothic
| `Grantha
| `Greek
| `Greek_Ext
| `Gujarati
| `Gunjala_Gondi
| `Gurmukhi
| `Gurung_Khema
| `Half_And_Full_Forms
| `Half_Marks
| `Hangul
| `Hanifi_Rohingya
| `Hanunoo
| `Hatran
| `Hebrew
| `High_PU_Surrogates
| `High_Surrogates
| `Hiragana
| `IDC
| `IPA_Ext
| `Ideographic_Symbols
| `Imperial_Aramaic
| `Indic_Number_Forms
| `Indic_Siyaq_Numbers
| `Inscriptional_Pahlavi
| `Inscriptional_Parthian
| `Jamo
| `Jamo_Ext_A
| `Jamo_Ext_B
| `Javanese
| `Kaithi
| `Kaktovik_Numerals
| `Kana_Ext_A
| `Kana_Ext_B
| `Kana_Sup
| `Kanbun
| `Kangxi
| `Kannada
| `Katakana
| `Katakana_Ext
| `Kawi
| `Kayah_Li
| `Kharoshthi
| `Khitan_Small_Script
| `Khmer
| `Khmer_Symbols
| `Khojki
| `Khudawadi
| `Kirat_Rai
| `Lao
| `Latin_1_Sup
| `Latin_Ext_A
| `Latin_Ext_Additional
| `Latin_Ext_B
| `Latin_Ext_C
| `Latin_Ext_D
| `Latin_Ext_E
| `Latin_Ext_F
| `Latin_Ext_G
| `Lepcha
| `Letterlike_Symbols
| `Limbu
| `Linear_A
| `Linear_B_Ideograms
| `Linear_B_Syllabary
| `Lisu
| `Lisu_Sup
| `Low_Surrogates
| `Lycian
| `Lydian
| `Mahajani
| `Mahjong
| `Makasar
| `Malayalam
| `Mandaic
| `Manichaean
| `Marchen
| `Masaram_Gondi
| `Math_Alphanum
| `Math_Operators
| `Mayan_Numerals
| `Medefaidrin
| `Meetei_Mayek
| `Meetei_Mayek_Ext
| `Mende_Kikakui
| `Meroitic_Cursive
| `Meroitic_Hieroglyphs
| `Miao
| `Misc_Arrows
| `Misc_Math_Symbols_A
| `Misc_Math_Symbols_B
| `Misc_Pictographs
| `Misc_Symbols
| `Misc_Symbols_Sup
| `Misc_Technical
| `Modi
| `Modifier_Letters
| `Modifier_Tone_Letters
| `Mongolian
| `Mongolian_Sup
| `Mro
| `Multani
| `Music
| `Myanmar
| `Myanmar_Ext_A
| `Myanmar_Ext_B
| `Myanmar_Ext_C
| `NB
| `NKo
| `Nabataean
| `Nag_Mundari
| `Nandinagari
| `New_Tai_Lue
| `Newa
| `Number_Forms
| `Nushu
| `Nyiakeng_Puachue_Hmong
| `OCR
| `Ogham
| `Ol_Onal
| `Ol_Chiki
| `Old_Hungarian
| `Old_Italic
| `Old_North_Arabian
| `Old_Permic
| `Old_Persian
| `Old_Sogdian
| `Old_South_Arabian
| `Old_Turkic
| `Old_Uyghur
| `Oriya
| `Ornamental_Dingbats
| `Osage
| `Osmanya
| `Ottoman_Siyaq_Numbers
| `PUA
| `Pahawh_Hmong
| `Palmyrene
| `Pau_Cin_Hau
| `Phags_Pa
| `Phaistos
| `Phoenician
| `Phonetic_Ext
| `Phonetic_Ext_Sup
| `Playing_Cards
| `Psalter_Pahlavi
| `Punctuation
| `Rejang
| `Rumi
| `Runic
| `Samaritan
| `Saurashtra
| `Sharada
| `Sharada_Sup
| `Shavian
| `Shorthand_Format_Controls
| `Siddham
| `Sidetic
| `Sinhala
| `Sinhala_Archaic_Numbers
| `Small_Forms
| `Small_Kana_Ext
| `Sogdian
| `Sora_Sompeng
| `Soyombo
| `Specials
| `Sundanese
| `Sundanese_Sup
| `Sunuwar
| `Sup_Arrows_A
| `Sup_Arrows_B
| `Sup_Arrows_C
| `Sup_Math_Operators
| `Sup_PUA_A
| `Sup_PUA_B
| `Sup_Punctuation
| `Sup_Symbols_And_Pictographs
| `Super_And_Sub
| `Sutton_SignWriting
| `Syloti_Nagri
| `Symbols_And_Pictographs_Ext_A
| `Symbols_For_Legacy_Computing
| `Symbols_For_Legacy_Computing_Sup
| `Syriac
| `Syriac_Sup
| `Tagalog
| `Tagbanwa
| `Tags
| `Tai_Le
| `Tai_Tham
| `Tai_Viet
| `Tai_Xuan_Jing
| `Tai_Yo
| `Takri
| `Tamil
| `Tamil_Sup
| `Tangsa
| `Tangut
| `Tangut_Components
| `Tangut_Components_Sup
| `Tangut_Sup
| `Telugu
| `Thaana
| `Thai
| `Tibetan
| `Tifinagh
| `Tirhuta
| `Todhri
| `Tolong_Siki
| `Toto
| `Transport_And_Map
| `Tulu_Tigalari
| `UCAS
| `UCAS_Ext
| `UCAS_Ext_A
| `Ugaritic
| `VS
| `VS_Sup
| `Vai
| `Vedic_Ext
| `Vertical_Forms
| `Vithkuqi
| `Wancho
| `Warang_Citi
| `Yezidi
| `Yi_Radicals
| `Yi_Syllables
| `Yijing
| `Zanabazar_Square
| `Znamenny_Music
]

type value =                                (* the type for property values. *)
| Age_v of [ `Version of int * int | `Unassigned ]
| Block_v of block_prop
| Bidi_class_v of [
    | `AL
    | `AN
    | `B
    | `BN
    | `CS
    | `EN
    | `ES
    | `ET
    | `L
    | `LRE
    | `LRO
    | `NSM
    | `ON
    | `PDF
    | `R
    | `RLE
    | `RLO
    | `S
    | `WS
    | `LRI
    | `RLI
    | `FSI
    | `PDI
  ]
| Bidi_paired_bracket_type_v of [ `O | `C | `N ]
| Bool_v of bool
| Bool_maybe_v of [ `True | `False | `Maybe ]
| Cp_v of cp
| Cp_map_v of [ `Self | `Cp of cp ]
| Cp_opt_v of cp option
| Decomposition_type_v of [
    | `Can
    | `Com
    | `Enc
    | `Fin
    | `Font
    | `Fra
    | `Init
    | `Iso
    | `Med
    | `Nar
    | `Nb
    | `Sml
    | `Sqr
    | `Sub
    | `Sup
    | `Vert
    | `Wide
    | `None
  ]
| East_asian_width_v of [ `A | `F | `H | `N | `Na | `W ]
| General_category_v of [
    | `Lu
    | `Ll
    | `Lt
    | `Lm
    | `Lo
    | `Mn
    | `Mc
    | `Me
    | `Nd
    | `Nl
    | `No
    | `Pc
    | `Pd
    | `Ps
    | `Pe
    | `Pi
    | `Pf
    | `Po
    | `Sm
    | `Sc
    | `Sk
    | `So
    | `Zs
    | `Zl
    | `Zp
    | `Cc
    | `Cf
    | `Cs
    | `Co
    | `Cn
  ]
| Grapheme_cluster_break_v of [
  | `CN
  | `CR
  | `EB
  | `EBG
  | `EM
  | `EX
  | `GAZ
  | `L
  | `LF
  | `LV
  | `LVT
  | `PP
  | `RI
  | `SM
  | `T
  | `V
  | `XX
  | `ZWJ ]
| Hangul_syllable_type_v of [ `L | `LV | `LVT | `T | `V | `NA ]
| Int_v of int
| Indic_conjunct_break_v of
    [ `Consonant
    | `Extend
    | `Linker
    | `None ]
| Indic_syllabic_category_v of
    [ `Avagraha
    | `Bindu
    | `Brahmi_Joining_Number
    | `Cantillation_Mark
    | `Consonant
    | `Consonant_Dead
    | `Consonant_Final
    | `Consonant_Head_Letter
    | `Consonant_Initial_Postfixed
    | `Consonant_Killer
    | `Consonant_Medial
    | `Consonant_Placeholder
    | `Consonant_Preceding_Repha
    | `Consonant_Prefixed
    | `Consonant_Repha
    | `Consonant_Subjoined
    | `Consonant_Succeeding_Repha
    | `Consonant_With_Stacker
    | `Gemination_Mark
    | `Invisible_Stacker
    | `Joiner
    | `Modifying_Letter
    | `Non_Joiner
    | `Nukta
    | `Number
    | `Number_Joiner
    | `Other
    | `Pure_Killer
    | `Reordering_Killer
    | `Register_Shifter
    | `Syllable_Modifier
    | `Tone_Letter
    | `Tone_Mark
    | `Virama
    | `Visarga
    | `Vowel
    | `Vowel_Dependent
    | `Vowel_Independent ]
| Indic_matra_category_v of [
    | `Right
    | `Left
    | `Visual_Order_Left
    | `Left_And_Right
    | `Top
    | `Bottom
    | `Top_And_Bottom
    | `Top_And_Right
    | `Top_And_Left
    | `Top_And_Left_And_Right
    | `Bottom_And_Right
    | `Top_And_Bottom_And_Right
    | `Overstruck
    | `Invisible
    | `NA
  ]
| Indic_positional_category_v of [
    | `Bottom
    | `Bottom_And_Left
    | `Bottom_And_Right
    | `Invisible
    | `Left
    | `Left_And_Right
    | `NA
    | `Overstruck
    | `Right
    | `Top
    | `Top_And_Bottom
    | `Top_And_Bottom_And_Left
    | `Top_And_Bottom_And_Right
    | `Top_And_Left
    | `Top_And_Left_And_Right
    | `Top_And_Right
    | `Visual_Order_Left
    ]
| Joining_group_v of [
    | `African_Feh
    | `African_Noon
    | `African_Qaf
    | `Ain
    | `Alaph
    | `Alef
    | `Alef_Maqsurah
    | `Beh
    | `Beth
    | `Burushaski_Yeh_Barree
    | `Dal
    | `Dalath_Rish
    | `E
    | `Farsi_Yeh
    | `Fe
    | `Feh
    | `Final_Semkath
    | `Gaf
    | `Gamal
    | `Hah
    | `Hanifi_Rohingya_Kinna_Ya
    | `Hanifi_Rohingya_Pa
    | `Hamza_On_Heh_Goal
    | `He
    | `Heh
    | `Heh_Goal
    | `Heth
    | `Kaf
    | `Kaph
    | `Kashmiri_Yeh
    | `Khaph
    | `Knotted_Heh
    | `Lam
    | `Lamadh
    | `Malayalam_Bha
    | `Malayalam_Ja
    | `Malayalam_Lla
    | `Malayalam_Llla
    | `Malayalam_Nga
    | `Malayalam_Nna
    | `Malayalam_Nnna
    | `Malayalam_Nya
    | `Malayalam_Ra
    | `Malayalam_Ssa
    | `Malayalam_Tta
    | `Manichaean_Aleph
    | `Manichaean_Ayin
    | `Manichaean_Beth
    | `Manichaean_Daleth
    | `Manichaean_Dhamedh
    | `Manichaean_Five
    | `Manichaean_Gimel
    | `Manichaean_Heth
    | `Manichaean_Hundred
    | `Manichaean_Kaph
    | `Manichaean_Lamedh
    | `Manichaean_Mem
    | `Manichaean_Nun
    | `Manichaean_One
    | `Manichaean_Pe
    | `Manichaean_Qoph
    | `Manichaean_Resh
    | `Manichaean_Sadhe
    | `Manichaean_Samekh
    | `Manichaean_Taw
    | `Manichaean_Ten
    | `Manichaean_Teth
    | `Manichaean_Thamedh
    | `Manichaean_Twenty
    | `Manichaean_Waw
    | `Manichaean_Yodh
    | `Manichaean_Zayin
    | `Meem
    | `Mim
    | `No_Joining_Group
    | `Noon
    | `Nun
    | `Nya
    | `Pe
    | `Qaf
    | `Qaph
    | `Reh
    | `Reversed_Pe
    | `Rohingya_Yeh
    | `Sad
    | `Sadhe
    | `Seen
    | `Semkath
    | `Shin
    | `Straight_Waw
    | `Swash_Kaf
    | `Syriac_Waw
    | `Tah
    | `Taw
    | `Teh_Marbuta
    | `Teh_Marbuta_Goal
    | `Teth
    | `Thin_Noon
    | `Thin_Yeh
    | `Vertical_Tail
    | `Waw
    | `Yeh
    | `Yeh_Barree
    | `Yeh_With_Tail
    | `Yudh
    | `Yudh_He
    | `Zain
    | `Zhain
    | `BAA
    | `FA
    | `HAA
    | `HA_GOAL
    | `HA
    | `CAF
    | `KNOTTED_HA
    | `RA
    | `SWASH_CAF
    | `HAMZAH_ON_HA_GOAL
    | `TAA_MARBUTAH
    | `YA_BARREE
    | `YA
    | `ALEF_MAQSURAH ]
| Joining_type_v of [ `U | `C | `T | `D | `L | `R ]
| Line_break_v of [
    | `AI
    | `AK
    | `AL
    | `AP
    | `AS
    | `B2
    | `BA
    | `BB
    | `BK
    | `CB
    | `CJ
    | `CL
    | `CM
    | `CP
    | `CR
    | `EB
    | `EM
    | `EX
    | `GL
    | `H2
    | `H3
    | `HH
    | `HL
    | `HY
    | `ID
    | `IN
    | `IS
    | `JL
    | `JT
    | `JV
    | `LF
    | `NL
    | `NS
    | `NU
    | `OP
    | `PO
    | `PR
    | `QU
    | `RI
    | `SA
    | `SG
    | `SP
    | `SY
    | `VF
    | `VI
    | `WJ
    | `XX
    | `ZW
    | `ZWJ
  ]
| Name_v of [`Pattern of string | `Name of string ]
| Name_alias_v of
    (string * [`Abbreviation | `Alternate | `Control | `Correction | `Figment])
      list
| Numeric_type_v of [ `None | `De | `Di | `Nu ]
| Numeric_value_v of
    [ `NaN | `Nums of [`Frac of int * int | `Num of int64 ] list]
| Script_v of script
| Script_extensions_v of script list
| Sentence_break_v of [
    | `AT
    | `CL
    | `CR
    | `EX
    | `FO
    | `LE
    | `LF
    | `LO
    | `NU
    | `SC
    | `SE
    | `SP
    | `ST
    | `UP
    | `XX
  ]
| Cps_v of cp list
| Cps_map_v of [ `Self | `Cps of cp list ]
| String_v of string
| UAX_42_element_v of [ `Reserved | `Noncharacter | `Surrogate | `Char ]
| Vertical_orientation_v of [ `U | `R | `Tu | `Tr ]
| Word_break_v of [
    | `CR
    | `DQ
    | `EB
    | `EBG
    | `EM
    | `EX
    | `Extend
    | `FO
    | `GAZ
    | `HL
    | `KA
    | `LE
    | `LF
    | `MB
    | `ML
    | `MN
    | `NL
    | `NU
    | `RI
    | `SQ
    | `WSegSpace
    | `XX
    | `ZWJ
  ]

(* property value projection *)

let o_age = function Age_v v -> v | _ -> assert false
let o_bidi_class = function Bidi_class_v v -> v | _ -> assert false
let o_bidi_paired_bracket_type =
  function Bidi_paired_bracket_type_v v -> v | _ -> assert false

let o_block = function Block_v v -> v | _ -> assert false
let o_bool = function Bool_v v -> v | _ -> assert false
let o_bool_maybe = function Bool_maybe_v v -> v | _ -> assert false
let o_cp = function Cp_v v -> v | _ -> assert false
let o_cp_map = function Cp_map_v v -> v | _ -> assert false
let o_cp_opt = function Cp_opt_v v -> v | _ -> assert false
let o_decomposition_type =
  function Decomposition_type_v v -> v | _ -> assert false

let o_east_asian_width = function East_asian_width_v v -> v | _ -> assert false
let o_general_category = function General_category_v v -> v | _ -> assert false
let o_grapheme_cluster_break =
  function Grapheme_cluster_break_v v -> v | _ -> assert false

let o_hangul_syllable_type =
  function Hangul_syllable_type_v v -> v | _ -> assert false

let o_int = function Int_v v -> v | _ -> assert false

let o_indic_conjunct_break =
  function Indic_conjunct_break_v v -> v | _ -> assert false

let o_indic_syllabic_category =
  function Indic_syllabic_category_v v -> v | _ -> assert false

let o_indic_matra_category =
  function Indic_matra_category_v v -> v | _ -> assert false

let o_indic_positional_category =
  function Indic_positional_category_v v -> v | _ -> assert false

let o_joining_group = function Joining_group_v v -> v | _ -> assert false
let o_joining_type = function Joining_type_v v -> v | _ -> assert false
let o_line_break = function Line_break_v v -> v | _ -> assert false
let o_name = function Name_v v -> v | _ -> assert false
let o_name_alias = function Name_alias_v v -> v | _ -> assert false
let o_numeric_type = function Numeric_type_v v -> v | _ -> assert false
let o_numeric_value = function Numeric_value_v v -> v | _ -> assert false
let o_script = function Script_v v -> v | _ -> assert false
let o_script_extensions =
  function Script_extensions_v v -> v | _ -> assert false

let o_sentence_break = function Sentence_break_v v -> v | _ -> assert false
let o_cps = function Cps_v v -> v | _ -> assert false
let o_cps_map = function Cps_map_v v -> v | _ -> assert false
let o_string = function String_v v -> v | _ -> assert false
let o_uax_42_element = function UAX_42_element_v v -> v | _ -> assert false
let o_vertical_orientation =
  function Vertical_orientation_v v -> v | _ -> assert false
let o_word_break = function Word_break_v v -> v | _ -> assert false

(* property value injection *)

let i_age v = Age_v begin match v with
| "unassigned" -> `Unassigned
| v ->
    try match List.map int_of_string (split_string v '.') with
    | [v1; v2;] -> `Version (v1, v2)
    | _ -> failwith ""
    with Failure _ -> err (err_att_val v)
end

let i_bidi_class v = Bidi_class_v begin match v with
| "AL" -> `AL
| "AN" -> `AN
| "B" -> `B
| "BN" -> `BN
| "CS" -> `CS
| "EN" -> `EN
| "ES" -> `ES
| "ET" -> `ET
| "L" -> `L
| "LRE" -> `LRE
| "LRO" -> `LRO
| "NSM" -> `NSM
| "ON" -> `ON
| "PDF" -> `PDF
| "R" -> `R
| "RLE" -> `RLE
| "RLO" -> `RLO
| "S" -> `S
| "WS" -> `WS
| "LRI" -> `LRI
| "RLI" -> `RLI
| "FSI" -> `FSI
| "PDI" -> `PDI
| v -> err (err_att_val v)
end

let i_bidi_paired_bracket_type v = Bidi_paired_bracket_type_v begin match v with
| "o" -> `O
| "c" -> `C
| "n" -> `N
| v -> err (err_att_val v)
end

let i_block v = Block_v begin match v with
| "ASCII" -> `ASCII
| "Adlam" -> `Adlam
| "Aegean_Numbers" -> `Aegean_Numbers
| "Ahom" -> `Ahom
| "Alchemical" -> `Alchemical
| "Alphabetic_PF" -> `Alphabetic_PF
| "Anatolian_Hieroglyphs" -> `Anatolian_Hieroglyphs
| "Ancient_Greek_Music" -> `Ancient_Greek_Music
| "Ancient_Greek_Numbers" -> `Ancient_Greek_Numbers
| "Ancient_Symbols" -> `Ancient_Symbols
| "Arabic" -> `Arabic
| "Arabic_Ext_A" -> `Arabic_Ext_A
| "Arabic_Ext_B" -> `Arabic_Ext_A
| "Arabic_Ext_C" -> `Arabic_Ext_C
| "Arabic_Math" -> `Arabic_Math
| "Arabic_PF_A" -> `Arabic_PF_A
| "Arabic_PF_B" -> `Arabic_PF_B
| "Arabic_Sup" -> `Arabic_Sup
| "Armenian" -> `Armenian
| "Arrows" -> `Arrows
| "Avestan" -> `Avestan
| "Balinese" -> `Balinese
| "Bamum" -> `Bamum
| "Bamum_Sup" -> `Bamum_Sup
| "Bassa_Vah" -> `Bassa_Vah
| "Batak" -> `Batak
| "Bengali" -> `Bengali
| "Beria_Erfe" -> `Beria_Erfe
| "Bhaiksuki" -> `Bhaiksuki
| "Block_Elements" -> `Block_Elements
| "Bopomofo" -> `Bopomofo
| "Bopomofo_Ext" -> `Bopomofo_Ext
| "Box_Drawing" -> `Box_Drawing
| "Brahmi" -> `Brahmi
| "Braille" -> `Braille
| "Buginese" -> `Buginese
| "Buhid" -> `Buhid
| "Byzantine_Music" -> `Byzantine_Music
| "CJK" -> `CJK
| "CJK_Compat" -> `CJK_Compat
| "CJK_Compat_Forms" -> `CJK_Compat_Forms
| "CJK_Compat_Ideographs" -> `CJK_Compat_Ideographs
| "CJK_Compat_Ideographs_Sup" -> `CJK_Compat_Ideographs_Sup
| "CJK_Ext_A" -> `CJK_Ext_A
| "CJK_Ext_B" -> `CJK_Ext_B
| "CJK_Ext_C" -> `CJK_Ext_C
| "CJK_Ext_D" -> `CJK_Ext_D
| "CJK_Ext_E" -> `CJK_Ext_E
| "CJK_Ext_F" -> `CJK_Ext_F
| "CJK_Ext_G" -> `CJK_Ext_G
| "CJK_Ext_H" -> `CJK_Ext_H
| "CJK_Ext_I" -> `CJK_Ext_I
| "CJK_Ext_J" -> `CJK_Ext_J
| "CJK_Radicals_Sup" -> `CJK_Radicals_Sup
| "CJK_Strokes" -> `CJK_Strokes
| "CJK_Symbols" -> `CJK_Symbols
| "Carian" -> `Carian
| "Caucasian_Albanian" -> `Caucasian_Albanian
| "Chakma" -> `Chakma
| "Cham" -> `Cham
| "Cherokee" -> `Cherokee
| "Cherokee_Sup" -> `Cherokee_Sup
| "Chess_Symbols" -> `Chess_Symbols
| "Chorasmian" -> `Chorasmian
| "Compat_Jamo" -> `Compat_Jamo
| "Control_Pictures" -> `Control_Pictures
| "Coptic" -> `Coptic
| "Coptic_Epact_Numbers" -> `Coptic_Epact_Numbers
| "Counting_Rod" -> `Counting_Rod
| "Cuneiform" -> `Cuneiform
| "Cuneiform_Numbers" -> `Cuneiform_Numbers
| "Currency_Symbols" -> `Currency_Symbols
| "Cypriot_Syllabary" -> `Cypriot_Syllabary
| "Cypro_Minoan" -> `Cypro_Minoan
| "Cyrillic" -> `Cyrillic
| "Cyrillic_Ext_A" -> `Cyrillic_Ext_A
| "Cyrillic_Ext_B" -> `Cyrillic_Ext_B
| "Cyrillic_Ext_C" -> `Cyrillic_Ext_C
| "Cyrillic_Ext_D" -> `Cyrillic_Ext_D
| "Cyrillic_Sup" -> `Cyrillic_Sup
| "Deseret" -> `Deseret
| "Devanagari" -> `Devanagari
| "Devanagari_Ext" -> `Devanagari_Ext
| "Devanagari_Ext_A" -> `Devanagari_Ext_A
| "Diacriticals" -> `Diacriticals
| "Diacriticals_Ext" -> `Diacriticals_Ext
| "Diacriticals_For_Symbols" -> `Diacriticals_For_Symbols
| "Diacriticals_Sup" -> `Diacriticals_Sup
| "Dingbats" -> `Dingbats
| "Dives_Akuru" -> `Dives_Akuru
| "Dogra" -> `Dogra
| "Domino" -> `Domino
| "Duployan" -> `Duployan
| "Early_Dynastic_Cuneiform" -> `Early_Dynastic_Cuneiform
| "Egyptian_Hieroglyph_Format_Controls" -> `Egyptian_Hieroglyph_Format_Controls
| "Egyptian_Hieroglyphs" -> `Egyptian_Hieroglyphs
| "Egyptian_Hieroglyphs_Ext_A" -> `Egyptian_Hieroglyphs_Ext_A
| "Elbasan" -> `Elbasan
| "Elymaic" -> `Elymaic
| "Emoticons" -> `Emoticons
| "Enclosed_Alphanum" -> `Enclosed_Alphanum
| "Enclosed_Alphanum_Sup" -> `Enclosed_Alphanum_Sup
| "Enclosed_CJK" -> `Enclosed_CJK
| "Enclosed_Ideographic_Sup" -> `Enclosed_Ideographic_Sup
| "Ethiopic" -> `Ethiopic
| "Ethiopic_Ext" -> `Ethiopic_Ext
| "Ethiopic_Ext_A" -> `Ethiopic_Ext_A
| "Ethiopic_Ext_B" -> `Ethiopic_Ext_B
| "Ethiopic_Sup" -> `Ethiopic_Sup
| "Garay" -> `Garay
| "Geometric_Shapes" -> `Geometric_Shapes
| "Geometric_Shapes_Ext" -> `Geometric_Shapes_Ext
| "Georgian" -> `Georgian
| "Georgian_Ext" -> `Georgian_Ext
| "Georgian_Sup" -> `Georgian_Sup
| "Glagolitic" -> `Glagolitic
| "Glagolitic_Sup" -> `Glagolitic_Sup
| "Gothic" -> `Gothic
| "Grantha" -> `Grantha
| "Greek" -> `Greek
| "Greek_Ext" -> `Greek_Ext
| "Gujarati" -> `Gujarati
| "Gunjala_Gondi" -> `Gunjala_Gondi
| "Gurmukhi" -> `Gurmukhi
| "Gurung_Khema" -> `Gurung_Khema
| "Half_And_Full_Forms" -> `Half_And_Full_Forms
| "Half_Marks" -> `Half_Marks
| "Hangul" -> `Hangul
| "Hanifi_Rohingya" -> `Hanifi_Rohingya
| "Hanunoo" -> `Hanunoo
| "Hatran" -> `Hatran
| "Hebrew" -> `Hebrew
| "High_PU_Surrogates" -> `High_PU_Surrogates
| "High_Surrogates" -> `High_Surrogates
| "Hiragana" -> `Hiragana
| "IDC" -> `IDC
| "IPA_Ext" -> `IPA_Ext
| "Ideographic_Symbols" -> `Ideographic_Symbols
| "Imperial_Aramaic" -> `Imperial_Aramaic
| "Indic_Number_Forms" -> `Indic_Number_Forms
| "Indic_Siyaq_Numbers" -> `Indic_Siyaq_Numbers
| "Inscriptional_Pahlavi" -> `Inscriptional_Pahlavi
| "Inscriptional_Parthian" -> `Inscriptional_Parthian
| "Jamo" -> `Jamo
| "Jamo_Ext_A" -> `Jamo_Ext_A
| "Jamo_Ext_B" -> `Jamo_Ext_B
| "Javanese" -> `Javanese
| "Kaithi" -> `Kaithi
| "Kaktovik_Numerals" -> `Kaktovik_Numerals
| "Kana_Ext_A" -> `Kana_Ext_A
| "Kana_Ext_B" -> `Kana_Ext_B
| "Kawi" -> `Kawi
| "Kana_Sup" -> `Kana_Sup
| "Kanbun" -> `Kanbun
| "Kangxi" -> `Kangxi
| "Kannada" -> `Kannada
| "Katakana" -> `Katakana
| "Katakana_Ext" -> `Katakana_Ext
| "Kayah_Li" -> `Kayah_Li
| "Kharoshthi" -> `Kharoshthi
| "Khitan_Small_Script" -> `Khitan_Small_Script
| "Khmer" -> `Khmer
| "Khmer_Symbols" -> `Khmer_Symbols
| "Khojki" -> `Khojki
| "Khudawadi" -> `Khudawadi
| "Kirat_Rai" -> `Kirat_Rai
| "Lao" -> `Lao
| "Latin_1_Sup" -> `Latin_1_Sup
| "Latin_Ext_A" -> `Latin_Ext_A
| "Latin_Ext_Additional" -> `Latin_Ext_Additional
| "Latin_Ext_B" -> `Latin_Ext_B
| "Latin_Ext_C" -> `Latin_Ext_C
| "Latin_Ext_D" -> `Latin_Ext_D
| "Latin_Ext_E" -> `Latin_Ext_E
| "Latin_Ext_F" -> `Latin_Ext_F
| "Latin_Ext_G" -> `Latin_Ext_G
| "Lepcha" -> `Lepcha
| "Letterlike_Symbols" -> `Letterlike_Symbols
| "Limbu" -> `Limbu
| "Linear_A" -> `Linear_A
| "Linear_B_Ideograms" -> `Linear_B_Ideograms
| "Linear_B_Syllabary" -> `Linear_B_Syllabary
| "Lisu" -> `Lisu
| "Lisu_Sup" -> `Lisu_Sup
| "Low_Surrogates" -> `Low_Surrogates
| "Lycian" -> `Lycian
| "Lydian" -> `Lydian
| "Mahajani" -> `Mahajani
| "Mahjong" -> `Mahjong
| "Makasar" -> `Makasar
| "Malayalam" -> `Malayalam
| "Mandaic" -> `Mandaic
| "Manichaean" -> `Manichaean
| "Marchen" -> `Marchen
| "Masaram_Gondi" -> `Masaram_Gondi
| "Math_Alphanum" -> `Math_Alphanum
| "Math_Operators" -> `Math_Operators
| "Mayan_Numerals" -> `Mayan_Numerals
| "Medefaidrin" -> `Medefaidrin
| "Meetei_Mayek" -> `Meetei_Mayek
| "Meetei_Mayek_Ext" -> `Meetei_Mayek_Ext
| "Mende_Kikakui" -> `Mende_Kikakui
| "Meroitic_Cursive" -> `Meroitic_Cursive
| "Meroitic_Hieroglyphs" -> `Meroitic_Hieroglyphs
| "Miao" -> `Miao
| "Misc_Arrows" -> `Misc_Arrows
| "Misc_Math_Symbols_A" -> `Misc_Math_Symbols_A
| "Misc_Math_Symbols_B" -> `Misc_Math_Symbols_B
| "Misc_Pictographs" -> `Misc_Pictographs
| "Misc_Symbols" -> `Misc_Symbols
| "Misc_Symbols_Sup" -> `Misc_Symbols_Sup
| "Misc_Technical" -> `Misc_Technical
| "Modi" -> `Modi
| "Modifier_Letters" -> `Modifier_Letters
| "Modifier_Tone_Letters" -> `Modifier_Tone_Letters
| "Mongolian" -> `Mongolian
| "Mongolian_Sup" -> `Mongolian_Sup
| "Mro" -> `Mro
| "Multani" -> `Multani
| "Music" -> `Music
| "Myanmar" -> `Myanmar
| "Myanmar_Ext_A" -> `Myanmar_Ext_A
| "Myanmar_Ext_B" -> `Myanmar_Ext_B
| "Myanmar_Ext_C" -> `Myanmar_Ext_C
| "NB" -> `NB
| "NKo" -> `NKo
| "Nabataean" -> `Nabataean
| "Nag_Mundari" -> `Nag_Mundari
| "Nandinagari" -> `Nandinagari
| "New_Tai_Lue" -> `New_Tai_Lue
| "Newa" -> `Newa
| "Number_Forms" -> `Number_Forms
| "Nushu" -> `Nushu
| "Nyiakeng_Puachue_Hmong" -> `Nyiakeng_Puachue_Hmong
| "OCR" -> `OCR
| "Ogham" -> `Ogham
| "Ol_Chiki" -> `Ol_Chiki
| "Ol_Onal" -> `Ol_Onal
| "Old_Hungarian" -> `Old_Hungarian
| "Old_Italic" -> `Old_Italic
| "Old_North_Arabian" -> `Old_North_Arabian
| "Old_Permic" -> `Old_Permic
| "Old_Persian" -> `Old_Persian
| "Old_Sogdian" -> `Old_Sogdian
| "Old_South_Arabian" -> `Old_South_Arabian
| "Old_Turkic" -> `Old_Turkic
| "Old_Uyghur" -> `Old_Uyghur
| "Oriya" -> `Oriya
| "Ornamental_Dingbats" -> `Ornamental_Dingbats
| "Osage" -> `Osage
| "Osmanya" -> `Osmanya
| "Ottoman_Siyaq_Numbers" -> `Ottoman_Siyaq_Numbers
| "PUA" -> `PUA
| "Pahawh_Hmong" -> `Pahawh_Hmong
| "Palmyrene" -> `Palmyrene
| "Pau_Cin_Hau" -> `Pau_Cin_Hau
| "Phags_Pa" -> `Phags_Pa
| "Phaistos" -> `Phaistos
| "Phoenician" -> `Phoenician
| "Phonetic_Ext" -> `Phonetic_Ext
| "Phonetic_Ext_Sup" -> `Phonetic_Ext_Sup
| "Playing_Cards" -> `Playing_Cards
| "Psalter_Pahlavi" -> `Psalter_Pahlavi
| "Punctuation" -> `Punctuation
| "Rejang" -> `Rejang
| "Rumi" -> `Rumi
| "Runic" -> `Runic
| "Samaritan" -> `Samaritan
| "Saurashtra" -> `Saurashtra
| "Sharada" -> `Sharada
| "Sharada_Sup" -> `Sharada_Sup
| "Shavian" -> `Shavian
| "Shorthand_Format_Controls" -> `Shorthand_Format_Controls
| "Siddham" -> `Siddham
| "Sidetic" -> `Sidetic
| "Sinhala" -> `Sinhala
| "Sinhala_Archaic_Numbers" -> `Sinhala_Archaic_Numbers
| "Small_Forms" -> `Small_Forms
| "Small_Kana_Ext" -> `Small_Kana_Ext
| "Sogdian" -> `Sogdian
| "Sora_Sompeng" -> `Sora_Sompeng
| "Soyombo" -> `Soyombo
| "Specials" -> `Specials
| "Sundanese" -> `Sundanese
| "Sundanese_Sup" -> `Sundanese_Sup
| "Sunuwar" -> `Sunuwar
| "Sup_Arrows_A" -> `Sup_Arrows_A
| "Sup_Arrows_B" -> `Sup_Arrows_B
| "Sup_Arrows_C" -> `Sup_Arrows_C
| "Sup_Math_Operators" -> `Sup_Math_Operators
| "Sup_PUA_A" -> `Sup_PUA_A
| "Sup_PUA_B" -> `Sup_PUA_B
| "Sup_Punctuation" -> `Sup_Punctuation
| "Sup_Symbols_And_Pictographs" -> `Sup_Symbols_And_Pictographs
| "Super_And_Sub" -> `Super_And_Sub
| "Sutton_SignWriting" -> `Sutton_SignWriting
| "Syloti_Nagri" -> `Syloti_Nagri
| "Symbols_And_Pictographs_Ext_A" -> `Symbols_And_Pictographs_Ext_A
| "Symbols_For_Legacy_Computing" -> `Symbols_For_Legacy_Computing
| "Symbols_For_Legacy_Computing_Sup" -> `Symbols_For_Legacy_Computing_Sup
| "Syriac" -> `Syriac
| "Syriac_Sup" -> `Syriac_Sup
| "Tagalog" -> `Tagalog
| "Tagbanwa" -> `Tagbanwa
| "Tags" -> `Tags
| "Tai_Le" -> `Tai_Le
| "Tai_Tham" -> `Tai_Tham
| "Tai_Viet" -> `Tai_Viet
| "Tai_Xuan_Jing" -> `Tai_Xuan_Jing
| "Tai_Yo" -> `Tai_Yo
| "Takri" -> `Takri
| "Tamil" -> `Tamil
| "Tamil_Sup" -> `Tamil_Sup
| "Tangsa" -> `Tangsa
| "Tangut" -> `Tangut
| "Tangut_Components" -> `Tangut_Components
| "Tangut_Components_Sup" -> `Tangut_Components_Sup
| "Tangut_Sup" -> `Tangut_Sup
| "Telugu" -> `Telugu
| "Thaana" -> `Thaana
| "Thai" -> `Thai
| "Tibetan" -> `Tibetan
| "Tifinagh" -> `Tifinagh
| "Tirhuta" -> `Tirhuta
| "Todhri" -> `Todhri
| "Tolong_Siki" -> `Tolong_Siki
| "Toto" -> `Toto
| "Transport_And_Map" -> `Transport_And_Map
| "Tulu_Tigalari" -> `Tulu_Tigalari
| "UCAS" -> `UCAS
| "UCAS_Ext" -> `UCAS_Ext
| "UCAS_Ext_A" -> `UCAS_Ext_A
| "Ugaritic" -> `Ugaritic
| "VS" -> `VS
| "VS_Sup" -> `VS_Sup
| "Vai" -> `Vai
| "Vedic_Ext" -> `Vedic_Ext
| "Vertical_Forms" -> `Vertical_Forms
| "Vithkuqi" -> `Vithkuqi
| "Wancho" -> `Wancho
| "Warang_Citi" -> `Warang_Citi
| "Yezidi" -> `Yezidi
| "Yi_Radicals" -> `Yi_Radicals
| "Yi_Syllables" -> `Yi_Syllables
| "Yijing" -> `Yijing
| "Zanabazar_Square" -> `Zanabazar_Square
| "Znamenny_Music" -> `Znamenny_Music
| v -> err (err_att_val v)
end

let i_bool v = Bool_v begin match v with
| "Y" -> true | "N" -> false
| v -> err (err_att_val v)
end

let i_bool_maybe v = Bool_maybe_v begin match v with
| "Y" -> `True | "N" -> `False | "M" -> `Maybe
| v -> err (err_att_val v)
end

let i_cp v = Cp_v (cp_of_string v)
let i_cp_map v =
  if v = "#" then Cp_map_v `Self else Cp_map_v (`Cp (cp_of_string v))

let i_cp_opt v =
  if v = "" then Cp_opt_v None else Cp_opt_v (Some (cp_of_string v))

let i_cps ?empty v = Cps_v (cps_of_string ?empty v)
let i_cps_map ?empty v =
  if v = "#" then Cps_map_v `Self else Cps_map_v (`Cps (cps_of_string ?empty v))

let i_decomposition_type v = Decomposition_type_v begin match v with
| "can" -> `Can
| "com" -> `Com
| "enc" -> `Enc
| "fin" -> `Fin
| "font" -> `Font
| "fra" -> `Fra
| "init" -> `Init
| "iso" -> `Iso
| "med" -> `Med
| "nar" -> `Nar
| "nb" -> `Nb
| "sml" -> `Sml
| "sqr" -> `Sqr
| "sub" -> `Sub
| "sup" -> `Sup
| "vert" -> `Vert
| "wide" -> `Wide
| "none" -> `None
| v -> err (err_att_val v)
end

let i_east_asian_width v = East_asian_width_v begin match v with
| "A" -> `A
| "F" -> `F
| "H" -> `H
| "N" -> `N
| "Na" -> `Na
| "W" -> `W
| v -> err (err_att_val v)
end

let i_general_category v = General_category_v begin match v with
| "Lu" -> `Lu
| "Ll" -> `Ll
| "Lt" -> `Lt
| "Lm" -> `Lm
| "Lo" -> `Lo
| "Mn" -> `Mn
| "Mc" -> `Mc
| "Me" -> `Me
| "Nd" -> `Nd
| "Nl" -> `Nl
| "No" -> `No
| "Pc" -> `Pc
| "Pd" -> `Pd
| "Ps" -> `Ps
| "Pe" -> `Pe
| "Pi" -> `Pi
| "Pf" -> `Pf
| "Po" -> `Po
| "Sm" -> `Sm
| "Sc" ->`Sc
| "Sk" -> `Sk
| "So" -> `So
| "Zs" -> `Zs
| "Zl" -> `Zl
| "Zp" -> `Zp
| "Cc" -> `Cc
| "Cf" -> `Cf
| "Cs" -> `Cs
| "Co" -> `Co
| "Cn" -> `Cn
| v -> err (err_att_val v)
end

let i_grapheme_cluster_break v = Grapheme_cluster_break_v begin match v with
| "CN" -> `CN
| "CR" -> `CR
| "EB" -> `EB
| "EBG" -> `EBG
| "EM" -> `EM
| "EX" -> `EX
| "GAZ" -> `GAZ
| "L" -> `L
| "LF" -> `LF
| "LV" -> `LV
| "LVT" -> `LVT
| "PP" -> `PP
| "RI" -> `RI
| "SM" -> `SM
| "T" -> `T
| "V" -> `V
| "XX" -> `XX
| "ZWJ" -> `ZWJ
| v -> err (err_att_val v)
end

let i_hangul_syllable_type v = Hangul_syllable_type_v begin match v with
| "L" -> `L
| "LV" -> `LV
| "LVT" -> `LVT
| "T" -> `T
| "V" -> `V
| "NA" -> `NA
| v -> err (err_att_val v)
end

let i_int v = try Int_v (int_of_string v) with Failure _ -> err (err_att_val v)
let i_indic_conjunct_break v = Indic_conjunct_break_v begin match v with
| "Consonant" -> `Consonant
| "Extend" -> `Extend
| "Linker" -> `Linker
| "None" -> `None
| v -> err (err_att_val v)
end

let i_indic_syllabic_category v = Indic_syllabic_category_v begin match v with
| "Avagraha" -> `Avagraha
| "Bindu" -> `Bindu
| "Brahmi_Joining_Number" -> `Brahmi_Joining_Number
| "Cantillation_Mark" -> `Cantillation_Mark
| "Consonant" -> `Consonant
| "Consonant_Dead" -> `Consonant_Dead
| "Consonant_Final" -> `Consonant_Final
| "Consonant_Head_Letter" -> `Consonant_Head_Letter
| "Consonant_Initial_Postfixed" -> `Consonant_Initial_Postfixed
| "Consonant_Killer" -> `Consonant_Killer
| "Consonant_Medial" -> `Consonant_Medial
| "Consonant_Placeholder" -> `Consonant_Placeholder
| "Consonant_Preceding_Repha" -> `Consonant_Preceding_Repha
| "Consonant_Prefixed" -> `Consonant_Prefixed
| "Consonant_Repha" -> `Consonant_Repha
| "Consonant_Subjoined" -> `Consonant_Subjoined
| "Consonant_Succeeding_Repha" -> `Consonant_Succeeding_Repha
| "Consonant_With_Stacker" -> `Consonant_With_Stacker
| "Gemination_Mark" -> `Gemination_Mark
| "Invisible_Stacker" -> `Invisible_Stacker
| "Joiner" -> `Joiner
| "Modifying_Letter" -> `Modifying_Letter
| "Non_Joiner" -> `Non_Joiner
| "Nukta" -> `Nukta
| "Number" -> `Number
| "Number_Joiner" -> `Number_Joiner
| "Other" -> `Other
| "Pure_Killer" -> `Pure_Killer
| "Reordering_Killer" -> `Reordering_Killer
| "Register_Shifter" -> `Register_Shifter
| "Syllable_Modifier" -> `Syllable_Modifier
| "Tone_Letter" -> `Tone_Letter
| "Tone_Mark" -> `Tone_Mark
| "Virama" -> `Virama
| "Visarga" -> `Visarga
| "Vowel" -> `Vowel
| "Vowel_Dependent" -> `Vowel_Dependent
| "Vowel_Independent" -> `Vowel_Independent
| v -> err (err_att_val v)
end

let i_indic_matra_category v = Indic_matra_category_v begin match v with
| "Right" -> `Right
| "Left" -> `Left
| "Visual_Order_Left" -> `Visual_Order_Left
| "Left_And_Right" -> `Left_And_Right
| "Top" -> `Top
| "Bottom" -> `Bottom
| "Top_And_Bottom" -> `Top_And_Bottom
| "Top_And_Right" -> `Top_And_Right
| "Top_And_Left" -> `Top_And_Left
| "Top_And_Left_And_Right" -> `Top_And_Left_And_Right
| "Bottom_And_Right" -> `Bottom_And_Right
| "Top_And_Bottom_And_Right" -> `Top_And_Bottom_And_Right
| "Overstruck" -> `Overstruck
| "Invisible" -> `Invisible
| "NA" -> `NA
| v -> err (err_att_val v)
end

let i_indic_positional_category v = Indic_positional_category_v
begin match v with
| "Bottom" -> `Bottom
| "Bottom_And_Left" -> `Bottom_And_Right
| "Bottom_And_Right" -> `Bottom_And_Right
| "Invisible" -> `Invisible
| "Left" -> `Left
| "Left_And_Right" -> `Left_And_Right
| "NA" -> `NA
| "Overstruck" -> `Overstruck
| "Right" -> `Right
| "Top" -> `Top
| "Top_And_Bottom" -> `Top_And_Bottom
| "Top_And_Bottom_And_Left" -> `Top_And_Bottom_And_Left
| "Top_And_Bottom_And_Right" -> `Top_And_Bottom_And_Right
| "Top_And_Left" -> `Top_And_Left
| "Top_And_Left_And_Right" -> `Top_And_Left_And_Right
| "Top_And_Right" -> `Top_And_Right
| "Visual_Order_Left" -> `Visual_Order_Left
| v -> err (err_att_val v)
end

let i_joining_group v = Joining_group_v begin match v with
| "African_Feh" -> `African_Feh
| "African_Noon" -> `African_Noon
| "African_Qaf" -> `African_Qaf
| "Ain" -> `Ain
| "Alaph" -> `Alaph
| "Alef" -> `Alef
| "Alef_Maqsurah" -> `Alef_Maqsurah
| "Beh" -> `Beh
| "Beth" -> `Beth
| "Burushaski_Yeh_Barree" -> `Burushaski_Yeh_Barree
| "Dal" -> `Dal
| "Dalath_Rish" -> `Dalath_Rish
| "E" -> `E
| "Farsi_Yeh" -> `Farsi_Yeh
| "Fe" -> `Fe
| "Feh" -> `Feh
| "Final_Semkath" -> `Final_Semkath
| "Gaf" -> `Gaf
| "Gamal" -> `Gamal
| "Hah" -> `Hah
| "Hanifi_Rohingya_Kinna_Ya" -> `Hanifi_Rohingya_Kinna_Ya
| "Hanifi_Rohingya_Pa" -> `Hanifi_Rohingya_Pa
| "Hamza_On_Heh_Goal" -> `Hamza_On_Heh_Goal
| "He" -> `He
| "Heh" -> `Heh
| "Heh_Goal" -> `Heh_Goal
| "Heth" -> `Heth
| "Kaf" -> `Kaf
| "Kaph" -> `Kaph
| "Kashmiri_Yeh" -> `Kashmiri_Yeh
| "Khaph" -> `Khaph
| "Knotted_Heh" -> `Knotted_Heh
| "Lam" -> `Lam
| "Lamadh" -> `Lamadh
| "Malayalam_Bha" -> `Malayalam_Bha
| "Malayalam_Ja" -> `Malayalam_Ja
| "Malayalam_Lla" -> `Malayalam_Lla
| "Malayalam_Llla" -> `Malayalam_Llla
| "Malayalam_Nna" -> `Malayalam_Nna
| "Malayalam_Nnna" -> `Malayalam_Nnna
| "Malayalam_Nya" -> `Malayalam_Nya
| "Malayalam_Ra" -> `Malayalam_Ra
| "Malayalam_Ssa" -> `Malayalam_Ssa
| "Malayalam_Tta" -> `Malayalam_Tta
| "Malayalam_Nga" -> `Malayalam_Nga
| "Manichaean_Aleph" -> `Manichaean_Aleph
| "Manichaean_Ayin" -> `Manichaean_Ayin
| "Manichaean_Beth" -> `Manichaean_Beth
| "Manichaean_Daleth" -> `Manichaean_Daleth
| "Manichaean_Dhamedh" -> `Manichaean_Dhamedh
| "Manichaean_Five" -> `Manichaean_Five
| "Manichaean_Gimel" -> `Manichaean_Gimel
| "Manichaean_Heth" -> `Manichaean_Heth
| "Manichaean_Hundred" -> `Manichaean_Hundred
| "Manichaean_Kaph" -> `Manichaean_Kaph
| "Manichaean_Lamedh" -> `Manichaean_Lamedh
| "Manichaean_Mem" -> `Manichaean_Mem
| "Manichaean_Nun" -> `Manichaean_Nun
| "Manichaean_One" -> `Manichaean_One
| "Manichaean_Pe" -> `Manichaean_Pe
| "Manichaean_Qoph" -> `Manichaean_Qoph
| "Manichaean_Resh" -> `Manichaean_Resh
| "Manichaean_Sadhe" -> `Manichaean_Sadhe
| "Manichaean_Samekh" -> `Manichaean_Samekh
| "Manichaean_Taw" -> `Manichaean_Taw
| "Manichaean_Ten" -> `Manichaean_Ten
| "Manichaean_Teth" -> `Manichaean_Teth
| "Manichaean_Thamedh" -> `Manichaean_Thamedh
| "Manichaean_Twenty" -> `Manichaean_Twenty
| "Manichaean_Waw" -> `Manichaean_Waw
| "Manichaean_Yodh" -> `Manichaean_Yodh
| "Manichaean_Zayin" -> `Manichaean_Zayin
| "Meem" -> `Meem
| "Mim" -> `Mim
| "No_Joining_Group" -> `No_Joining_Group
| "Noon" -> `Noon
| "Nun" -> `Nun
| "Nya" -> `Nya
| "Pe" -> `Pe
| "Qaf" -> `Qaf
| "Qaph" -> `Qaph
| "Reh" -> `Reh
| "Reversed_Pe" -> `Reversed_Pe
| "Rohingya_Yeh" -> `Rohingya_Yeh
| "Sad" -> `Sad
| "Sadhe" -> `Sadhe
| "Seen" -> `Seen
| "Semkath" -> `Semkath
| "Shin" -> `Shin
| "Straight_Waw" -> `Straight_Waw
| "Swash_Kaf" -> `Swash_Kaf
| "Syriac_Waw" -> `Syriac_Waw
| "Tah" -> `Tah
| "Taw" -> `Taw
| "Teh_Marbuta" -> `Teh_Marbuta
| "Teh_Marbuta_Goal" -> `Teh_Marbuta_Goal
| "Teth" -> `Teth
| "Thin_Noon" -> `Thin_Noon
| "Thin_Yeh" -> `Thin_Yeh
| "Vertical_Tail" -> `Vertical_Tail
| "Waw" -> `Waw
| "Yeh" -> `Yeh
| "Yeh_Barree" -> `Yeh_Barree
| "Yeh_With_Tail" -> `Yeh_With_Tail
| "Yudh" -> `Yudh
| "Yudh_He" -> `Yudh_He
| "Zain" -> `Zain
| "Zhain" -> `Zhain
| "BAA" -> `BAA
| "FA" -> `FA
| "HAA" -> `HAA
| "HA_GOAL" -> `HA_GOAL
| "HA" -> `HA
| "CAF" -> `CAF
| "KNOTTED_HA" -> `KNOTTED_HA
| "RA" -> `RA
| "SWASH_CAF" -> `SWASH_CAF
| "HAMZAH_ON_HA_GOAL" -> `HAMZAH_ON_HA_GOAL
| "TAA_MARBUTAH" -> `TAA_MARBUTAH
| "YA_BARREE" -> `YA_BARREE
| "YA" -> `YA
| "ALEF_MAQSURAH " -> `ALEF_MAQSURAH
| v -> err (err_att_val v)
end

let i_joining_type v = Joining_type_v begin match v with
| "U" -> `U
| "C" -> `C
| "T" -> `T
| "D" -> `D
| "L" -> `L
| "R" -> `R
| v -> err (err_att_val v)
end

let i_line_break v = Line_break_v begin match v with
| "AI" -> `AI
| "AK" -> `AK
| "AL" -> `AL
| "AP" -> `AP
| "AS" -> `AS
| "B2" -> `B2
| "BA" -> `BA
| "BB" -> `BB
| "BK" -> `BK
| "CB" -> `CB
| "CJ" -> `CJ
| "CL" -> `CL
| "CM" -> `CM
| "CP" -> `CP
| "CR" -> `CR
| "EB" -> `EB
| "EM" -> `EM
| "EX" -> `EX
| "GL" -> `GL
| "H2" -> `H2
| "H3" -> `H3
| "HH" -> `HH
| "HL" -> `HL
| "HY" -> `HY
| "ID" -> `ID
| "IN" -> `IN
| "IS" -> `IS
| "JL" -> `JL
| "JT" -> `JT
| "JV" -> `JV
| "LF" -> `LF
| "NL" -> `NL
| "NS" -> `NS
| "NU" -> `NU
| "OP" -> `OP
| "PO" -> `PO
| "PR" -> `PR
| "QU" -> `QU
| "RI" -> `RI
| "SA" -> `SA
| "SG" -> `SG
| "SP" -> `SP
| "SY" -> `SY
| "VF" -> `VF
| "VI" -> `VI
| "WJ" -> `WJ
| "XX" -> `XX
| "ZW" -> `ZW
| "ZWJ" -> `ZWJ
| v -> err (err_att_val v)
end

let i_name v = Name_v (if String.contains v '#' then `Pattern v else `Name v)
let i_name_alias_type = function
| "abbreviation" -> `Abbreviation
| "alternate" -> `Alternate
| "control" -> `Control
| "correction" -> `Correction
| "figment" -> `Figment
| v -> err (err_att_val v)

let i_numeric_type v = Numeric_type_v begin match v with
| "None" -> `None
| "De" -> `De
| "Di" -> `Di
| "Nu" -> `Nu
| v -> err (err_att_val v)
end

let i_numeric_value v = Numeric_value_v begin try match String.trim v with
|  "NaN" -> `NaN
| s ->
    let base s = match split_string (String.trim s) '/' with
    | [num; denom] -> `Frac (int_of_string num, int_of_string denom)
    | [num] -> `Num (Int64.of_string num)
    | _ -> failwith ""
    in
    `Nums (List.map base (split_string s ' '))
  with Failure _ -> err (err_att_val v)
end

let i_script v = Script_v begin match v with
| "Adlm" -> `Adlm
| "Aghb" -> `Aghb
| "Ahom" -> `Ahom
| "Arab" -> `Arab
| "Armi" -> `Armi
| "Armn" -> `Armn
| "Avst" -> `Avst
| "Bali" -> `Bali
| "Bamu" -> `Bamu
| "Bass" -> `Bass
| "Batk" -> `Batk
| "Beng" -> `Beng
| "Berf" -> `Berf
| "Bhks" -> `Bhks
| "Bopo" -> `Bopo
| "Brah" -> `Brah
| "Brai" -> `Brai
| "Bugi" -> `Bugi
| "Buhd" -> `Buhd
| "Cakm" -> `Cakm
| "Cans" -> `Cans
| "Cari" -> `Cari
| "Cham" -> `Cham
| "Cher" -> `Cher
| "Chrs" -> `Chrs
| "Copt" -> `Copt
| "Cpmn" -> `Cpmn
| "Cprt" -> `Cprt
| "Cyrl" -> `Cyrl
| "Deva" -> `Deva
| "Diak" -> `Diak
| "Dogr" -> `Dogr
| "Dsrt" -> `Dsrt
| "Dupl" -> `Dupl
| "Egyp" -> `Egyp
| "Elba" -> `Elba
| "Elym" -> `Elym
| "Ethi" -> `Ethi
| "Gara" -> `Gara
| "Geor" -> `Geor
| "Glag" -> `Glag
| "Gong" -> `Gong
| "Gonm" -> `Gonm
| "Goth" -> `Goth
| "Gran" -> `Gran
| "Grek" -> `Grek
| "Gujr" -> `Gujr
| "Gukh" -> `Gukh
| "Guru" -> `Guru
| "Hang" -> `Hang
| "Hani" -> `Hani
| "Hano" -> `Hano
| "Hatr" -> `Hatr
| "Hebr" -> `Hebr
| "Hira" -> `Hira
| "Hluw" -> `Hluw
| "Hmng" -> `Hmng
| "Hmnp" -> `Hmnp
| "Hrkt" -> `Hrkt
| "Hung" -> `Hung
| "Ital" -> `Ital
| "Java" -> `Java
| "Kali" -> `Kali
| "Kana" -> `Kana
| "Kawi" -> `Kawi
| "Khar" -> `Khar
| "Khmr" -> `Khmr
| "Khoj" -> `Khoj
| "Knda" -> `Knda
| "Krai" -> `Krai
| "Kthi" -> `Kthi
| "Kits" -> `Kits
| "Lana" -> `Lana
| "Laoo" -> `Laoo
| "Latn" -> `Latn
| "Lepc" -> `Lepc
| "Limb" -> `Limb
| "Lina" -> `Lina
| "Linb" -> `Linb
| "Lisu" -> `Lisu
| "Lyci" -> `Lyci
| "Lydi" -> `Lydi
| "Mahj" -> `Mahj
| "Maka" -> `Maka
| "Mand" -> `Mand
| "Mani" -> `Mani
| "Marc" -> `Marc
| "Medf" -> `Medf
| "Mend" -> `Mend
| "Merc" -> `Merc
| "Mero" -> `Mero
| "Mlym" -> `Mlym
| "Modi" -> `Modi
| "Mong" -> `Mong
| "Mroo" -> `Mroo
| "Mtei" -> `Mtei
| "Mult" -> `Mult
| "Mymr" -> `Mymr
| "Nagm" -> `Nagm
| "Nand" -> `Nand
| "Narb" -> `Narb
| "Nbat" -> `Nbat
| "Newa" -> `Newa
| "Nkoo" -> `Nkoo
| "Nshu" -> `Nshu
| "Ogam" -> `Ogam
| "Olck" -> `Olck
| "Onao" -> `Onao
| "Orkh" -> `Orkh
| "Orya" -> `Orya
| "Osge" -> `Osge
| "Osma" -> `Osma
| "Ougr" -> `Ougr
| "Palm" -> `Palm
| "Pauc" -> `Pauc
| "Perm" -> `Perm
| "Phag" -> `Phag
| "Phli" -> `Phli
| "Phlp" -> `Phlp
| "Phnx" -> `Phnx
| "Plrd" -> `Plrd
| "Prti" -> `Prti
| "Qaai" -> `Qaai
| "Rjng" -> `Rjng
| "Rohg" -> `Rohg
| "Runr" -> `Runr
| "Samr" -> `Samr
| "Sarb" -> `Sarb
| "Saur" -> `Saur
| "Sgnw" -> `Sgnw
| "Shaw" -> `Shaw
| "Shrd" -> `Shrd
| "Sidd" -> `Sidd
| "Sidt" -> `Sidt
| "Sind" -> `Sind
| "Sinh" -> `Sinh
| "Sogd" -> `Sogd
| "Sogo" -> `Sogo
| "Sora" -> `Sora
| "Soyo" -> `Soyo
| "Sund" -> `Sund
| "Sunu" -> `Sunu
| "Sylo" -> `Sylo
| "Syrc" -> `Syrc
| "Tagb" -> `Tagb
| "Takr" -> `Takr
| "Tale" -> `Tale
| "Talu" -> `Talu
| "Taml" -> `Taml
| "Tang" -> `Tang
| "Tavt" -> `Tavt
| "Tayo" -> `Tayo
| "Telu" -> `Telu
| "Tfng" -> `Tfng
| "Tglg" -> `Tglg
| "Thaa" -> `Thaa
| "Thai" -> `Thai
| "Tibt" -> `Tibt
| "Tirh" -> `Tirh
| "Tnsa" -> `Tnsa
| "Todr" -> `Todr
| "Tols" -> `Tols
| "Toto" -> `Toto
| "Tutg" -> `Tutg
| "Ugar" -> `Ugar
| "Vaii" -> `Vaii
| "Vith" -> `Vith
| "Wara" -> `Wara
| "Wcho" -> `Wcho
| "Xpeo" -> `Xpeo
| "Xsux" -> `Xsux
| "Yezi" -> `Yezi
| "Yiii" -> `Yiii
| "Zanb" -> `Zanb
| "Zinh" -> `Zinh
| "Zyyy" -> `Zyyy
| "Zzzz" -> `Zzzz
| v -> err (err_att_val v)
end

let i_script_seq v =
  let script v = o_script (i_script v) in
  Script_extensions_v (List.map script (split_string v ' '))

let i_sentence_break v = Sentence_break_v begin match v with
| "AT" -> `AT
| "CL" -> `CL
| "CR" -> `CR
| "EX" -> `EX
| "FO" -> `FO
| "LE" -> `LE
| "LF" -> `LF
| "LO" -> `LO
| "NU" -> `NU
| "SC" -> `SC
| "SE" -> `SE
| "SP" -> `SP
| "ST" -> `ST
| "UP" -> `UP
| "XX" -> `XX
| v -> err (err_att_val v)
end

let i_string v = String_v v
let i_uax_42_element v = UAX_42_element_v begin match v with
| "reserved" -> `Reserved
| "noncharacter" -> `Noncharacter
| "surrogate" -> `Surrogate
| "char" -> `Char
| s -> err (err_att_val s)
end

let i_vertical_orientation v = Vertical_orientation_v begin match v with
| "U" -> `U
| "R" -> `R
| "Tu" -> `Tu
| "Tr" -> `Tr
| s -> err (err_att_val s)
end

let i_word_break v = Word_break_v begin match v with
| "CR" -> `CR
| "DQ" -> `DQ
| "EB" -> `EB
| "EBG" -> `EBG
| "EM" -> `EM
| "EX" -> `EX
| "Extend" -> `Extend
| "FO" -> `FO
| "GAZ" -> `GAZ
| "HL" -> `HL
| "KA" -> `KA
| "LE" -> `LE
| "LF" -> `LF
| "MB" -> `MB
| "ML" -> `ML
| "MN" -> `MN
| "NL" -> `NL
| "NU" -> `NU
| "RI" -> `RI
| "SQ" -> `SQ
| "WSegSpace" -> `WSegSpace
| "XX" -> `XX
| "ZWJ" -> `ZWJ
| v -> err (err_att_val v)
end

module Pkey = struct type t = key let compare : key -> key -> int = compare end
module Pmap = Map.Make (Pkey)
type props = value Pmap.t
type 'a prop = key * (value -> 'a)     (* property key and value projection. *)

let find props (k, o) = try Some (o (Pmap.find k props)) with Not_found -> None
let unknown_prop name = (Other name), o_string


(* non hunihan and unikemet properties *)

let uax_42_element = UAX_42_element, o_uax_42_element    (* artefact of Uucd *)

let age = Age, o_age
let alphabetic = Alphabetic, o_bool
let ascii_hex_digit = Ascii_hex_digit, o_bool
let bidi_class = Bidi_class, o_bidi_class
let bidi_control = Bidi_control, o_bool
let bidi_mirrored = Bidi_mirrored, o_bool
let bidi_mirroring_glyph = Bidi_mirroring_glyph, o_cp_opt
let bidi_paired_bracket = Bidi_paired_bracket, o_cp_map
let bidi_paired_bracket_type =
  Bidi_paired_bracket_type, o_bidi_paired_bracket_type

let block = Block, o_block
let canonical_combining_class = Canonical_combining_class, o_int
let cased = Cased, o_bool
let case_folding = Case_folding, o_cps_map
let case_ignorable = Case_ignorable, o_bool
let changes_when_casefolded = Changes_when_casefolded, o_bool
let changes_when_casemapped = Changes_when_casemapped, o_bool
let changes_when_lowercased = Changes_when_lowercased, o_bool
let changes_when_nfkc_casefolded = Changes_when_nfkc_casefolded, o_bool
let changes_when_titlecased = Changes_when_titlecased, o_bool
let changes_when_uppercased = Changes_when_uppercased, o_bool
let composition_exclusion = Composition_exclusion, o_bool
let dash = Dash, o_bool
let decomposition_mapping = Decomposition_mapping, o_cps_map
let decomposition_type = Decomposition_type, o_decomposition_type
let default_ignorable_code_point = Default_ignorable_code_point, o_bool
let deprecated = Deprecated, o_bool
let diacritic = Diacritic, o_bool
let east_asian_width = East_asian_width, o_east_asian_width
let emoji = Emoji, o_bool
let emoji_presentation = Emoji_presentation, o_bool
let emoji_modifier = Emoji_modifier, o_bool
let emoji_modifier_base = Emoji_modifier_base, o_bool
let emoji_component = Emoji_component, o_bool
let equivalent_unified_ideograph = Equivalent_unified_ideograph, o_cp_opt
let extended_pictographic = Extended_pictographic, o_bool
let extender = Extender, o_bool
let full_composition_exclusion = Full_composition_exclusion, o_bool
let general_category = General_category, o_general_category
let grapheme_base = Grapheme_base, o_bool
let grapheme_cluster_break = Grapheme_cluster_break, o_grapheme_cluster_break
let grapheme_extend = Grapheme_extend, o_bool
let hangul_syllable_type = Hangul_syllable_type, o_hangul_syllable_type
let hex_digit = Hex_digit, o_bool
let id_continue = Id_continue, o_bool
let id_compat_math_continue = Id_compat_math_continue, o_bool
let id_compat_math_start = Id_compat_math_start, o_bool
let id_start = Id_start, o_bool
let ideographic = Ideographic, o_bool
let ids_binary_operator = Ids_binary_operator, o_bool
let ids_trinary_operator = Ids_trinary_operator, o_bool
let ids_unary_operator = Ids_unary_operator, o_bool
let indic_conjunct_break = Indic_conjunct_break, o_indic_conjunct_break
let indic_syllabic_category = Indic_syllabic_category, o_indic_syllabic_category
let indic_matra_category = Indic_matra_category, o_indic_matra_category
let indic_positional_category =
  Indic_positional_category, o_indic_positional_category
let jamo_short_name = Jamo_short_name, o_string
let join_control = Join_control, o_bool
let joining_group = Joining_group, o_joining_group
let joining_type = Joining_type, o_joining_type
let line_break = Line_break, o_line_break
let logical_order_exception = Logical_order_exception, o_bool
let lowercase = Lowercase, o_bool
let lowercase_mapping = Lowercase_mapping, o_cps_map
let math = Math, o_bool
let modifier_combining_mark = Modifier_combining_mark, o_bool
let name = Name, o_name
let name_alias = Name_alias, o_name_alias
let nfc_quick_check = Nfc_quick_check, o_bool_maybe
let nfd_quick_check = Nfd_quick_check, o_bool_maybe
let nfkc_quick_check = Nfkc_quick_check, o_bool_maybe
let nfkc_casefold = Nfkc_casefold, o_cps_map
let nfkc_simple_casefold = Nfkc_simple_casefold, o_cps_map
let nfkd_quick_check = Nfkd_quick_check, o_bool_maybe
let noncharacter_code_point = Noncharacter_code_point, o_bool
let numeric_type = Numeric_type, o_numeric_type
let numeric_value = Numeric_value, o_numeric_value
let other_alphabetic = Other_alphabetic, o_bool
let other_default_ignorable_code_point =
  Other_default_ignorable_code_point, o_bool

let other_grapheme_extend = Other_grapheme_extend, o_bool
let other_id_continue = Other_id_continue, o_bool
let other_id_start = Other_id_start, o_bool
let other_lowercase = Other_lowercase, o_bool
let other_math = Other_math, o_bool
let other_uppercase = Other_uppercase, o_bool
let pattern_syntax = Pattern_syntax, o_bool
let pattern_white_space = Pattern_white_space, o_bool
let prepended_concatenation_mark = Prepended_concatenation_mark, o_bool
let quotation_mark = Quotation_mark, o_bool
let radical = Radical, o_bool
let regional_indicator = Regional_indicator, o_bool
let script = Script, o_script
let script_extensions = Script_extensions, o_script_extensions
let sentence_break = Sentence_break, o_sentence_break
let simple_case_folding = Simple_case_folding, o_cp_map
let simple_lowercase_mapping = Simple_lowercase_mapping, o_cp_map
let simple_titlecase_mapping = Simple_titlecase_mapping, o_cp_map
let simple_uppercase_mapping = Simple_uppercase_mapping, o_cp_map
let soft_dotted = Soft_dotted, o_bool
let sterm = Sterm, o_bool
let terminal_punctuation = Terminal_punctuation, o_bool
let titlecase_mapping = Titlecase_mapping, o_cps_map
let unicode_1_name = Unicode_1_name, o_string
let unified_ideograph = Unified_ideograph, o_bool
let uppercase = Uppercase, o_bool
let uppercase_mapping = Uppercase_mapping, o_cps_map
let variation_selector = Variation_selector, o_bool
let vertical_orientation = Vertical_orientation, o_vertical_orientation
let white_space = White_space, o_bool
let word_break = Word_break, o_word_break
let xid_continue = Xid_continue, o_bool
let xid_start = Xid_start, o_bool

(* unihan properties *)

let kAccountingNumeric = KAccountingNumeric, o_string
let kAlternateHanYu = KAlternateHanYu, o_string
let kAlternateJEF = KAlternateJEF, o_string
let kAlternateKangXi = KAlternateKangXi, o_string
let kAlternateMorohashi = KAlternateMorohashi, o_string
let kAlternateTotalStrokes = KAlternateTotalStrokes, o_string
let kBigFive = KBigFive, o_string
let kCCCII = KCCCII, o_string
let kCNS1986 = KCNS1986, o_string
let kCNS1992 = KCNS1992, o_string
let kCangjie = KCangjie, o_string
let kCantonese = KCantonese, o_string
let kCheungBauer = KCheungBauer, o_string
let kCheungBauerIndex = KCheungBauerIndex, o_string
let kCihaiT = KCihaiT, o_string
let kCompatibilityVariant = KCompatibilityVariant, o_string
let kCowles = KCowles, o_string
let kDaeJaweon = KDaeJaweon, o_string
let kDefinition = KDefinition, o_string
let kEACC = KEACC, o_string
let kFanqie = KFanqie, o_string
let kFenn = KFenn, o_string
let kFennIndex = KFennIndex, o_string
let kFourCornerCode = KFourCornerCode, o_string
let kFrequency = KFrequency, o_string
let kGB0 = KGB0, o_string
let kGB1 = KGB1, o_string
let kGB3 = KGB3, o_string
let kGB5 = KGB5, o_string
let kGB8 = KGB8, o_string
let kGSR = KGSR, o_string
let kGradeLevel = KGradeLevel, o_string
let kHDZRadBreak = KHDZRadBreak, o_string
let kHKGlyph = KHKGlyph, o_string
let kHKSCS = KHKSCS, o_string
let kHanYu = KHanYu, o_string
let kHangul = KHangul, o_string
let kHanyuPinlu = KHanyuPinlu, o_string
let kHanyuPinyin = KHanyuPinyin, o_string
let kIBMJapan = KIBMJapan, o_string
let kIICore = KIICore, o_string
let kIRGDaeJaweon = KIRGDaeJaweon, o_string
let kIRGDaiKanwaZiten = KIRGDaiKanwaZiten, o_string
let kIRGHanyuDaZidian = KIRGHanyuDaZidian, o_string
let kIRGKangXi = KIRGKangXi, o_string
let kIRG_GSource = KIRG_GSource, o_string
let kIRG_HSource = KIRG_HSource, o_string
let kIRG_JSource = KIRG_JSource, o_string
let kIRG_KPSource = KIRG_KPSource, o_string
let kIRG_KSource = KIRG_KSource, o_string
let kIRG_MSource = KIRG_MSource, o_string
let kIRG_SSource = KIRG_SSource, o_string
let kIRG_TSource = KIRG_TSource, o_string
let kIRG_USource = KIRG_USource, o_string
let kIRG_UKSource = KIRG_UKSource, o_string
let kIRG_VSource = KIRG_VSource, o_string
let kJHJ = KJHJ, o_string
let kJIS0213 = KJIS0213, o_string
let kJapanese = KJapanese, o_string
let kJapaneseKun = KJapaneseKun, o_string
let kJapaneseOn = KJapaneseOn, o_string
let kJinmeiyoKanji = KJinmeiyoKanji, o_string
let kJis0 = KJis0, o_string
let kJis1 = KJis1, o_string
let kJoyoKanji = KJoyoKanji, o_string
let kKPS0 = KKPS0, o_string
let kKPS1 = KKPS1, o_string
let kKSC0 = KKSC0, o_string
let kKSC1 = KKSC1, o_string
let kKangXi = KKangXi, o_string
let kKarlgren = KKarlgren, o_string
let kKorean = KKorean, o_string
let kKoreanEducationHanja = KKoreanEducationHanja, o_string
let kKoreanName = KKoreanName, o_string
let kLau = KLau, o_string
let kMainlandTelegraph = KMainlandTelegraph, o_string
let kMandarin = KMandarin, o_string
let kMatthews = KMatthews, o_string
let kMeyerWempe = KMeyerWempe, o_string
let kMojiJoho = KMojiJoho, o_string
let kMorohashi = KMorohashi, o_string
let kNelson = KNelson, o_string
let kNSHU_DubenSrc = KNSHU_DubenSrc, o_string
let kNSHU_Reading = KNSHU_Reading, o_string
let kOtherNumeric = KOtherNumeric, o_string
let kPhonetic = KPhonetic, o_string
let kPrimaryNumeric = KPrimaryNumeric, o_string
let kPseudoGB1 = KPseudoGB1, o_string
let kRSAdobe_Japan1_6 = KRSAdobe_Japan1_6, o_string
let kRSJapanese = KRSJapanese, o_string
let kRSKanWa = KRSKanWa, o_string
let kRSKangXi = KRSKangXi, o_string
let kRSKorean = KRSKorean, o_string
let kRSMerged = KRSMerged, o_string
let kRSUnicode = KRSUnicode, o_string
let kSBGY = KSBGY, o_string
let kSemanticVariant = KSemanticVariant, o_string
let kSimplifiedVariant = KSimplifiedVariant, o_string
let kSMSZD2003Index = KSMSZD2003Index, o_string
let kSMSZD2003Readings = KSMSZD2003Readings, o_string
let kSpecializedSemanticVariant = KSpecializedSemanticVariant, o_string
let kSpoofingVariant = KSpoofingVariant, o_string
let kStrange = KStrange, o_string
let kTGH = KTGH, o_string
let kTGHZ2013 = KTGHZ2013, o_string
let kTGT_MergedSrc = KTGT_MergedSrc, o_string
let kTGT_RSUnicode = KTGT_RSUnicode, o_string
let kTaiwanTelegraph = KTaiwanTelegraph, o_string
let kTang = KTang, o_string
let kTayNumeric = KTayNumeric, o_string
let kTotalStrokes = KTotalStrokes, o_string
let kTraditionalVariant = KTraditionalVariant, o_string
let kUnihanCore2020 = KUnihanCore2020, o_string
let kVietnamese = KVietnamese, o_string
let kVietnameseNumeric = KVietnameseNumeric, o_string
let kWubi = KWubi, o_string
let kXHC1983 = KXHC1983, o_string
let kXerox = KXerox, o_string
let kZhuang = KZhuang, o_string
let kZhuangNumeric = KZhuangNumeric, o_string
let kZVariant = KZVariant, o_string

(* Unikemet properties *)

let kEH_Cat = KEH_Cat, o_string
let kEH_Core = KEH_Core, o_string
let kEH_Desc = KEH_Desc, o_string
let kEH_Func = KEH_Func, o_string
let kEH_FVal = KEH_FVal, o_string
let kEH_UniK = KEH_UniK, o_string
let kEH_JSesh = KEH_JSesh, o_string
let kEH_HG = KEH_HG, o_string
let kEH_IFAO = KEH_IFAO, o_string
let kEH_NoMirror = KEH_NoMirror, o_bool
let kEH_NoRotate = KEH_NoRotate, o_bool
let kEH_AltSeq = KEH_AltSeq, o_string


(* Unicode Character Databases *)

type block = (cp * cp) * string
type named_sequence = string * cp list
type standardized_variant =
  cp list * string * [ `Isolate | `Initial | `Medial | `Final ] list

type cjk_radical = string * cp * cp
type do_not_emit = { instead_of : cp list; use : cp list; because : string; }

type t =
  { description : string;
    repertoire : props Cpmap.t;
    blocks : block list;
    named_sequences : named_sequence list;
    provisional_named_sequences : named_sequence list;
    standardized_variants : standardized_variant list;
    cjk_radicals : cjk_radical list;
    do_not_emit : do_not_emit list; }

let cp_props db cp =
  try Some (Cpmap.find cp db.repertoire) with Not_found -> None

let cp_prop db cp p = try find (Cpmap.find cp db.repertoire) p
with Not_found -> None

(* Decode *)

(* Xml names *)

let ns_ucd = "http://www.unicode.org/ns/2003/ucd/1.0"
let n_block = (ns_ucd, "block")
let n_blocks = (ns_ucd, "blocks")
let n_char = (ns_ucd, "char")
let n_cjk_radical = (ns_ucd, "cjk-radical")
let n_cjk_radicals = (ns_ucd, "cjk-radicals")
let n_do_not_emit = (ns_ucd, "do-not-emit")
let n_description = (ns_ucd, "description")
let n_group = (ns_ucd, "group")
let n_instead = (ns_ucd, "instead")
let n_name_alias = (ns_ucd, "name-alias")
let n_named_sequence = (ns_ucd, "named-sequence")
let n_named_sequences = (ns_ucd, "named-sequences")
let n_noncharacter = (ns_ucd, "noncharacter")
let n_provisional_named_sequences = (ns_ucd, "provisional-named-sequences")
let n_repertoire = (ns_ucd, "repertoire")
let n_reserved = (ns_ucd, "reserved")
let n_standardized_variant = (ns_ucd, "standardized-variant")
let n_standardized_variants = (ns_ucd, "standardized-variants")
let n_surrogate = (ns_ucd, "surrogate")
let n_ucd = (ns_ucd, "ucd")

(* Attribute parsing *)

let add_prop : value Pmap.t -> Xmlm.attribute -> value Pmap.t =
  let h = Hashtbl.create 500 in
  let map = Hashtbl.add h in
  map "AHex" (Ascii_hex_digit, i_bool);
  map "Alpha" (Alphabetic, i_bool);
  map "Bidi_C" (Bidi_control, i_bool);
  map "Bidi_M" (Bidi_mirrored, i_bool);
  map "Cased" (Cased, i_bool);
  map "CI" (Case_ignorable, i_bool);
  map "CE" (Composition_exclusion, i_bool);
  map "CWCF" (Changes_when_casefolded, i_bool);
  map "CWCM" (Changes_when_casemapped, i_bool);
  map "CWL" (Changes_when_lowercased, i_bool);
  map "CWKCF" (Changes_when_nfkc_casefolded, i_bool);
  map "CWT" (Changes_when_titlecased, i_bool);
  map "CWU" (Changes_when_uppercased, i_bool);
  map "Comp_Ex" (Full_composition_exclusion, i_bool);
  map "DI" (Default_ignorable_code_point, i_bool);
  map "Dash" (Dash, i_bool);
  map "Dep" (Deprecated, i_bool);
  map "Dia" (Diacritic, i_bool);
  map "EqUIdeo" (Equivalent_unified_ideograph, i_cp_opt);
  map "Ext" (Extender, i_bool);
  map "GCB" (Grapheme_cluster_break, i_grapheme_cluster_break);
  map "Gr_Base" (Grapheme_base, i_bool);
  map "Gr_Ext" (Grapheme_extend, i_bool);
  map "Hex" (Hex_digit, i_bool);
  map "ID_Compat_Math_Continue" (Id_compat_math_continue, i_bool);
  map "ID_Compat_Math_Start" (Id_compat_math_start, i_bool);
  map "IDC" (Id_continue, i_bool);
  map "IDS" (Id_start, i_bool);
  map "IDSB" (Ids_binary_operator, i_bool);
  map "IDST" (Ids_trinary_operator, i_bool);
  map "IDSU" (Ids_unary_operator, i_bool);
  map "Ideo" (Ideographic, i_bool);
  map "InCB" (Indic_conjunct_break, i_indic_conjunct_break);
  map "InSC" (Indic_syllabic_category, i_indic_syllabic_category);
  map "InMC" (Indic_matra_category, i_indic_matra_category);
  map "InPC" (Indic_positional_category, i_indic_positional_category);
  map "JSN" (Jamo_short_name, i_string);
  map "Join_C" (Join_control, i_bool);
  map "LOE" (Logical_order_exception, i_bool);
  map "Lower" (Lowercase, i_bool);
  map "Math" (Math, i_bool);
  map "MCM" (Modifier_combining_mark, i_bool);
  map "NChar" (Noncharacter_code_point, i_bool);
  map "NFC_QC" (Nfc_quick_check, i_bool_maybe);
  map "NFD_QC" (Nfd_quick_check, i_bool_maybe);
  map "NFKC_QC" (Nfkc_quick_check, i_bool_maybe);
  map "NFKC_CF" (Nfkc_casefold, i_cps_map ~empty:true);
  map "NFKC_SCF" (Nfkc_simple_casefold, i_cps_map ~empty:true);
  map "NFKD_QC" (Nfkd_quick_check, i_bool_maybe);
  map "OAlpha" (Other_alphabetic, i_bool);
  map "ODI" (Other_default_ignorable_code_point, i_bool);
  map "OGr_Ext" (Other_grapheme_extend, i_bool);
  map "OIDC" (Other_id_continue, i_bool);
  map "OIDS" (Other_id_start, i_bool);
  map "OLower" (Other_lowercase, i_bool);
  map "OMath" (Other_math, i_bool);
  map "OUpper" (Other_uppercase, i_bool);
  map "Pat_Syn" (Pattern_syntax, i_bool);
  map "Pat_WS" (Pattern_white_space, i_bool);
  map "PCM" (Prepended_concatenation_mark, i_bool);
  map "QMark" (Quotation_mark, i_bool);
  map "Radical" (Radical, i_bool);
  map "RI" (Regional_indicator, i_bool);
  map "SB" (Sentence_break, i_sentence_break);
  map "SD" (Soft_dotted, i_bool);
  map "STerm" (Sterm, i_bool);
  map "Term" (Terminal_punctuation, i_bool);
  map "UIdeo" (Unified_ideograph, i_bool);
  map "Upper" (Uppercase, i_bool);
  map "VS" (Variation_selector, i_bool);
  map "vo" (Vertical_orientation, i_vertical_orientation);
  map "WB" (Word_break, i_word_break);
  map "WSpace" (White_space, i_bool);
  map "XIDC" (Xid_continue, i_bool);
  map "XIDS" (Xid_start, i_bool);
  map "age" (Age, i_age);
  map "bc" (Bidi_class, i_bidi_class);
  map "blk" (Block, i_block);
  map "bmg" (Bidi_mirroring_glyph, i_cp_opt);
  map "bpb" (Bidi_paired_bracket, i_cp_map);
  map "bpt" (Bidi_paired_bracket_type, i_bidi_paired_bracket_type);
  map "ccc" (Canonical_combining_class, i_int);
  map "cf" (Case_folding, i_cps_map ~empty:false);
  map "dm" (Decomposition_mapping,  (i_cps_map ~empty:true));
  map "dt" (Decomposition_type, i_decomposition_type);
  map "ea" (East_asian_width, i_east_asian_width);
  map "Emoji" (Emoji, i_bool);
  map "EPres" (Emoji_presentation, i_bool);
  map "EMod" (Emoji_modifier, i_bool);
  map "EBase" (Emoji_modifier_base, i_bool);
  map "EComp" (Emoji_component, i_bool);
  map "ExtPict" (Extended_pictographic, i_bool);
  map "gc" (General_category, i_general_category);
  map "hst" (Hangul_syllable_type, i_hangul_syllable_type);
  map "jg" (Joining_group, i_joining_group);
  map "jt" (Joining_type, i_joining_type);
  map "lb" (Line_break, i_line_break);
  map "lc" (Lowercase_mapping, i_cps_map ~empty:false);
  map "na" (Name, i_name);
  map "na1" (Unicode_1_name, i_string);
  map "nt" (Numeric_type, i_numeric_type);
  map "nv" (Numeric_value, i_numeric_value);
  map "sc" (Script, i_script);
  map "scf" (Simple_case_folding, i_cp_map);
  map "scx" (Script_extensions, i_script_seq);
  map "slc" (Simple_lowercase_mapping, i_cp_map);
  map "stc" (Simple_titlecase_mapping, i_cp_map);
  map "suc" (Simple_uppercase_mapping, i_cp_map);
  map "tc" (Titlecase_mapping, i_cps_map ~empty:false);
  map "uax_42_element" (UAX_42_element, i_uax_42_element);       (* artefact *)
  map "uc" (Uppercase_mapping, i_cps_map ~empty:false);
  map "kAccountingNumeric" (KAccountingNumeric, i_string);
  map "kAlternateHanYu" (KAlternateHanYu, i_string);
  map "kAlternateJEF" (KAlternateJEF, i_string);
  map "kAlternateKangXi" (KAlternateKangXi, i_string);
  map "kAlternateMorohashi" (KAlternateMorohashi, i_string);
  map "kBigFive" (KBigFive, i_string);
  map "kCCCII" (KCCCII, i_string);
  map "kCNS1986" (KCNS1986, i_string);
  map "kCNS1992" (KCNS1992, i_string);
  map "kCangjie" (KCangjie, i_string);
  map "kCantonese" (KCantonese, i_string);
  map "kCheungBauer" (KCheungBauer, i_string);
  map "kCheungBauerIndex" (KCheungBauerIndex, i_string);
  map "kCihaiT" (KCihaiT, i_string);
  map "kCompatibilityVariant" (KCompatibilityVariant, i_string);
  map "kCowles" (KCowles, i_string);
  map "kDaeJaweon" (KDaeJaweon, i_string);
  map "kDefinition" (KDefinition, i_string);
  map "kEACC" (KEACC, i_string);
  map "kFanqie" (KFanqie, i_string);
  map "kFenn" (KFenn, i_string);
  map "kFennIndex" (KFennIndex, i_string);
  map "kFourCornerCode" (KFourCornerCode, i_string);
  map "kFrequency" (KFrequency, i_string);
  map "kGB0" (KGB0, i_string);
  map "kGB1" (KGB1, i_string);
  map "kGB3" (KGB3, i_string);
  map "kGB5" (KGB5, i_string);
  map "kGB8" (KGB8, i_string);
  map "kGSR" (KGSR, i_string);
  map "kGradeLevel" (KGradeLevel, i_string);
  map "kHDZRadBreak" (KHDZRadBreak, i_string);
  map "kHKGlyph" (KHKGlyph, i_string);
  map "kHKSCS" (KHKSCS, i_string);
  map "kHanYu" (KHanYu, i_string);
  map "kHangul" (KHangul, i_string);
  map "kHanyuPinlu" (KHanyuPinlu, i_string);
  map "kHanyuPinyin" (KHanyuPinyin, i_string);
  map "kIBMJapan" (KIBMJapan, i_string);
  map "kIICore" (KIICore, i_string);
  map "kIRGDaeJaweon" (KIRGDaeJaweon, i_string);
  map "kIRGDaiKanwaZiten" (KIRGDaiKanwaZiten, i_string);
  map "kIRGHanyuDaZidian" (KIRGHanyuDaZidian, i_string);
  map "kIRGKangXi" (KIRGKangXi, i_string);
  map "kIRG_GSource" (KIRG_GSource, i_string);
  map "kIRG_HSource" (KIRG_HSource, i_string);
  map "kIRG_JSource" (KIRG_JSource, i_string);
  map "kIRG_KPSource" (KIRG_KPSource, i_string);
  map "kIRG_KSource" (KIRG_KSource, i_string);
  map "kIRG_MSource" (KIRG_MSource, i_string);
  map "kIRG_SSource" (KIRG_SSource, i_string);
  map "kIRG_TSource" (KIRG_TSource, i_string);
  map "kIRG_USource" (KIRG_USource, i_string);
  map "kIRG_UKSource" (KIRG_UKSource, i_string);
  map "kIRG_VSource" (KIRG_VSource, i_string);
  map "kJapanese" (KJapanese, i_string);
  map "kJHJ" (KJHJ, i_string);
  map "kJIS0213" (KJIS0213, i_string);
  map "kJapaneseKun" (KJapaneseKun, i_string);
  map "kJapaneseOn" (KJapaneseOn, i_string);
  map "kJinmeiyoKanji" (KJinmeiyoKanji, i_string);
  map "kJis0" (KJis0, i_string);
  map "kJis1" (KJis1, i_string);
  map "kJoyoKanji"  (KJoyoKanji, i_string);
  map "kKPS0" (KKPS0, i_string);
  map "kKPS1" (KKPS1, i_string);
  map "kKSC0" (KKSC0, i_string);
  map "kKSC1" (KKSC1, i_string);
  map "kKangXi" (KKangXi, i_string);
  map "kKarlgren" (KKarlgren, i_string);
  map "kKorean" (KKorean, i_string);
  map "kKoreanEducationHanja" (KKoreanEducationHanja, i_string);
  map "kKoreanName" (KKoreanName, i_string);
  map "kLau" (KLau, i_string);
  map "kMainlandTelegraph" (KMainlandTelegraph, i_string);
  map "kMandarin" (KMandarin, i_string);
  map "kMatthews" (KMatthews, i_string);
  map "kMeyerWempe" (KMeyerWempe, i_string);
  map "kMorohashi" (KMorohashi, i_string);
  map "kNelson" (KNelson, i_string);
  map "kNSHU_DubenSrc" (KNSHU_DubenSrc, i_string);
  map "kNSHU_Reading" (KNSHU_Reading, i_string);
  map "kOtherNumeric" (KOtherNumeric, i_string);
  map "kPhonetic" (KPhonetic, i_string);
  map "kPrimaryNumeric" (KPrimaryNumeric, i_string);
  map "kPseudoGB1" (KPseudoGB1, i_string);
  map "kRSAdobe_Japan1_6" (KRSAdobe_Japan1_6, i_string);
  map "kRSJapanese" (KRSJapanese, i_string);
  map "kRSKanWa" (KRSKanWa, i_string);
  map "kRSKangXi" (KRSKangXi, i_string);
  map "kRSKorean" (KRSKorean, i_string);
  map "kRSMerged" (KRSMerged, i_string);
  map "kRSUnicode" (KRSUnicode, i_string);
  map "kSBGY" (KSBGY, i_string);
  map "kSemanticVariant" (KSemanticVariant, i_string);
  map "kSimplifiedVariant" (KSimplifiedVariant, i_string);
  map "kSMSZD2003Index" (KSMSZD2003Index, i_string);
  map "kSMSZD2003Readings" (KSMSZD2003Readings, i_string);
  map "kSpecializedSemanticVariant" (KSpecializedSemanticVariant, i_string);
  map "kSpoofingVariant" (KSpoofingVariant, i_string);
  map "kTGH" (KTGH, i_string);
  map "kTGHZ2013" (KTGHZ2013, i_string);
  map "kTGT_MergedSrc" (KTGT_MergedSrc, i_string);
  map "kTGT_RSUnicode" (KTGT_RSUnicode, i_string);
  map "kTaiwanTelegraph" (KTaiwanTelegraph, i_string);
  map "kTang" (KTang, i_string);
  map "kTayNumeric" (KTayNumeric, i_string);
  map "kTotalStrokes" (KTotalStrokes, i_string);
  map "kTraditionalVariant" (KTraditionalVariant, i_string);
  map "kVietnamese" (KVietnamese, i_string);
  map "kVietnameseNumeric" (KVietnameseNumeric, i_string);
  map "kWubi" (KWubi, i_string);
  map "kXHC1983" (KXHC1983, i_string);
  map "kXerox" (KXerox, i_string);
  map "kZhuang" (KZhuang, i_string);
  map "kZhuangNumeric" (KZhuangNumeric, i_string);
  map "kZVariant" (KZVariant, i_string);
  map "kEH_Cat" (KEH_Cat, i_string);
  map "kEH_Core" (KEH_Core, i_string);
  map "kEH_Desc" (KEH_Desc, i_string);
  map "kEH_Func" (KEH_Func, i_string);
  map "kEH_FVal" (KEH_FVal, i_string);
  map "kEH_UniK" (KEH_UniK, i_string);
  map "kEH_JSesh" (KEH_JSesh, i_string);
  map "kEH_HG" (KEH_HG, i_string);
  map "kEH_IFAO" (KEH_IFAO, i_string);
  map "kEH_NoMirror" (KEH_NoMirror, i_bool);
  map "kEH_NoRotate" (KEH_NoRotate, i_bool);
  map "kEH_AltSeq" (KEH_AltSeq, i_string);
  fun m (n, v) ->
    try match n with
    | ("", p) ->
        let k, conv = Hashtbl.find h p in
        Pmap.add k (conv v) m
    | _ -> raise Not_found
    with Not_found -> Pmap.add (Other n) (i_string v) m

let attv n atts =               (* value of attribute [n] in atts or raises. *)
  try snd (List.find (fun (en, v) -> en = ("", n)) atts) with
  | Not_found -> err_miss_att n

let rec skip_el d =             (* skips an element, start signal was input. *)
  let rec loop d depth = match Xmlm.input d with
  | `El_start _ -> loop d (depth + 1)
  | `El_end -> if depth = 0 then () else loop d (depth - 1)
  | s -> loop d depth
  in
  loop d 0

(* Parses a sequence of empty elements named n and a El_end. *)
let p_seq n p_atts d =
  let rec aux n p_atts d acc = match Xmlm.input d with
  | `El_start (n', atts) when n' = n ->
      if Xmlm.input d <> `El_end then err err_exp_el_end else
      aux n p_atts d ((p_atts atts) :: acc);
  | `El_start _ -> skip_el d; aux n p_atts d acc
  | `El_end -> List.rev acc
  | `Data _ -> err err_data
  | _ -> assert false
  in
  aux n p_atts d []

let p_description d = match (Xmlm.input d) with
| `Data desc -> if (Xmlm.input d <> `El_end) then err err_exp_el_end else desc
| `El_end -> ""
| _ -> err err_exp_data

let p_name_aliases d =
  let rec loop d depth acc = match Xmlm.peek d with
  | `El_start (n, atts) when n = n_name_alias ->
      ignore (Xmlm.input d);
      let alias = ref "" in
      let atype = ref None in
      let p_alias_atts = function
      | ("", "alias"), v -> alias := v
      | ("", "type"), v -> atype := Some (i_name_alias_type v)
      | _ -> ()
      in
      List.iter p_alias_atts atts;
      begin match !atype with None -> err err_invalid_name_alias_spec
      | Some t -> loop d (depth + 1) ((!alias, t) :: acc)
      end
  | `El_start (n, atts) -> ignore (Xmlm.input d); skip_el d; loop d depth acc
  | `El_end ->
      if depth = 0 then List.rev acc else
      (ignore (Xmlm.input d); loop d (depth - 1) acc)
  | `Data _ -> err err_data
  | _ -> assert false
  in
  loop d 0 []

let p_cp d rep atts g_props =
  let cp = ref None in
  let cp_first = ref None in
  let cp_last = ref None in
  let add acc ((n, v) as a) = match n with
  | ("", "cp") -> cp := Some (cp_of_string v); acc
  | ("", "first-cp") -> cp_first := Some (cp_of_string v); acc
  | ("", "last-cp") -> cp_last := Some (cp_of_string v); acc
  | _ -> add_prop acc a
  in
  let props = List.fold_left add g_props atts in
  let props = Pmap.add Name_alias (Name_alias_v (p_name_aliases d)) props in
  match !cp with
  | Some cp -> Cpmap.add cp props rep
  | None -> match !cp_first, !cp_last with
    | Some f, Some l ->
        let rep = ref rep in
        for cp = f to l do rep := Cpmap.add cp props !rep done;
        !rep
    | _ -> err err_invalid_cp_spec

let p_repertoire d =
  let eatt t = ("","uax_42_element"), t in (* fake attribute for uniformity *)
  let rec loop d depth rep g_atts = match Xmlm.input d with
  | `El_start (n, atts) when n = n_reserved ->
      loop d (depth + 1) (p_cp d rep (eatt "reserved" :: atts) g_atts) g_atts
  | `El_start (n, atts) when n = n_noncharacter ->
      loop d (depth + 1) (p_cp d rep (eatt "noncharacter":: atts) g_atts) g_atts
  | `El_start (n, atts) when n = n_surrogate ->
      loop d (depth + 1) (p_cp d rep (eatt "surrogate" :: atts) g_atts) g_atts
  | `El_start (n, atts) when n = n_char ->
      loop d (depth + 1) (p_cp d rep (eatt "char" :: atts) g_atts) g_atts
  | `El_start (n, atts) when n = n_group ->
      let atts = List.fold_left add_prop Pmap.empty atts in
      let rep = loop d 0 rep atts in   (* ^ empty: no group hierarchy *)
      loop d depth rep Pmap.empty
  | `El_start (n, atts) -> skip_el d; loop d depth rep g_atts (* skip foreign *)
  | `El_end -> if depth = 0 then rep else loop d (depth - 1) rep g_atts
  | `Data _ -> err err_data
  | _ -> assert false
  in
  loop d 0 Cpmap.empty Pmap.empty

let p_blocks d =
  let b_atts atts =
    (cp_of_string (attv "first-cp" atts), cp_of_string (attv "last-cp" atts)),
    attv "name" atts
  in
  p_seq n_block b_atts d

let p_named_sequences d =
  let ns_atts atts = attv "name" atts, cps_of_string (attv "cps" atts) in
  p_seq n_named_sequence ns_atts d

let p_standardized_variants d =
  let when_of_string v =
    let w s = match s with
    | "isolate" -> `Isolate
    | "initial" -> `Initial
    | "medial" -> `Medial
    | "final" -> `Final
    | s -> err (err_att_val s)
    in
    List.map w (split_string v ' ')
  in
  let sv_atts atts =
    cps_of_string (attv "cps" atts),
    attv "desc" atts,
    when_of_string (attv "when" atts)
  in
  p_seq n_standardized_variant sv_atts d

let p_cjk_radicals d =
  let cjk_r_atts atts =
    attv "number" atts,
    cp_of_string (attv "radical" atts),
    cp_of_string (attv "ideograph" atts)
  in
  p_seq n_cjk_radical cjk_r_atts d

let p_do_not_emit d =
  let instead_atts atts =
    let instead_of = cps_of_string (attv "of" atts) in
    let use = cps_of_string (attv "use" atts) in
    let because = attv "because" atts in
    { instead_of; use; because }
  in
  p_seq n_instead instead_atts d

let p_ucd d =
  let description = ref None in
  let repertoire = ref None in
  let blocks = ref None in
  let named_sequences = ref None in
  let provisional_named_sequences = ref None in
  let standardized_variants = ref None in
  let cjk_radicals = ref None in
  let do_not_emit = ref None in
  let set n r p d = if !r <> None then err (err_dup n) else r := Some (p d) in
  while (Xmlm.peek d <> `El_end) do match Xmlm.input d with
  | `El_start (n, _) when n = n_description ->
      set n description p_description d
  | `El_start (n, _) when n = n_repertoire ->
      set n repertoire p_repertoire d
  | `El_start (n, _) when n = n_blocks ->
      set n blocks p_blocks d
  | `El_start (n, _) when n = n_named_sequences ->
      set n named_sequences p_named_sequences d
  | `El_start (n, _) when n = n_provisional_named_sequences ->
      set n provisional_named_sequences p_named_sequences d
  | `El_start (n, _) when n = n_standardized_variants ->
      set n standardized_variants p_standardized_variants d
  | `El_start (n, _) when n = n_cjk_radicals ->
      set n cjk_radicals p_cjk_radicals d
  | `El_start (n, _) when n = n_do_not_emit ->
      set n do_not_emit p_do_not_emit d
  | `El_start (n, _) -> skip_el d                        (* foreign markup *)
  | `Data _ -> err err_data
  | _ -> assert false
  done;
  ignore (Xmlm.input d);
  if not (Xmlm.eoi d) then err err_wf;
  let some v default = match v with Some v -> v | None -> default in
  { description = some !description "";
    repertoire = some !repertoire Cpmap.empty;
    blocks = some !blocks [];
    named_sequences = some !named_sequences [];
    provisional_named_sequences = some !provisional_named_sequences [];
    standardized_variants = some !standardized_variants [];
    cjk_radicals = some !cjk_radicals [];
    do_not_emit = some !do_not_emit []; }

type src = [ `Channel of in_channel | `String of string ]
type decoder = Xmlm.input

let decoder src =
  let src = match src with `String s -> `String (0, s) | `Channel _ as s -> s in
  Xmlm.make_input ~strip:true src

let decoded_range d = Xmlm.pos d, Xmlm.pos d
let decode d = try
  ignore (Xmlm.input d); (* `Dtd *)
  begin match Xmlm.input d with
  | `El_start (n, _) when n = n_ucd -> `Ok (p_ucd d)
  | `El_start (n, _) -> err (err_exp_ucd n)
  | _ -> assert false
  end;
with
| Failure e -> `Error e | Xmlm.Error (_, e) -> `Error (Xmlm.error_message e)
