vaseboot/include/VasEBoot/unicode.h

363 lines
13 KiB
C

/*
* VAS_EBOOT -- GRand Unified Bootloader
* Copyright (C) 2010 Free Software Foundation, Inc.
*
* VAS_EBOOT is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* VAS_EBOOT is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with VAS_EBOOT. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef VAS_EBOOT_BIDI_HEADER
#define VAS_EBOOT_BIDI_HEADER 1
#include <VasEBoot/types.h>
#include <VasEBoot/mm.h>
#include <VasEBoot/misc.h>
struct VasEBoot_unicode_bidi_pair
{
VasEBoot_uint32_t key;
VasEBoot_uint32_t replace;
};
struct VasEBoot_unicode_compact_range
{
unsigned start:21;
unsigned len:9;
unsigned bidi_type:5;
unsigned comb_type:8;
unsigned bidi_mirror:1;
unsigned join_type:3;
} VAS_EBOOT_PACKED;
/* Old-style Arabic shaping. Used for "visual UTF-8" and
in VasEBoot-mkfont to find variant glyphs in absence of GPOS tables. */
struct VasEBoot_unicode_arabic_shape
{
VasEBoot_uint32_t code;
VasEBoot_uint32_t isolated;
VasEBoot_uint32_t right_linked;
VasEBoot_uint32_t both_linked;
VasEBoot_uint32_t left_linked;
};
extern struct VasEBoot_unicode_arabic_shape VasEBoot_unicode_arabic_shapes[];
enum VasEBoot_bidi_type
{
VAS_EBOOT_BIDI_TYPE_L = 0,
VAS_EBOOT_BIDI_TYPE_LRE,
VAS_EBOOT_BIDI_TYPE_LRO,
VAS_EBOOT_BIDI_TYPE_R,
VAS_EBOOT_BIDI_TYPE_AL,
VAS_EBOOT_BIDI_TYPE_RLE,
VAS_EBOOT_BIDI_TYPE_RLO,
VAS_EBOOT_BIDI_TYPE_PDF,
VAS_EBOOT_BIDI_TYPE_EN,
VAS_EBOOT_BIDI_TYPE_ES,
VAS_EBOOT_BIDI_TYPE_ET,
VAS_EBOOT_BIDI_TYPE_AN,
VAS_EBOOT_BIDI_TYPE_CS,
VAS_EBOOT_BIDI_TYPE_NSM,
VAS_EBOOT_BIDI_TYPE_BN,
VAS_EBOOT_BIDI_TYPE_B,
VAS_EBOOT_BIDI_TYPE_S,
VAS_EBOOT_BIDI_TYPE_WS,
VAS_EBOOT_BIDI_TYPE_ON
};
enum VasEBoot_join_type
{
VAS_EBOOT_JOIN_TYPE_NONJOINING = 0,
VAS_EBOOT_JOIN_TYPE_LEFT = 1,
VAS_EBOOT_JOIN_TYPE_RIGHT = 2,
VAS_EBOOT_JOIN_TYPE_DUAL = 3,
VAS_EBOOT_JOIN_TYPE_CAUSING = 4,
VAS_EBOOT_JOIN_TYPE_TRANSPARENT = 5
};
enum VasEBoot_comb_type
{
VAS_EBOOT_UNICODE_COMB_NONE = 0,
VAS_EBOOT_UNICODE_COMB_OVERLAY = 1,
VAS_EBOOT_UNICODE_COMB_HEBREW_SHEVA = 10,
VAS_EBOOT_UNICODE_COMB_HEBREW_HATAF_SEGOL = 11,
VAS_EBOOT_UNICODE_COMB_HEBREW_HATAF_PATAH = 12,
VAS_EBOOT_UNICODE_COMB_HEBREW_HATAF_QAMATS = 13,
VAS_EBOOT_UNICODE_COMB_HEBREW_HIRIQ = 14,
VAS_EBOOT_UNICODE_COMB_HEBREW_TSERE = 15,
VAS_EBOOT_UNICODE_COMB_HEBREW_SEGOL = 16,
VAS_EBOOT_UNICODE_COMB_HEBREW_PATAH = 17,
VAS_EBOOT_UNICODE_COMB_HEBREW_QAMATS = 18,
VAS_EBOOT_UNICODE_COMB_HEBREW_HOLAM = 19,
VAS_EBOOT_UNICODE_COMB_HEBREW_QUBUTS = 20,
VAS_EBOOT_UNICODE_COMB_HEBREW_DAGESH = 21,
VAS_EBOOT_UNICODE_COMB_HEBREW_METEG = 22,
VAS_EBOOT_UNICODE_COMB_HEBREW_RAFE = 23,
VAS_EBOOT_UNICODE_COMB_HEBREW_SHIN_DOT = 24,
VAS_EBOOT_UNICODE_COMB_HEBREW_SIN_DOT = 25,
VAS_EBOOT_UNICODE_COMB_HEBREW_VARIKA = 26,
VAS_EBOOT_UNICODE_COMB_ARABIC_FATHATAN = 27,
VAS_EBOOT_UNICODE_COMB_ARABIC_DAMMATAN = 28,
VAS_EBOOT_UNICODE_COMB_ARABIC_KASRATAN = 29,
VAS_EBOOT_UNICODE_COMB_ARABIC_FATHAH = 30,
VAS_EBOOT_UNICODE_COMB_ARABIC_DAMMAH = 31,
VAS_EBOOT_UNICODE_COMB_ARABIC_KASRA = 32,
VAS_EBOOT_UNICODE_COMB_ARABIC_SHADDA = 33,
VAS_EBOOT_UNICODE_COMB_ARABIC_SUKUN = 34,
VAS_EBOOT_UNICODE_COMB_ARABIC_SUPERSCRIPT_ALIF = 35,
VAS_EBOOT_UNICODE_COMB_SYRIAC_SUPERSCRIPT_ALAPH = 36,
VAS_EBOOT_UNICODE_STACK_ATTACHED_BELOW = 202,
VAS_EBOOT_UNICODE_STACK_ATTACHED_ABOVE = 214,
VAS_EBOOT_UNICODE_COMB_ATTACHED_ABOVE_RIGHT = 216,
VAS_EBOOT_UNICODE_STACK_BELOW = 220,
VAS_EBOOT_UNICODE_COMB_BELOW_RIGHT = 222,
VAS_EBOOT_UNICODE_COMB_ABOVE_LEFT = 228,
VAS_EBOOT_UNICODE_STACK_ABOVE = 230,
VAS_EBOOT_UNICODE_COMB_ABOVE_RIGHT = 232,
VAS_EBOOT_UNICODE_COMB_YPOGEGRAMMENI = 240,
/* If combining nature is indicated only by class and
not "combining type". */
VAS_EBOOT_UNICODE_COMB_ME = 253,
VAS_EBOOT_UNICODE_COMB_MC = 254,
VAS_EBOOT_UNICODE_COMB_MN = 255,
};
struct VasEBoot_unicode_combining
{
VasEBoot_uint32_t code:21;
enum VasEBoot_comb_type type:8;
};
/* This structure describes a glyph as opposed to character. */
struct VasEBoot_unicode_glyph
{
VasEBoot_uint32_t base:23; /* minimum: 21 */
VasEBoot_uint16_t variant:9; /* minimum: 9 */
VasEBoot_uint8_t attributes:5; /* minimum: 5 */
VasEBoot_uint8_t bidi_level:6; /* minimum: 6 */
enum VasEBoot_bidi_type bidi_type:5; /* minimum: :5 */
#define VAS_EBOOT_UNICODE_NCOMB_MAX ((1 << 8) - 1)
unsigned ncomb:8;
/* Hint by unicode subsystem how wide this character usually is.
Real width is determined by font. Set only in UTF-8 stream. */
int estimated_width:8;
VasEBoot_size_t orig_pos;
union
{
struct VasEBoot_unicode_combining combining_inline[sizeof (void *)
/ sizeof (struct VasEBoot_unicode_combining)];
struct VasEBoot_unicode_combining *combining_ptr;
};
};
#define VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_MIRROR 0x1
#define VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT 1
#define VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED 0x2
#define VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED \
(VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED \
<< VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT)
/* Set iff the corresponding joining flags come from ZWJ or ZWNJ. */
#define VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT 0x8
#define VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED_EXPLICIT \
(VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT \
<< VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT)
#define VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTES_JOIN \
(VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED \
| VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED \
| VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT \
| VAS_EBOOT_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED_EXPLICIT)
enum
{
VAS_EBOOT_UNICODE_DOTLESS_LOWERCASE_I = 0x0131,
VAS_EBOOT_UNICODE_DOTLESS_LOWERCASE_J = 0x0237,
VAS_EBOOT_UNICODE_COMBINING_GRAPHEME_JOINER = 0x034f,
VAS_EBOOT_UNICODE_HEBREW_WAW = 0x05d5,
VAS_EBOOT_UNICODE_ARABIC_START = 0x0600,
VAS_EBOOT_UNICODE_ARABIC_END = 0x0700,
VAS_EBOOT_UNICODE_THAANA_ABAFILI = 0x07a6,
VAS_EBOOT_UNICODE_THAANA_AABAAFILI = 0x07a7,
VAS_EBOOT_UNICODE_THAANA_IBIFILI = 0x07a8,
VAS_EBOOT_UNICODE_THAANA_EEBEEFILI = 0x07a9,
VAS_EBOOT_UNICODE_THAANA_UBUFILI = 0x07aa,
VAS_EBOOT_UNICODE_THAANA_OOBOOFILI = 0x07ab,
VAS_EBOOT_UNICODE_THAANA_EBEFILI = 0x07ac,
VAS_EBOOT_UNICODE_THAANA_EYBEYFILI = 0x07ad,
VAS_EBOOT_UNICODE_THAANA_OBOFILI = 0x07ae,
VAS_EBOOT_UNICODE_THAANA_OABOAFILI = 0x07af,
VAS_EBOOT_UNICODE_THAANA_SUKUN = 0x07b0,
VAS_EBOOT_UNICODE_ZWNJ = 0x200c,
VAS_EBOOT_UNICODE_ZWJ = 0x200d,
VAS_EBOOT_UNICODE_LRM = 0x200e,
VAS_EBOOT_UNICODE_RLM = 0x200f,
VAS_EBOOT_UNICODE_LRE = 0x202a,
VAS_EBOOT_UNICODE_RLE = 0x202b,
VAS_EBOOT_UNICODE_PDF = 0x202c,
VAS_EBOOT_UNICODE_LRO = 0x202d,
VAS_EBOOT_UNICODE_RLO = 0x202e,
VAS_EBOOT_UNICODE_LEFTARROW = 0x2190,
VAS_EBOOT_UNICODE_UPARROW = 0x2191,
VAS_EBOOT_UNICODE_RIGHTARROW = 0x2192,
VAS_EBOOT_UNICODE_DOWNARROW = 0x2193,
VAS_EBOOT_UNICODE_UPDOWNARROW = 0x2195,
VAS_EBOOT_UNICODE_LIGHT_HLINE = 0x2500,
VAS_EBOOT_UNICODE_HLINE = 0x2501,
VAS_EBOOT_UNICODE_LIGHT_VLINE = 0x2502,
VAS_EBOOT_UNICODE_VLINE = 0x2503,
VAS_EBOOT_UNICODE_LIGHT_CORNER_UL = 0x250c,
VAS_EBOOT_UNICODE_CORNER_UL = 0x250f,
VAS_EBOOT_UNICODE_LIGHT_CORNER_UR = 0x2510,
VAS_EBOOT_UNICODE_CORNER_UR = 0x2513,
VAS_EBOOT_UNICODE_LIGHT_CORNER_LL = 0x2514,
VAS_EBOOT_UNICODE_CORNER_LL = 0x2517,
VAS_EBOOT_UNICODE_LIGHT_CORNER_LR = 0x2518,
VAS_EBOOT_UNICODE_CORNER_LR = 0x251b,
VAS_EBOOT_UNICODE_BLACK_UP_TRIANGLE = 0x25b2,
VAS_EBOOT_UNICODE_BLACK_RIGHT_TRIANGLE = 0x25ba,
VAS_EBOOT_UNICODE_BLACK_DOWN_TRIANGLE = 0x25bc,
VAS_EBOOT_UNICODE_BLACK_LEFT_TRIANGLE = 0x25c4,
VAS_EBOOT_UNICODE_VARIATION_SELECTOR_1 = 0xfe00,
VAS_EBOOT_UNICODE_VARIATION_SELECTOR_16 = 0xfe0f,
VAS_EBOOT_UNICODE_TAG_START = 0xe0000,
VAS_EBOOT_UNICODE_TAG_END = 0xe007f,
VAS_EBOOT_UNICODE_VARIATION_SELECTOR_17 = 0xe0100,
VAS_EBOOT_UNICODE_VARIATION_SELECTOR_256 = 0xe01ef,
VAS_EBOOT_UNICODE_LAST_VALID = 0x10ffff
};
extern struct VasEBoot_unicode_compact_range VasEBoot_unicode_compact[];
extern struct VasEBoot_unicode_bidi_pair VasEBoot_unicode_bidi_pairs[];
#define VAS_EBOOT_UNICODE_MAX_CACHED_CHAR 0x20000
/* Unicode mandates an arbitrary limit. */
#define VAS_EBOOT_BIDI_MAX_EXPLICIT_LEVEL 61
struct VasEBoot_term_pos
{
unsigned valid:1;
unsigned x:15, y:16;
};
VasEBoot_ssize_t
VasEBoot_bidi_logical_to_visual (const VasEBoot_uint32_t *logical,
VasEBoot_size_t logical_len,
struct VasEBoot_unicode_glyph **visual_out,
VasEBoot_size_t (*getcharwidth) (const struct VasEBoot_unicode_glyph *visual, void *getcharwidth_arg),
void *getcharwidth_arg,
VasEBoot_size_t max_width,
VasEBoot_size_t start_width, VasEBoot_uint32_t codechar,
struct VasEBoot_term_pos *pos,
int primitive_wrap);
enum VasEBoot_comb_type
VasEBoot_unicode_get_comb_type (VasEBoot_uint32_t c);
VasEBoot_size_t
VasEBoot_unicode_aglomerate_comb (const VasEBoot_uint32_t *in, VasEBoot_size_t inlen,
struct VasEBoot_unicode_glyph *out);
static inline const struct VasEBoot_unicode_combining *
VasEBoot_unicode_get_comb (const struct VasEBoot_unicode_glyph *in)
{
if (in->ncomb == 0)
return NULL;
if (in->ncomb > ARRAY_SIZE (in->combining_inline))
return in->combining_ptr;
return in->combining_inline;
}
static inline void
VasEBoot_unicode_destroy_glyph (struct VasEBoot_unicode_glyph *glyph)
{
if (glyph->ncomb > ARRAY_SIZE (glyph->combining_inline))
VasEBoot_free (glyph->combining_ptr);
glyph->ncomb = 0;
}
static inline struct VasEBoot_unicode_glyph *
VasEBoot_unicode_glyph_dup (const struct VasEBoot_unicode_glyph *in)
{
struct VasEBoot_unicode_glyph *out = VasEBoot_malloc (sizeof (*out));
if (!out)
return NULL;
VasEBoot_memcpy (out, in, sizeof (*in));
if (in->ncomb > ARRAY_SIZE (out->combining_inline))
{
out->combining_ptr = VasEBoot_calloc (in->ncomb, sizeof (out->combining_ptr[0]));
if (!out->combining_ptr)
{
VasEBoot_free (out);
return NULL;
}
VasEBoot_memcpy (out->combining_ptr, in->combining_ptr,
in->ncomb * sizeof (out->combining_ptr[0]));
}
else
VasEBoot_memcpy (&out->combining_inline, &in->combining_inline,
sizeof (out->combining_inline));
return out;
}
static inline void
VasEBoot_unicode_set_glyph (struct VasEBoot_unicode_glyph *out,
const struct VasEBoot_unicode_glyph *in)
{
VasEBoot_memcpy (out, in, sizeof (*in));
if (in->ncomb > ARRAY_SIZE (out->combining_inline))
{
out->combining_ptr = VasEBoot_calloc (in->ncomb, sizeof (out->combining_ptr[0]));
if (!out->combining_ptr)
return;
VasEBoot_memcpy (out->combining_ptr, in->combining_ptr,
in->ncomb * sizeof (out->combining_ptr[0]));
}
else
VasEBoot_memcpy (&out->combining_inline, &in->combining_inline,
sizeof (out->combining_inline));
}
static inline struct VasEBoot_unicode_glyph *
VasEBoot_unicode_glyph_from_code (VasEBoot_uint32_t code)
{
struct VasEBoot_unicode_glyph *ret;
ret = VasEBoot_zalloc (sizeof (*ret));
if (!ret)
return NULL;
ret->base = code;
return ret;
}
static inline void
VasEBoot_unicode_set_glyph_from_code (struct VasEBoot_unicode_glyph *glyph,
VasEBoot_uint32_t code)
{
VasEBoot_memset (glyph, 0, sizeof (*glyph));
glyph->base = code;
}
VasEBoot_uint32_t
VasEBoot_unicode_mirror_code (VasEBoot_uint32_t in);
VasEBoot_uint32_t
VasEBoot_unicode_shape_code (VasEBoot_uint32_t in, VasEBoot_uint8_t attr);
const VasEBoot_uint32_t *
VasEBoot_unicode_get_comb_end (const VasEBoot_uint32_t *end,
const VasEBoot_uint32_t *cur);
#endif