vaseboot/include/VasEBoot/unicode.h

361 lines
13 KiB
C

/*
* VasEBoot -- GRand Unified Bootloader
* Copyright (C) 2010 Free Software Foundation, Inc.
*
* VasEBoot is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* VasEBoot is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with VasEBoot. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef VasEBoot_BIDI_HEADER
#define VasEBoot_BIDI_HEADER 1
#include <VasEBoot/types.h>
#include <VasEBoot/mm.h>
#include <VasEBoot/misc.h>
struct VasEBoot_unicode_bidi_pair
{
VasEBoot_uint32_t key;
VasEBoot_uint32_t replace;
};
struct VasEBoot_unicode_compact_range
{
unsigned start:21;
unsigned len:9;
unsigned bidi_type:5;
unsigned comb_type:8;
unsigned bidi_mirror:1;
unsigned join_type:3;
} VasEBoot_PACKED;
/* Old-style Arabic shaping. Used for "visual UTF-8" and
in VasEBoot-mkfont to find variant glyphs in absence of GPOS tables. */
struct VasEBoot_unicode_arabic_shape
{
VasEBoot_uint32_t code;
VasEBoot_uint32_t isolated;
VasEBoot_uint32_t right_linked;
VasEBoot_uint32_t both_linked;
VasEBoot_uint32_t left_linked;
};
extern struct VasEBoot_unicode_arabic_shape VasEBoot_unicode_arabic_shapes[];
enum VasEBoot_bidi_type
{
VasEBoot_BIDI_TYPE_L = 0,
VasEBoot_BIDI_TYPE_LRE,
VasEBoot_BIDI_TYPE_LRO,
VasEBoot_BIDI_TYPE_R,
VasEBoot_BIDI_TYPE_AL,
VasEBoot_BIDI_TYPE_RLE,
VasEBoot_BIDI_TYPE_RLO,
VasEBoot_BIDI_TYPE_PDF,
VasEBoot_BIDI_TYPE_EN,
VasEBoot_BIDI_TYPE_ES,
VasEBoot_BIDI_TYPE_ET,
VasEBoot_BIDI_TYPE_AN,
VasEBoot_BIDI_TYPE_CS,
VasEBoot_BIDI_TYPE_NSM,
VasEBoot_BIDI_TYPE_BN,
VasEBoot_BIDI_TYPE_B,
VasEBoot_BIDI_TYPE_S,
VasEBoot_BIDI_TYPE_WS,
VasEBoot_BIDI_TYPE_ON
};
enum VasEBoot_join_type
{
VasEBoot_JOIN_TYPE_NONJOINING = 0,
VasEBoot_JOIN_TYPE_LEFT = 1,
VasEBoot_JOIN_TYPE_RIGHT = 2,
VasEBoot_JOIN_TYPE_DUAL = 3,
VasEBoot_JOIN_TYPE_CAUSING = 4,
VasEBoot_JOIN_TYPE_TRANSPARENT = 5
};
enum VasEBoot_comb_type
{
VasEBoot_UNICODE_COMB_NONE = 0,
VasEBoot_UNICODE_COMB_OVERLAY = 1,
VasEBoot_UNICODE_COMB_HEBREW_SHEVA = 10,
VasEBoot_UNICODE_COMB_HEBREW_HATAF_SEGOL = 11,
VasEBoot_UNICODE_COMB_HEBREW_HATAF_PATAH = 12,
VasEBoot_UNICODE_COMB_HEBREW_HATAF_QAMATS = 13,
VasEBoot_UNICODE_COMB_HEBREW_HIRIQ = 14,
VasEBoot_UNICODE_COMB_HEBREW_TSERE = 15,
VasEBoot_UNICODE_COMB_HEBREW_SEGOL = 16,
VasEBoot_UNICODE_COMB_HEBREW_PATAH = 17,
VasEBoot_UNICODE_COMB_HEBREW_QAMATS = 18,
VasEBoot_UNICODE_COMB_HEBREW_HOLAM = 19,
VasEBoot_UNICODE_COMB_HEBREW_QUBUTS = 20,
VasEBoot_UNICODE_COMB_HEBREW_DAGESH = 21,
VasEBoot_UNICODE_COMB_HEBREW_METEG = 22,
VasEBoot_UNICODE_COMB_HEBREW_RAFE = 23,
VasEBoot_UNICODE_COMB_HEBREW_SHIN_DOT = 24,
VasEBoot_UNICODE_COMB_HEBREW_SIN_DOT = 25,
VasEBoot_UNICODE_COMB_HEBREW_VARIKA = 26,
VasEBoot_UNICODE_COMB_ARABIC_FATHATAN = 27,
VasEBoot_UNICODE_COMB_ARABIC_DAMMATAN = 28,
VasEBoot_UNICODE_COMB_ARABIC_KASRATAN = 29,
VasEBoot_UNICODE_COMB_ARABIC_FATHAH = 30,
VasEBoot_UNICODE_COMB_ARABIC_DAMMAH = 31,
VasEBoot_UNICODE_COMB_ARABIC_KASRA = 32,
VasEBoot_UNICODE_COMB_ARABIC_SHADDA = 33,
VasEBoot_UNICODE_COMB_ARABIC_SUKUN = 34,
VasEBoot_UNICODE_COMB_ARABIC_SUPERSCRIPT_ALIF = 35,
VasEBoot_UNICODE_COMB_SYRIAC_SUPERSCRIPT_ALAPH = 36,
VasEBoot_UNICODE_STACK_ATTACHED_BELOW = 202,
VasEBoot_UNICODE_STACK_ATTACHED_ABOVE = 214,
VasEBoot_UNICODE_COMB_ATTACHED_ABOVE_RIGHT = 216,
VasEBoot_UNICODE_STACK_BELOW = 220,
VasEBoot_UNICODE_COMB_BELOW_RIGHT = 222,
VasEBoot_UNICODE_COMB_ABOVE_LEFT = 228,
VasEBoot_UNICODE_STACK_ABOVE = 230,
VasEBoot_UNICODE_COMB_ABOVE_RIGHT = 232,
VasEBoot_UNICODE_COMB_YPOGEGRAMMENI = 240,
/* If combining nature is indicated only by class and
not "combining type". */
VasEBoot_UNICODE_COMB_ME = 253,
VasEBoot_UNICODE_COMB_MC = 254,
VasEBoot_UNICODE_COMB_MN = 255,
};
struct VasEBoot_unicode_combining
{
VasEBoot_uint32_t code:21;
enum VasEBoot_comb_type type:8;
};
/* This structure describes a glyph as opposed to character. */
struct VasEBoot_unicode_glyph
{
VasEBoot_uint32_t base:23; /* minimum: 21 */
VasEBoot_uint16_t variant:9; /* minimum: 9 */
VasEBoot_uint8_t attributes:5; /* minimum: 5 */
VasEBoot_uint8_t bidi_level:6; /* minimum: 6 */
enum VasEBoot_bidi_type bidi_type:5; /* minimum: :5 */
unsigned ncomb:8;
/* Hint by unicode subsystem how wide this character usually is.
Real width is determined by font. Set only in UTF-8 stream. */
int estimated_width:8;
VasEBoot_size_t orig_pos;
union
{
struct VasEBoot_unicode_combining combining_inline[sizeof (void *)
/ sizeof (struct VasEBoot_unicode_combining)];
struct VasEBoot_unicode_combining *combining_ptr;
};
};
#define VasEBoot_UNICODE_GLYPH_ATTRIBUTE_MIRROR 0x1
#define VasEBoot_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT 1
#define VasEBoot_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED 0x2
#define VasEBoot_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED \
(VasEBoot_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED \
<< VasEBoot_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT)
/* Set iff the corresponding joining flags come from ZWJ or ZWNJ. */
#define VasEBoot_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT 0x8
#define VasEBoot_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED_EXPLICIT \
(VasEBoot_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT \
<< VasEBoot_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT)
#define VasEBoot_UNICODE_GLYPH_ATTRIBUTES_JOIN \
(VasEBoot_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED \
| VasEBoot_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED \
| VasEBoot_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT \
| VasEBoot_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED_EXPLICIT)
enum
{
VasEBoot_UNICODE_DOTLESS_LOWERCASE_I = 0x0131,
VasEBoot_UNICODE_DOTLESS_LOWERCASE_J = 0x0237,
VasEBoot_UNICODE_COMBINING_GRAPHEME_JOINER = 0x034f,
VasEBoot_UNICODE_HEBREW_WAW = 0x05d5,
VasEBoot_UNICODE_ARABIC_START = 0x0600,
VasEBoot_UNICODE_ARABIC_END = 0x0700,
VasEBoot_UNICODE_THAANA_ABAFILI = 0x07a6,
VasEBoot_UNICODE_THAANA_AABAAFILI = 0x07a7,
VasEBoot_UNICODE_THAANA_IBIFILI = 0x07a8,
VasEBoot_UNICODE_THAANA_EEBEEFILI = 0x07a9,
VasEBoot_UNICODE_THAANA_UBUFILI = 0x07aa,
VasEBoot_UNICODE_THAANA_OOBOOFILI = 0x07ab,
VasEBoot_UNICODE_THAANA_EBEFILI = 0x07ac,
VasEBoot_UNICODE_THAANA_EYBEYFILI = 0x07ad,
VasEBoot_UNICODE_THAANA_OBOFILI = 0x07ae,
VasEBoot_UNICODE_THAANA_OABOAFILI = 0x07af,
VasEBoot_UNICODE_THAANA_SUKUN = 0x07b0,
VasEBoot_UNICODE_ZWNJ = 0x200c,
VasEBoot_UNICODE_ZWJ = 0x200d,
VasEBoot_UNICODE_LRM = 0x200e,
VasEBoot_UNICODE_RLM = 0x200f,
VasEBoot_UNICODE_LRE = 0x202a,
VasEBoot_UNICODE_RLE = 0x202b,
VasEBoot_UNICODE_PDF = 0x202c,
VasEBoot_UNICODE_LRO = 0x202d,
VasEBoot_UNICODE_RLO = 0x202e,
VasEBoot_UNICODE_LEFTARROW = 0x2190,
VasEBoot_UNICODE_UPARROW = 0x2191,
VasEBoot_UNICODE_RIGHTARROW = 0x2192,
VasEBoot_UNICODE_DOWNARROW = 0x2193,
VasEBoot_UNICODE_UPDOWNARROW = 0x2195,
VasEBoot_UNICODE_LIGHT_HLINE = 0x2500,
VasEBoot_UNICODE_HLINE = 0x2501,
VasEBoot_UNICODE_LIGHT_VLINE = 0x2502,
VasEBoot_UNICODE_VLINE = 0x2503,
VasEBoot_UNICODE_LIGHT_CORNER_UL = 0x250c,
VasEBoot_UNICODE_CORNER_UL = 0x250f,
VasEBoot_UNICODE_LIGHT_CORNER_UR = 0x2510,
VasEBoot_UNICODE_CORNER_UR = 0x2513,
VasEBoot_UNICODE_LIGHT_CORNER_LL = 0x2514,
VasEBoot_UNICODE_CORNER_LL = 0x2517,
VasEBoot_UNICODE_LIGHT_CORNER_LR = 0x2518,
VasEBoot_UNICODE_CORNER_LR = 0x251b,
VasEBoot_UNICODE_BLACK_UP_TRIANGLE = 0x25b2,
VasEBoot_UNICODE_BLACK_RIGHT_TRIANGLE = 0x25ba,
VasEBoot_UNICODE_BLACK_DOWN_TRIANGLE = 0x25bc,
VasEBoot_UNICODE_BLACK_LEFT_TRIANGLE = 0x25c4,
VasEBoot_UNICODE_VARIATION_SELECTOR_1 = 0xfe00,
VasEBoot_UNICODE_VARIATION_SELECTOR_16 = 0xfe0f,
VasEBoot_UNICODE_TAG_START = 0xe0000,
VasEBoot_UNICODE_TAG_END = 0xe007f,
VasEBoot_UNICODE_VARIATION_SELECTOR_17 = 0xe0100,
VasEBoot_UNICODE_VARIATION_SELECTOR_256 = 0xe01ef,
VasEBoot_UNICODE_LAST_VALID = 0x10ffff
};
extern struct VasEBoot_unicode_compact_range VasEBoot_unicode_compact[];
extern struct VasEBoot_unicode_bidi_pair VasEBoot_unicode_bidi_pairs[];
#define VasEBoot_UNICODE_MAX_CACHED_CHAR 0x20000
/* Unicode mandates an arbitrary limit. */
#define VasEBoot_BIDI_MAX_EXPLICIT_LEVEL 61
struct VasEBoot_term_pos
{
unsigned valid:1;
unsigned x:15, y:16;
};
VasEBoot_ssize_t
VasEBoot_bidi_logical_to_visual (const VasEBoot_uint32_t *logical,
VasEBoot_size_t logical_len,
struct VasEBoot_unicode_glyph **visual_out,
VasEBoot_size_t (*getcharwidth) (const struct VasEBoot_unicode_glyph *visual, void *getcharwidth_arg),
void *getcharwidth_arg,
VasEBoot_size_t max_width,
VasEBoot_size_t start_width, VasEBoot_uint32_t codechar,
struct VasEBoot_term_pos *pos,
int primitive_wrap);
enum VasEBoot_comb_type
VasEBoot_unicode_get_comb_type (VasEBoot_uint32_t c);
VasEBoot_size_t
VasEBoot_unicode_aglomerate_comb (const VasEBoot_uint32_t *in, VasEBoot_size_t inlen,
struct VasEBoot_unicode_glyph *out);
static inline const struct VasEBoot_unicode_combining *
VasEBoot_unicode_get_comb (const struct VasEBoot_unicode_glyph *in)
{
if (in->ncomb == 0)
return NULL;
if (in->ncomb > ARRAY_SIZE (in->combining_inline))
return in->combining_ptr;
return in->combining_inline;
}
static inline void
VasEBoot_unicode_destroy_glyph (struct VasEBoot_unicode_glyph *glyph)
{
if (glyph->ncomb > ARRAY_SIZE (glyph->combining_inline))
VasEBoot_free (glyph->combining_ptr);
glyph->ncomb = 0;
}
static inline struct VasEBoot_unicode_glyph *
VasEBoot_unicode_glyph_dup (const struct VasEBoot_unicode_glyph *in)
{
struct VasEBoot_unicode_glyph *out = VasEBoot_malloc (sizeof (*out));
if (!out)
return NULL;
VasEBoot_memcpy (out, in, sizeof (*in));
if (in->ncomb > ARRAY_SIZE (out->combining_inline))
{
out->combining_ptr = VasEBoot_malloc (in->ncomb * sizeof (out->combining_ptr[0]));
if (!out->combining_ptr)
{
VasEBoot_free (out);
return NULL;
}
VasEBoot_memcpy (out->combining_ptr, in->combining_ptr,
in->ncomb * sizeof (out->combining_ptr[0]));
}
else
VasEBoot_memcpy (&out->combining_inline, &in->combining_inline,
sizeof (out->combining_inline));
return out;
}
static inline void
VasEBoot_unicode_set_glyph (struct VasEBoot_unicode_glyph *out,
const struct VasEBoot_unicode_glyph *in)
{
VasEBoot_memcpy (out, in, sizeof (*in));
if (in->ncomb > ARRAY_SIZE (out->combining_inline))
{
out->combining_ptr = VasEBoot_malloc (in->ncomb * sizeof (out->combining_ptr[0]));
if (!out->combining_ptr)
return;
VasEBoot_memcpy (out->combining_ptr, in->combining_ptr,
in->ncomb * sizeof (out->combining_ptr[0]));
}
else
VasEBoot_memcpy (&out->combining_inline, &in->combining_inline,
sizeof (out->combining_inline));
}
static inline struct VasEBoot_unicode_glyph *
VasEBoot_unicode_glyph_from_code (VasEBoot_uint32_t code)
{
struct VasEBoot_unicode_glyph *ret;
ret = VasEBoot_zalloc (sizeof (*ret));
if (!ret)
return NULL;
ret->base = code;
return ret;
}
static inline void
VasEBoot_unicode_set_glyph_from_code (struct VasEBoot_unicode_glyph *glyph,
VasEBoot_uint32_t code)
{
VasEBoot_memset (glyph, 0, sizeof (*glyph));
glyph->base = code;
}
VasEBoot_uint32_t
VasEBoot_unicode_mirror_code (VasEBoot_uint32_t in);
VasEBoot_uint32_t
VasEBoot_unicode_shape_code (VasEBoot_uint32_t in, VasEBoot_uint8_t attr);
const VasEBoot_uint32_t *
VasEBoot_unicode_get_comb_end (const VasEBoot_uint32_t *end,
const VasEBoot_uint32_t *cur);
#endif