/*************************************************
*    The PMS Music Typesetter - 2nd incarnation  *
*************************************************/

/* Copyright (c) Philip Hazel, 1991 - 2020 */

/* Written by Philip Hazel, starting November 1991 */
/* This file last modified: July 2020 */


/* This file contains code for reading and processing strings. */


#include "pmwhdr.h"
#include "readhdr.h"


#define string_start_size 64


static BOOL in_string_check = FALSE;

static uschar *music_escapes = US"bsmcQq#$%><udlr";
static uschar music_escape_values[] = {
  49, 50, 51, 53, 55, 57, 37, 39, 40, 122, 121, 126, 124, 123, 125 };

typedef struct esctabstr
  {
  int escape;
  int unicode;
  }
esctabstr;

/* Table of 2-character escape sequences. We use @ here instead of circumflex,
because there's been a translation at input time. This table must be in order
because it is searched by binary chop.

Some available accented characters are omitted until I think of a suitable
escape for them. They are characters with these accents:

  dotaccent     - dot has been used for dieresis since the start of PMW
  commaaccent   - comma has been used for cedilla ditto
  hungrumlaut   - doublequote isn't available in PMW strings
  ogonek
*/

static esctabstr esctab[] = {
  { ('A' << 8) + '\'',   0x00c1 },  /* Aacute */
  { ('A' << 8) + '-',    0x0100 },  /* Amacron */
  { ('A' << 8) + '.',    0x00c4 },  /* Adieresis */
  { ('A' << 8) + '@',    0x00c2 },  /* Acircumflex */
  { ('A' << 8) + '`',    0x00c0 },  /* Agrave */
  { ('A' << 8) + 'o',    0x00c5 },  /* Aring */
  { ('A' << 8) + 'u',    0x0102 },  /* Abreve */
  { ('A' << 8) + '~',    0x00c3 },  /* Atilde */

  { ('C' << 8) + '\'',   0x0106 },  /* Cacute */
  { ('C' << 8) + ')',    0x00a9 },  /* Copyright */
  { ('C' << 8) + ',',    0x00c7 },  /* Ccedilla */
  { ('C' << 8) + '@',    0x0108 },  /* Ccircumflex */
  { ('C' << 8) + 'v',    0x010c },  /* Ccaron */

  { ('D' << 8) + '-',    0x0110 },  /* Dcroat */
  { ('D' << 8) + 'v',    0x010e },  /* Dcaron */

  { ('E' << 8) + '\'',   0x00c9 },  /* Eacute */
  { ('E' << 8) + '-',    0x0112 },  /* Emacron */
  { ('E' << 8) + '.',    0x00cb },  /* Edieresis */
  { ('E' << 8) + '@',    0x00ca },  /* Ecircumflex */
  { ('E' << 8) + '`',    0x00c8 },  /* Egrave */
  { ('E' << 8) + 'u',    0x0114 },  /* Ebreve */
  { ('E' << 8) + 'v',    0x011a },  /* Ecaron */

  { ('G' << 8) + '@',    0x011c },  /* Gcircumflex */
  { ('G' << 8) + 'u',    0x011e },  /* Gbreve */

  { ('H' << 8) + '@',    0x0124 },  /* Hcircumflex */

  { ('I' << 8) + '\'',   0x00cd },  /* Iacute */
  { ('I' << 8) + '-',    0x012a },  /* Imacron */
  { ('I' << 8) + '.',    0x00cf },  /* Idieresis */
  { ('I' << 8) + '@',    0x00ce },  /* Icircumflex */
  { ('I' << 8) + '`',    0x00cc },  /* Igrave */
  { ('I' << 8) + 'u',    0x012C },  /* Ibreve */
  { ('I' << 8) + '~',    0x0128 },  /* Itilde */

  { ('J' << 8) + '@',    0x0134 },  /* Jcircumflex */

  { ('L' << 8) + '\'',   0x0139 },  /* Lacute */
  { ('L' << 8) + '/',    0x0141 },  /* Lslash */
  { ('L' << 8) + 'v',    0x013d },  /* Lcaron */

  { ('N' << 8) + '\'',   0x0143 },  /* Nacute */
  { ('N' << 8) + 'v',    0x0147 },  /* Ncaron */
  { ('N' << 8) + '~',    0x00d1 },  /* Ntilde */

  { ('O' << 8) + '\'',   0x00d3 },  /* Oacute */
  { ('O' << 8) + '-',    0x014c },  /* Omacron */
  { ('O' << 8) + '.',    0x00d6 },  /* Odieresis */
  { ('O' << 8) + '/',    0x00d8 },  /* Oslash */
  { ('O' << 8) + '@',    0x00d4 },  /* Ocircumflex */
  { ('O' << 8) + '`',    0x00d2 },  /* Ograve */
  { ('O' << 8) + 'u',    0x014e },  /* Obreve */
  { ('O' << 8) + '~',    0x00d5 },  /* Otilde */

  { ('R' << 8) + '\'',   0x0154 },  /* Racute */
  { ('R' << 8) + 'v',    0x0158 },  /* Rcaron */

  { ('S' << 8) + '\'',   0x015a },  /* Sacute */
  { ('S' << 8) + ',',    0x015e },  /* Scedilla */
  { ('S' << 8) + '@',    0x015c },  /* Scircumflex */
  { ('S' << 8) + 'v',    0x0160 },  /* Scaron */

  { ('T' << 8) + ',',    0x0162 },  /* Tcedilla */
  { ('T' << 8) + 'v',    0x0164 },  /* Tcaron */

  { ('U' << 8) + '\'',   0x00da },  /* Uacute */
  { ('U' << 8) + '-',    0x016a },  /* Umacron */
  { ('U' << 8) + '.',    0x00dc },  /* Udieresis */
  { ('U' << 8) + '@',    0x00db },  /* Ucircumflex */
  { ('U' << 8) + '`',    0x00d9 },  /* Ugrave */
  { ('U' << 8) + 'o',    0x016e },  /* Uring */
  { ('U' << 8) + 'u',    0x016c },  /* Ubreve */
  { ('U' << 8) + '~',    0x0168 },  /* Utilde */

  { ('W' << 8) + '@',    0x0174 },  /* Wcircumflex */

  { ('Y' << 8) + '\'',   0x00dd },  /* Yacute */
  { ('Y' << 8) + '.',    0x0178 },  /* Ydieresis */
  { ('Y' << 8) + '@',    0x0176 },  /* Ycircumflex */

  { ('Z' << 8) + '\'',   0x0179 },  /* Zacute */
  { ('Z' << 8) + 'v',    0x017d },  /* Zcaron */

  { ('a' << 8) + '\'',   0x00e1 },  /* aacute */
  { ('a' << 8) + '-',    0x0101 },  /* amacron */
  { ('a' << 8) + '.',    0x00e4 },  /* adieresis */
  { ('a' << 8) + '@',    0x00e2 },  /* acircumflex */
  { ('a' << 8) + '`',    0x00e0 },  /* agrave */
  { ('a' << 8) + 'o',    0x00e5 },  /* aring */
  { ('a' << 8) + 'u',    0x0103 },  /* abreve */
  { ('a' << 8) + '~',    0x00e3 },  /* atilde */

  { ('c' << 8) + '\'',   0x0107 },  /* cacute */
  { ('c' << 8) + ')',    0x00a9 },  /* copyright */
  { ('c' << 8) + ',',    0x00e7 },  /* ccedilla */
  { ('c' << 8) + '@',    0x0109 },  /* ccircumflex */
  { ('c' << 8) + 'v',    0x010d },  /* ccaron */

  { ('d' << 8) + '-',    0x0111 },  /* dcroat */
  { ('d' << 8) + 'v',    0x010f },  /* dcaron */

  { ('e' << 8) + '\'',   0x00e9 },  /* eacute */
  { ('e' << 8) + '-',    0x0113 },  /* emacron */
  { ('e' << 8) + '.',    0x00eb },  /* edieresis */
  { ('e' << 8) + '@',    0x00ea },  /* ecircumflex */
  { ('e' << 8) + '`',    0x00e8 },  /* egrave */
  { ('e' << 8) + 'u',    0x0115 },  /* ebreve */
  { ('e' << 8) + 'v',    0x011b },  /* ecaron */

  { ('g' << 8) + '@',    0x011d },  /* gcircumflex */
  { ('g' << 8) + 'u',    0x011f },  /* gbreve */

  { ('h' << 8) + '@',    0x0125 },  /* hcircumflex */

  { ('i' << 8) + '\'',   0x00ed },  /* iacute */
  { ('i' << 8) + '-',    0x012b },  /* imacron */
  { ('i' << 8) + '.',    0x00ef },  /* idieresis */
  { ('i' << 8) + '@',    0x00ee },  /* icircumflex */
  { ('i' << 8) + '`',    0x00ec },  /* igrave */
  { ('i' << 8) + 'u',    0x012d },  /* ibreve */
  { ('i' << 8) + '~',    0x0129 },  /* itilde */

  { ('j' << 8) + '@',    0x0135 },  /* jcircumflex */

  { ('l' << 8) + '\'',   0x013a },  /* Lacute */
  { ('l' << 8) + '/',    0x0142 },  /* Lslash */
  { ('l' << 8) + 'v',    0x013e },  /* Lcaron */

  { ('n' << 8) + '\'',   0x0144 },  /* nacute */
  { ('n' << 8) + 'v',    0x0148 },  /* ncaron */
  { ('n' << 8) + '~',    0x00f1 },  /* ntilde */

  { ('o' << 8) + '\'',   0x00f3 },  /* oacute */
  { ('o' << 8) + '-',    0x014d },  /* omacron */
  { ('o' << 8) + '.',    0x00f6 },  /* odieresis */
  { ('o' << 8) + '/',    0x00f8 },  /* oslash */
  { ('o' << 8) + '@',    0x00f4 },  /* ocircumflex */
  { ('o' << 8) + '`',    0x00f2 },  /* ograve */
  { ('o' << 8) + 'u',    0x014f },  /* obreve */
  { ('o' << 8) + '~',    0x00f5 },  /* otilde */

  { ('r' << 8) + '\'',   0x0155 },  /* racute */
  { ('r' << 8) + 'v',    0x0159 },  /* rcaron */

  { ('s' << 8) + '\'',   0x015b },  /* sacute */
  { ('s' << 8) + ',',    0x015f },  /* scedilla */
  { ('s' << 8) + '@',    0x015d },  /* scircumflex */
  { ('s' << 8) + 'v',    0x0161 },  /* scaron */

  { ('t' << 8) + ',',    0x0163 },  /* tcedilla */
  { ('t' << 8) + 'v',    0x0165 },  /* tcaron */

  { ('u' << 8) + '\'',   0x00fa },  /* uacute */
  { ('u' << 8) + '-',    0x016b },  /* umacron */
  { ('u' << 8) + '.',    0x00fc },  /* udieresis */
  { ('u' << 8) + '@',    0x00fb },  /* ucircumflex */
  { ('u' << 8) + '`',    0x00f9 },  /* ugrave */
  { ('u' << 8) + 'o',    0x016f },  /* uring */
  { ('u' << 8) + 'u',    0x016d },  /* ubreve */
  { ('u' << 8) + '~',    0x0169 },  /* utilde */

  { ('w' << 8) + '@',    0x0175 },  /* wcircumflex */

  { ('y' << 8) + '\'',   0x00fd },  /* yacute */
  { ('y' << 8) + '.',    0x00ff },  /* ydieresis */
  { ('y' << 8) + '@',    0x0177 },  /* ycircumflex */

  { ('z' << 8) + '\'',   0x017a },  /* zacute */
  { ('z' << 8) + 'v',    0x017e },  /* zcaron */
};

static int esctabcount = sizeof(esctab)/sizeof(esctabstr);


/*************************************************
*          Check for a UTF-8 character           *
*************************************************/

/* Given a pointer to a byte in a zero-terminated string, check to see if it is
the start of a UTF-8 character, and if so, return the length.

Argument:  pointer to the first byte
Returns:   the length of the character (1 - 6) or -1 if invalid UTF-8 start
*/

static int
check_utf8(uschar *pp)
{
register int ab;
register int c = *pp++;
int n;

if (c < 0x80) return 1;
if (c < 0xc0) return -1;

n = ab = utf8_table4[c & 0x3f];  /* Number of additional bytes */

/* Check top bits in the second byte */
if ((*pp & 0xc0) != 0x80) return -1;

/* Check for overlong sequences for each different length */
switch (ab)
  {
  /* Check for xx00 000x */
  case 1:
  if ((c & 0x3e) == 0) return -1;
  return 2;   /* We know there aren't any more bytes to check */

  /* Check for 1110 0000, xx0x xxxx */
  case 2:
  if (c == 0xe0 && (*pp & 0x20) == 0) return -1;
  break;

  /* Check for 1111 0000, xx00 xxxx */
  case 3:
  if (c == 0xf0 && (*pp & 0x30) == 0) return -1;
  break;

  /* Check for 1111 1000, xx00 0xxx */
  case 4:
  if (c == 0xf8 && (*pp & 0x38) == 0) return -1;
  break;

  /* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
  case 5:
  if (c == 0xfe || c == 0xff ||
     (c == 0xfc && (*pp & 0x3c) == 0)) return -1;
  break;
  }

/* Check for valid bytes after the 2nd, if any; all must start 10 */
while (--ab > 0)
  {
  if ((*(++pp) & 0xc0) != 0x80) return -1;
  }

return n + 1;
}


/*************************************************
*           Read a string of any length          *
*************************************************/

/* The string may extend over more than one line; newlines count as spaces.
Strings are expected to be in UTF-8 format, but for backwards compatibility,
any non-UTF-8 bytes are taken as single 8-bit characters and converted to
UTF-8.

Arguments:  none
Returns:    pointer to store containing the string
*/

uschar *
string_read(void)
{
int p = 0;
int size = string_start_size;
uschar *s;

sigch();
if (read_ch != '\"')
  {
  error_moan(ERR10, "String in quotes");
  return NULL;
  }

s = store_Xget(size);
next_ch();

while (read_ch != '\"' && read_ch != EOF)
  {
  /* Handle bytes with the top bit set */

  if (read_ch > 0x7f)
    {
    uschar buffer[8];
    uschar *pp = read_chptr - 1;    /* Starting byte */
    int n = check_utf8(pp);         /* Length of UTF-8 character */

    /* This byte is not the start of a UTF-8 character; convert it to a UTF-8
    string. */

    if (n < 1)
      {
      n = misc_ord2utf8(read_ch, buffer);
      pp = buffer;
      }

    /* This is a UTF-8 character; advance the character pointer */

    else read_chptr += n - 1;

    /* Add the bytes to the string. */

    memcpy(s+p, pp, n);
    p += n;
    }

  /* Bytes without the top bit are always one-byte characters. */

  else s[p++] = (read_ch == '\n')? ' ' : read_ch;

  /* Ensure there's enough room for another full-length UTF-8 character */

  if (p >= size-6)
    {
    int increment = (size > 1024)? 1024 : size;
    uschar *ss = store_Xget(size + increment);
    memcpy(ss, s, size);
    store_free(s);
    s = ss;
    size += increment;
    }

  next_ch();
  }

s[p] = 0;
if (read_ch == EOF) { error_moan(ERR23); return NULL; }
next_ch();
return s;
}


/*************************************************
*   Check the escapes in a string & transpose    *
*************************************************/

/* This is called for all but PostScript strings on reading, so that any errors
are given at that time. We set a flag (in_string_check) while handling escapes
to ensure that the \a^ -> \a@ fudge works correctly. Character codes are
checked for supported values, and those that are not supported in standardly
encoded fonts are converted to use the Symbol font where possible.

Another job of this function is to look for note letter transpositions in the
string and to carry them at at this time. Other escapes are re-processed later,
at output time.

Argument:   the string, in dynamic store
Returns:    the string, may be modified and/or copied
*/

uschar *
string_check(uschar *s)
{
int c;
uschar *ss, *tt;

/* If there are any transposed note names in the string, we must build a new
string with the transpositions done. */

if ((tt = Ustrstr(s, "\\t")) != NULL)
  {
  int p = tt - s;
  int size = Ustrlen(s) * 2 + 14;
  ss = store_Xget(size);

  memcpy(ss, s, p);

  for (s = tt; *s != 0; s++)
    {
    int i, pitch, abspitch;
    int note, acc;

    if (p >= size - 10)
      {
      int increment = (size > 1024)? 1024 : size;
      uschar *sss = store_Xget(size + increment);
      memcpy(sss, ss, size);
      store_free(ss);
      ss = sss;
      size += increment;
      }

    if (*s != '\\' || s[1] != 't' || s[2] < 'A' || s[2] > 'G')
      {
      ss[p++] = *s;
      continue;
      }

    note = s[2];
    s += 2;
    acc = ac_none;

    if (s[1] == '#') acc = ac_sharp;
    else if (s[1] == '$') acc = ac_flat;
    else if (s[1] == '%') acc = ac_natural;

    if (acc != ac_none) s++;

    pitch = 36 + read_basicpitch[note - 'A'];
    abspitch = pitch + read_accpitch[acc] - 2;

    /* If we are not in a stave, the appropriate values for the transposing
    function should have been fudged up by the heading reading code. */

    if (stave_transpose != no_transpose)
      abspitch = transpose_note(
        abspitch,    /* input absolute pitch */
        &pitch,      /* for output new pitch */
        &acc,        /* for output accidental */
        0,           /* do not force an output accidental */
        TRUE,        /* force addidental - not relevant for text */
        TRUE,        /* single note - not relevant for text */
        TRUE,        /* texttranspose */
        0);          /* tie count - not relevant for text */

    /* Retrieve a pitch within an octave */

    pitch %= 12;
    for (i = 0; i < 6; i++)
      {
      if (pitch == read_basicpitch[i]) break;
      }
    ss[p++] = 'A' + i;

    /* Handle accidentals. Double sharps and double flats are never seen, so
    change to the appropriate enharmonic note. We should only ever get
    "sensible" double accidentals. */

    switch (acc)
      {
      case ac_sharp:   Ustrcpy(ss+p, "\\*u*#*d*<\\"); p += 10; break;
      case ac_flat:    Ustrcpy(ss+p, "\\*$*<\\"); p += 6; break;

      #ifdef NEVER
      /* This code retains a natural if that note is sharpened or flattened in
      the new key signature, but in practice that isn't what is actually seen
      in printed music, where naturals never occur in chord names. */

      case ac_natural:
      knumacc = main_keysigtable[stave_key_tp];  /* Number of accidentals */
      if (abs(knumacc) <= 7)                     /* Something odd otherwise */
        {
        uschar *nlist = (knumacc > 0)? US"FCGDAEB" : US"BEADGCF";
        for (i = 0; i < abs(knumacc); i++)
          {
          if (ss[p-1] == nlist[i]) break;
          }
        if (i < abs(knumacc))
          {
          Ustrcpy(ss+p, "\\*%*<\\");
          p += 6;
          }
        }
      break;
      #endif

      case ac_dsharp:
      if (ss[p-1] == 'G') ss[p-1] = 'A'; else ss[p-1] += 1;
      break;

      case ac_dflat:
      if (ss[p-1] == 'A') ss[p-1] = 'G'; else ss[p-1] -= 1;
      break;
      }
    }

 ss[p] = 0;
 s = ss;
 }

/* Now do an escape check on the (new) string, but don't make any changes to
it. This gets any errors early. The setting of in_string_check is tested in
string_escape() to cause an error for \@, which is used internally. */

ss = s;                     /* The string to return */
in_string_check = TRUE;
while ((c = *s++) != 0)
  {
  if (c == '\\')
    {
    int nf, r;
    uschar xs[80];
    s = string_escape(s, xs, &nf, &r);
    }
  }
in_string_check = FALSE;

return ss;
}


/*************************************************
*            Get an integer in a string          *
*************************************************/

/* The pointer is updated to point past the end of the number.

Argument:  pointer to pointer to the first digit (updated)
Returns:   the integer
*/

static int
string_integer(uschar **p)
{
uschar *s = *p;
int n = 0;
while (isdigit(*s)) n = (n * 10) + (*s++ - '0');
if (n < 32 || n > 255) { error_moan(ERR53); n = 32; }
*p = s;
return n;
}



/*************************************************
*             Decode string escape               *
*************************************************/

/* This function is called during the checking process when a string is first
read (to catch escape errors early), in which case in_string_check is TRUE. It
is also called again later when the string is processed for real. On entry, the
pointer is past the initial '\'; return it past the whole escape. The data
returned is:

  In ss, a string to be printed, which can be empty;
  In font, a font to change to before printing, set to
    -1 for no change; a pseudo-font is used for small caps;
  In revert, a flag saying whether to revert to the old
    font after printing - FALSE for font change escapes
    and TRUE for special uschar escapes.

This function is now a horrible mess. It could do with a good Spring Clean.

Arguments:
  s          the escape, after the initial '\'
  ss         where to return string data       ) see
  font       where to return font data         )  comment
  revert     where to return font revert data  )   above

Returns:     see above
*/

uschar *
string_escape(uschar *s, uschar *ss, int *font, int *revert)
{
uschar *escape_uschar = s - 1;
uschar *error_argS = NULL;
uschar escbuffer[8];
int error_argN = 0;
int error_number = ERR54;
int unicode = -1;
int bot, top, ee;

*font = -1;
*ss = 0;
*revert = FALSE;

/* Deal with font changes */

if (Ustrncmp(s, "rm\\", 3) == 0) { *font = font_rm; return s+3; }
if (Ustrncmp(s, "it\\", 3) == 0) { *font = font_it; return s+3; }
if (Ustrncmp(s, "bf\\", 3) == 0) { *font = font_bf; return s+3; }
if (Ustrncmp(s, "bi\\", 3) == 0) { *font = font_bi; return s+3; }
if (Ustrncmp(s, "sy\\", 3) == 0) { *font = font_sy; return s+3; }
if (Ustrncmp(s, "mu\\", 3) == 0) { *font = font_mu; return s+3; }
if (Ustrncmp(s, "mf\\", 3) == 0) { *font = font_mf; return s+3; }
if (Ustrncmp(s, "sc\\", 3) == 0) { *font = font_sc; return s+3; }

/* 'x' is overloaded: if it is followed by a small number, it's a font change
to an extra font. Otherwise it is a character number, specified in hexadecimal,
which we deal with later on. */

if (*s == 'x' && isdigit(s[1]))
  {
  if (s[2] == '\\')
    {
    *font = font_xx + s[1] - '1';
    return s+3;
    }
  else if (isdigit(s[2]) && s[3] == '\\')
    {
    int n = 10*(s[1] - '0') + s[2] - '0';
    if (n <= MaxExtraFont)
      {
      *font = font_xx + n - 1;
      return s+4;
      }
    }
  }

/* If not a font change, deal with other escapes */

*revert = TRUE;

/* Deal with ligatures and double typographic quotes */

if      (Ustrncmp(s, "fi",  2) == 0) { unicode = 0xfb01; s += 2; }
else if (Ustrncmp(s, "fl",  2) == 0) { unicode = 0xfb02; s += 2; }
else if (Ustrncmp(s, "ss",  2) == 0) { unicode = 0x00df; s += 2; }
else if (Ustrncmp(s, "---", 3) == 0) { unicode = 0x2014; s += 3; }
else if (Ustrncmp(s, "--",  2) == 0) { unicode = 0x2013; s += 2; }
else if (Ustrncmp(s, "<<",  2) == 0) { unicode = 0x201C; s += 2; }
else if (Ustrncmp(s, ">>",  2) == 0) { unicode = 0x201D; s += 2; }

else switch (*s)
  {
  case 0:         /* Backslash as last uschar is ignored */
  break;

  case '@':       /* starts within-string comment */
  while (*(++s) != '\\' && *s != 0);
  if (*s == '\\') s++;
  break;

  case '\\':
  *ss++ = *s++;   /* \\ gives \ */
  break;

  case '\'':      /* \' gives ascii quote */
  case '`':       /* \` gives ascii grave */
  *ss++ = *s;
  s++;
  break;

  case '*':       /* \* escapes into the music font */
  if (*(++s) == '*') { *font = font_mf; s++; }
    else *font = font_mu;

  /* Loop for multiple musical escapes */

  for (;;)
    {
    uschar *p;
    if ((p = Ustrchr(music_escapes, *s)) != NULL)
      {
      int n = p - music_escapes;
      *ss++ = music_escape_values[n];
      if (*(++s) == '.' && n <= 4)     /* Allow '.' after bsmcq */
        {
        *ss++ = 63;
        s++;
        }
      }
    else if (isdigit(*s)) ss += misc_ord2utf8(string_integer(&s), ss);

    else if (*s == 'x' && isxdigit(s[1]))
      {
      int ucode = 0;
      s++;;
      while (isxdigit(*s))
        {
        if (isdigit(*s))
          ucode = (ucode << 4) | (*s - '0');
        else
          ucode = (ucode << 4) | (tolower(*s) - 'a' + 10);
        s++;
        }
      ss += misc_ord2utf8(ucode, ss);
      }
    else
      {
      error_argS = US"after \\* or \\** a music character code or number is";
      break;
      }

    if (*s == '\\') { s++; break; }

    if (*s++ != '*')
      {
      error_argS = US"\"\\\" or \"*\"";
      break;
      }

    if (*font == font_mf && *s++ != '*')
      {
      error_argS = US"\"*\"";
      break;
      }
    }
  break;

  /* Non-musical escapes */

  case '?': unicode = 0x00bf; s++; break;  /* Spanish question mark */

  case 'p':
  if (*(++s) == '\\')
    {
    ss += sprintf(CS ss, "%d", curpage->number);
    s++;
    }
  else if (*s == 'o' && s[1] == '\\')
    {
    if ((curpage->number & 1) != 0) ss += sprintf(CS ss, "%d", curpage->number);
    s += 2;
    }
  else if (*s == 'e' && s[1] == '\\')
    {
    if ((curpage->number & 1) == 0) ss += sprintf(CS ss, "%d", curpage->number);
    s += 2;
    }
  else error_argS = US"\\p\\, \\po\\, or \\pe\\";
  break;

  case 's':
  if (Ustrncmp(s+1, "e\\", 2) == 0)
    {
    s += 3;
    if ((curpage->number & 1) == 0) for (;;)
      {
      while (*s != '\\' && *s != 0) s++;
      if (*s == 0) break;
      if (Ustrncmp(++s, "e\\", 2) == 0)
        {
        s += 2;
        break;
        }
      }
    }
  else if (Ustrncmp(s+1, "o\\", 2) == 0)
    {
    s += 3;
    if ((curpage->number & 1) != 0) for (;;)
      {
      while (*s != '\\' && *s != 0) s++;
      if (*s == 0) break;
      if (Ustrncmp(++s, "o\\", 2) == 0)
        {
        s += 2;
        break;
        }
      }
    }
  else if (isdigit(s[1]))
    {
    s++;
    *font = font_sy;
    *ss++ = string_integer(&s);
    if (*s++ != '\\') error_argS = US"\\";
    }
  else if (s[1] == 'x' && isxdigit(s[2]))
    {
    *font = font_sy;
    unicode = 0;
    s += 2;
    while (isxdigit(*s))
      {
      if (isdigit(*s))
        unicode = (unicode << 4) | (*s - '0');
      else
        unicode = (unicode << 4) | (tolower(*s) - 'a' + 10);
      s++;
      }
    if (*s++ != '\\') error_argS = US"\\";
    }
  else goto ACCENTED;
  break;

  /* The case of \x for a font change to an extra font is handled above; here
  we handle it as an escape for a Unicode character. */

  case 'x':
  unicode = 0;
  s++;
  while (isxdigit(*s))
    {
    if (isdigit(*s))
      unicode = (unicode << 4) | (*s - '0');
    else
      unicode = (unicode << 4) | (tolower(*s) - 'a' + 10);
    s++;
    }
  if (*s != '\\')
    {
    error_number = ERR27;
    error_argN = *s;
    }
  else s++;
  break;

  /* 's' is not in this list because of \se\ and \so\. There will be a goto
  from above when \s is followed by something else. */

  case 'a':
  case 'c':
  case 'd':
  case 'e':
  case 'g':
  case 'h':
  case 'i':
  case 'j':
  case 'l':
  case 'n':
  case 'o':
  case 'r':
  case 't':
  case 'u':
  case 'w':
  case 'y':
  case 'z':
  case 'A':
  case 'C':
  case 'D':
  case 'E':
  case 'G':
  case 'H':
  case 'J':
  case 'I':
  case 'L':
  case 'N':
  case 'O':
  case 'R':
  case 'S':
  case 'T':
  case 'U':
  case 'W':
  case 'Y':
  case 'Z':

  ACCENTED:

  ee = *s << 8;

  /* There is a problem with escapes using circumflex, because this is also
  used to split up underlay strings for centering. This is dealt with as
  follows: All strings are checked as they are read. If we are in the
  string_check() function, \@ is an error, because it is not defined for the
  user to use. After that test, if we see \^ we turn it into \@. Thus, when the
  string is interpreted for escapes when actually being processed for display,
  it will see \@ and not \^. */

  if (*(++s) == '@' && in_string_check)
    {
    error_number = ERR125;
    error_argS = s - 1;
    break;
    }
  if (*s == '^') *s = '@';   /* Should only ever see when checking */

  ee |= *s++;                /* Pick up a 2-character escape code */

  /* \c] == \C] is special; it gives copyright from the Symbol font */

  if (ee == ('c' << 8) + ']' || ee == ('C' << 8) + ']')
    {
    *font = font_sy;
    *ss++ = 211;
    break;
    }

  /* The rest can be handled by a table, which turns them into Unicode */

  bot = 0;
  top = esctabcount;
  while (bot < top)
    {
    int mid = (bot + top)/2;
    if (ee == esctab[mid].escape)
      {
      unicode = esctab[mid].unicode;
      break;
      }
    if (ee < esctab[mid].escape) top = mid; else bot = mid + 1;
    }

  if (unicode < 0)
    {
    error_number = ERR125;
    Ustrncpy(escbuffer, s - 3, 3);
    escbuffer[3] = 0;
    error_argS = escbuffer;
    }
  break;

  /* A decimal number is allowed for a character number */

  default:
  if (isdigit(*s))
    {
    unicode = 0;
    while (isdigit(*s)) unicode = unicode * 10 + *s++ -'0';
    if (*s != '\\') error_argS = US"\"\\\""; else s++;
    }
  else
    {
    error_number = ERR40;
    error_argN = *s;
    }
  break;
  }

/* If there has been an error, output the message with an appropriate
correction so the point is at the right place for strings that are all on one
line. Then skip to the end of the string. Errors should only occur during the
reading phase when this routine is called for checking. Flatten the original
'\' character, to prevent a second error, which otherwise happens when an
underlay string is split. Also any subsequent ones.*/

if (error_argS != NULL || error_argN != 0)
  {
  *escape_uschar = '/';
  while (*s) { *s++ = '/'; error_ptr_correction++; }
  error_ptr_correction += 1;
  if (error_argS != NULL) error_moan(error_number, error_argS);
  if (error_argN != 0) error_moan(error_number, error_argN);
  }

/* If there is a value in unicode, convert it to a string */

if (unicode >= 0) ss += misc_ord2utf8(unicode, ss);

/* Terminate the output string and return */

*ss = 0;
return s;
}


/*************************************************
*     Read string in stave and handle options    *
*************************************************/

/* A b_textstr item is added to the stave data.

Argument:  TRUE for a rehearsal "letter"
Returns:   nothing
*/

void
string_stavestring(BOOL rehearse)
{
BOOL hadab = FALSE;
int rotate = 0;
int halfway = 0;
int offset = 0;
int htype = 0;
int size = -1;
int flags = stave_textflags;
int adjustx = 0;
int adjusty = 0;
int fontid;
uschar *s = string_read();

if (s == NULL) return;

/* Note that two successive slashes are a caesura, not a bad string option. */

while (read_ch == '/' && *read_chptr != '/')
  {
  next_ch();
  switch(read_ch)
    {
    case 'a':
    hadab = TRUE;
    flags &= ~(text_ul|text_fb|text_ps|text_middle|text_atulevel|text_absolute);
    flags |= text_above;
    next_ch();
    if (read_ch == 'o')
      {
      flags |= text_atulevel;
      next_ch();
      }
    else if (isdigit(read_ch))
      {
      flags |= text_absolute;
      adjusty = read_integer(TRUE);
      }
    break;

    case 'b':
    next_ch();
    if (read_ch == 'o' && *read_chptr == 'x')
      {
      flags |= text_box;
      next_ch();
      next_ch();
      }
    else if (read_ch == 'a' && *read_chptr == 'r')
      {
      flags |= text_baralign;
      next_ch();
      next_ch();
      }
    else   /* /b or /bu */
      {
      hadab = TRUE;
      flags &= ~(text_ul | text_fb | text_above | text_ps |
        text_middle | text_atulevel | text_absolute);
      if (read_ch == 'u')
        {
        flags |= text_atulevel;
        next_ch();
        }
      else if (isdigit(read_ch))
        {
        flags |= text_absolute;
        adjusty = -read_integer(TRUE);
        }
      }
    break;

    case 'c':
    flags &= ~text_endalign;
    flags |= text_centre;
    next_ch();
    break;

    case 'd':
    adjusty -= read_movevalue();
    break;

    case 'e':
    flags &= ~text_centre;
    flags |= text_endalign;
    next_ch();
    break;

    case 'F':
    if (!stave_string_followOK || rehearse) error_moan(ERR141);
      else flags |= text_followon;
    next_ch();
    break;

    case 'f':
    next_ch();
    if (read_ch == 'b')
      {
      flags &= ~(text_ul | text_above | text_ps | text_middle | text_atulevel);
      flags |= text_fb;
      }
    else error_moan(ERR37, "/fb");
    next_ch();
    break;

    case 'h':
    next_ch();
    halfway = 500;
    if (isdigit(read_ch)) halfway = read_integer(TRUE);
    break;

    case 'l':
    if (*read_chptr == 'c')
      {
      next_ch();
      offset -= read_movevalue();
      }
    else adjustx -= read_movevalue();
    break;

    case 'm':
    flags &= ~(text_ul|text_fb|text_above|text_ps|text_atulevel|text_absolute);
    flags |= text_middle;
    next_ch();
    break;

    case 'n':
    next_ch();
    if (read_ch == 'c') flags &= ~text_centre;
    else if (read_ch == 'e') flags &= ~text_endalign;
    else error_moan(ERR37, "/nc or /ne");
    next_ch();
    break;

    case 'o':
    next_ch();
    if (read_ch == 'l')
      {
      flags &= ~(text_fb | text_ps);
      flags |= text_ul | text_above;
      }
    else error_moan(ERR37, "/ol");
    next_ch();
    break;

    case 'p':
    next_ch();
    if (read_ch == 's')
      {
      flags &= ~(text_above | text_fb | text_ul | text_middle |
        text_atulevel | text_absolute);
      flags |= text_ps;
      }
    else error_moan(ERR37, "/ps");
    next_ch();
    break;

    case 'r':
    if (read_chptr[0] == 'i' && read_chptr[1] == 'n' && read_chptr[2] == 'g')
      {
      flags |= text_ring;
      next_ch();
      next_ch();
      next_ch();
      next_ch();
      }
    else if (read_chptr[0] == 'o' && read_chptr[1] == 't')
      {
      next_ch();
      next_ch();
      next_ch();
      read_expect_integer(&rotate, TRUE, TRUE);
      }
    else if (*read_chptr == 'c')
      {
      next_ch();
      offset += read_movevalue();
      }
    else adjustx += read_movevalue();
    break;

    case 's':
    next_ch();
    if (read_expect_integer(&size, FALSE, FALSE))
      {
      if (--size < 0 || size >= MaxFontSizes)
        { error_moan(ERR39, MaxFontSizes); size = 0; }
      }
    break;

    case 't':
    next_ch();
    if (read_ch == 's') flags |= text_timealign;
      else error_moan(ERR37, "/ts");
    next_ch();
    break;

    case 'u':
    if (isdigit(*read_chptr)) adjusty += read_movevalue(); else
      {
      next_ch();
      if (read_ch == 'l')
        {
        flags &= ~(text_fb | text_above | text_ps);
        flags |= text_ul;
        }
      else error_moan(ERR37, "/u<number> or /ul");
      next_ch();
      }
    break;

    default:
    error_moan(ERR37, "/a, /ao, /b, /bar, /box, /bu, /d, /e, /F, /fb, "
      "/h, /l, /m, /ol, /ps, /r, /ring, /s, /u or /ul");
    next_ch();
    break;

    /* The second/third string stuff for underlay/overlay must come last. The
    only way to get out of a case within a loop is by GOTO. */

    case '\"':
      {
      int adjust = 0;
      int size1, size2;
      int *sizeptr = &size1;
      uschar *s1 = string_read();  /* Repeated hyphen replacement string */
      uschar *s2 = NULL;           /* Extra string at start of continuation */
      uschar *s3 = NULL;           /* Final string at end of "hyphens" */

      if (size < 0)
        size1 = ((flags & text_above) == 0)? stave_ulsize : stave_olsize;
      else size1 = size;

      size2 = size1;

      /* Don't bother if there's been some kind of error. Otherwise, split the
      string at '|' if there is one. The final part is stored as a "third"
      extra string. */

      if (s1 != NULL)
        {
        for (s3 = s1; *s3; s3++)
          {
          if (*s3 == '|')
            {
            *s3++ = 0;
            break;
            }
          }
        if (*s3 == 0) s3 = NULL;

        /* Now read options, and possibly third string, which, paradoxically,
        is stored as s2. */

        while (read_ch == '/' && *read_chptr != '/')
          {
          next_ch();
          switch (read_ch)
            {
            case 'd':
            adjust -= read_movevalue();
            break;

            case 's':
            next_ch();
            if (read_expect_integer(sizeptr, FALSE, FALSE))
              {
              if ((*sizeptr -= 1) < 0 || *sizeptr >= MaxFontSizes)
                { error_moan(ERR39, MaxFontSizes); *sizeptr = 0; }
              }
            break;

            case 'u':
            adjust += read_movevalue();
            break;

            case '\"':
            if (s2 != NULL) error_moan(ERR92);
            s2 = string_read();
            sizeptr = &size2;
            break;

            default:
            error_moan(ERR37, "/d, /s, /u, or string");
            next_ch();
            break;
            }
          }

        /* Only applies to {und,ov}erlay. Set up a hyphen-type block, or point
        to an identical one. */

        if ((flags & text_ul) == 0) error_moan(ERR90); else
          {
          int font = ((flags & text_above) == 0)?
            stave_ulfont : stave_olfont;
          htypestr *h = main_htypes;
          htypestr **hh = &main_htypes;
          htype++;

          while (h != NULL)
            {
            if (Ustrcmp(h->string1, s1) == 0 &&
               ((h->string2 == NULL && s2 == NULL) ||
                (h->string2 != NULL && s2 != NULL &&
                  Ustrcmp(h->string2, s2) == 0)) &&
               ((h->string3 == NULL && s3 == NULL) ||
                (h->string3 != NULL && s3 != NULL &&
                  Ustrcmp(h->string3, s3) == 0)) &&
               h->adjust == adjust && h->font == font &&
               h->size1 == size1 && h->size2 == size2) break;
            hh = &(h->next);
            h = *hh;
            htype++;
            }

          if (h == NULL)
            {
            h = store_Xget(sizeof(htypestr));
            *hh = h;
            h->next = NULL;
            h->string1 = s1;
            h->string2 = s2;
            h->string3 = s3;
            h->font = font;
            h->adjust = adjust;
            h->size1 = size1;
            h->size2 = size2;
            }
          }
        }
      }
    goto ENDSTRING;
    }

  sigch();
  }

ENDSTRING:

/* If this is a follow-on string, allow only relative positioning options.
Follow-ons are forbidden for rehearsal strings above. */

if ((flags & text_followon) != 0)
  {
  if (hadab || halfway != 0 || offset != 0 ||
       ((flags & (text_baralign|text_box|text_centre|text_endalign|text_fb|
         text_middle|text_ps|text_ring|text_timealign|text_ul)) != 0))
    {
    error_moan(ERR142,
      ((flags & text_ps) != 0)? "PostScript string" :
      ((flags & (text_box|text_ring)) != 0)? "boxed or ringed string" :
      "string with explicit positioning");
    flags &= ~text_followon;
    }
  }

/* Unless this is a PostScript string, check its escapes. If the next
significant character is double quote, the next string can be a follow-on,
unless this string is boxed or ringed. */

stave_string_followOK = FALSE;
if ((flags & text_ps) == 0)
  {
  s = string_check(s);
  if ((flags & (text_box|text_ring)) == 0)
    {
    sigch();
    stave_string_followOK = read_ch == '\"';
    }
  }

/* If the absolute flag is set and we have not had /a or /b, then add in the
default absolute position. */

if ((flags & text_absolute) != 0 && !hadab)
  adjusty += stave_textabsolute;

/* If size unset, over-ride it for underlay, overlay, or figured bass */

if (size < 0)
  {
  if ((flags & text_ul) != 0)
    size = ((flags & text_above) == 0)? stave_ulsize : stave_olsize;
      else if ((flags & text_fb) != 0) size = stave_fbsize;
        else size = stave_textsize;
  }

/* Over-ride flags if requested for rehearsal strings, and sort out the default
font for the rest if necessary. */

if (rehearse)
  {
  flags = text_rehearse | text_above;
  fontid = curmovt->font_rehearse;
  }
else
  {
  fontid = ((flags & text_ul) != 0)?
    (((flags & text_above) == 0)? stave_ulfont : stave_olfont) :
      ((flags & text_fb) != 0)? stave_fbfont : stave_textfont;
  }

/* If this is a non-{und,ov}erlay string, create the text block immediately,
preceded by an extras block if required. */

if ((flags & text_ul) == 0)
  {
  b_textstr *p;
  if (rotate != 0 || halfway != 0 || offset != 0)
    {
    b_textXstr *pp = store_getitem(b_textX);
    pp->rotate = rotate;
    pp->halfway = halfway;
    pp->offset = offset;
    if (halfway != 0 && offset != 0) error_moan(ERR140);  /* Warning */
    }
  p = store_getitem(b_text);
  p->font = fontid;
  p->size = size;
  p->htype = 0;
  p->ulevel = 0;
  p->ulen = 0;
  p->flags = flags;
  p->string = s;
  p->x = adjustx;
  p->y = adjusty;
  }

/* Otherwise, create a block on the underlay chain to hold the string until it
is all parcelled out to the succeeding notes. Give an error if rotation is
attempted. */

else
  {
  BOOL overlay = (flags & text_above) != 0;
  int count = overlay? olay_offset : 0;
  ulaypend **pp = &stave_pendulay;
  ulaypend *p = stave_pendulay;
  ulaypend *q;

  if (rotate) error_moan(ERR98, "supported", "for underlay or overlay");

  while (p != NULL)
    {
    if (p->level > count) break;
    if (overlay)
      {
      if (p->level >= olay_offset) count++;
      }
    else if (p->level < olay_offset) count++;
    pp = &(p->next);
    p = p->next;
    }

  q = store_Xget(sizeof(ulaypend));
  q->next = p;
  *pp = q;

  q->level = count;
  q->size = size;
  q->htype = htype;
  q->font = fontid;
  q->flags = flags;
  q->string = s;
  q->x = adjustx;
  q->y = adjusty;
  q->halfway = halfway;
  q->offset = offset;
  }
}


/*************************************************
*              Get width of a PMW string         *
*************************************************/

/* This almost duplicates code in out_string(), and yet it seems hard to find a
tidy way of combining them. The initial font may have font_sc added for small
caps. The final font is left in string_font for use if needed.

Arguments:
  s            the string
  f            the font number
  pointsize    the font size

Returns:       the width (fixed point)
*/

int
string_width(uschar *s, int f, int pointsize)
{
int yield = 0;
int yield_height = 0;
int orig_pointsize = pointsize;
fontstr *fs;
register int c;
uschar ss[256];
uschar *pp = ss;

DEBUG(("string_width() \"%s\" font %d size %d\n", s, f, pointsize));

if (f >= font_sc)
  {
  pointsize = (pointsize * curmovt->smallcapsize) / 1000;
  f -= font_sc;
  }
fs = &(font_List[font_table[f]]);

while (*s != 0)
  {
  int nf, r;
  uschar xs[80];

  /* If the string buffer is getting full, measure the string so far. This
  leaves plenty of room for escapes (which in practice are only a few chars). */

  if (pp - ss > 240)
    {
    *pp = 0;
    yield += font_stringwidth(ss, f, pointsize);
    yield_height += font_stringheight;
    pp = ss;
    }

  /* Pick up the next character. For backwards compatibility, bytes with a
  value >= 0x80 but < 0xc0 are taken as character values. */

  if (*s > 127 && (*s & 0x40) == 0) c = *s; else { GETCHARINC(c, s); }

  /* Until we hit the escape character, just copy the character, except that we
  must turn ` and ' into open and close quotes, and fi into the ligature, for
  fonts with standard encoding. We have to do this by character rather than
  just copying bytes because of the buffer overflow handling above. */

  if (c != '\\')
    {
    if (fs->stdencoding)
      {
      if (c == '`') c = QUOTE_LEFT;
      else if (c == '\'') c = QUOTE_RIGHT;
      else if (c == 'f' && *s == 'i' && !fs->fixedpitch && fs->hasfi)
        {
        c = CHAR_FI;
        s++;
        }
      }

    if (c > 127)
      {
      uschar utf[8];
      utf[misc_ord2utf8(c, utf)] = 0;
      Ustrcpy(pp, utf);
      pp += Ustrlen(utf);
      }
    else *pp++ = c;

    continue;
    }

  /* Interpret the escape. It may return a string to print and/or a font
  change, and a flag to say whether the font change is permanent or not */

  s = string_escape(s, xs, &nf, &r);

  /* If there is a font change, first measure the string so far in the old font
  (if any). */

  if (nf >= 0)
    {
    if (pp != ss)
      {
      *pp = 0;
      yield += font_stringwidth(ss, f, pointsize);
      yield_height += font_stringheight;
      pp = ss;
      }

    /* If the new font is temporary, measure the escaped string and mark it as
    empty. Otherwise, change the current font. */

    if (r)
      {
      yield += font_stringwidth(xs, nf, pointsize);
      yield_height += font_stringheight;
      xs[0] = 0;
      }

    /* If the new font is "small caps", leave the font alone, but adjust the
    size. Otherwise reset the original size. */

    else
      {
      if (nf == font_sc)
        {
        if (pointsize == orig_pointsize)
          pointsize = (pointsize * curmovt->smallcapsize) / 1000;
        }
      else
        {
        f = nf;
        pointsize = orig_pointsize;
        fs = &(font_List[font_table[f]]);
        }
      }
    }

  /* Join the escape string onto the string so far */

  Ustrcpy(pp, xs);
  pp += Ustrlen(xs);
  }

/* Catch any pending characters at the end */

if (pp != ss)
  {
  *pp = 0;
  yield += font_stringwidth(ss, f, pointsize);
  yield_height += font_stringheight;
  }

string_font = f;                     /* stash away final font */
font_stringheight = yield_height;    /* put total height in known place */

DEBUG(("string_width() = %d\n", yield));
return yield;
}


/* End of string.c */
