unicode

#!/bin/sh
# outputs unicode glyphs for hexadecimal character values
# example invocations "unicode 2200", "unicode 2200-22f1"
# like unicode(1), but can handle astral plane characters
# © 2015 Nils Dagsson Moskopp (erlehmann), license: GPLv3+

if [ ${#*} -eq 0 ]; then
 printf 'usage: unicode { hex hex … | hexmin-hexmax … }\n'
 exit 1
fi

codepoint_to_escape_sequence() {
  codepoint=$1
  if [ $codepoint -le 127 ]; then
   printf '\%o' \
    $codepoint
  elif [ $codepoint -le 2047 ]; then
   printf '\%o\%o' \
    $((( $codepoint >> 6 ) + 192 )) \
    $((( $codepoint & 63 ) + 128 ))
  elif [ $codepoint -le 65535 ]; then
   printf '\%o\%o\%o' \
    $((( $codepoint >> 12) + 224 )) \
    $(((( $codepoint >> 6) & 63 ) + 128 )) \
    $((( $codepoint & 63) + 128 ))
  elif [ $codepoint -le 1114111 ]; then
   printf '\%o\%o\%o\%o' \
    $((( $codepoint >> 18 ) + 240 )) \
    $(((( $codepoint >> 12 ) & 63 ) + 128 )) \
    $(((( $codepoint >> 6 ) & 63 ) + 128 )) \
    $((( $codepoint & 63 ) + 128 ))
  fi
}

for ARGUMENT; do
 case $ARGUMENT in
  *-*)
   CUR=$(( 0x${ARGUMENT%-*} ))
   END=$(( 0x${ARGUMENT#*-} ))
   if [ $CUR -gt $END ]; then
    printf 'unicode: bad range %s\n' $ARGUMENT
    exit 1
   fi
   COL=.
   printf "$(
    while :; do
     printf '%X ' $CUR
     codepoint_to_escape_sequence $CUR
     printf '\t'
     if [ $CUR = $END ]; then
      break
     fi
     CUR=$(( $CUR + 1 ))
     case ${#COL} in
      8)
       printf '\n'
       COL=.
      ;;
      *)
       COL=$COL.
     esac
    done
   )"'\n'
  ;;
  *)
   printf "$(
    codepoint_to_escape_sequence $(( 0x$ARGUMENT ))
   )"'\n'
  ;;
 esac
done