#!/usr/bin/perl

# convert Yivo transcription into my internal form, with last-minute respellings
# This Perl script is far faster than the equivalent Sed script.

# Author:  Raphael Finkel 1/1997, 12/2007

use strict;

# constants
	my %vowels = (a => "\x80", o => "\x81", e => "\x82", i => "\x83",
		U => "\x84", ":" => "\x85", A => "\x86", O => "\x87",
		"." => "\x88", ";" => "\x89", "," => "\x90", "=" => "\x91"); 
	# if you add more, change ranges below to reach farther than \x91.

sub doFile {
	my ($AZ) = @_; # Algemeyner Zhurnal spelling
	my ($inStanza, %Respell);
	my $addBreaks = 0; # until proven otherwise
	while (my $line = <STDIN>) {
		if ($line =~ /^\\exact\s*$/) { # we want to preserve all line breaks
			$addBreaks = 1;
			$inStanza = 0; # 1 after regular line, 0 after a break
			print $line;
			next; 
		}
		if ($line =~ /\\spell (\S+) (.*)$/) { # last-minute respelling
			my ($key, $replace) = ($1, $2);
			$key =~ s/\|/\\|/g;
			$Respell{$key} = $replace;
			next;
		}
		if ($line =~ /\\/) { # don't convert lines with \ char
			# except to handle \english{...}, |, and \input
			$line =~ s/\\(E|e)nglish\{(.*)}%?/"\\relax {\\roman " . reverse($2) . "}"/eg;
			$line =~ s/\|//g; # prophylactic | to prevent respell of English words
			$line =~ s/\\input//g; # don't divulge other files.
			print $line;
			next;
		}
		for my $key (keys %Respell) { # apply last-minute respellings
			$line =~ s/\b$key\b/$Respell{$key}/ge;
		}
		if ($addBreaks) {
			if ($line =~ /^\s*$/) { # blank line
				$inStanza = 0;
			} elsif ($inStanza) { # nonblank, in a stanza
				print "\\\\\n";
			} else { # nonblank, first line of stanza
				$inStanza = 1;
			}
			while ($line =~ s/^ //) { # output TeX spacing request
				print "\\hspace*{.1in}\n";
			}
			while ($line =~ s/^\t//) { # output TeX spacing request
				print "\\hspace*{.4in}\n";
			}
		} # addBreaks
		$line =~ s/---/\x92/g; # quotation dash
		$line =~ s/,,/\x93/g; # double comma
		$line =~ s/''/\x94/g; # double quote
		$line =~ s/"/\x94/g; # double quote
		$line =~ s/”/\x94/g; # double quote
		$line =~ s/\.\.\./\x95/g; # three dots
		$line =~ s/\^(.)/$vowels{$1}/g; # vowels
		$line =~ s/\bfar(ey|in|ib|um)/far#$1/g; # "fareynik"
		$line =~ s/\bfarur/far#ur/g; # ur has to have a shtumer alef here
		$line =~ s/\bur(ey)/ur#$1/g; # "ureynikl"
			# maybe need a rule for geiblt, which also needs a shtumer alef?
		$line =~ s/ay/I/g; # pasakh tsvey-yud
		$line =~ s/ey/A/g; # tsvey-yud
		$line =~ s/oy/O/g; # vov-yud
		$line =~ s/u/w/g; # vov
		$line =~ s/iy/Jy/g; # khirik-yud yud
		$line =~ s/vw/vu/g; # tsvey-vov melupm-vov
		$line =~ s/wv/uv/g; # melupm-vov tsvey-vov
		$line =~ s/ww/uw/g; # melupm-vov vov
		$line =~ s/wy/Uy/g; # melupn-vov yud
		$line =~ s/wi/wJ/g; # vov khirik-yud
		$line =~ s/(^|[^\w#~|]|_)([OAIiuw])/$1#$2/g; # shtumer alef
		$line =~ s/ie/Je/g; # khirik-yud ayin
		$line =~ s/ii/JJ/g; # khirik-yud khirik-yud
		$line =~ s/([aAeIoOuw])i/$1J/g; # vowel khirik-yud
		$line =~ s/i([aAeIoOuw])/J$1/g; # khirik-yud vowel
		$line =~ s/i/y/g; # yod
		$line =~ s/J/i/g; # khirik-yud
		$line =~ s/yy/|yi/g; # yud khirik-yud
		$line =~ s/kh/x/g; # khof
		$line =~ s/tsh/tS/g; # tes shin
		$line =~ s/ts/c/g; # tsadik
		# $line =~ s/tz/c/g; # tsadik (deprecated input form)
		$line =~ s/(\w|[#'"\x80-\x91]|\|)([nxfmc])($|[^\w^#~|\x80-\x91]|_)/$1\u$2$3/g; # final letters
			# must be at least a 2-letter word 
		$line =~ s/(\w[kh])It(N?)(\W|\n)/$1At$2$3/g; # xxxkayt => xxxkeyt
		$line =~ s/sh/S/g; # shin
		$line =~ s/zh/zS/g; # zayin shin
		$line =~ s/dj/dzS/g; # daled zayin shin
		$line =~ s/\|//g; # remove prophylactic  |
		$line =~ s/J/i/g; # explicit pintl-yud
		$line =~ s/U/u/g; # explicit melupm-vov
		$line =~ s/V/w/g; # explicit single vov
		$line =~ s/:s//g; # joke: shin-dot samekh 
		$line =~ s/(\S)$/$1 /; # add extra space at end of non-null lines
		$line =~ s/ʼ/'/g; # change into reasonable quotes
		if ($AZ) { # convert to Algemeyner Zhurnal style
			$line =~ s/vu/v#w/g; # no melupm-vov
			$line =~ s/uv/w#v/g; # no melupm-vov
			$line =~ s/vO/v#O/g; # epenthetic shtumer alef
			$line =~ s/Ov/O#v/g; # epenthetic shtumer alef
			$line =~ s/\byi/#i/g; # respelling 'yi-'
			$line =~ s/([aeyuiAI])i/$1#y/g; # no khirik-yud
			$line =~ s/i([aeyuiAI])/y#$1/g; # but frier without #
			$line =~ s/([aeyuiAI])([ayuiAI])/$1#$2/g; # epenthetic shtumer alef
			$line =~ s/y#e/ye/g; # but uncorrect this combination
			$line =~ s/i/y/g;
			$line =~ s/I/A/g; # no pasekh-tsvey-yudn
			$line =~ s/f/P/g; # no feh-rofe
			$line =~ s/B/b/g; # no veys-rofe
			$line =~ s/Q/S/g; # no sin-dot
		} # Algemeyner Zhurnal
		print $line;
	} # each line
} # doFile

my $AZ = (@ARGV != 0) && ($ARGV[0] eq 'AZ');
doFile($AZ);

# vim:ai nospell:
