#!/usr/bin/perl -w
#
# $Id: asm2htm,v 1.3 1996/12/09 06:54:13 cs Exp cs $
#------------------------------------------------------------------------------
#
# NAME
#        asm2htm
#
# SYNOPSIS
#        asm2htm asmfn [...]
#
# DESCRIPTION
#        Perl script to produce HTML files *.htm from assembly source files
#        *.asm, with the following actions:
#
#        - convert characters from IBM-PC character set to valid HTML
#          characters / character sequences
#
#        - format comments in italics
#
#        - make hyperlinks for:
#          - includes
#          - constants (equ)
#          - data      (db, dw)
#          - variables (ds)
#          - labels
#
#        - create cross reference file asm_xref.htm (with hyperlinks)
#
#        Lines starting (first column!) with ";<" in *.asm are not processed,
#        but passed directly into *.htm (to allow passing of hyperlinks to
#        other documents, e.g.)
#
# AUTHOR
#        Christian_Schultze@b.maus.de
#        (Attention: limited gateway, do not send mails exceeding 16KB/day!!)
#
# NOTES
#        Assumptions made regarding input files:
#        - file names are *.asm
#        - 8bit chars use IBP-PC character set
#        - identifiers start with an uppercase character in first column
#        - comments are ";" to end of line
#
#        As I am neither an assembly programmer nor experienced in perl,
#        I would very much appreciate any comments on this script --
#        especially bug reports/fixes and/or improvements!
#
#------------------------------------------------------------------------------

require 5.001;                # does need Perl 5!
use integer;                  # no floating point arithmetic used
use strict;                   # be strict in syntax checks
use English;                  # use verbose names for perl's special vars

$OUTPUT_AUTOFLUSH = 1;        # no buffering of output

A
my %htm;                      # mapping: *.htm file names of *.asm file names

# sanity checks:
foreach $ARG ( @ARGV ) {
    -f $ARG or die "No such file: $ARG!\n";
    m/asm$/i or die "File doesn't have *.asm extension: $ARG!\n";
    ( $htm{$ARG} = $ARG ) =~ s!^(.*)\.asm$!\L$1\E.htm!i;
    die "Cannot find HTML file name for $ARG!\n" if $htm{$ARG} eq $ARG;
}

sub htmlhead {
    my $title = @ARG;         # title for HTML is given as 1st arg
    my $me = $PROGRAM_NAME;
    $me =~ s!.*/!!;           # remove path from this script's name
    my $date = `date`;
    chomp $date;              # remove trailing NL
      "<html>\n"
    . "<!-- automatically generated by $me on $date -->\n"
    . "<head><title>$title</title></head>\n"
    . "<body>"
    . "<pre>"   # input is PREformatted
    . "<tt>\n"  # use TeleType font
    ;
}

my $htmlend = "</tt></pre></body>\n</html>\n";


sub ibmpc2html {
    # use HTML encoding for special chars,
    # assumes IBM-PC chars in source,
    # special HTML chars unknown in IBM-PC charset commented out:

    my $save = $ARG;              # to restore global $ARG
    $ARG = shift;                 # get line to operate on from parameter list

    # translate 7bit chars with special meaning in HTML:
    s/&/\&amp;/g;                 # Ampersand (CS: must be 1st!)
    s/</\&lt;/g;                  # Less than sign
    s/>/\&gt;/g;                  # Greater than sign
    s/\"/\&quot;/g;               # Double quote sign

    if ( m/[\x80-\xFF]/ ) {       # any 8bit chars? --> translate them all!
        #s/A/\&Aacute;/g;         # Capital A, acute accent
        #s/A/\&Agrave;/g;         # Capital A, grave accent
        #s/A/\&Acirc;/g;          # Capital A, circumflex accent
        #s/A/\&Atilde;/g;         # Capital A, tilde
        s/\x8F/\&Aring;/g;        # Capital A, ring
        s/\x8E/\&Auml;/g;         # Capital A, dieresis or umlaut mark
        s/\x92/\&AElig;/g;        # Capital AE dipthong (ligature)
        s/\x80/\&Ccedil;/g;       # Capital C, cedilla
        #s/E/\&Eacute;/g;         # Capital E, acute accent
        #s/E/\&Egrave;/g;         # Capital E, grave accent
        #s/E/\&Ecirc;/g;          # Capital E, circumflex accent
        #s/E/\&Euml;/g;           # Capital E, dieresis or umlaut mark
        #s/I/\&Iacute;/g;         # Capital I, acute accent
        #s/I/\&Igrave;/g;         # Capital I, grave accent
        #s/I/\&Icirc;/g;          # Capital I, circumflex accent
        #s/I/\&Iuml;/g;           # Capital I, dieresis or umlaut mark
        #s/E/\&ETH;/g;            # Capital Eth, Icelandic
        s/\xA5/\&Ntilde;/g;       # Capital N, tilde
        #s/O/\&Oacute;/g;         # Capital O, acute accent
        #s/O/\&Ograve;/g;         # Capital O, grave accent
        #s/O/\&Ocirc;/g;          # Capital O, circumflex accent
        #s/O/\&Otilde;/g;         # Capital O, tilde
        s/\x99/\&Ouml;/g;         # Capital O, dieresis or umlaut mark
        #s/O/\&Oslash;/g;         # Capital O, slash
        #s/U/\&Uacute;/g;         # Capital U, acute accent
        #s/U/\&Ugrave;/g;         # Capital U, grave accent
        #s/U/\&Ucirc;/g;          # Capital U, circumflex accent
        s/\x9A/\&Uuml;/g;         # Capital U, dieresis or umlaut mark
        #s/Y/\&Yacute;/g;         # Capital Y, acute accent
        #s/T/\&THORN;/g;          # Capital THORN, Icelandic
        s/\xE1/\&szlig;/g;        # Small sharp s, German (sz ligature)
        s/\x85/\&aacute;/g;       # Small a, acute accent
        s/\xA0/\&agrave;/g;       # Small a, grave accent
        s/\x83/\&acirc;/g;        # Small a, circumflex accent
        #s/a/\&atilde;/g;         # Small a, tilde
        s/\x84/\&auml;/g;         # Small a, dieresis or umlaut mark
        s/\x91/\&aelig;/g;        # Small ae dipthong (ligature)
        s/\x87/\&ccedil;/g;       # Small c, cedilla
        s/\x8A/\&eacute;/g;       # Small e, acute accent
        s/\x82/\&egrave;/g;       # Small e, grave accent
        s/\x88/\&ecirc;/g;        # Small e, circumflex accent
        #s/e/\&euml;/g;           # Small e, dieresis or umlaut mark
        s/\x8D/\&iacute;/g;       # Small i, acute accent
        s/\xA1/\&igrave;/g;       # Small i, grave accent
        s/\x8C/\&icirc;/g;        # Small i, circumflex accent
        s/\x8B/\&iuml;/g;         # Small i, dieresis or umlaut mark
        #s/e/\&eth;/g;            # Small eth, Icelandic
        s/\xA4/\&ntilde;/g;       # Small n, tilde
        s/\x95/\&oacute;/g;       # Small o, acute accent
        s/\xA2/\&ograve;/g;       # Small o, grave accent
        s/\x93/\&ocirc;/g;        # Small o, circumflex accent
        #s/o/\&otilde;/g;         # Small o, tilde
        s/\x94/\&ouml;/g;         # Small o, dieresis or umlaut mark
        #s/o/\&oslash;/g;         # Small o, slash
        s/\x97/\&uacute;/g;       # Small u, acute accent
        s/\xA3/\&ugrave;/g;       # Small u, grave accent
        s/\x96/\&ucirc;/g;        # Small u, circumflex accent
        s/\x81/\&uuml;/g;         # Small u, dieresis or umlaut mark
        #s/y/\&yacute;/g;         # Small y, acute accent
        #s/t/\&thorn;/g;          # Small thorn, Icelandic
        s/\x98/\&yuml;/g;         # Small y, dieresis or umlaut mark
        
        # translate PC graphics to "simple graphics":
        tr/\xB0\xB1\xB2\xDB/\#/;  # solid fill: light gray, ..., black
        tr/\xDC\xDD\xDE\xDF/\#/;  # solid half: lower, left, right, upper
        tr/\xC4/-/;               # single horizontal bar
        tr/\xCD/=/;               # double horizontal bar
        tr/\xB3\xBA/|/;           # vertical bars: single, double

        # line crossings:
        tr/\xDA\xC2\xBF/+/;       # -/|   top    left, middle, right
        tr/\xC3\xC5\xB4/+/;       #       middle
        tr/\xC0\xC1\xD9/+/;       #       bottom
        tr/\xC9\xCB\xBB/+/;       # =/||  top
        tr/\xCC\xCE\xB9/+/;       #       middle
        tr/\xC8\xCA\xBC/+/;       #       bottom
        tr/\xD5\xD1\xB8/+/;       # =/|   top
        tr/\xC6\xCE\xB5/+/;       #       middle
        tr/\xD4\xCF\xBE/+/;       #       bottom
        tr/\xD6\xD2\xB7/+/;       # -/||  top
        tr/\xC7\xD7\xB6/+/;       #       middle
        tr/\xD3\xD0\xBD/+/;       #       bottom

        tr/\x80-\xFF/?/;          # replace all remaining 8-bit chars with '?'
    }

    tr/\x00-\x1F/?/;              # replace all control chars with '?'

    my $ret = $ARG;               # remember value to return
    $ARG = $save;                 # restore global $ARG
    return $ret;
}


my %desc = ( "C"=>"Constants", "D"=>"Data", "V"=>"Variables", "l"=>"Labels" );
my %fn;      # file names for id defs.
my %line;    # line numbers for id defs.
my %kind;    # kinds of id defs. (cf. %desc)
my %access;  # strings holding fn,line pairs for all references of ids


print "=== Scanning files for identifier definitions: ===\n";

my $fn;
foreach $fn ( @ARGV ) {
    open(FN, "<$fn") or die "Cannot open $fn for reading: $ERRNO\n";
    print "  $fn:";
    my $kind;      # what kind of def. have we just found?
    my %ndef;      # count def. kinds in FN
    $ndef{"C"} = $ndef{"D"} = $ndef{"V"} = $ndef{"l"} = 0;
    my $code = 0;  # count lines containing actual code
    while ( <FN> ) {
        ++$code if m/^\s*\w/;
        next unless m/^[A-Z]/;
        chomp;     # remove trailing NL
        s/\r$//;   # remove trailing CR
        my ($id, $nextword) = split;  # get first two words from input line
        if    ( ! defined $nextword )                    { $kind = "l"; }
        elsif ( $nextword eq "equ" )                     { $kind = "C"; }
        elsif ( $nextword eq "db" or $nextword eq "dw" ) { $kind = "D"; }
        elsif ( $nextword eq "ds" )                      { $kind = "V"; }
        else                                             { $kind = "l"; }
        ++$ndef{$kind};
        $kind{$id} = $kind;
        $fn{$id} = $fn;
        $line{$id} = $NR;
    }
    print "\n    $NR lines, $code containing code\n  ";
    my $lead = " "; # what to prefix in following loop's output
    my $kind;
    foreach $kind ( sort keys %desc ) {
        print "$lead $ndef{$kind} $desc{$kind}";
        $lead = ",";
    }
    print "\n";
    close FN;
}
print "\n";


print "=== Converting files to HTML format: ===\n";

my $asmfn;
foreach $asmfn ( @ARGV ) {
    my $htmfn = $htm{$asmfn};

    open(ASMFN, "<$asmfn") or die "Cannot open $asmfn for reading: $ERRNO\n";
    open(HTMFN, ">$htmfn") or die "Cannot open $htmfn for writing: $ERRNO\n";

    print "  $htmfn:";
    print HTMFN &htmlhead($htmfn);

    my $ncmt = 0;          # count comments
    my $nincl = 0;         # count includes
    my $nlabel = 0;        # count labels set for id defs.
    my $nref = 0;          # count references (hrefs)
    my $cmt = "";          # to memorize comments found in lines
    while ( <ASMFN> ) {
        chomp;             # remove trailing NL
        s/\r$//;           # remove trailing CR

        # expand TABs:
        1 while s/\t+/' ' x (length($MATCH) * 8 - length($PREMATCH) % 8)/e;

        $ncmt += m/;/;     # comment in line?

        # no special processing for selfdefined HTML command(s)!:
        next if m/^;</;

        # remove comments from $ARG, but memorize them in $cmt:
        $cmt = s/ *;.*// ? $MATCH : "";

        $ARG = &ibmpc2html($ARG);

        # make hyperlinks for "includes":
        if ( s!\b(include\s+)(\w+)(\.asm)\b!$1<a href=\L$2\E.htm>$2$3<\/a>!i ){
            ++$nincl
            }

        else { # (IDs are never to be found in include lines!)
            # make hyperlinks and labels for references:
            my $haslabel = 0;                     # line has line no. label?
            my %processed = ();                   # ids looked for in line
            my $id;
            foreach $id ( split m/\W+/ ) {        # for all possible IDs ...
                next unless defined $fn{$id};     # is this a known $id?
                
                my $entry = "$asmfn $NR";         # register for xref:
                if ( defined $access{$id} ) { $access{$id} .= " $entry"; }
                else                        { $access{$id}  =   $entry ; }
                
                next if $processed{$id}++;        # only once for hrefs!
                
                $fn = $htm{$fn{$id}};             # get $fn of $id definition
                $fn = "" if $fn eq $htmfn;        # omit own $htmfn in hrefs
                
                # is this id's definition?:
                my $isdef = $entry eq "$fn{$id} $line{$id}";

                # make hrefs:
                $nref += s!\b$id\b!<a href=$fn#$line{$id}>$id</a>!g;
                # (to avoid self hrefs append "unless $isdef" to above line)

                # make label(s):
                $nlabel += s!^!<a name=$id></a>! if     $isdef;
                $nlabel += s!^!<a name=$NR></a>! unless $haslabel++;
            }
        }
        
        # comment memorized? --> typeset it in italics:
        $ARG .= '<i>' . &ibmpc2html($cmt) . '</i>'  if $cmt ne "";
        
    } continue {
        print HTMFN "$ARG\n";  # print current line (with all substitutions)
    }

    print HTMFN $htmlend;

    close ASMFN;
    close HTMFN;
    chmod 0644, $htmfn;

    print "\n    "
        , "$nincl includes, "
        , "$nlabel labels, "
        , "$nref references, "
        , "$ncmt comments\n";
}
print "\n";


print "=== Creating cross reference: ===\n";

my $xreffn = "asm_xref.htm";
print "  $xreffn: ";
open(FN, ">$xreffn") or die "Cannot open $fn for writing: $ERRNO\n";
print FN &htmlhead($fn);

my $kind;
foreach $kind ( sort keys %desc ) {
    print "$desc{$kind}, ";
    print FN "\n=== $desc{$kind}: ===\n";

    my $id;
    foreach $id ( sort keys %fn ) {
        next unless $kind{$id} eq $kind;
        print FN "\n<a href=$htm{$fn{$id}}#$line{$id}>$id</a>:\n";

        my $fn;
        foreach $fn ( sort @ARGV ) {
            # skip $fn if it doesn't contain a reference to $id:
            next unless defined $access{$id} and $access{$id} =~ m/\b$fn\b/;
            print FN "  <a href=$htm{$fn}>$fn</a>:";

            # make hrefs for all line numbers in $fn with refs to $id:
            my @ref = split ' ', $access{$id};
            my $rfn;    # file name of reference
            my $rline;  # line no.  of reference
            while ( $rfn=shift(@ref) and $rline=shift(@ref) ) {
                print FN " <a href=$htm{$fn}#$rline>$rline</a>" if $rfn eq $fn;
            }

            print FN "\n";
        }
    }
    print FN "\n";
}
print FN $htmlend;
close FN;
print "done.\n";
