--  IDoc - A No Frills Haskell Interface Documentation System     -*-haskell-*-
--
--  Author : Manuel M T Chakravarty
--  Created: 15 Januar 2002
--
--  Version $Revision: 1.6 $ from $Date: 2002/02/28 10:07:05 $
--
--  Copyright (c) 2002 Manuel M T Chakravarty
--
--  This file is free software; you can redistribute it and/or modify it under
--  the terms of the GNU General Public License as published by the Free
--  Software Foundation; either version 2 of the License, or (at your option)
--  any later version.
--
--  This file is distributed in the hope that it will be useful, but WITHOUT
--  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
--  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
--  more details.
--
--- DESCRIPTION ---------------------------------------------------------------
--
--  This module single-handedly implements a simple interface documentation
--  system for Haskell.  It extracts a documented interface of a Haskell
--  module guided by a small set of non-intrusive clues.  In other words, for
--  the price of a little discipline in formatting the comments in a Haskell
--  module, an interface can be automatically extracted.  The clues are
--  designed to interfere little with the verbatim presentation of the source
--  code.
--
--- DOCU ----------------------------------------------------------------------
--
--  Language: Haskell 98
--
--  * All delarations relevant for interface documentation processing need to
--    follow the layout rules (ie, they may not use explicit braces).
--
--  * The head of instance, default, data, and class declarations must be
--    in its entirety on a single line.
--
--  * All blocks of single line comments starting in the leftmost column and
--    preceding the module header where the first comment line starts with "--
--    |" are regarded as being a module comment.
--  
--  * All interface comments introducing toplevel declarations must start in
--    the leftmost column and must use the single line comment syntax.  The
--    first line of an interface comment must start either the sequence "-- |"
--    or "-- /", where the latter indicates that the documented entity is
--    exported abstractly.
--  
--  * If the lexeme "--" introducing a comment is immediately (ie, without any
--    white space inbetween) followed by one or more dash characters '-',
--    these dash characters are converted to white space.
--  
--  * Interface comments may be free standing or can be associated with a 
--    declaration.
--  
--  * A comment describing a declaration must precede the declaration and must
--    include the line immediately preceding the declaration.  We call such
--    declaration, interface declarations.
--  
--  * An interface declarations is assumed to continue up to the next line that
--    either contains white space only or contains code or a comment starting
--    in the leftmost column.
--  
--  * If to the right of the lines of an interface declaration, there are
--    single-line comments, they are assumed to be descriptions of the
--    corresponding pieces of the declaration.
--  
--  * In the body of a class definition and the right hand-side of data
--    declarations, the rules for commenting methods and constructors,
--    respectively, are the same after shifting the body of the declaration as
--    many columns to the left as the body is indented.  The only exception is
--    that in a data declaration, constructor declarations following an
--    interface comment and start with "= " or "| " may start two columns
--    further to the left than the interface comment.
--  
--  * On the top-level only interface declarations are taken into account.  In
--    local scopes (class and data declarations) all local entities are
--    collected if the containing declaration is not exported abstractly.
--    However, if the containing declaration is exported abstractly, only the
--    local entities preceded by an interface comment appear in the
--    documentation. 
--  
--  * In an interface comment, the following formatting conventions apply:
--  
--    - In the first line of the first comment block of a module, if the
--      string "-*-haskell-*-" appears, it is removed.
--  
--    - Empty lines separating two non-empty blocks introduce a paragraph
--      break. 
--  
--    - Phrases enclosed in `...' or @...@ are set verbatim in a fixed-width
--      font. 
--
--    - Headings: Comments of the form
--
--        -- |Heading
--        -- --------
--  
--      as well as those of the form
--
--        -- Heading ----    (four or more dashes at the end of the line)
--  
--      are set as section headers and comments of the form
--  
--        -- |Heading
--        -- -
--  
--    are set as subsection headers.
--  
--    - Displays:
--
--      * Stared and dashed lists are set as itemisation lists.
--  
--      * Numbered lists (with numbers in parenthesis) are set as numbered
--        lists.
--
--      * Indented text is rendered as a quote.
--  
--      * Bird-tracked text is rendered as a verbatim code display.  (Each
--        line in the paragraph needs to have a '>' symbol and in each line
--        the '>' must be followed by a space character.)
--
--      Each display must be in a separated paragraph (ie, it must be preceded
--      and followed by an empty line if it is not the start or end of a
--      comment block).  In the case of lists, the list items need to be
--      separate paragraphs.  Paragraphs following a list item that are
--      further indented than the item are regarded to be part of that item.
--  
--    - Everything else is set as vanilla text.
--
--  Usage
--  ~~~~~
--    idoc [ option... ] haskell-file
--
--  This produces an interface documentation file for `haskell-file'. 
--
--  Options:
--
--    -h, -?
--    --help
--        Dump brief usage information to stderr.
--
--    -m LANG
--    --markup-language=LANG
--        Generate output in the specified markup language (the default is
--        `html').  Currently, only `html' is supported.
--
--    -o FILE
--    --output=FILE
--        Place output in file FILE.
--
--        If `-o' is not specified, the default is to put the output into a
--        file with the same basename as the Haskell module, but with a suffix
--        indicating the used markup language.
--
--    -v,
--    --version
--        Print (to standard error) the version and copyright information of
--        the tool (before doing anything else). 
--
--- TODO ----------------------------------------------------------------------
--
--  * Need a way to format `description'-like lists in comments
--
--  * Recognise and properly render URLs
--
--  * Need to be able to add infix decls to the docu
--
--  * "-- ||" (or something else) to introduce a list of declarations, where
--    we don't want to put a "-- |" in front of every decl (see end of
--    PABase.hs)
--
--  * Maybe change the rendering of declarations with an empty comment
--
--  * Some way of merging the docu of imported modules into the current one
--    and/or to have a link in the header to imported modules that also have
--    docu.

module Main (main)
where

-- standard libraries
import Char   (isAlpha, isDigit, toUpper, isSpace, isDigit)
import List   (isPrefixOf, intersperse)
import IO     (stderr, hPutStr)
import System (ExitCode(..), getArgs, exitWith)

-- extra library
import GetOpt (ArgOrder(..), OptDescr(..), ArgDescr(..), usageInfo,
	       getOpt)


-- version information
-- -------------------

-- version number is major.minor.patchlvl; don't change the format of the
-- `versnum' line as it is `grep'ed for by a Makefile
--
idstr      = "$Id: IDoc.hs,v 1.6 2002/02/28 10:07:05 chak Exp $"
name       = "IDoc - A No Frills Haskell Interface Documentation System"
versnum    = "0.2.6"
date	   = "5 Mar 2002"
version    = name ++ ",\nversion " ++ versnum ++ ", " ++ date
copyright  = "Copyright (c) 2002 Manuel M T Chakravarty"
disclaimer = "This software is distributed under the terms \
	     \of the GNU General Public Licence.\n\
	     \NO WARRANTY WHATSOEVER IS PROVIDED. \
	     \See the details in the documentation."


-- parsing
-- -------

-- the Haskell module is parsed into a structure that contains entries for all
-- those portions of the source that are relevant for the interface
-- documentation
--
data Module = Module Comment	-- header comment
		     String	-- module name
		     [Entity]	-- interface comments and exported declarations
	    deriving Show

data Entity = 
    --
    -- a comment not associated with a declaration
    --
    EComment Comment
    --
    -- a comment with an associated declaration
    --
  | EDecl    Comment	-- comment preceding the declaration
	     Bool	-- `True' means the declaration is exported abstractly
	     Decl	-- the declaration itself
  deriving Show

-- the various declarations (all nested comments have been erased)
--
-- * although, the data type allows arbitrarily deeply nested definitions, we
--   assume that there is at most one nesting level
--
data Decl = ValDecl       [(String, String)]	-- general value declaration
	  | TypeDecl      [String]		-- type synonyms
	  | NewtypeDecl   [String]		-- newtype declarations
	  | BriefDataDecl [(String, String)]	-- brief data declaration
	  | VerbDataDecl  String [Entity]	-- verbose data declaration
	  | ClassDecl     String [Entity]	-- class declaration
	  | InstDecl      String		-- instance declaration
	  | DftDecl	  String		-- default default
	  | ForeignDecl   [(String, String)]	-- foreign declarations
	  deriving Show

-- the lines forming an interface comment with the leading `--', `-- |', and
-- `-- /' removed (the latter two have been replaced by a ` ')
--
-- * any line containing white space only is represented by an empty string
--
type Comment = [String]

-- given the file name of a module, read it
--
readModule       :: FilePath -> IO Module
readModule fpath  = do
  file <- readFile fpath
  return $ parseModule (lexLines file)

-- the actual parsing routines
--

-- a line is represented by its indentation (column of the first non-white
-- space character), a flag indicating whether is a comment line, and it's
-- contents 
--
-- * in comment lines the leading `--' are omitted, but the indentation is
--   relative to the first `-'
--
-- * tabs are expanded and nested comments converted to white space
--
type Line = (Int, Bool, String)

-- checks whether a line is comment
--
isComment             :: Line -> Bool
isComment (_, val, _)  = val

-- checks whether a line is an interface comment; ie, it is left-aligned at
-- the given column and starts with `-- |' or `-- /'
--
isIComment :: Int -> Line -> Bool
isIComment col (col', True, ' ':c:_) = col == col' && (c == '|' || c == '/')
isIComment _   _		     = False

-- checks whether a line is farther indented than a given column; empty lines
-- are accepted, too
--
isIndented                   :: Int -> Line -> Bool
isIndented _   (_   , _, "")  = True
isIndented col (col', _, _ )  = col' > col

-- checks whether a line is not farther indented than a given column; empty
-- lines are accepted, too 
--
-- * NB: *not* the opposite of `isIndented'
--
isNotIndented                   :: Int -> Line -> Bool
isNotIndented _   (_   , _, "")  = True
isNotIndented col (col', _, _ )  = col' <= col

-- checks whether a line is a declaration line that is left-aligned at the
-- given column 
--
isDecl :: Int -> Line -> Bool
isDecl col (col', False, _) = col == col'
isDecl _   _		    = False

-- turn a string into a sequence of lines
--
lexLines   :: String -> [Line]
lexLines s  = lex s 1
  where
    -- lex the whole file
    --
    lex               :: String -> Int -> [Line]
    lex []        col  = [(col, False, "")]
    lex (' ' :cs) col  = lex cs (col + 1)
    lex ('\a':cs) col  = lex cs (col + 1)
    lex ('\b':cs) col  = lex cs (col + 1)
    lex ('\t':cs) col  = lex cs (col + 8 - ((col - 1) `mod` 8))
    lex ('\f':cs) col  = (col, False, "") : lex cs 1
    lex ('\n':cs) col  = (col, False, "") : lex cs 1
    lex ('\r':cs) col  = (col, False, "") : lex cs 1
    lex ('\v':cs) col  = (col, False, "") : lex cs 1
    lex ('-' :cs) col  = case cs of 
        		   -- FIXME: currently, something like `-->' is
        		   --	     regarded to be a comment, which it isn't
			   ('-':cs) -> lexLine cs        col True
			   _	    -> lexLine ('-' :cs) col False
    lex cs        col  = lexLine cs col False
    --
    -- we found the start of the non-white space portion of a line (line
    -- consisting of white space only are reduced to an empty string; NB:
    -- comments start with non-white space, but may be all white space
    -- afterwards) 
    --
    lexLine                  :: String -> Int -> Bool -> [Line]
    lexLine cs col isComment  = 
      let
        (line, rest) = lexC cs col zeroDL
	line'        = if isComment then dashToSpace line else line
	line'2       = if all isSpace line' then "" else line'
      in
      (col, isComment, line'2) : lex rest 1
      where
        -- convert any prefix consisting of dashes only into white space
	--
        dashToSpace ""       = ""
	dashToSpace ('-':cs) = ' ' : dashToSpace cs
	dashToSpace cs       = cs
    --
    -- collect all characters up to the end of the current line
    --
    lexC              :: String -> Int -> DList Char -> (String, String)
    lexC []        _   dl  = (closeDL dl, [])
    lexC ('\a':cs) col dl  = lexC cs (col + 1) (dl `snocDL` ' ')
    lexC ('\b':cs) col dl  = lexC cs (col + 1) (dl `snocDL` ' ')
    lexC ('\t':cs) col dl  = let
			       tabs   = 8 - ((col - 1) `mod` 8)
			       spaces = openDL (replicate tabs ' ')
			     in
			     lexC cs (col + tabs) (dl `joinDL` spaces)
    lexC ('\f':cs) _   dl  = (closeDL dl, cs)
    lexC ('\n':cs) _   dl  = (closeDL dl, cs)
    lexC ('\r':cs) _   dl  = (closeDL dl, cs)
    lexC ('\v':cs) _   dl  = (closeDL dl, cs)
    lexC ('{' :cs) col dl  = 
      case cs of
	'-':cs -> lexNested cs (col + 2) (dl `joinDL` openDL "  ") 1
	_      -> lexC      cs (col + 1) (dl `snocDL` '{'        )
    lexC (c   :cs) col dl  = lexC cs (col + 1) (dl `snocDL` c)
    --
    -- lex a nested comment (it's turned into white space)
    --
    -- FIXME: currently, \n and friends are converted to space characters,
    --	      which screws the formatting
    --
    lexNested :: String -> Int -> DList Char -> Int -> (String, String)
    lexNested []        _   dl _  = (closeDL dl, [])
    lexNested ('\t':cs) col dl nl = 
      let
        tabs   = col - (col `mod` 8) + 8
	spaces = openDL (replicate tabs ' ')
      in
      lexNested cs (col + tabs) (dl `joinDL` spaces) nl
    lexNested ('{' :cs) col dl nl = 
      case cs of
	'-':cs -> lexNested cs (col + 2) (dl `joinDL` openDL "  ") (nl + 1)
	_      -> lexNested cs (col + 1) (dl `snocDL` ' '        ) nl
    lexNested ('-' :cs) col dl nl = 
      case cs of
	('}':cs) | nl == 1   -> lexC      cs (col + 1) dl'
	         | otherwise -> lexNested cs (col + 1) dl' (nl - 1)
	  where
	    dl' = (dl `joinDL` openDL "  ")
	_                    -> lexNested cs (col + 1) (dl `snocDL` ' ') nl
    lexNested (_   :cs) col dl nl = lexNested cs (col + 1) (dl `snocDL` ' ') nl

-- parse an entire Haskell module
--
parseModule   :: [Line] -> Module
parseModule s  = let
		   (header, name, rest) = parseHeader s
		 in
		 Module header name (parseBody 1 rest)

-- up to the module header, collect all blocks of left aligned comment lines
-- where the first line in the block starts with "-- |"
--
-- * successive blocks are separated by an empty line
--
parseHeader :: [Line] -> (Comment, String, [Line])
parseHeader []                              = ([], "", [])
parseHeader ((1, True , ' ':'|':com):lines) = 
  let
    inBlock l              = isComment l && isNotIndented 1 l
    strip (_, _, str)      = str
    (coms, lines')         = span inBlock lines
    (restComs, name, rest) = parseHeader lines'
  in
  ((("  " ++ com) : map strip coms) ++ ("":restComs), name, rest)
parseHeader ((_, True , _  ):lines)         = parseHeader lines
parseHeader ((_, False, str):lines) 
  | ide == "module"                         = ([], name, lines)
  | otherwise				    = parseHeader lines
  where
    (ide , rest) = firstIdent str
    (name, _   ) = firstIdent rest

-- collect all entities in the given list of lines assuming the left border to
-- be where specified
--
-- * if `col == 1' (ie, toplevel) only declarations that are preceded by an
--   interface comment are collected
--
-- * in local scopes all declarations are collected
--
parseBody        :: Int -> [Line] -> [Entity]
parseBody col ls  = 
  case collectChunk ls of
    Nothing            -> []
    Just (chunk, rest) -> parseEntity chunk : parseBody col rest
  where
    -- a chunk consists of a couple of comment lines, which maybe followed by
    -- a number of declaration lines; the first declaration line must be
    -- aligned with the comment lines, the following declaration lines are
    -- indented; however, there may be empty lines inbetween the indented lines
    --
    -- * there is one exception to the above alignment constraint: local
    --   chunks that are part of data declaration and start with "= " or "| "
    --   may start two columns further to the left
    --
    collectChunk ls = case dropWhile (not . isIFaceLine) ls of
			[]  -> Nothing
			ls' -> case span isComment ls' of
				 (com, []               )   -> Just (com, [])
				 (com, l@(col', _, s):ls) 
				   | null s || not rightCol -> Just (com, l:ls)
				   | otherwise              ->
				     let 
				      (decl, rest) = span (isIndented col) ls
				     in
				     Just (com ++ l:decl, rest)
				   where
				     rightCol = col' == col ||
						(col' == col - 2 &&
						 ("= " `isPrefixOf` s ||
						  "| " `isPrefixOf` s))
    --
    isIFaceLine l = isIComment col l || (col > 1 && isDecl col l)

-- given a bunch of lines that form one interface entity, convert it into an
-- entity description
--
parseEntity    :: [Line] -> Entity
parseEntity ls  = 
  case span isComment ls of
    (com, []  ) -> EComment $ mkComment com
    (com, decl) -> EDecl (mkComment com) isAbstract (parseDecl isAbstract decl)
      where
	isAbstract | null com  = False
		   | otherwise = let (_, _, _:c:_) = head com
				 in 
				 c == '/'	-- check the clue

-- given a non-empty list of comment lines, convert them into a `Comment'
--
-- * the first line is a clue comment from which the clue will be removed
--
-- * comments may be empty (for comments to local declarations as part of a
--   class or data declaration)
--
mkComment                :: [Line] -> Comment
mkComment ((_, _, s):ls)  = (' ' : drop 2 s) : [s | (_, _, s) <- ls]
mkComment _               = []

-- given a non-empty list of lines forming a declaration, convert it into a
-- declaration description
--
parseDecl                      :: Bool -> [Line] -> Decl
parseDecl isAbstract all@(l:_)  = 
  let 
    (_, _, str) = l
  in
  case firstIdent str of
    ("instance", _) -> InstDecl (removeSuffix "where" str)
    ("default" , _) -> DftDecl  str
    ("type"    , _) -> TypeDecl    $ normaliseLines all
    ("newtype" , _) -> NewtypeDecl $ normaliseLines (if isAbstract 
						     then pruneEqual all
						     else all)
    ("data"    , _) -> parseDataDecl  isAbstract all
    ("class"   , _) -> parseClassDecl isAbstract all
    ("foreign" , _) -> ForeignDecl $ parseAnnotatedDecl all
    _               -> ValDecl     $ parseAnnotatedDecl all

-- given a non-empty list of lines, convert them into a list of strings, where
-- the indentation of lines that are indented farther that the first line is
-- expanded into white space
--
normaliseLines                  :: [Line] -> [String]
normaliseLines ((col, _, s):ls)  = s : map (expand col) ls
  where
    expand col (col', _, s) = replicate (col' - col) ' ' ++ s

-- remove everything after the first equality sign
--
pruneEqual                                 :: [Line] -> [Line]
pruneEqual []                               = []
pruneEqual ((col, com, s):ls) | containsEq  = [(col, com, s')]
			      | otherwise   = (col, com, s ) : pruneEqual ls
  where
    (containsEq, s') = let (front, rest) = span (/= '=') s
		       in
		       if null rest then (False, s) else (True, front)

-- given a non-empty list of lines where some may end on single-line comments,
-- convert them into string pairs
--
parseAnnotatedDecl    :: [Line] -> [(String, String)]
parseAnnotatedDecl ls  = 
  let 
    (codeLines, coms) = unzip [separate l | l <- ls]
  in
  zip (normaliseLines codeLines) coms
  where
    -- separate a single-line comment from the code
    --
    separate l@(col, _, s) = case splitAt (scan s 0) s of
			       (_   , "" ) -> (l, "")
			       (code, com) -> ((col, False, code), drop 2 com)
    --
    -- scan for a single line comment (beware care of strings)
    --
    scan []        i = i
    scan ('\"':cs) i = scanStr cs (i + 1)
    scan ('-' :cs) i = case cs of
		         ('-':_) -> i
			 _	 -> scan cs (i + 1)
    scan (_   :cs) i = scan cs (i + 1)
    --
    scanStr []         i = i       -- Urgh, but we don't do syntax...
    scanStr ['\\']     i = i + 1	  -- ... checking here
    scanStr ('\\' :cs) i = scanStr (tail cs) (i + 2)
    scanStr ('\"' :cs) i = scan    cs        (i + 1)
    scanStr (_    :cs) i = scanStr cs        (i + 1)

-- parse a class declaration
--
parseClassDecl :: Bool -> [Line] -> Decl
parseClassDecl isAbstract ((_, _, s):ls) 
  | isAbstract = ClassDecl (removeSuffix "where" s) []
  | otherwise  = ClassDecl s                        (body ls)
  where
    -- NB: be sure to include empty lines
    --
    body []                      = []
    body      ((_  , _, ""):ls)  = body ls
    body (all@((col, _, _ ):_ )) = parseBody col all

-- parse a data declaration
--
parseDataDecl                                 :: Bool -> [Line] -> Decl
parseDataDecl isAbstract lines@((_, _, s):ls)  = 
  case break (== '=') s of
    (declHead, _       ) | isAbstract        -> BriefDataDecl 
						  [(declHead ++ "   ", 
						    "abstract")]
    (_       , ""      )                     -> VerbDataDecl s (body ls)
    (_       , '=':rest) | all (== ' ') rest -> VerbDataDecl s (body ls)
		         | otherwise	     -> BriefDataDecl $ 
					          parseAnnotatedDecl lines
  where
    -- NB: be sure to include empty lines
    --
    body []                      = []
    body      ((_  , _, ""):ls)  = body ls
    body (all@((col, _, _ ):_ )) = parseBody col all


-- formatting
-- ----------

data FormattingOps a = FOps {

		         -- close the document by adding the prologue and
		         -- epilogue 
			 --
		         close       :: String -> a -> String,

			 -- join two documents
			 --
			 conc        :: a -> a -> a,

			 -- turn a document into a paragraph
			 --
			 para        :: a -> a,

			 -- generates a heading
			 --
			 heading     :: Int -> a -> a,

			 -- join two documents with a bit more vertical space
			 -- than a paragraph boundary implies
			 --
			 sep         :: a -> a -> a,

			 -- produce a vertical separator
			 --
			 rule        :: a,

			 -- turn a string into a document
			 --
		         text        :: String -> a,

			 -- set a string as inline code
			 --
			 code        :: String -> a,

			 -- quoted document
			 --
			 quote	     :: a -> a,

			 -- set a string of pre-formatted code verbatim (in
			 -- general newline characters will be preserved,
			 -- except at the start and end of the string)
			 --
			 codeVerb :: String -> a,

			 -- set a list of pairs as a two-column table, which
			 -- hasn't got any padding or borders and is set in a
			 -- display 
			 --
			 twoColTable :: [(a, a)] -> a,

			 -- set a list, which is ordered if the first argument
			 -- is `True'
			 --
			 list :: Bool -> [a] -> a,

			 -- set a document in the display style appropriate for
			 -- code
			 --
			 codeBox :: a -> a,

			 -- set a document in the display style appropriate for
			 -- declaration comments
			 --
			 commentBox :: a -> a,

			 -- combine a sequence of documents into one visual
			 -- entity
			 --
			 join :: [a] -> a
		       }

-- convert a module structure into a document
--
formatModule :: FormattingOps a -> Module -> String
formatModule fops (Module header name ents)  = 
  close name $
    formatComment fops header'
    `conc`
    rule
    `conc` 
    formatEntities fops ents
  where
    FOps {close = close, conc = conc, rule = rule} = fops
    --
    header' = case header of
	        []               -> []
		(firstLine:rest) -> removeMode firstLine : rest
    --
    removeMode ""                                           = ""
    removeMode (c:cs) | "-*-haskell-*-" `isPrefixOf` (c:cs) = drop 12 cs
		      | otherwise			    = c : removeMode cs

data ParagraphKind = TextPara            -- just normal text
		   | ItemPara Bool Int   -- item in an list (True == ordered)
		   | VerbPara	         -- verbatim code display
		   deriving Eq

-- formatting a comment includes picking up all the clues and using them to
-- structure the generated document
--
formatComment :: FormattingOps a -> Comment -> a
formatComment fops@(FOps {text = text             }) []  = text ""
formatComment fops                                   com = 
  render 1 . map categorise . paras . leftAlign $ com
  where
    FOps {text = text, conc = conc, para = para, quote = quote,
	  heading = heading, list = list, code = code, 
	  codeVerb = codeVerb, codeBox = codeBox              } = fops
    --
    -- remove all empty columns to the left of the text in a block
    --
    leftAlign ls = let spaceCounts = [countSpace l | l <- ls, l /= ""]
		       indent      = if null spaceCounts 
				     then 0 
				     else minimum spaceCounts
		   in
		   map (drop indent) ls
		   where
		     countSpace = length . takeWhile isSpace
    --
    -- split a comment block into paragraphs (separated by empty lines)
    --
    paras         :: [String] -> [[String]]
    paras []       = []
    paras ("":ls)  = paras ls  -- drop all leading (and duplicate) empty lines
    paras ls       = let (ls', rest) = break (== "") ls
		     in ls' : case rest of
                                []        -> []
                                (_:rest') -> paras rest'
    --
    -- determine for a paragraph what type it is and remove all indentation
    -- and formatting symbols to it's left (in the lines following the first
    -- one, all indentation that matches the formatting symbol in the first
    -- line is removed, too); the paragraph is non-empty
    --
    categorise          :: [String] -> (Int, ParagraphKind, String)
    categorise p@(l:ls)  =
      case span isSpace l of
        (white, '*':' ':l') -> (length white, ItemPara False 2, 
			        unlines (l':ls))
        (white, '-':' ':l') -> (length white, ItemPara False 2, 
			        unlines (l':ls))
        (white, '>'    :l') -> let 
			         indent      = length white
				 (pre, rest) = unzip [splitAt (indent + 2) l 
						     | l <- p]
				 isVerb      = all (== (white ++ "> ")) pre
			       in
			       if isVerb then (indent, VerbPara, unlines rest)
				         else (indent, TextPara, unlines p   )
        (white, '('    :l') -> let 
			         indent = length white
			       in case l' of
				 d    :')':lr | isDigit d                -> 
				   (indent, ItemPara True 3, unlines (lr:ls))
				 d1:d2:')':lr | isDigit d1 && isDigit d2 -> 
				   (indent, ItemPara True 4, unlines (lr:ls))
				 _                                       ->
				   (indent, TextPara, unlines p)
        (white, _         ) -> (length white, TextPara, unlines p)
    --
    -- render a set of categorised paragraphs
    --
    -- render :: Int -> [(Int, ParagraphKind, String)] -> a
    render indent []                                       = text ""
    render indent all@((col, ItemPara isOrdered w, str):_) =
      let
        isItem (col', ItemPara isOrdered' _, str) = (col       == col' &&
						     isOrdered == isOrdered')||
						    col <  col'
        isItem (col', _                    , str) = col <  col'
	--
        (localParas, rest) = span isItem all
      in
      renderList localParas
      `conc`
      render indent rest
    render indent all@((col, VerbPara, str):_)           =
      let
        isVerb (col', kind, str) = kind == VerbPara && col == col'
	--
        (localParas, rest)       = span isVerb all
      in
      renderVerb localParas
      `conc`
      render indent rest
    render indent all@((col, TextPara, str):_)           =
      let
        isText (col', kind, str) = kind == TextPara && col == col'
	--
        (localParas, rest)       = span isText all
      in
      renderText (indent < col) localParas
      `conc`
      render indent rest
    --
    -- render a set of paragraphs that form one list display (all that have a
    -- column value identical to the first paragraph are toplevel items in the
    -- current list)
    --
    --renderList :: [(Int, ParagraphKind, String)] -> a
    renderList all@((col, ItemPara isOrdered _, _):_) = 
      list isOrdered [renderItem item | item <- items all]
      where
	-- split the sequence of paragraphs at all item boundaries (ie, by the
	-- pre-condition for `renderList' these are all paragraphs whose
	-- indentation matches that of the first)
	--
        items :: [(Int, ParagraphKind, String)] 
	      -> [[(Int, ParagraphKind, String)]]
        items []           = []
	items (para:paras) = 
	  let
	    (first, rest) = break (\(col', _, _) -> col == col') paras
	  in
	  (para:first) : items rest
    --
    -- render a non-empty list of paragraphs forming a single list item 
    --
    renderItem ((col, ItemPara _ w, s):paras) =
      let
        (white, rest) = span isSpace s
	innerIndent   = col + w + length white
      in
      textWithCode fops s `conc` render innerIndent paras
    --
    -- render a set of paragraphs that contain verbatim code
    --
    --renderVerb       :: [(Int, ParagraphKind, String)] -> a
    renderVerb paras  =
      codeBox . codeVerb . concat . intersperse "\n" $ [s | (_, _, s) <- paras]
    --
    -- render a set of paragraphs that contain code; they are quoted if the
    -- first argument is `True'
    --
    --renderText :: Bool -> [(Int, ParagraphKind, String)] -> a
    renderText doQuote paras = 
      (if doQuote then quote else id) $
        foldr conc (text "") [para $ textWithCode fops s | (_, _, s) <- paras]

-- convert all entities
--
formatEntities :: FormattingOps a -> [Entity] -> a
formatEntities      (FOps{text = text             }) []      = text ""
formatEntities fops@(FOps{conc = conc, para = para}) (e:es)  = 
  para (formatEntity fops e)
  `conc` 
  formatEntities fops es

-- format a single entity
--
-- * when a (local) declaration has no comment, we avoid the creation of the
--   comment box entirely
--
formatEntity                            :: FormattingOps a -> Entity -> a
formatEntity fops (EComment com       )  = formatComment fops com
formatEntity fops (EDecl    com _ decl)  = 
  join $
    (if null com then [] else [commentBox $ formatComment fops com]) ++
    [formatDecl fops decl]
  where
  FOps {commentBox = commentBox, join = join} = fops

-- format a declaration
--
formatDecl :: FormattingOps a -> Decl -> a
formatDecl fops (ValDecl       annDecl  ) = formatAnnDecl fops annDecl
formatDecl fops (BriefDataDecl annDecl  ) = formatAnnDecl fops annDecl
formatDecl fops (ForeignDecl   annDecl  ) = formatAnnDecl fops annDecl
formatDecl fops (TypeDecl      lines	) = formatLines   fops lines
formatDecl fops (NewtypeDecl   lines	) = formatLines   fops lines
formatDecl fops (InstDecl      line	) = formatLines   fops [line]
formatDecl fops (DftDecl       line	) = formatLines   fops [line]
formatDecl fops (VerbDataDecl  line ents) = formatNested  fops line ents
formatDecl fops (ClassDecl     line ents) = formatNested  fops line ents

-- generic formatting of a plain multi-line declaration
--
formatLines :: FormattingOps a -> [String] -> a
formatLines (FOps{codeVerb = codeVerb, codeBox = codeBox}) lines =
  codeBox . codeVerb . unlines $ lines

-- generic formatting of an annotated declaration
--
formatAnnDecl :: FormattingOps a -> [(String, String)] -> a
formatAnnDecl fops annDecl =
  codeBox $
    twoColTable 
      [(codeVerb declLine, textCom comLine) | (declLine, comLine) <- annDecl]
  where
    FOps {
      text        = text, 
      codeVerb    = codeVerb, 
      twoColTable = twoColTable,
      codeBox     = codeBox
    } = fops
    --
    textCom "" = text ""
    textCom s  = textWithCode fops $ "-- " ++ s

-- generic formatting of a nested declaration (ie, a declaration containing a
-- list of local entities)
--
formatNested :: FormattingOps a -> String -> [Entity] -> a
formatNested fops str ents =
  codeBox (codeVerb str) 
  `conc` 
  quote (foldr conc (text "") (map (formatEntity fops) ents))
  where
    FOps {
      text     = text, 
      conc     = conc,
      quote    = quote,
      codeVerb = codeVerb,
      codeBox  = codeBox
    } = fops

-- convert all text quoted as `...' or @...@ into inline code fragments
--
textWithCode :: FormattingOps a -> String -> a
textWithCode fops@(FOps {text = text}) "" = text ""
textWithCode fops                      s  
  | lvl > 0   = heading lvl (textWithCode fops headingStr)
  | otherwise = 
    let
      (first  , rest ) = break (== '`' ) s
      (codeStr, rest') = break (== '\'') rest
      foundCode        = not (null codeStr) && not (null rest')
    in
    if foundCode 
    then text first `conc` code (tail codeStr) `conc` 
	 textWithCode fops (tail rest')
    else text s
    where
      FOps {text = text, conc = conc, code = code, heading = heading} = fops
      --
      (headingStr, lvl) = p 

      p :: (String, Int)
      p =
	case lines s of
	  -- underlined heading
	  [headingStr, dashes] | all (== '-') dashes -> 
	    (headingStr, if dashes == "-" then 3 else 2)
	  -- heading with dashes at the end of the line
	  [line]			               -> 
	    let
	      (dashes, headingStr) = span (== '-') . reverse $ line
	    in
	    if length dashes >= 4 then (reverse headingStr, 2) 
				  else ("", 0)
	  -- not a heading
	  _				               -> ("", 0)


-- HTML pretty-printing
-- --------------------

-- HTML configuration constants
--
codeBoxColour, commentBoxColour, entityBgColour :: String
codeBoxColour    = "#D6E8FF"
commentBoxColour = "#FFE8D6"
entityBgColour   = "#EFEFEF"

-- the operations needed by the formatting phase to produce HTML
--
htmlFOps :: FormattingOps ShowS
htmlFOps  = FOps {
	      close       = htmlClose,
	      conc	  = htmlConc,
	      para	  = htmlPara,
	      heading	  = htmlHeading,
	      sep	  = htmlSep,
	      rule	  = htmlRule,
	      text        = htmlText,
	      code        = htmlCode,
	      quote	  = htmlQuote,
	      codeVerb    = htmlCodeVerb,
	      twoColTable = htmlTwoColTable,
	      list	  = htmlList,
	      codeBox     = htmlCodeBox,
	      commentBox  = htmlCommentBox,
	      join        = htmlJoin
	    }

-- put the document tags around the argument and convert into a plain string
--
htmlClose          :: String -> ShowS -> String
htmlClose name doc  = (htmlTag "html" [] $ head . showChar '\n' . body) ""
  where
    head = htmlTag "head" [] $
	       showString "<META HTTP-EQUIV=\"Content-Type\" \
			  \CONTENT=\"text/html; charset=ISO-8859-1\">\n"
	     . htmlTag "title" [] (showString name)
           . showChar '\n'
    body = htmlTag "body" [("BGCOLOR", "FFFFFF")] $
	       htmlTag "h1"    [] title
	     . showChar '\n'
	     . htmlTag "p"     [] doc
	     . htmlTag "hr"    [] id
	     . htmlTag "small" [] footer
    --
    title  = showString $ "Interface of <code>" ++ name ++ "</code>"
    footer = showString $ "This document was generated by \"" ++
			  version ++ "\"."

-- concatenate two pieces of HTML
--
htmlConc     :: ShowS -> ShowS -> ShowS
htmlConc a b  = a . showChar '\n' . b

-- wrap a document into <p> tags
--
htmlPara     :: ShowS -> ShowS
htmlPara doc  = htmlTag "p" [] doc

-- set a heading
--
htmlHeading     :: Int -> ShowS -> ShowS
htmlHeading n h  = htmlTag ('h':show n) [] h

-- put some vertical space between two pieces of HTML
--
htmlSep         :: ShowS -> ShowS -> ShowS
htmlSep top bot  =   
    top 
  . showChar '\n'
  . htmlTag "font" [("size", "-2")] (htmlTag "p"  [] (showString "&nbsp;"))
  . showChar '\n'
  . bot

-- generate a <hr> tag
--
htmlRule :: ShowS
htmlRule  = htmlTag "p" [] $ showString "<hr>"

-- escape all characters that have a special meaning in HTML
--
htmlText :: String -> ShowS
htmlText ""       = id
htmlText ('<':cs) = showString "&lt;"  . htmlText cs
htmlText ('>':cs) = showString "&gt;"  . htmlText cs
htmlText ('&':cs) = showString "&amp;" . htmlText cs
htmlText (c  :cs) = showChar   c       . htmlText cs

-- set a string using the <code> tag
--
htmlCode   :: String -> ShowS
htmlCode s  = htmlTag "code" [] (htmlText s)

-- set a string using the <code> tag
--
htmlQuote :: ShowS -> ShowS
htmlQuote  = htmlTag "blockquote" []

-- pre-formatted code set verbatim
--
-- * trailing newlines are removed to avoid superfluous vertical space between
--   the current and following elements
--
htmlCodeVerb   :: String -> ShowS
htmlCodeVerb s  = 
    showChar '\n'
  . htmlTag "pre" [] (showString (clean s))
  where
    clean = reverse . dropWhile (== '\n') . reverse

-- two column table formatted as a display without any borders or padding
--
htmlTwoColTable       :: [(ShowS, ShowS)] -> ShowS
htmlTwoColTable lines  = 
  htmlTag "table" [("border", "0"), ("cellpadding", "0")] $
    foldr (.) id 
      [htmlTag "tr" [] $
           showChar '\n' . htmlTag "td" [] col1 
	 . showChar '\n' . htmlTag "td" [] col2
      | (col1, col2) <- lines]

-- ordered and unordered lists
--
htmlList                 :: Bool -> [ShowS] -> ShowS
htmlList isOrdered items  =
  htmlTag (if isOrdered then "ol" else "ul") [] $
    foldr htmlConc id (map (htmlTag "li" []) items)

-- colour box for code displays
--
htmlCodeBox   :: ShowS -> ShowS
htmlCodeBox s  =
  htmlTag "table" [("border", "0"), ("bgcolor", codeBoxColour), 
		   ("width", "100%"), ("cellpadding", "6"),
		   ("cellspacing", "0")] $ 
    htmlTag "tr" [] $
      htmlTag "td" [] $ s

-- colour box for pre-declaration comments
--
htmlCommentBox   :: ShowS -> ShowS
htmlCommentBox s  =
  htmlTag "table" [("border", "0"), ("bgcolor", commentBoxColour), 
		   ("width", "100%"), ("cellpadding", "6"),
		   ("cellspacing", "0")] $ 
    htmlTag "tr" [] $
      htmlTag "td" [] $ s

-- render a set of documents as rows in an invisible table
--
htmlJoin      :: [ShowS] -> ShowS
htmlJoin docs  =
  htmlTag "table" [("border", "0"), ("bgcolor", entityBgColour),
		   ("width", "100%"), ("cellpadding", "0"), 
		   ("cellspacing", "0")] $ 
    foldr (.) id 
      [htmlTag "tr" [] $ htmlTag "td" [] doc | doc <- docs]

-- given the tag name and an association list of attribute keys and values,
-- wrap a document into an open-end tag pair of the specified sort
--
htmlTag                :: String -> [(String, String)] -> ShowS -> ShowS
htmlTag name attrs doc  =
    showChar   '<'
  . showString name
  . showAttrs  attrs
  . showChar   '>'
  . doc
  . showString "</"
  . showString name
  . showChar   '>'
  where
    showAttrs []               = id
    showAttrs ((key, val):kvs) =   showChar   ' '
				 . showString key
				 . showChar   '='
				 . shows      val  -- to have it quoted
				 . showAttrs  kvs


-- main control
-- ------------

-- option handling
--

-- header is output in case of help, before the descriptions of the options;
-- errTrailer is output after an error message
--
header :: String
header  = version ++ "\n" ++ copyright ++ "\n" ++ disclaimer
          ++ "\n\nUsage: idoc [ option... ] haskell-file\n"

trailer, errTrailer :: String
trailer    = "\n\
	     \Currently supported markup languages: HTML.\n\n\
	     \Refer to <http://www.cse.unsw.edu.au/~chak/haskell/idoc/> \
	     \for more information.\n"
errTrailer = "Try the option `--help' on its own for more information.\n"

-- supported option types
--
data Flag = Help	        -- print brief usage information
	  | Markup  MarkupLang  -- markup language to use
	  | Output  String      -- file where the generated file should go
	  | Version	        -- print version information on stderr
	  | Error   String      -- error occured during processing of options
	  deriving Eq

-- the currently supported markup languages
--
data MarkupLang = HTML
		deriving Eq

-- file suffix depending on the used markup language
--
idocSuffix      :: MarkupLang -> String
idocSuffix HTML  = ".html"

-- option description suitable for `GetOpt'
--
options :: [OptDescr Flag]
options  = [
  Option ['h', '?'] 
	 ["help"] 
	 (NoArg Help) 
	 "brief help (the present message)",
  Option ['m'] 
	 ["markup-language"] 
	 (ReqArg markupArg "LANG") 
	 "generate output in markup language LANG",
  Option ['o'] 
	 ["output"] 
	 (ReqArg Output "FILE") 
	 "write resulting interface description to FILE",
  Option ['v'] 
	 ["version"] 
	 (NoArg Version) 
	 "show version information"]

-- convert argument of `MarkupLang' option
--
markupArg     :: String -> Flag
markupArg arg  = case map toUpper arg of
	           "HTML" -> Markup HTML
		   _      -> Error "Illegal markup language."

-- the main routine
--
main :: IO ()
main  =
  do
    args <- getArgs
    case normaliseOpts $ getOpt RequireOrder options args of
      ([Help]   , []   , []  ) -> help
      ([Version], []   , []  ) -> putStrLn version
      (opts     , [arg], []  ) -> do
				    config <- processOpts opts arg
				    genIDoc config
      (_        , _    , errs) -> raiseErrs errs
  where
    -- help options may not be mixed with other arguments and normal
    -- invocation must always supply exactly one file name
    --
    normaliseOpts ([Help]   , [], [])		= ([Help]   , [], [])
    normaliseOpts ([Version], [], [])		= ([Version], [], [])
    normaliseOpts (opts, args, errs)
      | not (null errs')			= ([], [], errs')
      | Help `elem` opts
	&& (length opts > 1 || not (null args)) = ([], [], [helpErr])
      | length args /= 1			= ([], [], [fileErr])
      | otherwise				= (opts, args, [])
      where
	errs'   = errs ++ [msg | Error msg <- opts]
	helpErr = "Help options may not be mixed with other arguments\n"
	fileErr = "You must supply exactly one filename\n"

-- emit help message
--
help :: IO ()
help  = do
	  putStr $ usageInfo header options
	  putStr trailer

-- output error message
--
raiseErrs      :: [String] -> IO a
raiseErrs errs = do
  		   hPutStr stderr (concat errs)
		   hPutStr stderr errTrailer
		   exitWith $ ExitFailure 1

-- descriptor of the configuration information for one run of the tool
--
data Config = Config {
	        inputFile  :: FilePath,
		outputFile :: FilePath,
	        markupLang :: MarkupLang
	      }

-- process the command line options and compute the configuration for this run
--
-- * `Help', `Error', and `Version' won't occur in the flag list
--
processOpts             :: [Flag] -> FilePath -> IO Config
processOpts flags fname  =
  let
    dftConfig = Config {
		  inputFile  = fname,
		  outputFile = fname,
		  markupLang = HTML
		}
  in
  process dftConfig False flags
  where
    process config outputFileSet (Markup lang :flags) = 
      process (config {markupLang = lang}) outputFileSet flags
    process config outputFileSet (Version     :flags) =
      do
        putStrLn version
	process config outputFileSet flags
    process config _             (Output fname:flags) = 
      process (config {outputFile = fname}) True flags
    process config outputFileSet []                   =
      return $
        if outputFileSet 
        then config
	else let
	    suffix = idocSuffix (markupLang config)
	  in
	  config {outputFile = replaceSuffix (outputFile config) suffix}

-- generate the interface documentation for the given configuration
--
genIDoc        :: Config -> IO ()
genIDoc config  = do
  mod <- readModule inputFile
  let fops = case markupLang of
	       HTML -> htmlFOps
      idoc = formatModule fops mod
  writeFile outputFile idoc
  where
    Config {
      inputFile  = inputFile,
      outputFile = outputFile,
      markupLang = markupLang
    } = config


-- auxilliary routines
-- -------------------

-- Difference lists
--

-- a difference list is a function that given a list returns the original
-- contents of the difference list prepended at the given list
--
type DList a = [a] -> [a]

-- open a list for use as a difference list
--
openDL :: [a] -> DList a
openDL  = (++)

-- create a difference list containing no elements
--
zeroDL :: DList a
zeroDL  = id

-- create difference list with given single element
--
unitDL :: a -> DList a
unitDL  = (:)

-- append a single element at a difference list
--
snocDL      :: DList a -> a -> DList a
snocDL dl x  = \l -> dl (x:l)

-- appending difference lists
--
joinDL :: DList a -> DList a -> DList a
joinDL  = (.)

-- closing a difference list into a normal list
--
closeDL :: DList a -> [a]
closeDL  = ($[])

-- string manipulation
--

-- eat all white space at the start of a string
--
eat          :: String -> String
eat []        = []
eat (' ':cs)  = eat cs
eat cs        = cs

-- if the first string appears as a suffix of the second (only followed by
-- white space), drop it
--
removeSuffix		 :: String -> String -> String
removeSuffix suffix str   = let
			      rts = eat . reverse $ str
			    in
			    if reverse suffix `isPrefixOf` rts 
			    then reverse (drop (length suffix) rts)
			    else str

-- get the first identifier of a string
--
-- * returns the empty string if the initial non-white space is not an
--   identifier
--
firstIdent :: String -> (String, String)
firstIdent  = break nonIdentChar . eat
	      where
	        nonIdentChar c = not (isAlpha c || isDigit c 
				      || c == '_' || c == '\'')

-- replace the suffix of a filename by a given suffix
--
replaceSuffix           :: FilePath -> String -> FilePath
replaceSuffix fname suf  = let
			     base = case break (== '.') (reverse fname) of
				      (_, '.':esab) -> reverse esab
				      _		    -> fname
			   in
			   base ++ suf