scdoc2html

Abstract

The scdoc2html script converts the SuperCollider documentation from .rtf-files to a set of HTML-pages which can be viewed with any web-browser. This is useful especially on Linux, where the original .rtf-files are hard to read in Emacs.

Note that the binary executable "unrtf" is required on the system. (On Debian, there is a “unrtf” package that can quickly be installed via apt-get / dselect.)

Also the binary of "wget" is used to download the SuperCollider logo from the internet. (Again easy to install through the “wget”-Debian-package.) The use of wget is easy to disable if unwanted (see function getLogoImage).
/*****************************************************************************
  scdoc2html 0.3
  Generates HTML pages from the SuperCollider RTF-online documentation.

  This program generates HTML pages from the SuperCollider3 online help
  files, allowing to read them in a web-browser on Linux, which is far
  better than using Emacs (which displays RTF-files with almost no
  formatting, making them very hard to read).
  The result is not perfect, e.g. some links remain unresolved. If this
  program is useful to more people, maybe it makes sence to develop both
  this program and the RTF-files in a way that they "converge" and
  allow generating fully valid HTML documentation.
  Note: unrtf is a quality-bottleneck, because it generates horrible
  HTML. But if needed, it should be easy to replace it by a different 
  rtf2html converter.

  The binary executable "unrtf" is required on the system.
  Also the binary of "wget" is used to download the SuperCollider logo
  from the internet. This is, however, easy to disable if unwanted (see
  function getLogoImage).

  To run manually from within Emacs:
      - "C-c <"   clear post buffer
      - "C-x h"   select all
      - "C-c C-x" evaluate region

  Bugs: - Sometimes terminates too early with a "Primitive '_FileWrite'"
          error, so may need to be restarted several times until
          it finally runs. (Like an old car...)
        - Several links in the result are not correctly resolved.

  Changes:
  v 0.3, 2005-04-08: Workaround for missing curly braces after unrtf-
                     conversion. The conversion process has changed:
                     Conversion from rtf to html is done via one
                     temporary file now (configurable via tmpFile),
                     all other conversions operate on strings only,
                     no temporary files are created between first html-
                     conversion and html-post-processing. Memory usage
                     may have increased a lot, all generated html will
                     be aggregated in memory before written to disk.
                     Some files that were not generated by previous versions 
                     are now correctly handled, because output-filenames are 
                     now converted to a normalized representation without
                     special characters (this had affected e.g. the
                     conversions of files '**.rtf', '<=.rtf' etc.)
                     Useless CVS-subdirectories in the output are no longer
                     created.
                     Also added to-page-top links to be generated in the 
                     alphabetical-index page at each letter entry.
  v 0.2, 2005-03-19: Replaced the crappy implementations of ls() and md()
                     with cleaner versions. Runs noticeably faster and more 
                     reliable now.
                     Also seperated system-dependent parts (calls to external
                     binaries) from the main SC code.
                     Added a navigation header to the alphabetic index page.
  v 0.1, 2005-03-17: Initial version.

  Written by Jens Gulden.
******************************************************************************/


// --- configuration ----------------------------------------------------------

var inDir = "/usr/src/SuperCollider3/build/Help";
var outDir ="/d/docs/SuperCollider/html";
var tmpFile = "/tmp/rtf2html.tmp";
var logoURL = "http://supercollider.sourceforge.net/images/title.gif";
var delayTime = 100000; // loop counter for dirty waits
var delayMax = 100; // maximum of wait loops before breaking
var maxHtmlSize = 1024 * 512; // maximum buffer size for converted html


// --- variables --------------------------------------------------------------

// functions:
var ls;
var md;
var convert;
var htmlConvert;
var removeTags;
var findIndex;
var countOccurrence;
var repeat;
var replace;
var stylesheet;
var getLogoImage;
var rtf2html;
var encodeFilename;
var encodeUrl;

// attributes:
var infile;
var outfile;
var in;
var out;
var allTopics = Dictionary.new; // assoc. topic -> rel. filename
var allFiles = Dictionary.new; // assoc. rel. filename -> html-content
var sortedTopics;
var lsCount = 0;
var html;
var list;
var firstChar;
var prevFirstChar;
var usedChars;
var depth;
var rel;



// *** external binaries interface functions (possibly system-dependent) ******


// --- function rtf2html(rtf: String) : String --------------------------------
//
// Calls "unrtf" to convert RTF to HTML.
rtf2html = { arg rtf;
    var file;
    var pipe;
    var html;

    file = File(tmpFile, "w");
    file.write(rtf); // write rtf string to tempfile which gets read by unrtf
    file.close;    
    pipe = Pipe.new("unrtf " ++ tmpFile , "r");

    html = String.newClear(maxHtmlSize); //pipe.length
	pipe.read(html); //readAllString
    pipe.close;
    html;
//unixCmd("unrtf " ++ tmpFile ++ " > " ++ out); 
};


// --- function getLogoImage(targetFile : String) : void ----------------------
//
// Retrieves the image file of the SuperCollider logo.
// The current implementation downloads the image from the net, but it could
// e.g. be copied from a local file instead.
getLogoImage = { arg targetFile;
    unixCmd("wget " ++ logoURL ++ " -O " ++ targetFile);
};


// --- function md(dir : String) : void ---------------------------------------
//
// Creates a sub-directory inside another directory of the filesystem.
// Only issues a warning message if the directory already exists.
md = { arg dir;
    var p;

    p = Pipe.new("mkdir " ++ dir, "r");
    p.getLine; // dummy, make sure mkdir has finished when p.close returns
    p.close;
};



// *** functions **************************************************************


// --- function ls(dir : String) : String[] -----------------------------------
//
// Lists the files and directories in a directory of the filesystem.
// (This should be done more elegantly. Is there a "File.list()" or equal?)
ls = { arg dir;
    (dir ++ "/*").pathMatch;
};


// --- function stylesheet(rel : String) : String -----------------------------
//
// Generates a HTML-tag for stylesheet inclusion, relative to the current path.
stylesheet = { arg rel;
    "<link href='" ++ rel ++ "/sc3doc.css' type='text/css' rel='stylesheet'>\n";
};


// --- function encodeFilename(filename : String) : String --------------------
//
// Replaces illegal characters in filenames with ascii-representations.
encodeFilename = { arg filename;
    var pos = 0;
    var s;

    while ( { ( ( pos < filename.size ) && ( "/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-.#".includes(filename[pos]) ) ) } , { pos = pos + 1; });
    if ( pos < filename.size , {
        s = "";
        if ( (pos > 0) , {
            s = filename.copyFromStart(pos - 1);
        });
        //s = s ++ "%" ++ filename[pos].ascii.asHexString(2);
        s = s ++ "%" ++ ((filename[pos].ascii).asHexString(2));
        if ( (pos < (filename.size - 1)) , {
            s = s ++ encodeFilename.value( filename.copyToEnd(pos + 1) ); // recursion
        });
        s;
	},{
        filename;
	});
};


// --- encodeUrl(filename : String) : String ----------------------------------
//
// Performs a simple conversion so that filenames coverted with encodeFilename
// will work correctly in HTML-documents. This is not a 'real' URL-encoding.
encodeUrl = { arg filename;
    var p;
    var f;
    // convert each '%' to '%25'
    p = filename.indexOf($%);
    if ( p != nil , {
        f = filename;
        filename = f.copyFromStart(p) ++ "25";
        if ( (p < (f.size-1)) , {
            filename = filename ++ encodeUrl.value( f.copyToEnd(p+1)); // recursion
		});
	});
    filename;
};


// --- function htmlConvert(html : String, rel : String) : String -------------
//
// Replaces underlined words which are known to be help topics with appropriate
// links to the corresponding page.
htmlConvert = { arg html, rel;
    var pos; 
    var endpos;      
    var topic;
    var topicNr;
    var count;
    var file;
    var done;

    done = false;
    // find first occurrence of <u>
    pos = html.find("<u>");
    if ( pos != nil, {
        endpos = html.find("</u>");
        topic = html.copyRange(pos+3, endpos-1);
        file = allTopics.at(topic);
        if ( file != nil, {
            // replace
            html = html.copyRange(0, pos-1) 
                   ++ "<a href='" ++ rel ++ encodeUrl.value(file) ++ "'>" ++ topic ++ "</a>" 
                   ++ htmlConvert.value(html.copyRange(endpos+4, html.size), rel); // recurse
            done = true;
        });
        if ( done==false, {
            // don't replace here, but continue searching
            html = html.copyRange(0, endpos+3) 
                   ++ htmlConvert.value(html.copyRange(endpos+4, html.size), rel); // recurse
        });
    });
    html;
};


// --- function removeTags(html : String, tagname : String) : String ----------
//
// Removes all start and end tags with the specified name from the HTML.
removeTags = { arg html, tagname;
    var pos;
    var endpos;
    var headPart;
    var tailPart;
    
    pos = html.find("<" ++ tagname);
    if ( pos != nil, {
        headPart = html.copyFromStart(pos - 1);
        html = html.copyToEnd(pos);
        endpos = html.find(">");
        if (endpos != nil, { // should always be !=nil, otherwise something's wrong
            html = html.copyToEnd(endpos + 1);
            // now remove end tag
            pos = html.find("</" ++ tagname ++ ">");
            if ( pos!=nil, {
                headPart = headPart + html.copyFromStart(pos-1);
                html = html.copyToEnd(pos + tagname.size + 3);
            });
        });
        html = headPart + html;
        html = removeTags.value(html, tagname); // recursion to handle next occurrence
    });
    html;
};


// --- function countOccurrence(s : String, pattern : String) : int -----------
//
// Returns the number of occurrences of pattern in s.
countOccurrence = { arg s, pattern;
    var pos;

    pos = s.find(pattern);
    if ( pos!=nil, {
        1 + countOccurrence.value(s.copyToEnd(pos+pattern.size), pattern); // recursion
    },{
        0;
    });
};


// --- function repeat(s : String, i : int) : String --------------------------
//
// Repeat string s for i times.
repeat = { arg s, i;
    if ( i==0, {
        "";
    },{
        s ++ repeat.value(s, i-1); // recursion
    });
};


// --- function replace(s : String, search : String, repl : String) : String --
//
// Replaces all occurrences of search in s with repl.
replace = { arg s, search, repl;
    var p;
    p = s.find(search);
    if ( p != nil , {
        (s.copyFromStart(p - 1) ++ repl ++ replace.value( s.copyToEnd(p + search.size), search, repl )); // recursion
    },{ //else
        s;
    });
};


// *** main program ***********************************************************


// create index.html
md.value(outDir); // create directory
"<html><head><title>SC3 documentation</title></head>\n"
"<frameset cols='250,*'>\n"
"<frame name='toc' src='toc_root.html'>\n"
"<frame name='main' src='Help.help.html'>\n"
"</frameset>\n"
"</html>\n"
.write(outDir ++ "/index.html");


// create stylesheet
"/* Stylesheet for HTML SC3-documentation, very simple. */\n"
"\n"
"body {\n"
"  font-family: Arial,Helvetica;\n"
"}\n"
".toctitle {\n"
"  margin-bottom: 5;\n"
"}\n" 
".topnavi {\n"
"  font-size: 12;\n"
"}\n" 
".topnaviTitle {\n"
"  font-size: 12;\n"
"  font-weight: bold;\n"
"}\n" 
"table.topnavi {\n"
"  padding: 0;\n"
"  margin: 0;\n"
"}\n" 
".filenavi {\n"
"}\n" 
".dirnavi {\n"
"  font-style: italic\n"
"}\n" 
"body.fullindex {\n"
"  font-family: Arial,Helvetica;\n"
"}\n" 
"table.fullindex {\n"
"  margin-top: 30;\n"
"  margin-bottom: 10;\n"
"  padding: 0;\n"
"}\n" 
"h1.fullindex {\n"
"}\n" 
"h2.fullindex {\n"
"}\n" 
"a.fullindex {\n"
"}\n"
"p.fullindexNavi {\n"
"    font-weight: bold;\n"
"}\n"
"a.fullindexNavi {\n"
"    font-weight: normal;\n"
"}\n"
"a.indextop {\n"
"  font-size: 12;\n"
"}\n"
.write(outDir ++ "/sc3doc.css");


// create converted pages and toc for one directory, then recurse into subdirs
convert = { arg dir; // (dir is either "" (root) or starts with "/" (although relative))
    var dirname;
    var dirlabel = "";
    var outdirname;
    var list;
    var infilename;
    var infile;
    var rtf;
    var html;
    var relfilename;
    var filenameNoSuffix;
    var tocfilename;
    var extpos;
    var fn;
    var toc;
    var tocDirs;
    var tocCount = 0;
    var tocname;
    var topic;
    var d;
    var cmd;
    var r;

    dirname = inDir ++ dir;
    if ( dir != "", {
        dirlabel = dir.copyToEnd(1);
    });
//list = Array.with("A", "B", "C");
    list = ls.value(dirname);

    if ( (list.size > 0), {
        outdirname = outDir ++ dir;

        // prepare toc
        toc = "<html><head>" ++ stylesheet.value(".") ++ "</head>"
              "<body><h5 class='toctitle'>SC3 documentation</h5>"
              "<table class='topnavi' width='100%'><tr>"
              "<td class='topnavi'><a href='full_index.html' target='main'>Index</a></td>"
              "<td align='center'><h5 class='toctitle'>" ++ dirlabel ++ "</h5></td>";

        if ( dir != "", {
            toc = toc ++ "<td align='right' class='topnavi'><a href='toc_root.html'>Top</a></td>";
    	},{ // else
            //nop
    	});
        toc = toc ++ "</tr></table><p>";
        tocDirs = "";

        postln("processing directory " ++ dirname ++ " -> " ++ outdirname);
        md.value(outdirname);

        list.do({ arg filename;
            if (filename==nil, {
                filename = "";
            });
            filename = PathName(filename).fileName.escapeChar($ ).escapeChar($().escapeChar($));
            if ( ((filename!="") && (filename!=outDir)), {

                //fn = filename.splitext;
                extpos = filename.find(".rtf");

                if ( (extpos != nil), { // .rtf
                    filenameNoSuffix = filename.copyRange(0, extpos);

                    // convert rtf to html
                    infilename = dirname ++ "/" ++ filename;
                    infile = File(infilename, "r");
                    if ( infile.isOpen , {
                        rtf = String.readNew(infile);
                        infile.close;

                        // workaround: replace all curly braces with special tokens (would get eaten by unrtf)
                        rtf = replace.value( replace.value( rtf, "\\{" , "#CB"++"OPEN#") , "\\}", "#CB"++"CLOSE#" ); // (split string constants to keep this code input-compatible with itself)

                        // convert string from rtf to html
                        html = rtf2html.value(rtf);

                        // workaround: replace back all curly braces 
                        html = replace.value( replace.value( html, "#CB"++"OPEN#", "{") , "#CB"++"CLOSE#", "}" );

                        topic = filenameNoSuffix.copyRange(0, filenameNoSuffix.size-2);

                        if ( topic.endsWith(".help") , {
                            topic = topic.copyFromStart(topic.size-6);
                        });

                        relfilename = encodeFilename.value( dir ++ "/" ++ filenameNoSuffix ++ "html" );

                        toc = toc + "<a class='filenavi' href='." ++ encodeUrl.value(relfilename) ++"' target='main'>" ++ topic ++ "</a><br>";
                        // remember names for later substitution of underlines with links

                        allTopics = allTopics.put(topic, relfilename); // assoc. topic - > rel. filename
                        allFiles = allFiles.put(relfilename, html); // assoc. rel. filename -> html-content
                        tocCount = tocCount + 1;
					});
                },{ // else
                        if ( ((filename != "CVS") && (filename != PathName(dir).fileName) && (PathName( inDir ++ "/" ++ dir ++ "/" ++ filename ).fullPath != PathName( outDir ).fullPath)) , {
                    r = convert.value(dir ++ "/" ++ filename); // recursion into subdir
                        if ( (r == true), {
                            tocDirs = tocDirs ++ "<a class='dirnavi' href='toc_" ++ filename ++ ".html'>" ++ filename ++ "</a><br>\n";
                            tocCount = tocCount + 1;
                        });
                    });
                });
            });
        });

        if (tocCount != 0, {
            toc = toc + "\n</p><p>\n" ++ tocDirs ++ "</p>"
                        "<br><br><br><br><br><center>"
                        "<img src='img/title.gif' border='0' width='195' height='34'><!--orig:389,67-->"
                        "</center></body></html>";
            if ( dir!="", {
                tocname = PathName(dir).fileName;
            },{ // else
                tocname = "root";
            });
            tocfilename = outDir ++ "/" ++ "toc_" ++ tocname ++ ".html";
            f = File(tocfilename, "w");
            if ( f.isOpen, {
                f.write(toc);
                f.close;
                true;
            },{
                false;
            });
        },{ // else
            // was not a directory, or empty
            "ignoring ".post; dir.postln;
            false;
        });
    },{
        false;
    });
};
convert.value(""); // start with root dir and continue recursively


// substitute underlined topic-identifiers with appropriate links in generated HTML where possible
allFiles.keysDo({ arg file;
    var html;
    var pos;
    var filename;
    var f;
    filename = outDir ++ file;
    html = allFiles.at(file);
    // get relative path prefix
    depth = countOccurrence.value(file, "/");
    rel = repeat.value("./.", depth-1) ++ ".";
    // insert stylesheet reference
    pos = html.find("<head>");
    if ( pos!=nil, {
        html = html.copyFromStart(pos+6) ++ "\n" ++ stylesheet.value(rel) ++ "\n" ++ html.copyToEnd(pos+7);
    });
    // at least remove all "<span ...>"-tags from unrtf-output (the output of unrtf is horrible HTML)
    html = removeTags.value(html, "span");
    html = htmlConvert.value(html, rel);
    html = "<!-- generated by scdoc2html -->\n" ++ html;
    f = File(filename, "w");
    if ( f.isOpen , {
        f.write(html);
        f.close;
    },{ //else
        ("could not write file '"++filename++"', ignoring").postln;
    });
});


// generate alphabetic index page
html = "<hmtl><head>" ++ stylesheet.value(".") ++ "</head>"
       "<body class='fullindex'><a name='top'><h1 class='fullindex'>Alphabetic Index</h1>";
sortedTopics = allTopics.keys.asList.sort;
prevFirstChar = 0;
firstChar = 0;
usedChars = List.new;
list = "";
sortedTopics.do({ arg topic;
    var file;
    file = allTopics.at(topic);
    f.close;
    firstChar = topic[0].toUpper;
    if ((firstChar != prevFirstChar) && (firstChar >= $A), {
        list = list + "<a name='" ++ firstChar ++ "'><table width='100%' class='fullindex'><tr><td><h2 class='fullindex'>" ++ firstChar ++ "</h2></td><td align='right'><a href='#top' class='indextop'>top</a></td></tr></table>\n";
        prevFirstChar = firstChar;
        usedChars.add(firstChar);
    });
    list = list ++ "<a class='fullindex' href='." ++ encodeUrl.value(file) ++ "'>" ++ topic ++ "</a><br>\n";
});
html = html + "<p class='fullindexNavi'>";
"ABCDEFGHIJKLMNOPQRSTUVWXYZ".do({ arg ch;
    html = html ++ " &nbsp; &nbsp; ";
    if ( usedChars.includes(ch), {
        html = html + "<a href='#" ++ ch ++ "' class='fullindexNavi'>" ++ ch ++ "</a>";
    },{ // else
        html = html + ch;
    });
});
html = html + "</p>";
html = html 
       ++ list 
       ++ "<br><br><center><a href='http://supercollider.sourceforge.net/' target='_blank'>"
       "<img src='img/title.gif' border='0'></a></center></body></html>";
html.write(outDir++"/full_index.html");


// get logo from http://supercollider.sourceforge.net/images/title.gif
// (comment this out if you do not have wget or are offline, the image can easily be copied to its place manually)
md.value( outDir++"/img" );
getLogoImage.value(outDir ++ "/img/title.gif");

"All done.".postln;
References

Link to this Page

Code Pool last edited on 12 May 2013 at 11:30 pm by g225142013.adsl.alicedsl.de