#!/usr/bin/python
#
"""HTMLPP - Generate HTML like as CPP.
Synopsis: htmlpp [-f] [-n] [-I dir]... foo.txt...
This script generates foo.html when foo.txt is newer than it.
Options:
-f: force genarate
-n: do not validate html
-I dir: specify include dir (for template files)
Example:
htmlpp index.txt
htmlpp -f -n index.txt
Contents of file:
>command arguments
HTML
Commands:
>include "filename" [``localvars'']
>define var value
>undef var
>if exp
>ifdef var
>ifndef var
>else [//comment]
>endif [//comment]
>csvtable "filename" [[start]..[end]] [``escape'']
>textpre "filename"
># comment_string
<$NAME> in HTML is a variable.
``>>>command'' is same as ``>command''.
"""
#
# Copyright (c) 2005-2007 Satoshi Fukutomi .
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
import csv
import os
import re
import sys
import cgi
import xml.dom.minidom
import xml.parsers.expat
#
# Configuration
#
include_dir = ['.', './template', '../template'] # path to templates
csv_cache = {}
class HTML:
"""Partial HTML."""
def __init__(self, txt, incdir=[], var={}, loop=[]):
"""Create partial HTML with text file."""
self.txt = txt
self.var = {}
self.var.update(var)
self.loop = loop[:]
self.incdir = [os.path.dirname(txt)] + incdir
def eval_var(self, line):
"""Eval var in HTML.
<$NAME> in HTML is a variable.
"""
re_var = re.compile(r"<\$([A-Za-z_][A-Za-z_0-9]*)>")
m_var = re_var.search(line)
if m_var is None:
return line
else:
var = m_var.group(1)
buf = line[:m_var.start()]
buf += self.var.get(var, "")
buf += self.eval_var(line[m_var.end():])
return buf
def eval_exp(self, exp):
"""Eval exp.
Exp:
operand
operand[0] operator operand[1]
operand: integer or string
operator: ==, !=, <, >, <=, or >=
"""
re_var = re.compile(r"^([A-Za-z_][A-Za-z_0-9]*)$")
re_quote = re.compile(r"^['\"](.*)['\"]$")
re_isint = re.compile(r"^[-+]?\d+$")
m_exp = re.search(r"(\S+)\s*([=!<>]+)\s*(\S+)", exp)
m_var = re_var.search(exp)
if m_var is not None:
var = m_var.group(1)
return (var in self.var) and \
(self.var[var] != "0") and \
(self.var[var] != "")
elif re_quote.search(exp):
return exp != '""'
elif re_isint.search(exp):
return int(exp) != 0
elif m_exp is None:
sys.exit("syntax error(exp)\n")
operand = ["", ""]
operand[0], operator, operand[1] = m_exp.groups()
isint = [False, False]
for i in (0, 1):
quote = re_quote.search(operand[i])
if quote is not None:
operand[i] = quote.group(1)
elif re_var.search(operand[i]):
if (operand[i] in self.var):
operand[i] = self.var[operand[i]]
else:
operand[i] = "0"
if re_isint.search(operand[i]):
isint[i] = True
elif re_isint.search(operand[i]):
isint[i] = True
else:
sys.exit("syntax error(exp): %s\n" % exp)
if isint[0] and isint[1]:
operand[0] = int(operand[0])
operand[1] = int(operand[1])
if operator == "==":
return operand[0] == operand[1]
elif operator == "!=":
return operand[0] != operand[1]
elif operator == "<":
return operand[0] < operand[1]
elif operator == ">":
return operand[0] > operand[1]
elif operator == "<=":
return operand[0] <= operand[1]
elif operator == ">=":
return operand[0] >= operand[1]
def do_include(self, fileobj, arg):
"""Include header file(template)."""
quote = re.search(r"^['\"<](.*)['\">]", arg)
if quote is not None:
header = self.header_file(quote.group(1))
if header:
html = HTML(header,
incdir=self.incdir, var=self.var, loop=self.loop)
return html
else:
sys.exit('%s: file not found' % quote.group(1))
else:
sys.exit("syntax error(include)")
def do_define(self, arg):
"""Define variable."""
exp = re.search(r"^([A-Za-z_][A-Za-z_0-9]*)\s+(.*)", arg)
if exp is not None:
buf = self.eval_var(exp.group(2))
self.var[exp.group(1)] = buf
else:
sys.exit("syntax error(define)\n")
def do_undef(self, arg):
"""Undefine variable."""
exp = re.search(r"^([A-Za-z_][A-Za-z_0-9]*)", arg)
if exp is not None:
try:
del self.var[exp.group(1)]
except KeyError:
pass
else:
sys.exit("syntax error(define)\n")
def do_if(self, fileobj, arg, mode=""):
"""If state.
>if exp
>ifdef var
>ifndef var
>else
>endif
"""
buf = ""
flag = False
if mode == "if":
flag = self.eval_exp(arg)
elif mode == "ifdef":
flag = arg in self.var
elif mode == "ifndef":
flag = not arg in self.var
else:
sys.exit("syntax error(exp): " + mode + "\n")
if flag:
return self.read(fileobj, ignore=False)
else:
return self.read(fileobj, ignore=True)
return (buf, hpvars)
def do_csvtable(self, arg):
"""Convert CSV to HTML table.
>csvtable "filename" [[start]..[end]] [``escape'']
>csvtable "filename" row_index [``escape'']
"""
m_quote = re.search(r"^[\"'<](.*)[\"'>]\s*", arg)
rows = []
if m_quote is None:
sys.exit("syntax error(csvtable)\n")
else:
name = self.header_file(m_quote.group(1), False)
if name in csv_cache:
rows = csv_cache[name]
else:
for row in csv.reader(file(name)):
rows.append(row)
csv_cache[name] = rows
ran = [0, None]
m_range = re.search(r"([^.\s]*)\.\.([^.\s]*)", arg[m_quote.end():])
m_index = re.search(r"([^.\s]*)", arg[m_quote.end():])
re_isint = re.compile(r"^[-+]?\d+$")
tmp = []
if m_range is not None:
tmp = m_range.groups()
elif m_index is not None:
tmp = (m_index.group(1), m_index.group(1))
if tmp:
for i in (0, 1):
if tmp[i] == "":
pass
elif re_isint.search(tmp[i]):
ran[i] = int(tmp[i])
elif (tmp[i] in self.var) and \
re_isint.search(self.var[tmp[i]]):
ran[i] = int(self.var[tmp[i]])
(start, end) = ran
if (start != 0) and (end is not None):
rows = rows[start-1:end]
elif start != 0:
rows = rows[start-1:]
elif end is not None:
rows = rows[0:end]
escape = False
if re.search(r"(.*\.\.\S*)?\s*escape", arg[m_quote.end():]):
escape = True
buf = ""
for i in rows:
buf += " \n"
for j in i:
if escape:
j = cgi.escape(j).replace("\n", "
")
buf += " %s | \n" % j
buf += "
\n"
return buf
def do_textpre(self, arg):
"""Convert text to HTML ...
.
>textpre "filename"
"""
m_quote = re.search(r"^[\"'<](.*)[\"'>]\s*", arg)
rows = []
if m_quote is None:
sys.exit("syntax error(textpre)\n")
else:
name = self.header_file(m_quote.group(1), False)
buf = ""
text = file(name)
for i in text:
buf += cgi.escape(i)
buf = re.sub(r"[\r\n]*$", "", buf)
return "%s
\n" % buf
def read(self, fileobj, ignore=False):
"""Read text of header file(template).
Synopsis:
>command arguments
HTML
Commands:
>include "filename" [``localvars'']
>define var value
>undef var
>if exp
>ifdef var
>ifndef var
>else [//comment]
>endif [//comment]
>csvtable "filename" [[start]..[end]] [``escape'']
>textpre "filename"
># comment_string
<$NAME> in HTML is a variable.
"""
buf = ""
re_command = re.compile(r"^>+([a-z#]+)\s+(.*)[\r\n]*$")
for line in fileobj:
command = re_command.search(line)
if command is None:
if not ignore:
buf += self.eval_var(line)
else:
cmd, arg = command.groups()
if cmd == "endif":
break
elif ignore and (cmd == "else"):
ignore = False
elif ignore:
if cmd in ('if', 'ifdef', 'ifndef'):
self.do_if(fileobj, arg, mode=cmd)
elif cmd == "else":
ignore = True
elif cmd == "include":
html = self.do_include(fileobj, arg)
buf += html.read_file()
if not re.search("^['\"<](.*)['\">].*localvars", arg):
self.var = html.var
elif cmd == "define":
self.do_define(arg)
elif cmd == "undef":
self.do_undef(arg)
elif cmd == "if":
buf += self.do_if(fileobj, arg, mode="if")
elif cmd == "ifdef":
buf += self.do_if(fileobj, arg, mode="ifdef")
elif cmd == "ifndef":
buf += self.do_if(fileobj, arg, mode="ifndef")
elif cmd == "csvtable":
buf += self.do_csvtable(arg)
elif cmd == "textpre":
buf += self.do_textpre(arg)
elif cmd == "#":
pass
else:
sys.exit("syntax error(command)\n")
else:
fileobj.close()
return buf
def read_file(self):
return self.read(file(self.txt))
def header_file(self, head, check_loop=True):
"""Search header file (template).
Header file is in include_dir.
Stop when loop reference or do not exist header file.
"""
path = ""
for i in self.incdir:
i = os.path.join(i, head)
if os.path.isfile(i):
path = i
break
if path == "":
sys.exit(" %s: not found\n" % head)
elif check_loop and (path in self.loop):
print ' %s: loop' % path
return ''
else:
self.loop += [path]
return path
# End of HTML
def generate_html(txt, html, incdir, force=False):
"""Generate HTML from txt."""
date_txt = os.path.getmtime(txt)
date_html = 0
if os.path.isfile(html):
date_html = os.path.getmtime(html)
if (not force) and (date_txt < date_html):
return None
parthtml = HTML(txt, incdir=incdir)
htmldata = parthtml.read_file()
f_html = file(html, "w")
f_html.write(htmldata)
f_html.close()
#os.utime(html, (date_txt+1, date_txt+1))
return htmldata
def validate(htmldata):
'''Validate HTML.
Now it only checks valid XML or not.
'''
try:
found = re.search(r'^\s*<\?xml[^<>]*\?>', htmldata)
if found:
xmlhead = htmldata[:found.end()]
xmlbody = htmldata[found.end():]
found = re.search(r'encoding=["\'](.*?)["\']', xmlhead)
if found:
encoding = found.group(1)
xmlhead = '%sencoding="%s"%s' % \
(xmlhead[:found.start()],
'utf-8',
xmlhead[found.end():])
xmlbody = unicode(xmlbody, encoding).encode('utf-8')
htmldata = xmlhead + xmlbody
xml.dom.minidom.parseString(htmldata)
return True
except (xml.parsers.expat.ExpatError, UnicodeDecodeError, LookupError), e:
print e
return False
def main():
txtfile = []
flag_force = False
flag_validate = True
sys.argv.pop(0)
idir = []
cdir = []
hlopt = []
while len(sys.argv) > 0:
i = sys.argv.pop(0)
if i == "--":
txtfile += sys.argv
elif i == "-I":
idir.append(sys.argv.pop(0))
elif i == "-f":
flag_force = True
elif i == "-n":
flag_validate = False
else:
txtfile.append(i)
incdir = idir + include_dir
if len(txtfile) == 0:
sys.exit("htmlpp [-f] [-n] [-I dir]... foo.txt...")
for txt in txtfile:
html = re.sub(r"\.[^.]*$", ".html", txt)
htmldata = generate_html(txt, html, incdir, force=flag_force)
if htmldata is not None:
print '%s:' % txt,
if not flag_validate:
print 'pass'
elif validate(htmldata):
print 'OK'
else:
print 'stop'
sys.exit(1)
if __name__ == "__main__":
main()