Python HTML / XML
HTML fra en enkel tekstfil
Programmet leser inne en enkel tekstfil og plasserer innholdet i en pre-tag på en minimalistisk HTML-side. HTML-siden ligger som en tekst i programmet.
Input fil:
frej1.txt
Output fil:
frej1.html
Python koden:
""" This is a modul that reads a textfil wraps it in a very simple HTML-skeleton and produce a HTML-page """ #------------------------ # HTML-fragments HTML_PAGE="""<html> <head> <title>a page</title> </head> <body> <h1> Farbror Frej:</h1> <pre> %s </pre> </body> </html> """ #----------------------- # filenames infile='c:\\web\\dw\\pyex\\frej1.txt' outfile=infile.replace('.txt','.html') #------------------------ # Read / write text files def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None def storeTextFile(filename,txt): try: file=open(filename,'w') file.write(txt) file.close() except: print 'Trouble writing to: '+filename #------------------------ # do the job def doit(): txt=getTextFile(infile) if txt!=None: txt=HTML_PAGE%txt #print txt storeTextFile(outfile,txt) doit()
Transformasjon: CSV-XML
Modulen boktoxml gjør noen av de grunnleggende operasjonene som inngår i å lage en XML-fil fra en kommaseparert fil:
- åpner og leser en fil på en sivilisert måte
- splitter innholdet opp i linjer, og forkaster meningsløse linjer
- splitter hver linje i kommaseparete deler
- bruker delene til å produsere XML-elementer
- skriver alt tilbake til fil på en sivilisert måte
Input fil: bokliste.txt
Output fil:bokliste.xml
(hvis nettleseren din tåler det)
Pythonkoden:
""" Transform a commaseparated (CSV) file to XML Input data as lines: title,author,publisher,year,isbn,pages,course,category,comment """ #---------------------------- # XML-skeletons # a template for a xml-fragment XMLFragment=""" <book isbn="%s" pages="%s"> <title>%s</title> <course>%s</course> <category>%s</category> <author>%s</author> <publisher>%s</publisher> <year>%s</year> <comment>%s</comment> </book> """ # a template for a complete xml-file XMLFile="""<?xml version="1.0" encoding="ISO-8859-1"?> <booklist> %s </booklist> """ #------------------------ # Read / write text files def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None def storeTextFile(filename,txt): try: file=open(filename,'w') file.write(txt) file.close() except: print 'Trouble writing to: '+filename #-------------------------------- # produce and save XML def makeXML(filename='c:\\web\\dw\\pyex\\bokliste.txt'): # les en text fil text=getTextFile(filename) if(text==''): return content='' # plukk ut linjene lines=text.split('\n') for line in lines: line.strip() # drop tomme linjer og kommentarlinjer if(len(line)<2): continue if(line[0:2]=='//'): continue # har en boklinje, finn delene pcs=line.split(',') if(len(pcs)!=9): print 'ignore:' , line continue content+=XMLFragment%(pcs[4],pcs[5],pcs[0],pcs[6], pcs[7],pcs[1],pcs[2],pcs[3],pcs[8]) storeTextFile(filename.replace('.txt','.xml'),XMLFile%content) makeXML()
Transformasjon: CSV-HTML
Modulen boktohtml tar den samme tekst-fila som i eksempelet ovenfor og transformerer den til en htmlfil som viser en liste av bøker med forfatter
Input fil: bokliste.txt
Output fil: bokliste.html
Python koden:
# transform a commaseparated (CSV) file to HTML """ Transform a commaseparated (CSV) file to HTML Input data as lines: title,author,publisher,year,isbn,pages,course,category,comment """ #---------------------------- # HTML-skeletons # a template for a complete html-file HTMLFile="""<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <meta HTTP-EQUIV="Content-Type" content="text/html; charset=iso-8859-1"> <title>books</title> <style> li{margin-top:10;} .fat{font-weight:bold; color:red} </style> </head> <body> <h1>Bokliste</h1> <ul> %s </ul> </body> </html> """ # a template for a html-fragment, one author HTMLFragment=""" <li> <div class="fat">%s</div> <div>%s</div> </li> """ #------------------------ # Read / write text files def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None def storeTextFile(filename,txt): try: file=open(filename,'w') file.write(txt) file.close() except: print 'Trouble writing to: '+filename #-------------------------------- # produce and save HTML def makeHTML(filename='c:\\web\\dw\\pyex\\bokliste.txt'): # read the input file text=getTextFile(filename) if (text==None) or (text==''): return content='' # pick up lines lines=text.split('\n') for line in lines: line.strip() # drop too short lines if(len(line)<2): continue # drop commentlines if(line[0:2]=='//'): continue # We have a line , find elements pcs=line.split(',') # acceptable ? if(len(pcs)!=9): print 'ignoring:' , line continue content+=HTMLFragment%(pcs[0].strip(),pcs[1].strip()) storeTextFile(filename.replace('.txt','.html'), HTMLFile%content) makeHTML()
Transformasjon: XML-HTML
Modulen bokxmltohml tar xml-fila og transformerer den til html.
Input fil: bokliste.xml
(hvis nettleseren din tåler det)
Output fil: boklistefromxml.html
Pythonkode:
import xml.dom.minidom """ Produce a HTML-file from a XML-file Input: <booklist> <book isbn="txt" pages="txt"> <title>txt</title> <course>txt</course> <category>txt</category> <author>txt</author> <publisher>txt</publisher> <year>txt</year> <comment>txt</comment> </book> ... </booklist> Usage: produceHTML(filename) """ #-------------------------------------- # skeleton for HTML-file HTMLFile="""<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <META http-equiv="Content-Type" content="text/html; charset=iso-8859-1\"> <title>bokliste</title> <!-- produced by python --> </head> <body> <h2>Litteraturliste</h2> %s </body> </html> """ #------------------------ # Read / write text files def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None def storeTextFile(filename,txt): try: file=open(filename,'w') file.write(txt) file.close() except: print 'Trouble writing to: '+filename #------------------------------------ # collect all text in a node def getText(nodelist): rc = '' for node in nodelist: if node.nodeType == node.TEXT_NODE: t=node.data.encode('ISO-8859-1') rc += t return rc def getStrippedText(nodelist): rc = '' for node in nodelist: if node.nodeType == node.TEXT_NODE: t=node.data t=t.strip() t=node.data.encode('ISO-8859-1') if t!='\n': rc += t.strip() return rc #---------------------------------------- # Produce a fragment for one book def makeBook(q): result=q.getElementsByTagName('title')[0] T='<h3>%s</h3>\n'%getText(result.childNodes) T+='<div>isbn:%s</div>\n'%q.getAttribute('isbn').encode('ISO-8859-1') result=q.getElementsByTagName('course')[0] T+='<div>%s</div>\n'%getText(result.childNodes) result=q.getElementsByTagName('category')[0] T+='<div>%s</div>\n'%getText(result.childNodes) result=q.getElementsByTagName('author')[0] T+='<div>%s</div>\n'%getText(result.childNodes) result=q.getElementsByTagName('publisher')[0] T+='<div>%s</div>\n'%getText(result.childNodes) result=q.getElementsByTagName('year')[0] T+='<div>%s</div>\n'%getText(result.childNodes) result=q.getElementsByTagName('comment')[0] T+='<div>%s</div>\n'%getText(result.childNodes) return T #---------------------------------- # Produce the entire file and save it def produceHTML(infile='c:\\web\\dw\\pyex\\bokliste.xml'): # load xml-file document=getTextFile(infile) if document==None: return # and establish DOM-tree dom = xml.dom.minidom.parseString(document) # get a list of all books bliste=dom.getElementsByTagName('book') # doing the books T='' for b in bliste: T+=makeBook(b) # write the file storeTextFile(infile.replace('.xml','fromxml.html'),HTMLFile%T) # clean up dom.unlink() produceHTML()
Du kan sammenligne denne Pythonkoden med en XSLT-fil som gjør samme jobben:
<?xml version="1.0" encoding="ISO-8859-1"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:output method="xml" omit-xml-declaration="no" indent="yes" doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN" doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" encoding="ISO-8859-1"/> <xsl:template match="/"> <html> <head> <title>bokliste</title> </head> <body> <h1>Litteratur</h1> <xsl:apply-templates select="booklist/book"/> </body> </html> </xsl:template> <xsl:template match="book"> <h3><xsl:value-of select="title"/></h3> <div><xsl:value-of select="author"/></div> <div><xsl:value-of select="publisher"/>, <xsl:value-of select="year"/></div> <div>Isbn: <xsl:value-of select="@isbn"/></div> <div class="kommentar"><xsl:value-of select="comment"/></div> </xsl:template> </xsl:stylesheet>