Eksempler
>Data om land
Land i verden
Lese CVS
|
Lese XML
|
Lese og parse XML med SAX
|
Lese CSV til objekter
|
CSV eller XML til objekter
Lese CVS
Vi leser inn CVS-fila, og rapporterer land fordelt på verdensdel.
_readcsv.py
""" Read land.txt Report countries sorted on continent """ #------------------------ # helper to read a file def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None #------------------------------- # continets [code,fullname,collected countries] continents=[['EU','Europa',[]], ['AS','Asia',[]], ['NA', 'North America',[]], ['AN','Antarctica',[]], ['SA','South america',[]], ['OC','Oceania',[]], ['','No region',[]]] #-------------------------------- # wherever you place the file: T=getTextFile('..\\..\\commondata\\geonames\\land.txt') if T!=None: lines=T.split('\n') for line in lines: parts=line.split('\t') if len(parts)!=12: continue (iso2,iso3,isonum,fips,country,capital, area,population,continent,language,money,gid)=parts for c in continents: if c[0]==continent: c[2].append( country) #-------------------- # report, collected in a string, and ready for filewrite result='' for c in continents: result+= c[1]+'\n' for co in sorted(c[2]): result+='\t'+co+'\n' print result
Lese XML
Vi leser inn XML-fila, og rapporterer land fordelt på verdensdel.
_readxml.py
import xml.dom.minidom """ Read land.xml Report countries sorted on continent """ #------------------------ # helper to read a file def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None #----------------------- # helper to collect all text in a node def getText(nodelist): rc = '' for node in nodelist: if node.nodeType == node.TEXT_NODE: t=node.data.encode('ISO-8859-1') rc += t return rc #------------------------------- # continets [code,fullname,collected countries] continents=[['EU','Europa',[]], ['AS','Asia',[]], ['NA', 'North America',[]], ['AN','Antarctica',[]], ['SA','South america',[]], ['OC','Oceania',[]], ['','No region',[]]] #-------------------------------- # wherever you place the file: T=getTextFile('..\\..\\commondata\\geonames\\land.xml') if T!=None: dom=xml.dom.minidom.parseString(T) countries=dom.getElementsByTagName('country') for co in countries: country=getText(co.getElementsByTagName('countryName')[0].childNodes) continent=getText(co.getElementsByTagName('continent')[0].childNodes) for c in continents: if c[0]==continent: c[2].append( country) #-------------------- # report, collected in a string, and ready for filewrite result='' for c in continents: result+= c[1]+'\n' for co in sorted(c[2]): result+='\t'+co+'\n' print result
Lese og parse XML med SAX
Vi leser inn XML-fila, og rapporterer land fordelt på verdensdel.
_saxxml.py
import xml.parsers.expat """ Read land.xml Parse with SAX Report countries sorted on continent """ #------------------------ # helper to read a file def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None #------------------------------- # continets [code,fullname,collected countries] continents=[['EU','Europa',[]], ['AS','Asia',[]], ['NA', 'North America',[]], ['AN','Antarctica',[]], ['SA','South america',[]], ['OC','Oceania',[]], ['','No region',[]]] #------------------------------------------------------- # 2 flags to remember type of element # 2 globals to store wanted data # 3 primitive handler functions used by saxparser country='' continent='' countryFlag=False continentFlag=False #--------------------- # opening an element def start_element(name, attrs): global countryFlag global continentFlag if name=='countryName': countryFlag=True elif name=='continent': continentFlag=True #--------------------- # closing an element def end_element(name): global countryFlag global continentFlag global country global continent global continents countryFlag=False continentFlag=False if name=='country': for c in continents: if c[0]==continent: c[2].append( country) #--------------------- # content of an element def char_data(data): global country global continent global continents if continentFlag: continent=data elif countryFlag: country=data #-------------------------------- # wherever you place the file: p = xml.parsers.expat.ParserCreate() p.returns_unicode = 0 # tell parser about our callbacks p.StartElementHandler = start_element p.EndElementHandler = end_element p.CharacterDataHandler = char_data T=getTextFile('..\\..\\commondata\\geonames\\land.xml') if T!=None: p.Parse(T) #-------------------- # report, collected in a string, and ready for filewrite result='' for c in continents: result+= c[1]+'\n' for co in sorted(c[2]): result+='\t'+co+'\n' print result
Lese CSV til objekter
Vi leser inn csv-fila, ordner i objektliste og rapporterer land fordelt på verdensdel.
_csv2obj.py
""" Read land.txt build list of objects Report countries sorted on continent """ #------------------------ # defining a class class Country: def __init__(self, name,continent): self.name=name self.continent=continent #------------------------ # helper to read a file def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None #------------------------------- # continets [code,fullname,collected countries] continents=[['EU','Europa',[]], ['AS','Asia',[]], ['NA', 'North America',[]], ['AN','Antarctica',[]], ['SA','South america',[]], ['OC','Oceania',[]], ['','No region',[]]] #-------------------------------- # wherever you place the file: T=getTextFile('..\\..\\commondata\\geonames\\land.txt') allCountries=[] if T!=None: lines=T.split('\n') for line in lines: parts=line.split('\t') if len(parts)!=12: continue allCountries.append(Country(parts[4],parts[8])) #-------------------- # report, collected in a string, and ready for filewrite result='' for c in continents: result+= c[1]+'\n' for co in allCountries: if co.continent==c[0]: result+='\t'+co.name+'\n' print result
CSV eller XML til objekter
Skisse av en løsning som skal kunne handtere begge formatene , ordne i objektliste og rapporterer land fordelt på verdensdel.
_ant2obj.py
import xml.dom.minidom """ Read land.txt or land.xml build list of objects Report countries sorted on continent """ #------------------------ # defining a class class Country: def __init__(self, clist=None,cnode=None): if clist!=None: self.name=clist[4] self.continent=clist[8] elif cnode!=None: cnn=cnode.getElementsByTagName('countryName')[0] self.name=getText(cnn.childNodes) ccc=cnode.getElementsByTagName('continent')[0] self.continent=getText(ccc.childNodes) else: self.name='langvekkistan' self.continent='' #------------------------ # helper to read a file def getTextFile(filename): try: file=open(filename,'r') res=file.read() file.close() return res except: print 'Trouble reading: '+filename return None #----------------------- # helper to collect all text in a node def getText(nodelist): rc = '' for node in nodelist: if node.nodeType == node.TEXT_NODE: t=node.data.encode('ISO-8859-1') rc += t return rc #------------------------------- # continets [code,fullname,collected countries] continents=[['EU','Europa',[]], ['AS','Asia',[]], ['NA', 'North America',[]], ['AN','Antarctica',[]], ['SA','South america',[]], ['OC','Oceania',[]], ['','No region',[]]] #-------------------------------- allCountries=[] #------ this section----- ##T=getTextFile('..\\..\\commondata\\geonames\\land.txt') ##if T!=None: ## lines=T.split('\n') ## for line in lines: ## parts=line.split('\t') ## if len(parts)!=12: ## continue ## allCountries.append(Country(clist=parts)) #------ or this section----- T=getTextFile('..\\..\\commondata\\geonames\\land.xml') if T!=None: dom=xml.dom.minidom.parseString(T) countries=dom.getElementsByTagName('country') for co in countries: allCountries.append(Country(cnode=co)) #-------------------- # report, collected in a string, and ready for filewrite result='' for c in continents: result+= c[1]+'\n' for co in allCountries: if co.continent==c[0]: result+='\t'+co.name+'\n' print result