Eksempler
>Data om land
Land i verden
Lese CVS
|
Lese XML
|
Lese og parse XML med SAX
|
Lese CSV til objekter
|
CSV eller XML til objekter
Lese CVS
Vi leser inn CVS-fila, og rapporterer land fordelt på verdensdel.
_readcsv.py
"""
Read land.txt
Report countries sorted on continent
"""
#------------------------
# helper to read a file
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
#-------------------------------
# continets [code,fullname,collected countries]
continents=[['EU','Europa',[]],
['AS','Asia',[]],
['NA', 'North America',[]],
['AN','Antarctica',[]],
['SA','South america',[]],
['OC','Oceania',[]],
['','No region',[]]]
#--------------------------------
# wherever you place the file:
T=getTextFile('..\\..\\commondata\\geonames\\land.txt')
if T!=None:
lines=T.split('\n')
for line in lines:
parts=line.split('\t')
if len(parts)!=12:
continue
(iso2,iso3,isonum,fips,country,capital,
area,population,continent,language,money,gid)=parts
for c in continents:
if c[0]==continent:
c[2].append( country)
#--------------------
# report, collected in a string, and ready for filewrite
result=''
for c in continents:
result+= c[1]+'\n'
for co in sorted(c[2]):
result+='\t'+co+'\n'
print result
Lese XML
Vi leser inn XML-fila, og rapporterer land fordelt på verdensdel.
_readxml.py
import xml.dom.minidom
"""
Read land.xml
Report countries sorted on continent
"""
#------------------------
# helper to read a file
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
#-----------------------
# helper to collect all text in a node
def getText(nodelist):
rc = ''
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
t=node.data.encode('ISO-8859-1')
rc += t
return rc
#-------------------------------
# continets [code,fullname,collected countries]
continents=[['EU','Europa',[]],
['AS','Asia',[]],
['NA', 'North America',[]],
['AN','Antarctica',[]],
['SA','South america',[]],
['OC','Oceania',[]],
['','No region',[]]]
#--------------------------------
# wherever you place the file:
T=getTextFile('..\\..\\commondata\\geonames\\land.xml')
if T!=None:
dom=xml.dom.minidom.parseString(T)
countries=dom.getElementsByTagName('country')
for co in countries:
country=getText(co.getElementsByTagName('countryName')[0].childNodes)
continent=getText(co.getElementsByTagName('continent')[0].childNodes)
for c in continents:
if c[0]==continent:
c[2].append( country)
#--------------------
# report, collected in a string, and ready for filewrite
result=''
for c in continents:
result+= c[1]+'\n'
for co in sorted(c[2]):
result+='\t'+co+'\n'
print result
Lese og parse XML med SAX
Vi leser inn XML-fila, og rapporterer land fordelt på verdensdel.
_saxxml.py
import xml.parsers.expat
"""
Read land.xml
Parse with SAX
Report countries sorted on continent
"""
#------------------------
# helper to read a file
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
#-------------------------------
# continets [code,fullname,collected countries]
continents=[['EU','Europa',[]],
['AS','Asia',[]],
['NA', 'North America',[]],
['AN','Antarctica',[]],
['SA','South america',[]],
['OC','Oceania',[]],
['','No region',[]]]
#-------------------------------------------------------
# 2 flags to remember type of element
# 2 globals to store wanted data
# 3 primitive handler functions used by saxparser
country=''
continent=''
countryFlag=False
continentFlag=False
#---------------------
# opening an element
def start_element(name, attrs):
global countryFlag
global continentFlag
if name=='countryName':
countryFlag=True
elif name=='continent':
continentFlag=True
#---------------------
# closing an element
def end_element(name):
global countryFlag
global continentFlag
global country
global continent
global continents
countryFlag=False
continentFlag=False
if name=='country':
for c in continents:
if c[0]==continent:
c[2].append( country)
#---------------------
# content of an element
def char_data(data):
global country
global continent
global continents
if continentFlag:
continent=data
elif countryFlag:
country=data
#--------------------------------
# wherever you place the file:
p = xml.parsers.expat.ParserCreate()
p.returns_unicode = 0
# tell parser about our callbacks
p.StartElementHandler = start_element
p.EndElementHandler = end_element
p.CharacterDataHandler = char_data
T=getTextFile('..\\..\\commondata\\geonames\\land.xml')
if T!=None:
p.Parse(T)
#--------------------
# report, collected in a string, and ready for filewrite
result=''
for c in continents:
result+= c[1]+'\n'
for co in sorted(c[2]):
result+='\t'+co+'\n'
print result
Lese CSV til objekter
Vi leser inn csv-fila, ordner i objektliste og rapporterer land fordelt på verdensdel.
_csv2obj.py
"""
Read land.txt
build list of objects
Report countries sorted on continent
"""
#------------------------
# defining a class
class Country:
def __init__(self, name,continent):
self.name=name
self.continent=continent
#------------------------
# helper to read a file
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
#-------------------------------
# continets [code,fullname,collected countries]
continents=[['EU','Europa',[]],
['AS','Asia',[]],
['NA', 'North America',[]],
['AN','Antarctica',[]],
['SA','South america',[]],
['OC','Oceania',[]],
['','No region',[]]]
#--------------------------------
# wherever you place the file:
T=getTextFile('..\\..\\commondata\\geonames\\land.txt')
allCountries=[]
if T!=None:
lines=T.split('\n')
for line in lines:
parts=line.split('\t')
if len(parts)!=12:
continue
allCountries.append(Country(parts[4],parts[8]))
#--------------------
# report, collected in a string, and ready for filewrite
result=''
for c in continents:
result+= c[1]+'\n'
for co in allCountries:
if co.continent==c[0]:
result+='\t'+co.name+'\n'
print result
CSV eller XML til objekter
Skisse av en løsning som skal kunne handtere begge formatene , ordne i objektliste og rapporterer land fordelt på verdensdel.
_ant2obj.py
import xml.dom.minidom
"""
Read land.txt or land.xml
build list of objects
Report countries sorted on continent
"""
#------------------------
# defining a class
class Country:
def __init__(self, clist=None,cnode=None):
if clist!=None:
self.name=clist[4]
self.continent=clist[8]
elif cnode!=None:
cnn=cnode.getElementsByTagName('countryName')[0]
self.name=getText(cnn.childNodes)
ccc=cnode.getElementsByTagName('continent')[0]
self.continent=getText(ccc.childNodes)
else:
self.name='langvekkistan'
self.continent=''
#------------------------
# helper to read a file
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
#-----------------------
# helper to collect all text in a node
def getText(nodelist):
rc = ''
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
t=node.data.encode('ISO-8859-1')
rc += t
return rc
#-------------------------------
# continets [code,fullname,collected countries]
continents=[['EU','Europa',[]],
['AS','Asia',[]],
['NA', 'North America',[]],
['AN','Antarctica',[]],
['SA','South america',[]],
['OC','Oceania',[]],
['','No region',[]]]
#--------------------------------
allCountries=[]
#------ this section-----
##T=getTextFile('..\\..\\commondata\\geonames\\land.txt')
##if T!=None:
## lines=T.split('\n')
## for line in lines:
## parts=line.split('\t')
## if len(parts)!=12:
## continue
## allCountries.append(Country(clist=parts))
#------ or this section-----
T=getTextFile('..\\..\\commondata\\geonames\\land.xml')
if T!=None:
dom=xml.dom.minidom.parseString(T)
countries=dom.getElementsByTagName('country')
for co in countries:
allCountries.append(Country(cnode=co))
#--------------------
# report, collected in a string, and ready for filewrite
result=''
for c in continents:
result+= c[1]+'\n'
for co in allCountries:
if co.continent==c[0]:
result+='\t'+co.name+'\n'
print result