--- dict-gazetteer2k-1.0.0.orig/countieswriter.py
+++ dict-gazetteer2k-1.0.0/countieswriter.py
@@ -0,0 +1,71 @@
+print "Loading states."
+from states import statedict
+print "Processing."
+from dictdlib import DictWriter
+import re
+
+def withstate(city, state):
+    return [city, city + ", " + state]
+
+rfile = open("county2k.txt", "rt")
+writer = DictWriter('gazetteer2k-counties',
+                    'http://www.census.gov/geo/www/gazetteer/places2k.html',
+                    'U.S. Gazetteer Counties (2000)',
+                    """The original data is available from:
+
+http://www.census.gov/ftp/pub/tiger/tmz/gazetteer/county2k.txt
+http://www.census.gov/ftp/pub/tiger/tms/gazetteer/zips.txt
+
+  The original U.S. Gazetteer Place and Zipcode Files
+  are provided by the U.S. Census Bureau and are in
+  the Public Domain."""
+                    )
+
+for line in rfile.xreadlines():
+    line = line.strip()
+    stateabbr = line[0:2]
+    statefips = line[2:4]
+    name = line[7:71].strip()
+    population = int(line[71:80])
+    housingunits = int(line[80:89])
+    landarea_m = float(line[89:103])
+    waterarea_m = float(line[103:117])
+    landarea_mi = float(line[117:129])
+    waterarea_mi = float(line[129:141])
+    lat = float(line[141:151])
+    long = float(line[151:162])
+
+    indexwords = []
+    
+    match = re.search('^(.+) (County|Municipio|city|Parish|Census Area|Borough|Municipality)$', name)
+    if match:
+        countyname = match.group(1)
+        type = match.group(2)
+        indexwords.extend(withstate(countyname, stateabbr))
+    else:
+        countyname = name
+        type = 'County'
+
+    indexwords.extend(withstate(name, stateabbr))
+
+    defstr = "%s -- U.S. %s in %s\n" % \
+             (countyname, type, statedict[stateabbr])
+    defstr += "   Population (2000):    %d\n" % population
+    defstr += "   Housing Units (2000): %d\n" % housingunits
+    defstr += "   Land area (2000):     %f sq. miles (%f sq. km)\n" % \
+              (landarea_mi, landarea_m / 1000000)
+    defstr += "   Water area (2000):    %f sq. miles (%f sq. km)\n" % \
+              (waterarea_mi, waterarea_m / 1000000)
+    defstr += "   Total area (2000):    %f sq. miles (%f sq. km)\n" % \
+              (landarea_mi + waterarea_mi,
+               landarea_m / 1000000 + waterarea_m / 1000000)
+    defstr += "   Located within:       %s (%s), FIPS %s\n" % \
+              (statedict[stateabbr], stateabbr, statefips)
+    defstr += "   Location:             %f %s, %f %s\n" % ( \
+        abs(lat), lat > 0 and 'N' or 'S',
+        abs(long), long > 0 and 'E' or 'W')
+    defstr += "   Headwords:\n"
+    for hw in indexwords:
+        defstr += "    %s\n" % hw
+    writer.writeentry(defstr, indexwords)
+writer.finish()
--- dict-gazetteer2k-1.0.0.orig/placeswriter.py
+++ dict-gazetteer2k-1.0.0/placeswriter.py
@@ -0,0 +1,111 @@
+print "Loading states."
+from states import statedict
+print "Loading zips."
+from zips import zipcodesdict
+print "Processing."
+from dictdlib import DictWriter
+
+import re
+rfile = open("places2k.txt", "rt")
+writer = DictWriter('gazetteer2k-places',
+                    'http://www.census.gov/geo/www/gazetteer/places2k.html',
+                    'U.S. Gazetteer Places (2000)',
+                    """The original data is available from:
+
+http://www.census.gov/ftp/pub/tiger/tmz/gazetteer/places2k.txt
+http://www.census.gov/ftp/pub/tiger/tms/gazetteer/zips.txt
+
+  The original U.S. Gazetteer Place and Zipcode Files
+  are provided by the U.S. Census Bureau and are in
+  the Public Domain."""
+                    )
+
+for line in rfile.xreadlines():
+    line = line.strip()
+    stateabbr = line[0:2]
+    statefips = line[2:4]
+    placefips = line[4:9]
+    name = line[9:73].strip()
+    population = int(line[73:82])
+    housingunits = int(line[82:91])
+    landarea_m = float(line[91:105])
+    waterarea_m = float(line[105:119])
+    landarea_mi = float(line[119:131])
+    waterarea_mi = float(line[131:143])
+    lat = float(line[143:153])
+    long = float(line[153:164])
+
+    indexwords = []
+
+    # Convert name to type.  Some icky special cases.
+
+    # Convert "Indianapolis city (balance)" to "Indianapolis city"
+    
+    name = re.sub('\s*\(.+\)\s*', ' ', name)
+    name = name.strip()
+
+    for splitspecial in ['-', ',']:
+        if len(name.split(splitspecial)) == 2:
+            # Index "Nashville-Davidson (balance)" under "Nashville"
+            # and "Nashville, TN" as well as the full thing.
+            # Same for "Lynchburg, Moore County"
+            left, right = name.split(splitspecial)
+            indexwords.append(left)
+            indexwords.append(left + ", " + stateabbr)
+
+    match = re.search('^(.+) (city|town|village|CDP|urbana|comunidad|borough|municipality)$', name)
+    if not match:
+        cityname = name
+        type = 'unknown location type'
+    else:
+        cityname = match.group(1)
+        type = match.group(2)
+
+    if type == 'CDP':
+        type = 'Census Designated Place'
+
+    indexwords.append(cityname)
+    indexwords.append(cityname + ", " + stateabbr)
+
+    defstr = "%s, %s -- U.S. %s in %s\n" % \
+             (cityname, stateabbr, type, statedict[stateabbr])
+    defstr += "   Population (2000):    %d\n" % population
+    defstr += "   Housing Units (2000): %d\n" % housingunits
+    defstr += "   Land area (2000):     %f sq. miles (%f sq. km)\n" % \
+              (landarea_mi, landarea_m / 1000000)
+    defstr += "   Water area (2000):    %f sq. miles (%f sq. km)\n" % \
+              (waterarea_mi, waterarea_m / 1000000)
+    defstr += "   Total area (2000):    %f sq. miles (%f sq. km)\n" % \
+              (landarea_mi + waterarea_mi,
+               landarea_m / 1000000 + waterarea_m / 1000000)
+    defstr += "   FIPS code:            %s\n" % placefips
+    defstr += "   Located within:       %s (%s), FIPS %s\n" % \
+              (statedict[stateabbr], stateabbr, statefips)
+    defstr += "   Location:             %f %s, %f %s\n" % ( \
+        abs(lat), lat > 0 and 'N' or 'S',
+        abs(long), long > 0 and 'E' or 'W')
+
+    zipcodes = []
+    zcsearchlist = indexwords
+    zcsearchlist.reverse()
+    for zctry in zcsearchlist:
+        zctry = zctry.upper()
+        if zipcodesdict.has_key(zctry):
+            zipcodes = zipcodesdict[zctry]
+            break
+
+    defstr += "   ZIP Codes (1990):    "
+    zipcount = 0
+    for zipcode in zipcodes:
+        if zipcount and zipcount % 6 == 0:
+            defstr += "\n                        "
+        zipcount += 1
+        defstr += " " + zipcode
+    defstr += "\n   Note: some ZIP codes may be omitted esp. for suburbs.\n"
+    defstr += "   Headwords:\n"
+    for hw in indexwords:
+        defstr += "    %s\n" % hw
+    writer.writeentry(defstr, indexwords)
+writer.finish()
+
+        
--- dict-gazetteer2k-1.0.0.orig/states.py
+++ dict-gazetteer2k-1.0.0/states.py
@@ -0,0 +1,15 @@
+from gzip import GzipFile
+
+statedict = {}
+
+fh = GzipFile("/usr/share/doc/miscfiles/na.postalcodes.gz", "rb")
+
+for line in fh.readlines():
+    if line[0] == '#':
+        continue
+    line = line.strip()
+    splitline = line.split(":")
+    if len(splitline) != 2:
+        continue
+    if not statedict.has_key(splitline[0]):
+        statedict[splitline[0]] = splitline[1]
--- dict-gazetteer2k-1.0.0.orig/zcta.py
+++ dict-gazetteer2k-1.0.0/zcta.py
@@ -0,0 +1,23 @@
+import dictdlib, dictclient, re
+
+rfile = open("zcta5.txt", "rt")
+zctanumdict = {}
+
+for line in rfile.xreadlines():
+    line = line.strip()
+    new = {}
+    new['stateabbr'] = line[0:2]
+    new['name'] = line[2:66].strip()
+    match = re.search('^(\d\d\d\d\d )', new['name'])
+    if not match:
+        continue
+    new['zipcode'] = int(match.group(1))
+    new['population'] = int(line[66:75])
+    new['housingunits'] = int(line[75:84])
+    new['landarea_m'] = float(line[84:98])
+    new['waterarea_m'] = float(line[98:112])
+    new['landarea_mi'] = float(line[112:124])
+    new['waterarea_mi'] = float(line[124:136])
+    new['lat'] = float(line[136:146])
+    new['long'] = float(line[146:157])
+    zctanumdict[new['zipcode']] = new
--- dict-gazetteer2k-1.0.0.orig/zips.py
+++ dict-gazetteer2k-1.0.0/zips.py
@@ -0,0 +1,19 @@
+import dictdlib, dictclient
+from states import statedict
+
+rfile = open("zips.txt", "rt")
+
+zipcodesdict = {}
+zipcodesnumdict = {}
+
+for line in rfile.xreadlines():
+    line = line.strip()
+    fips, zipcode, state, city, junk = line.split(",", 4)
+    zipcode = dictclient.dequote(zipcode)
+    state = dictclient.dequote(state)
+    city = dictclient.dequote(city)
+    indexval = "%s, %s" % (city, state)
+    if not zipcodesdict.has_key(indexval):
+        zipcodesdict[indexval] = []
+    zipcodesdict[indexval].append(zipcode)
+    zipcodesnumdict[int(zipcode)] = "%s, %s" % (city, state)
--- dict-gazetteer2k-1.0.0.orig/zipswriter.py
+++ dict-gazetteer2k-1.0.0/zipswriter.py
@@ -0,0 +1,78 @@
+print "Loading states."
+from states import statedict
+print "Loading zips."
+from zips import zipcodesnumdict, zipcodesdict
+print "Loading ZCTA."
+from zcta import zctanumdict
+print "Processing."
+from dictdlib import DictWriter
+import re
+
+def withstate(city, state):
+    return [city, city + ", " + state]
+
+def uniq(list):
+    if len(list) < 2:
+        return list
+    retval = [ list[0] ]
+    for item in list[1:]:
+        if retval[-1] != item:
+            retval.append(item)
+    return retval
+
+writer = DictWriter('gazetteer2k-zips',
+                    'http://www.census.gov/geo/www/gazetteer/places2k.html',
+                    'U.S. Gazetteer Zip Code Tabulation Areas (2000)',
+                    """The original data is available from:
+
+http://www.census.gov/ftp/pub/tiger/tmz/gazetteer/zcta5.txt
+http://www.census.gov/ftp/pub/tiger/tms/gazetteer/zips.txt
+
+  The original U.S. Gazetteer Place and Zipcode Files
+  are provided by the U.S. Census Bureau and are in
+  the Public Domain."""
+                    )
+
+biglist = zipcodesnumdict.keys() + zctanumdict.keys()
+biglist.sort()
+biglist = uniq(biglist)
+
+for zipcode in biglist:
+    indexwords = ["%05d" % zipcode]
+    defstr = "%05d -- U.S. ZIP code\n" % zipcode
+
+
+    if zipcodesnumdict.has_key(zipcode):
+        name = zipcodesnumdict[zipcode]
+        city, stateabbr = name.split(",")
+        stateabbr = stateabbr.strip()
+        state = statedict[stateabbr]
+        defstr += "   Municipality (1990):  %s\n" % (city + ", " + state)
+        defstr += "   All ZIPs for this municipality (1990):"
+        zipcount = 0
+        for allzipcode in zipcodesdict[name]:
+            if zipcount % 6 == 0:
+                defstr += "\n                        "
+            zipcount += 1
+            defstr += " " + allzipcode
+        defstr += "\n"
+
+    if zctanumdict.has_key(zipcode):
+        e = zctanumdict[zipcode]
+        defstr += "   Population (2000):    %d\n" % e['population']
+        defstr += "   Housing Units (2000): %d\n" % e['housingunits']
+        defstr += "   Land area (2000):     %f sq. miles (%f sq. km)\n" % \
+                  (e['landarea_mi'], e['landarea_m'] / 1000000)
+        defstr += "   Water area (2000):    %f sq. miles (%f sq. km)\n" % \
+                  (e['waterarea_mi'], e['waterarea_m'] / 1000000)
+        defstr += "   Total area (2000):    %f sq. miles (%f sq. km)\n" % \
+                  (e['landarea_mi'] + e['waterarea_mi'],
+                   e['landarea_m'] / 1000000 + e['waterarea_m'] / 1000000)
+        defstr += "   Located within:       %s (%s)\n" % \
+                  (statedict[e['stateabbr']], e['stateabbr'])
+        defstr += "   Location:             %f %s, %f %s\n" % ( \
+            abs(e['lat']), e['lat'] > 0 and 'N' or 'S',
+            abs(e['long']), e['long'] > 0 and 'E' or 'W')
+        
+    writer.writeentry(defstr, indexwords)
+writer.finish()
