diff --git a/lib/validate.py b/lib/validate.py new file mode 100644 index 0000000..c74efb6 --- /dev/null +++ b/lib/validate.py @@ -0,0 +1,30 @@ +from math import cos, radians +from osgeo import ogr, osr + +METERS_PER_DEGREE_LAT = 111132 + +def validate_one_line(fields): + if int(fields['from']) < 0 or int(fields['to']) < 0: + print('Negative housenumber, skipping') + return False + + number_range = abs(int(fields['from']) - int(fields['to'])) + step_size = 1 if fields['interpolation'] == 'all' else 2 + length = length_of_wkt_line_in_meters(fields['geometry']) + if number_range > 0 and length < 10: + print('Interpolation less than 10 meters, skipping') + return False + + return True + +def length_of_wkt_line_in_meters(wkt_line): + line = ogr.CreateGeometryFromWkt(wkt_line) + + center_lat = line.Centroid().GetY() + + meters_per_degree_lon = METERS_PER_DEGREE_LAT * cos(radians(center_lat)) + + length_degrees = line.Length() + length_meters = length_degrees * (METERS_PER_DEGREE_LAT + meters_per_degree_lon) / 2 + + return length_meters diff --git a/tests/test_validate.py b/tests/test_validate.py new file mode 100644 index 0000000..6c4805c --- /dev/null +++ b/tests/test_validate.py @@ -0,0 +1,33 @@ +from lib.validate import validate_one_line, length_of_wkt_line_in_meters + +def test_validate_one_line(): + line = { + 'from': '53', + 'to': '99', + 'interpolation': 'odd', + 'street': 'Pope Rd', + 'city': 'Middlesex', + 'state': 'MA', + 'postcode': '01720', + 'geometry': 'LINESTRING(-71.407890 42.480238,-71.407870 42.480264,-71.407563 42.480689,-71.407061 42.481394,-71.406843 42.481731,-71.406309 42.482496,-71.406032 42.482864,-71.405533 42.483570,-71.405220 42.483994,-71.404987 42.484331,-71.404723 42.484835,-71.404551 42.485166,-71.404495 42.485385)' + } + assert(validate_one_line(line)) == True + + line['from'] = '-1' + assert(validate_one_line(line)) == False + line['from'] = '53' + + line['geometry'] = "LINESTRING(-64.937000 18.344883,-64.937000 18.344883)" + assert(validate_one_line(line)) == False + + + +def test_length_of_wkt_line_in_meters(): + wkt_line = "LINESTRING(-64.937000 18.344883,-64.937000 18.344883)" + assert(length_of_wkt_line_in_meters(wkt_line)) == 0.0 + + wkt_line = "LINESTRING(-71.196131 42.409367,-71.196170 42.409260)" + assert(round(length_of_wkt_line_in_meters(wkt_line))) == 11 + + wkt_line = "LINESTRING(-64.937000 18.344883,-64.936982 18.344751,-64.936960 18.344663,-64.936949 18.344617)" + assert(round(length_of_wkt_line_in_meters(wkt_line))) == 29 diff --git a/tiger_address_convert.py b/tiger_address_convert.py index bdc6c57..3750f08 100755 --- a/tiger_address_convert.py +++ b/tiger_address_convert.py @@ -15,6 +15,7 @@ from lib.parse import parse_shp_for_geom_and_tags from lib.convert import addressways, compile_nodelist, compile_waylist +from lib.validate import validate_one_line def shape_to_csv(shp_filename, csv_filename): """ @@ -46,7 +47,9 @@ def shape_to_csv(shp_filename, csv_filename): with open(csv_filename, 'w', encoding="utf8") as csv_file: csv_writer = csv.DictWriter(csv_file, delimiter=';', fieldnames=fieldnames) csv_writer.writeheader() - csv_writer.writerows(csv_lines) + for csv_line in csv_lines: + if validate_one_line(csv_line): + csv_writer.writerow(csv_line) if len(sys.argv) < 3: print("%s input.shp output.csv" % sys.argv[0])