forked from SEMCOG/semcog_urbansim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
112 lines (93 loc) · 5.45 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import warnings
import numpy as np
import orca
import pandas as pd
from urbansim.utils import misc
import assumptions
warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)
for name in ['persons', 'parcels', 'zones', 'semmcds', 'counties', 'employment_sectors',
'building_sqft_per_job',
'annual_relocation_rates_for_households',
'annual_relocation_rates_for_jobs', 'annual_employment_control_totals',
'travel_data', 'zoning', 'large_areas', 'building_types', 'land_use_types',
'workers_labor_participation_rates', 'workers_employment_rates_by_large_area_age',
'workers_employment_rates_by_large_area',
'transit_stops', 'crime_rates', 'schools', 'poi',
'group_quarters', 'group_quarters_control_totals',
'annual_household_control_totals',
'events_addition', 'events_deletion', 'refiner_events', 'income_growth_rates']:
store = orca.get_injectable("store")
orca.add_table(name, store[name])
orca.add_table("remi_pop_total", pd.read_csv("data/remi_hhpop_bylarge.csv", index_col='large_area_id'))
orca.add_table('target_vacancies', pd.read_csv("data/target_vacancies.csv"))
orca.add_table('demolition_rates', pd.read_csv("data/DEMOLITION_RATES.csv", index_col='city_id'))
orca.add_table('extreme_hu_controls', pd.read_csv("data/extreme_hu_controls.csv", index_col='b_city_id'))
@orca.table(cache=True)
def buildings(store, parcels):
df = store['buildings']
# Todo: combine two sqft prices into one and set non use sqft price to 0
df.loc[df.improvement_value < 0, 'improvement_value'] = 0
df['sqft_price_nonres'] = df.improvement_value * 1.0 / 0.7 / df.non_residential_sqft
df.loc[df.sqft_price_nonres > 1000, 'sqft_price_nonres'] = 0
df.loc[df.sqft_price_nonres < 0, 'sqft_price_nonres'] = 0
df['sqft_price_res'] = df.improvement_value * 1.25 / 0.7 / (df.sqft_per_unit.astype(int) * df.residential_units)
df.loc[df.sqft_price_res > 1000, 'sqft_price_res'] = 0
df.loc[df.sqft_price_res < 0, 'sqft_price_res'] = 0
df.fillna(0, inplace=True)
orca.add_injectable("max_building_id", 10000000)
df['hu_filter'] = 0
cites = [551, 1155, 1100, 3130, 6020, 6040]
sample = df[df.residential_units > 0]
sample = sample[~(sample.index.isin(store['households'].building_id))]
for c in sample.b_city_id.unique():
frac = 0.9 if c in cites else 0.5
df.loc[sample[sample.b_city_id == c].sample(frac=frac, replace=False).index.values, 'hu_filter'] = 1
#removebuildings with parcel_id with null positions (x,y)
parcels = parcels.local
df['large_area_id'] = misc.reindex(parcels.large_area_id, df.parcel_id)
df = df[df.large_area_id.notnull()]
return df
@orca.table(cache=True)
def households(store, buildings):
df = store['households']
b = buildings.to_frame(['large_area_id'])
b = b[b.large_area_id.isin({161.0, 3.0, 5.0, 125.0, 99.0, 115.0, 147.0, 93.0})]
df.loc[df.building_id == -1, 'building_id'] = np.random.choice(b.index.values,
(df.building_id == -1).sum())
idx_invalid_building_id = np.in1d(df.building_id, b.index.values) == False
df.loc[idx_invalid_building_id, 'building_id'] = np.random.choice(b.index.values,
idx_invalid_building_id.sum())
df['large_area_id'] = misc.reindex(b.large_area_id, df.building_id)
df.index.name = 'household_id'
return df.fillna(0)
@orca.table(cache=True)
def jobs(store, buildings):
df = store['jobs']
b = buildings.to_frame(['large_area_id'])
b = b[b.large_area_id.isin({161.0, 3.0, 5.0, 125.0, 99.0, 115.0, 147.0, 93.0})]
df.loc[df.building_id == -1, 'building_id'] = np.random.choice(b.index.values,
(df.building_id == -1).sum())
idx_invalid_building_id = np.in1d(df.building_id, b.index.values) == False
df.loc[idx_invalid_building_id, 'building_id'] = np.random.choice(b.index.values,
idx_invalid_building_id.sum())
df['large_area_id'] = misc.reindex(b.large_area_id, df.building_id)
return df.fillna(0)
@orca.table(cache=True)
def base_job_space(buildings):
return buildings.jobs_non_home_based.to_frame("base_job_space")
# these are dummy returns that last until accessibility runs
for node_tbl in ['nodes', 'nodes_walk', 'nodes_drv']:
empty_df = pd.DataFrame()
orca.add_table(node_tbl, empty_df)
# this specifies the relationships between tables
orca.broadcast('nodes_walk', 'buildings', cast_index=True, onto_on='nodeid_walk')
orca.broadcast('nodes_walk', 'parcels', cast_index=True, onto_on='nodeid_walk')
orca.broadcast('nodes_drv', 'buildings', cast_index=True, onto_on='nodeid_drv')
orca.broadcast('nodes_drv', 'parcels', cast_index=True, onto_on='nodeid_drv')
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id')
orca.broadcast('building_types', 'buildings', cast_index=True, onto_on='building_type_id')
orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id')
orca.broadcast('schools', 'parcels', cast_on='parcel_id', onto_index=True)