Source code for dict_builder.dict_builder_functions

"""
Module defines a function to save (using pickling) the GTFS information in the form of a dictionary.
This is done for easy/faster data lookup.
"""

import pickle

import pandas as pd
from tqdm import tqdm


[docs]def build_save_route_by_stop(stop_times_file, NETWORK_NAME: str) -> dict: """ This function saves a dictionary to provide easy access to all the routes passing through a stop_id. Args: stop_times_file (pandas.dataframe): stop_times.txt file in GTFS. NETWORK_NAME (str): name of the network Returns: route_by_stop_dict_new (dict): keys: stop_id, values: list of routes passing through the stop_id. Format-> dict[stop_id] = [route_id] """ print("building routes_by_stop") stops_by_route = stop_times_file.drop_duplicates(subset=['route_id', 'stop_sequence'])[ ['stop_id', 'route_id']].groupby('stop_id') route_by_stop_dict = {id: list(routes.route_id) for id, routes in stops_by_route} with open(f'./dict_builder/{NETWORK_NAME}/routes_by_stop.pkl', 'wb') as pickle_file: pickle.dump(route_by_stop_dict, pickle_file) print("routes_by_stop done") return route_by_stop_dict
[docs]def build_save_stops_dict(stop_times_file, trips_file, NETWORK_NAME: str) -> dict: """ This function saves a dictionary to provide easy access to all the stops in the route. Args: stop_times_file (pandas.dataframe): stop_times.txt file in GTFS. trips_file (pandas.dataframe): trips.txt file in GTFS. NETWORK_NAME (str): name of the network Returns: stops_dict (dict): keys: route_id, values: list of stop id in the route_id. Format-> dict[route_id] = [stop_id] """ print("building stops dict") import os path = f'./dict_builder/{NETWORK_NAME}/' if not os.path.exists(f'./dict_builder/{NETWORK_NAME}/'): os.makedirs(path) trips_group = stop_times_file.groupby("trip_id") # This drops all trips for which timestamps are not sorted trips_with_correct_timestamps = [id for id, trip in tqdm(trips_group) if list(trip.arrival_time) == list(trip.arrival_time.sort_values())] if len(trips_with_correct_timestamps) != len(trips_file): print(f"Incorrect time sequence in stoptimes builder file") stop_times = stop_times_file[stop_times_file["trip_id"].isin(trips_with_correct_timestamps)] route_groups = stop_times.drop_duplicates(subset=['route_id', 'stop_sequence'])[['stop_id', 'route_id', 'stop_sequence']].groupby('route_id') stops_dict = {id: routes.sort_values(by='stop_sequence')['stop_id'].to_list() for id, routes in route_groups} with open(f'./dict_builder/{NETWORK_NAME}/stops_dict_pkl.pkl', 'wb') as pickle_file: pickle.dump(stops_dict, pickle_file) print("stops_dict done") return stops_dict
[docs]def build_save_stopstimes_dict(stop_times_file, trips_file, NETWORK_NAME: str) -> dict: """ This function saves a dictionary to provide easy access to all the trips passing along a route id. Trips are sorted in the increasing order of departure time. A trip is list of tuple of form (stop id, arrival time) Args: stop_times_file (pandas.dataframe): stop_times.txt file in GTFS. trips_file (pandas.dataframe): dataframe with transfers (footpath) details. NETWORK_NAME (str): name of the network Returns: stoptimes_dict (dict): keys: route ID, values: list of trips in the increasing order of start time. Format-> dict[route_ID] = [trip_1, trip_2] where trip_1 = [(stop id, arrival time), (stop id, arrival time)] """ print("building stoptimes dict") stop_times_file.arrival_time = pd.to_datetime(stop_times_file.arrival_time) route_group = stop_times_file.groupby("route_id") stoptimes_dict = {r_id: [] for r_id, _ in route_group} for r_id, route in tqdm(route_group): trip_group = route.groupby("trip_id") # Collect trip start points temp = route[route.stop_sequence == 0][["trip_id", "arrival_time"]].sort_values(by=["arrival_time"]) for trip_id in temp["trip_id"]: # Add them inorder trip = trip_group.get_group(trip_id).sort_values(by=["stop_sequence"]) stoptimes_dict[r_id].append(list(zip(trip.stop_id, trip.arrival_time))) with open(f'./dict_builder/{NETWORK_NAME}/stoptimes_dict_pkl.pkl', 'wb') as pickle_file: pickle.dump(stoptimes_dict, pickle_file) print("stoptimes dict done") return stoptimes_dict
[docs]def build_save_footpath_dict(transfers_file, NETWORK_NAME: str) -> dict: """ This function saves a dictionary to provide easy access to all the footpaths through a stop id. Args: transfers_file (pandas.dataframe): dataframe with transfers (footpath) details. NETWORK_NAME (str): name of the network Returns: footpath_dict (dict): keys: from stop_id, values: list of tuples of form (to stop id, footpath duration). Format-> dict[stop_id]=[(stop_id, footpath_duration)] """ print("building footpath dict..") footpath_dict = {} g = transfers_file.groupby("from_stop_id") for from_stop, details in tqdm(g): footpath_dict[from_stop] = [] for _, row in details.iterrows(): footpath_dict[from_stop].append( (row.to_stop_id, pd.to_timedelta(float(row.min_transfer_time), unit='seconds'))) with open(f'./dict_builder/{NETWORK_NAME}/transfers_dict_full.pkl', 'wb') as pickle_file: pickle.dump(footpath_dict, pickle_file) print("transfers_dict done") return footpath_dict
[docs]def build_stop_idx_in_route(stop_times_file, NETWORK_NAME: str) -> dict: """ This function saves a dictionary to provide easy access to index of a stop in a route. Args: stop_times_file (pandas.dataframe): stop_times.txt file in GTFS. NETWORK_NAME (str): name of the network Returns: idx_by_route_stop_dict (dict): Keys: (route id, stop id), value: stop index. Format {(route id, stop id): stop index in route}. """ pandas_group = stop_times_file.groupby(["route_id", "stop_id"]) idx_by_route_stop = {route_stop_pair: details.stop_sequence.iloc[0] for route_stop_pair, details in pandas_group} with open(f'./dict_builder/{NETWORK_NAME}/idx_by_route_stop.pkl', 'wb') as pickle_file: pickle.dump(idx_by_route_stop, pickle_file) print("idx_by_route_stop done") return idx_by_route_stop
[docs]def build_routesindx_by_stop_dict(NETWORK_NAME: str) -> dict: """ This function saves a dictionary. Args: NETWORK_NAME (str): name of the network Returns: routesindx_by_stop_dict (dict): Keys: stop id, value: [(route_id, stop index), (route_id, stop index)] """ with open(f'./dict_builder/{NETWORK_NAME}/stops_dict_pkl.pkl', 'rb') as file: stops_dict = pickle.load(file) with open(f'./dict_builder/{NETWORK_NAME}/routes_by_stop.pkl', 'rb') as file: routes_by_stop_dict = pickle.load(file) routesindx_by_stop_dict = {stop: list(zip(listofroutes, [stops_dict[x].index(stop) for x in listofroutes])) for stop, listofroutes in routes_by_stop_dict.items()} with open(f'./dict_builder/{NETWORK_NAME}/routesindx_by_stop.pkl', 'wb') as pickle_file: pickle.dump(routesindx_by_stop_dict, pickle_file) print("routesindx_by_stop_dict done") return routesindx_by_stop_dict