Module preparation

A module for paths (e.g., path to SUMO, path to network) and data (e.g., prior OD estimates, observed traffic measurements) prepartion.

@author: Qing-Long Lu (qinglong.lu@tum.de)

Expand source code
# -*- coding: utf-8 -*-
"""
A module for paths (e.g., path to SUMO, path to network) and data (e.g., prior OD estimates, observed traffic measurements) prepartion.

@author: Qing-Long Lu (qinglong.lu@tum.de)
"""
import pandas as pd
import numpy as np
import os
import shutil

class DataPreparation(object):
    def __init__(
            self, 
            paths,
            sumo_var
            ):
        '''
        Initialize a DataPreparation object, with `paths` (a dict of paths to the network, SUMO, scenario and cache) 
        and `sumo_var` (a dict of SUMO simulation configuration) as necessary attributes.

        Parameters
        ----------
        paths : dict
            A dict of paths to SUMO, network, measurements, demand and cache, including:
                <table>
                    <thead>
                        <tr>
                            <th align="left">Variable</th>
                            <th align="left">Description</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td>'sumo'</td>
                            <td>path to the SUMO installation location.</td>
                        </tr>
                        <tr>
                            <td>'network'</td>
                            <td>path to the SUMO network files.</td>
                        </tr>
                        <tr>
                            <td>'demand'</td>
                            <td>path to the prior OD estimates in the [O-format (VISUM/VISSUM)](https://sumo.dlr.de/docs/Demand/Importing_O/D_Matrices.html).</td>
                        </tr>
                        <tr>
                            <td>'measurements'</td>
                            <td>path to the true traffic measurements (in `.csv` format).</td>
                        </tr>
                        <tr>
                            <td>'cache'</td>
                            <td>path to cache folder.</td>
                        </tr>
                    </tbody>
                </table>
        sumo_var : dict
            A dict of SUMO simulation setups, including:
                <table>
                    <thead>
                        <tr>
                            <th align="left">Variable</th>
                            <th align="left">Description</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td>'network'</td>
                            <td>name of the network file.</td>
                        </tr>
                        <tr>
                            <td>'tazname'</td>
                            <td>name of the traffic analysis zone (TAZ) file.</td>
                        </tr>
                        <tr>
                            <td>'add_file'</td>
                            <td>name of the additional file, which includes the detectors information.</td>
                        </tr>
                        <tr>
                            <td>'starttime'</td>
                            <td>when the simulation should start.</td>
                        </tr>
                        <tr>
                            <td>'endtime'</td>
                            <td>when the simulation should stop.</td>
                        </tr>
                        <tr>
                            <td>'objective'</td>
                            <td>indicate the traffic measurements to use, 'counts' or 'time'.</td>
                        </tr>
                        <tr>
                            <td>'interval'</td>
                            <td>calibration interval (in common with the resolution of traffic measurements).</td>
                        </tr>
                    </tbody>
                </table>

        Returns
        -------
        None.

        '''
        self.paths = paths
        self.sumo_var = sumo_var
    
    def load_path(self):
        '''
        Copy necessary files to the cache folder. Simulations and calibration 
        are carried out in this folder to aviod the intermediate data generated by SUMO
        mess up the original network and scenario folders.

        Returns
        -------
        None.

        '''
        self.sumo_var['od_prior'] = [f for f in os.listdir(self.paths['demand'])]
        self.sumo_var['od_prior'].sort()
        self.sumo_var['truedata'] = [f for f in os.listdir(self.paths['measurements'])]
        self.sumo_var['truedata'].sort()
        
        # create the folder to store the scenario files
        # if os.path.exists(self.paths["cache"]):
        #     shutil.rmtree(self.paths["cache"])
        #     os.mkdir(self.paths["cache"])
        if not os.path.exists(self.paths["cache"]):
            os.mkdir(self.paths["cache"])
        #% Start Simulation Times
        integ= np.floor(np.double(self.sumo_var["starttime"]))
        fract=100*(np.double(self.sumo_var["starttime"])-integ)
        beginSim = int(integ*60*60 + fract*60)
        #% End Simulation Times
        integ=np.floor(np.double(self.sumo_var["endtime"]))
        fract=100*(np.double(self.sumo_var["endtime"])-integ)
        endSim = int(integ*60*60 + fract*60)
        
        self.sumo_var['beginSimTime'] =  str(beginSim)
        self.sumo_var['endSimTime']   =  str(endSim+500)
        
        '''
         copy relevant files for SUMO simulation to a folder under the temp directory,
         so that all output files will be located in a same folder which won't make the
         original network folder dirty and give us convenienc to deal with the temporary
         outputs which could be redundant.
        '''
        # network
        old_loc = self.paths['network']+self.sumo_var['network']
        new_loc = self.paths['cache']+self.sumo_var['network']
        shutil.copyfile(old_loc, new_loc)
        # taz
        old_loc = self.paths['network']+self.sumo_var['tazname']
        new_loc = self.paths['cache']+self.sumo_var['tazname']
        shutil.copyfile(old_loc, new_loc)
        # additional
        old_loc = self.paths['network']+self.sumo_var['add_file']
        new_loc = self.paths['cache']+self.sumo_var['add_file']
        shutil.copyfile(old_loc, new_loc)
        # prior od estimates
        for file in self.sumo_var["od_prior"]:
            old_loc = self.paths['demand']+file
            new_loc = self.paths['cache']+file
            shutil.copyfile(old_loc, new_loc)
        # truedata
        for file in self.sumo_var['truedata']:
            old_loc = self.paths['measurements']+file
            new_loc = self.paths['cache']+file
            shutil.copyfile(old_loc, new_loc)            
        
    def load_data(self):
        '''
        Load the od_prior estimates and traffic measurements.

        Returns
        -------
        data : TYPE
            DESCRIPTION.
        od_prior : TYPE
            DESCRIPTION.

        '''
        start = int(float(self.sumo_var["starttime"]))
        end = int(float(self.sumo_var['endtime']))
        cols = list(range(start, end))
        cols_inter = list(np.arange(start, end, self.sumo_var['interval']))
        data = pd.DataFrame()
        # for counts
        if self.sumo_var['objective'] == 'counts':
            for i in range(len(self.sumo_var["truedata"])):
                temp_data = pd.read_csv(self.paths["cache"] + self.sumo_var['truedata'][i], header=None)
                temp_data.set_index(0, inplace=True)
                data = pd.concat([data, temp_data], axis=1)
            data.columns = cols_inter
            #@@@@
            data = data[(data.T!=0).any()]
            #@@@@
        # for travel time
        if self.sumo_var['objective'] == 'time':
            for i in range(len(self.sumo_var["truedata"])):
                temp_data = pd.read_csv(self.paths["cache"] + self.sumo_var['truedata'][i], header=None)
                temp_data['label'] = list(zip(temp_data.iloc[:,0], temp_data.iloc[:,1]))
                temp_data.drop([0,1], axis=1, inplace=True)
                temp_data.set_index('label', inplace=True)
                data = pd.concat([data, temp_data], axis=1)
            data.columns = cols
        # prior od estimates
        od_prior = pd.DataFrame()
        for i in range(len(self.sumo_var["od_prior"])):
            temp_od = pd.read_csv(self.paths["cache"] + self.sumo_var["od_prior"][i], sep='\s+', header=None, skiprows=5)
            od_prior = pd.concat([od_prior, temp_od.iloc[:,2]], axis=1)
        od_prior.columns = cols_inter
        od_prior = pd.concat([temp_od.iloc[:,:2], od_prior], axis=1)
        return data, od_prior


        

Classes

class DataPreparation (paths, sumo_var)

Initialize a DataPreparation object, with paths (a dict of paths to the network, SUMO, scenario and cache) and sumo_var (a dict of SUMO simulation configuration) as necessary attributes.

Parameters

paths : dict
A dict of paths to SUMO, network, measurements, demand and cache, including:
Variable Description
'sumo' path to the SUMO installation location.
'network' path to the SUMO network files.
'demand' path to the prior OD estimates in the O-format (VISUM/VISSUM).
'measurements' path to the true traffic measurements (in .csv format).
'cache' path to cache folder.
sumo_var : dict
A dict of SUMO simulation setups, including:
Variable Description
'network' name of the network file.
'tazname' name of the traffic analysis zone (TAZ) file.
'add_file' name of the additional file, which includes the detectors information.
'starttime' when the simulation should start.
'endtime' when the simulation should stop.
'objective' indicate the traffic measurements to use, 'counts' or 'time'.
'interval' calibration interval (in common with the resolution of traffic measurements).

Returns

None.

Expand source code
class DataPreparation(object):
    def __init__(
            self, 
            paths,
            sumo_var
            ):
        '''
        Initialize a DataPreparation object, with `paths` (a dict of paths to the network, SUMO, scenario and cache) 
        and `sumo_var` (a dict of SUMO simulation configuration) as necessary attributes.

        Parameters
        ----------
        paths : dict
            A dict of paths to SUMO, network, measurements, demand and cache, including:
                <table>
                    <thead>
                        <tr>
                            <th align="left">Variable</th>
                            <th align="left">Description</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td>'sumo'</td>
                            <td>path to the SUMO installation location.</td>
                        </tr>
                        <tr>
                            <td>'network'</td>
                            <td>path to the SUMO network files.</td>
                        </tr>
                        <tr>
                            <td>'demand'</td>
                            <td>path to the prior OD estimates in the [O-format (VISUM/VISSUM)](https://sumo.dlr.de/docs/Demand/Importing_O/D_Matrices.html).</td>
                        </tr>
                        <tr>
                            <td>'measurements'</td>
                            <td>path to the true traffic measurements (in `.csv` format).</td>
                        </tr>
                        <tr>
                            <td>'cache'</td>
                            <td>path to cache folder.</td>
                        </tr>
                    </tbody>
                </table>
        sumo_var : dict
            A dict of SUMO simulation setups, including:
                <table>
                    <thead>
                        <tr>
                            <th align="left">Variable</th>
                            <th align="left">Description</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td>'network'</td>
                            <td>name of the network file.</td>
                        </tr>
                        <tr>
                            <td>'tazname'</td>
                            <td>name of the traffic analysis zone (TAZ) file.</td>
                        </tr>
                        <tr>
                            <td>'add_file'</td>
                            <td>name of the additional file, which includes the detectors information.</td>
                        </tr>
                        <tr>
                            <td>'starttime'</td>
                            <td>when the simulation should start.</td>
                        </tr>
                        <tr>
                            <td>'endtime'</td>
                            <td>when the simulation should stop.</td>
                        </tr>
                        <tr>
                            <td>'objective'</td>
                            <td>indicate the traffic measurements to use, 'counts' or 'time'.</td>
                        </tr>
                        <tr>
                            <td>'interval'</td>
                            <td>calibration interval (in common with the resolution of traffic measurements).</td>
                        </tr>
                    </tbody>
                </table>

        Returns
        -------
        None.

        '''
        self.paths = paths
        self.sumo_var = sumo_var
    
    def load_path(self):
        '''
        Copy necessary files to the cache folder. Simulations and calibration 
        are carried out in this folder to aviod the intermediate data generated by SUMO
        mess up the original network and scenario folders.

        Returns
        -------
        None.

        '''
        self.sumo_var['od_prior'] = [f for f in os.listdir(self.paths['demand'])]
        self.sumo_var['od_prior'].sort()
        self.sumo_var['truedata'] = [f for f in os.listdir(self.paths['measurements'])]
        self.sumo_var['truedata'].sort()
        
        # create the folder to store the scenario files
        # if os.path.exists(self.paths["cache"]):
        #     shutil.rmtree(self.paths["cache"])
        #     os.mkdir(self.paths["cache"])
        if not os.path.exists(self.paths["cache"]):
            os.mkdir(self.paths["cache"])
        #% Start Simulation Times
        integ= np.floor(np.double(self.sumo_var["starttime"]))
        fract=100*(np.double(self.sumo_var["starttime"])-integ)
        beginSim = int(integ*60*60 + fract*60)
        #% End Simulation Times
        integ=np.floor(np.double(self.sumo_var["endtime"]))
        fract=100*(np.double(self.sumo_var["endtime"])-integ)
        endSim = int(integ*60*60 + fract*60)
        
        self.sumo_var['beginSimTime'] =  str(beginSim)
        self.sumo_var['endSimTime']   =  str(endSim+500)
        
        '''
         copy relevant files for SUMO simulation to a folder under the temp directory,
         so that all output files will be located in a same folder which won't make the
         original network folder dirty and give us convenienc to deal with the temporary
         outputs which could be redundant.
        '''
        # network
        old_loc = self.paths['network']+self.sumo_var['network']
        new_loc = self.paths['cache']+self.sumo_var['network']
        shutil.copyfile(old_loc, new_loc)
        # taz
        old_loc = self.paths['network']+self.sumo_var['tazname']
        new_loc = self.paths['cache']+self.sumo_var['tazname']
        shutil.copyfile(old_loc, new_loc)
        # additional
        old_loc = self.paths['network']+self.sumo_var['add_file']
        new_loc = self.paths['cache']+self.sumo_var['add_file']
        shutil.copyfile(old_loc, new_loc)
        # prior od estimates
        for file in self.sumo_var["od_prior"]:
            old_loc = self.paths['demand']+file
            new_loc = self.paths['cache']+file
            shutil.copyfile(old_loc, new_loc)
        # truedata
        for file in self.sumo_var['truedata']:
            old_loc = self.paths['measurements']+file
            new_loc = self.paths['cache']+file
            shutil.copyfile(old_loc, new_loc)            
        
    def load_data(self):
        '''
        Load the od_prior estimates and traffic measurements.

        Returns
        -------
        data : TYPE
            DESCRIPTION.
        od_prior : TYPE
            DESCRIPTION.

        '''
        start = int(float(self.sumo_var["starttime"]))
        end = int(float(self.sumo_var['endtime']))
        cols = list(range(start, end))
        cols_inter = list(np.arange(start, end, self.sumo_var['interval']))
        data = pd.DataFrame()
        # for counts
        if self.sumo_var['objective'] == 'counts':
            for i in range(len(self.sumo_var["truedata"])):
                temp_data = pd.read_csv(self.paths["cache"] + self.sumo_var['truedata'][i], header=None)
                temp_data.set_index(0, inplace=True)
                data = pd.concat([data, temp_data], axis=1)
            data.columns = cols_inter
            #@@@@
            data = data[(data.T!=0).any()]
            #@@@@
        # for travel time
        if self.sumo_var['objective'] == 'time':
            for i in range(len(self.sumo_var["truedata"])):
                temp_data = pd.read_csv(self.paths["cache"] + self.sumo_var['truedata'][i], header=None)
                temp_data['label'] = list(zip(temp_data.iloc[:,0], temp_data.iloc[:,1]))
                temp_data.drop([0,1], axis=1, inplace=True)
                temp_data.set_index('label', inplace=True)
                data = pd.concat([data, temp_data], axis=1)
            data.columns = cols
        # prior od estimates
        od_prior = pd.DataFrame()
        for i in range(len(self.sumo_var["od_prior"])):
            temp_od = pd.read_csv(self.paths["cache"] + self.sumo_var["od_prior"][i], sep='\s+', header=None, skiprows=5)
            od_prior = pd.concat([od_prior, temp_od.iloc[:,2]], axis=1)
        od_prior.columns = cols_inter
        od_prior = pd.concat([temp_od.iloc[:,:2], od_prior], axis=1)
        return data, od_prior

Methods

def load_data(self)

Load the od_prior estimates and traffic measurements.

Returns

data : TYPE
DESCRIPTION.
od_prior : TYPE
DESCRIPTION.
Expand source code
def load_data(self):
    '''
    Load the od_prior estimates and traffic measurements.

    Returns
    -------
    data : TYPE
        DESCRIPTION.
    od_prior : TYPE
        DESCRIPTION.

    '''
    start = int(float(self.sumo_var["starttime"]))
    end = int(float(self.sumo_var['endtime']))
    cols = list(range(start, end))
    cols_inter = list(np.arange(start, end, self.sumo_var['interval']))
    data = pd.DataFrame()
    # for counts
    if self.sumo_var['objective'] == 'counts':
        for i in range(len(self.sumo_var["truedata"])):
            temp_data = pd.read_csv(self.paths["cache"] + self.sumo_var['truedata'][i], header=None)
            temp_data.set_index(0, inplace=True)
            data = pd.concat([data, temp_data], axis=1)
        data.columns = cols_inter
        #@@@@
        data = data[(data.T!=0).any()]
        #@@@@
    # for travel time
    if self.sumo_var['objective'] == 'time':
        for i in range(len(self.sumo_var["truedata"])):
            temp_data = pd.read_csv(self.paths["cache"] + self.sumo_var['truedata'][i], header=None)
            temp_data['label'] = list(zip(temp_data.iloc[:,0], temp_data.iloc[:,1]))
            temp_data.drop([0,1], axis=1, inplace=True)
            temp_data.set_index('label', inplace=True)
            data = pd.concat([data, temp_data], axis=1)
        data.columns = cols
    # prior od estimates
    od_prior = pd.DataFrame()
    for i in range(len(self.sumo_var["od_prior"])):
        temp_od = pd.read_csv(self.paths["cache"] + self.sumo_var["od_prior"][i], sep='\s+', header=None, skiprows=5)
        od_prior = pd.concat([od_prior, temp_od.iloc[:,2]], axis=1)
    od_prior.columns = cols_inter
    od_prior = pd.concat([temp_od.iloc[:,:2], od_prior], axis=1)
    return data, od_prior
def load_path(self)

Copy necessary files to the cache folder. Simulations and calibration are carried out in this folder to aviod the intermediate data generated by SUMO mess up the original network and scenario folders.

Returns

None.

Expand source code
def load_path(self):
    '''
    Copy necessary files to the cache folder. Simulations and calibration 
    are carried out in this folder to aviod the intermediate data generated by SUMO
    mess up the original network and scenario folders.

    Returns
    -------
    None.

    '''
    self.sumo_var['od_prior'] = [f for f in os.listdir(self.paths['demand'])]
    self.sumo_var['od_prior'].sort()
    self.sumo_var['truedata'] = [f for f in os.listdir(self.paths['measurements'])]
    self.sumo_var['truedata'].sort()
    
    # create the folder to store the scenario files
    # if os.path.exists(self.paths["cache"]):
    #     shutil.rmtree(self.paths["cache"])
    #     os.mkdir(self.paths["cache"])
    if not os.path.exists(self.paths["cache"]):
        os.mkdir(self.paths["cache"])
    #% Start Simulation Times
    integ= np.floor(np.double(self.sumo_var["starttime"]))
    fract=100*(np.double(self.sumo_var["starttime"])-integ)
    beginSim = int(integ*60*60 + fract*60)
    #% End Simulation Times
    integ=np.floor(np.double(self.sumo_var["endtime"]))
    fract=100*(np.double(self.sumo_var["endtime"])-integ)
    endSim = int(integ*60*60 + fract*60)
    
    self.sumo_var['beginSimTime'] =  str(beginSim)
    self.sumo_var['endSimTime']   =  str(endSim+500)
    
    '''
     copy relevant files for SUMO simulation to a folder under the temp directory,
     so that all output files will be located in a same folder which won't make the
     original network folder dirty and give us convenienc to deal with the temporary
     outputs which could be redundant.
    '''
    # network
    old_loc = self.paths['network']+self.sumo_var['network']
    new_loc = self.paths['cache']+self.sumo_var['network']
    shutil.copyfile(old_loc, new_loc)
    # taz
    old_loc = self.paths['network']+self.sumo_var['tazname']
    new_loc = self.paths['cache']+self.sumo_var['tazname']
    shutil.copyfile(old_loc, new_loc)
    # additional
    old_loc = self.paths['network']+self.sumo_var['add_file']
    new_loc = self.paths['cache']+self.sumo_var['add_file']
    shutil.copyfile(old_loc, new_loc)
    # prior od estimates
    for file in self.sumo_var["od_prior"]:
        old_loc = self.paths['demand']+file
        new_loc = self.paths['cache']+file
        shutil.copyfile(old_loc, new_loc)
    # truedata
    for file in self.sumo_var['truedata']:
        old_loc = self.paths['measurements']+file
        new_loc = self.paths['cache']+file
        shutil.copyfile(old_loc, new_loc)