Source code for tlseparation.utility.shortpath

# Copyright (c) 2017-2019, Matheus Boni Vicari, TLSeparation Project
# All rights reserved.
#
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


__author__ = "Matheus Boni Vicari"
__copyright__ = "Copyright 2017-2019, TLSeparation Project"
__credits__ = ["Matheus Boni Vicari"]
__license__ = "GPL3"
__version__ = "1.3.2"
__maintainer__ = "Matheus Boni Vicari"
__email__ = "matheus.boni.vicari@gmail.com"
__status__ = "Development"

import networkx as nx
import numpy as np
from sklearn.neighbors import NearestNeighbors


[docs]def array_to_graph(arr, base_id, kpairs, knn, nbrs_threshold,
                   nbrs_threshold_step, graph_threshold=np.inf):

    """
    Converts a numpy.array of points coordinates into a Weighted BiDirectional
    NetworkX Graph.
    This funcions uses a NearestNeighbor search to determine points adajency.
    The NNsearch results are used to select pairs of points (or nodes) that
    have a common edge.


    Parameters
    ----------
    arr : array
        n-dimensional array of points.
    base_id : int
        Index of base id (root) in the graph.
    kpairs : int
        Number of points around each point in arr to select in order to
        build edges.
    knn : int
        Number of neighbors to search around each point in the neighborhood
        phase. The higher the better (careful, it's  memory intensive).
    nbrs_threshold : float
        Maximum valid distance between neighbors points.
    nbrs_threshold_step : float
        Distance increment used in the final phase of edges generation. It's
        used to make sure that in the end, every point in arr will be
        translated to nodes in the graph.
    graph_threshold : float
        Maximum distance between pairs of nodes (edge distance) accepted in
        the graph generation.

    Returns
    -------
    G : networkx graph
        Graph containing all points in 'arr' as nodes.

    """

    # Initializing graph.
    G = nx.Graph()

    # Generating array of all indices from 'arr' and all indices to process
    # 'idx'.
    idx_base = np.arange(arr.shape[0], dtype=int)
    idx = np.arange(arr.shape[0], dtype=int)

    # Initializing NearestNeighbors search and searching for all 'knn'
    # neighboring points arround each point in 'arr'.
    nbrs = NearestNeighbors(n_neighbors=knn, metric='euclidean',
                            leaf_size=15, n_jobs=-1).fit(arr)
    distances, indices = nbrs.kneighbors(arr)
    indices = indices.astype(int)

    # Initializing variables for current ids being processed (current_idx)
    # and all ids already processed (processed_idx).
    current_idx = [base_id]
    processed_idx = [base_id]

    # Looping while there are still indices (idx) left to process.
    while idx.shape[0] > 0:

        # If current_idx is a list containing several indices.
        if len(current_idx) > 0:

            # Selecting NearestNeighbors indices and distances for current
            # indices being processed.
            nn = indices[current_idx]
            dd = distances[current_idx]

            # Masking out indices already contained in processed_idx.
            mask1 = np.in1d(nn, processed_idx, invert=True).reshape(nn.shape)

            # Initializing temporary list of nearest neighbors. This list
            # is latter used to accumulate points that will be added to
            # processed points list.
            nntemp = []

            # Looping over current indices's set of nn points and selecting
            # knn points that hasn't been added/processed yet (mask1).
            for i, (n, d, g) in enumerate(zip(nn, dd, current_idx)):
                nn_idx = n[mask1[i]][0:kpairs+1]
                dd_idx = d[mask1[i]][0:kpairs+1]
                nntemp.append(nn_idx)

                # Adding current knn selected points as nodes to graph G.
                add_nodes(G, g, nn_idx, dd_idx, graph_threshold)

            # Obtaining an unique array of points currently being processed.
            current_idx = np.unique([t2 for t1 in nntemp for t2 in t1])

        # If current_idx is an empty list.
        elif len(current_idx) == 0:

            # Getting NearestNeighbors indices and distance for all indices
            # that remain to be processed.
            idx2 = indices[idx]
            dist2 = distances[idx]

            # Masking indices in idx2 that have already been processed. The
            # idea is to connect remaining points to existing graph nodes.
            mask1 = np.in1d(idx2, processed_idx).reshape(idx2.shape)
            # Masking neighboring points that are withing threshold distance.
            mask2 = dist2 < nbrs_threshold
            # mask1 AND mask2. This will mask only indices that are part of
            # the graph and within threshold distance.
            mask = np.logical_and(mask1, mask2)

            # Getting unique array of indices that match the criteria from
            # mask1 and mask2.
            temp_idx = np.unique(np.where(mask)[0])
            # Assigns remaining indices (idx) matched in temp_idx to
            # current_idx.
            current_idx = idx[temp_idx]

            # Selecting NearestNeighbors indices and distances for current
            # indices being processed.
            nn = indices[current_idx]
            dd = distances[current_idx]

            # Masking points in nn that have already been processed.
            # This is the oposite approach as above, where points that are
            # still not in the graph are desired. Now, to make sure the
            # continuity of the graph is kept, join current remaining indices
            # to indices already in G.
            mask = np.in1d(nn, processed_idx, invert=True).reshape(nn.shape)

            # Initializing temporary list of nearest neighbors. This list
            # is latter used to accumulate points that will be added to
            # processed points list.
            nntemp = []

            # Looping over current indices's set of nn points and selecting
            # knn points that have alreay been added/processed (mask).
            # Also, to ensure continuity over next iteration, select another
            # kpairs points from indices that haven't been processed (~mask).
            for i, (n, d, g) in enumerate(zip(nn, dd, current_idx)):
                nn_idx = n[mask[i]][0:kpairs+1]
                dd_idx = d[mask[i]][0:kpairs+1]

                # Adding current knn selected points as nodes to graph G.
                add_nodes(G, g, nn_idx, dd_idx, graph_threshold)

                nn_idx = n[~mask[i]][0:kpairs+1]
                dd_idx = d[~mask[i]][0:kpairs+1]

                # Adding current knn selected points as nodes to graph G.
                add_nodes(G, g, nn_idx, dd_idx, graph_threshold)

            # Check if current_idx is still empty. If so, increase the
            # nbrs_threshold to try to include more points in the next
            # iteration.
            if len(current_idx) == 0:
                nbrs_threshold += nbrs_threshold_step

        # Appending current_idx to processed_idx.
        processed_idx = np.append(processed_idx, current_idx)
        processed_idx = np.unique(processed_idx).astype(int)

        # Generating list of remaining proints to process.
        idx = idx_base[np.in1d(idx_base, processed_idx, invert=True)]

    return G


[docs]def extract_path_info(G, base_id, return_path=True):

    """
    Extracts shortest path information from a NetworkX graph.

    Parameters
    ----------
    G : networkx graph
        NetworkX graph object from which to extract the information.
    base_id : int
        Base (root) node id to calculate the shortest path for all other
        nodes.
    return_path : boolean
        Option to select if function should output path list for every node
        in G to base_id.

    Returns
    -------
    nodes_ids : list
        Indices of all nodes in graph G.
    distance : list
        Shortest path distance (accumulated) from all nodes in G to base_id
        node.
    path_list : dict
        Dictionary of nodes that comprises the path of every node in G to
        base_id node.

    """

    # Calculating the shortest path
    shortpath = nx.single_source_dijkstra_path_length(G, base_id)

    # Obtaining the node coordinates and their respective distance from
    # the base point.
    nodes_ids = shortpath.keys()
    distance = shortpath.values()

    # Checking if the function should also return the paths of each node and
    # if so, generating the path list and returning it.
    if return_path is True:
        path_list = nx.single_source_dijkstra_path(G, base_id)
        return nodes_ids, distance, path_list

    elif return_path is False:
        return nodes_ids, distance


[docs]def add_nodes(G, base_node, indices, distance, threshold):

    """
    Adds a set of nodes and weighted edges based on pairs of indices
    between base_node and all entries in indices. Each node pair shares an
    edge with weight equal to the distance between both nodes.

    Parameters
    ----------
    G : networkx graph
        NetworkX graph object to which all nodes/edges will be added.
    base_node : int
        Base node's id to be added. All other nodes will be paired with
        base_node to form different edges.
    indices : list or array
        Set of nodes indices to be paired with base_node.
    distance : list or array
        Set of distances between all nodes in 'indices' and base_node.
    threshold : float
        Edge distance threshold. All edges with distance larger than
        'threshold' will not be added to G.

    """

    for c in np.arange(len(indices)):
        if distance[c] <= threshold:
            # If the distance between vertices is less than a given
            # threshold, add edge (i[0], i[c]) to Graph.
            G.add_weighted_edges_from([(base_node, indices[c],
                                        distance[c])])