Source code for gerrychain.partition.assignment

from collections import defaultdict
from collections.abc import Mapping
from typing import DefaultDict, Dict, Optional, Set, Type, Union

import pandas

from ..graph import Graph



[docs]
class Assignment(Mapping):
    """
    An assignment of nodes into parts.

    The goal of :class:`Assignment` is to provide an interface that mirrors a
    dictionary (what we have been using for assigning nodes to districts) while making it
    convenient/cheap to access the set of nodes in each part.

    An :class:`Assignment` has a ``parts`` property that is a dictionary of the form
    ``{part: <frozenset of nodes in part>}``.
    """

    __slots__ = ["parts", "mapping"]

    def __init__(self, parts: Dict, mapping: Optional[Dict] = None, validate: bool = True) -> None:
        """
        :param parts: Dictionary mapping partition assignments frozensets of nodes.
        :type parts: Dict
        :param mapping: Dictionary mapping nodes to partition assignments.
            Default is None.
        :type mapping: Optional[Dict], optional
        :param validate: Whether to validate the assignment. Default is True.
        :type validate: bool, optional

        :returns: None

        :raises ValueError: if the keys of ``parts`` are not unique
        :raises TypeError: if the values of ``parts`` are not frozensets
        """

        if validate:
            number_of_keys = sum(len(keys) for keys in parts.values())
            number_of_unique_keys = len(set().union(*parts.values()))
            if number_of_keys != number_of_unique_keys:
                raise ValueError("Keys must have unique assignments.")
            if not all(isinstance(keys, frozenset) for keys in parts.values()):
                raise TypeError("Level sets must be frozensets")
        self.parts = parts

        if not mapping:
            self.mapping = {}
            for part, nodes in self.parts.items():
                for node in nodes:
                    self.mapping[node] = part
        else:
            self.mapping = mapping

    def __repr__(self):
        return "<Assignment [{} keys, {} parts]>".format(len(self), len(self.parts))

    def __iter__(self):
        return self.keys()

    def __len__(self):
        return sum(len(keys) for keys in self.parts.values())

    def __getitem__(self, node):
        return self.mapping[node]


[docs]
    def copy(self):
        """
        Returns a copy of the assignment.
        Does not duplicate the frozensets of nodes, just the parts dictionary.
        """
        return Assignment(self.parts.copy(), self.mapping.copy(), validate=False)



[docs]
    def update_flows(self, flows):
        """
        Update the assignment for some nodes using the given flows.
        """
        # frm: Update the assignment of nodes to partitions by adding
        #       all of the new nodes and removing all of the old nodes
        #       as represented in the flows (dict keyed by district (part)
        #       of nodes flowing "in" and "out" for that district).
        #
        #       Also, reset the mapping of node to partition (self.mapping)
        #       to reassign each node to its new partition.
        #
        for part, flow in flows.items():
            # Union between frozenset and set returns an object whose type
            # matches the object on the left, which here is a frozenset
            self.parts[part] = (self.parts[part] - flow["out"]) | flow["in"]

            for node in flow["in"]:
                self.mapping[node] = part



[docs]
    def items(self):
        """
        Iterate over ``(node, part)`` tuples, where ``node`` is assigned to ``part``.
        """
        yield from self.mapping.items()



[docs]
    def keys(self):
        yield from self.mapping.keys()



[docs]
    def values(self):
        yield from self.mapping.values()



[docs]
    def update_parts(self, new_parts: Dict) -> None:
        """
        Update some parts of the assignment. Does not check that every node is
        still assigned to a part.

        :param new_parts: dictionary mapping (some) parts to their new sets or
            frozensets of nodes
        :type new_parts: Dict

        :returns: None
        """
        for part, nodes in new_parts.items():
            self.parts[part] = frozenset(nodes)

            for node in nodes:
                self.mapping[node] = part



[docs]
    def to_series(self) -> pandas.Series:
        """
        :returns: The assignment as a :class:`pandas.Series`.
        :rtype: pandas.Series
        """
        groups = [pandas.Series(data=part, index=nodes) for part, nodes in self.parts.items()]
        return pandas.concat(groups)



[docs]
    def to_dict(self) -> Dict:
        """
        :returns: The assignment as a ``{node: part}`` dictionary.
        :rtype: Dict
        """
        return self.mapping



[docs]
    @classmethod
    def from_dict(cls, assignment: Dict) -> "Assignment":
        """
        Create an :class:`Assignment` from a dictionary. This is probably the method you want
        to use to create a new assignment.

        This also works for :class:`pandas.Series`.

        :param assignment: dictionary mapping nodes to partition assignments
        :type assignment: Dict

        :returns: A new instance of :class:`Assignment` with the same assignments as the
            passed-in dictionary.
        :rtype: Assignment
        """

        # frm: TODO: Refactoring:  Clean up from_dict().
        #
        # A couple of things:
        #  * It uses a routine, level_sets(), which is only ever used here, so
        #    why bother having a separate routine.  All it does is convert a dict
        #    mapping node_ids to parts into a dict mapping parts into sets of
        #    node_ids.  Why not just have that code here inline?
        #
        #  * Also, the constructor for Assignment explicitly allows for the caller
        #    to pass in a "mapping" of node_id to part, which we have right here.
        #    Why don't we pass it in and save having to recompute it?
        #

        parts = {part: frozenset(keys) for part, keys in level_sets(assignment).items()}

        return cls(parts)



[docs]
    def new_assignment_convert_old_node_ids_to_new_node_ids(
        self, node_id_mapping: Dict
    ) -> "Assignment":
        """
        Create a new Assignment object from the one passed in, where the node_ids are changed
        according to the node_id_mapping from old node_ids to new node_ids.

        This routine was motivated by the fact that node_ids are changed when converting from an
        NetworkX based graph to a RustworkX based graph.  An Assignment based on the node_ids in
        the NetworkX based graph would need to be changed to use the new node_ids - the new
        Asignment would be semantically equivalent - just converted to use the new node_ids in
        the RX based graph.

        The node_id_mapping is of the form {old_node_id: new_node_id}
        """

        # Dict of the form: {node_id: part_id}
        old_assignment_mapping = self.mapping

        # convert old_node_ids to new_node_ids, keeping part IDs the same
        new_assignment_mapping = {
            node_id_mapping[old_node_id]: part
            for old_node_id, part in old_assignment_mapping.items()
        }
        # Now upate the parts dict that has a frozenset of all the nodes in each part (district)
        new_parts = {}
        for cur_node_id, cur_part in new_assignment_mapping.items():
            if cur_part not in new_parts:
                new_parts[cur_part] = set()
            new_parts[cur_part].add(cur_node_id)
        for cur_part, set_of_nodes in new_parts.items():
            new_parts[cur_part] = frozenset(set_of_nodes)

        #  pandas.Series(data=part, index=nodes) for part, nodes in self.parts.items()

        new_assignment = Assignment(new_parts, new_assignment_mapping)

        return new_assignment





[docs]
def get_assignment(
    part_assignment: Union[str, Dict, Assignment], graph: Optional[Graph] = None
) -> Assignment:
    """
    Either extracts an :class:`Assignment` object from the input graph
    using the provided key or attempts to convert part_assignment into
    an :class:`Assignment` object.

    :param part_assignment: A node attribute key, dictionary, or
        :class:`Assignment` object corresponding to the desired assignment.
    :type part_assignment: str
    :param graph: The graph from which to extract the assignment.
        Default is None.
    :type graph: Optional[Graph], optional

    :returns: An :class:`Assignment` object containing the assignment
        corresponding to the part_assignment input
    :rtype: Assignment

    :raises TypeError: If the part_assignment is a string and the graph
        is not provided.
    :raises TypeError: If the part_assignment is not a string or dictionary.
    """

    # frm: TODO: Refactoring:  Think about whether to split this into two functions.  AT
    #               present, it does different things based on whether
    #               the "part_assignment" parameter is a string, a dict,
    #               or an assignment.  Probably not worth the trouble (possible
    #               legacy issues), but I just can't get used to the Python habit
    #               of weak typing...

    if isinstance(part_assignment, str):
        # Extract an assignment using the named node attribute
        if graph is None:
            raise TypeError(
                "You must provide a graph when using a node attribute for the part_assignment"
            )
        return Assignment.from_dict(
            {node: graph.node_data(node)[part_assignment] for node in graph}
        )
    # Check if assignment is a dict or a mapping type
    elif callable(getattr(part_assignment, "items", None)):
        return Assignment.from_dict(part_assignment)
    elif isinstance(part_assignment, Assignment):
        return part_assignment
    else:
        raise TypeError("Assignment must be a dict or a node attribute key")




[docs]
def level_sets(mapping: Dict, container: Type[Set] = set) -> DefaultDict:
    """
    Inverts a dictionary. ``{key: value}`` becomes
    ``{value: <container of keys that map to value>}``.

    :param mapping: A dictionary to invert. Keys and values can be of any type.
    :type mapping: Dict
    :param container: A container type used to collect keys that map to the same value.
        By default, the container type is ``set``.
    :type container: Type[Set], optional

    :return: A dictionary where each key is a value from the original dictionary,
        and the corresponding value is a container (by default, a set) of keys from
        the original dictionary that mapped to this value.
    :rtype: DefaultDict

    Example usage::

    .. code_block:: python

        >>> level_sets({'a': 1, 'b': 1, 'c': 2})
        defaultdict(<class 'set'>, {1: {'a', 'b'}, 2: {'c'}})
    """
    sets: Dict = defaultdict(container)
    for source, target in mapping.items():
        sets[target].add(source)
    return sets