Source code for gerrychain.proposals.tree_proposals

import random
from functools import partial
from inspect import signature
from typing import Callable, Dict, Optional, Union

from gerrychain.partition import Partition

from ..tree import (
    ReselectException,
    bipartition_tree,
    bipartition_tree_random,
    bipartition_tree_random_with_num_cuts,
    epsilon_tree_bipartition,
    find_balanced_edge_cuts_memoization,
    uniform_spanning_tree,
)


# frm: only used in this file
[docs] class MetagraphError(Exception): """ Raised when the partition we are trying to split is a low degree node in the metagraph. """ pass
# frm: only used in this file
[docs] class ValueWarning(UserWarning): """ Raised whe a particular value is technically valid, but may cause issues with the algorithm. """ pass
[docs] def recom( partition: Partition, pop_col: str, pop_target: Union[int, float], epsilon: float, node_repeats: int = 1, region_surcharge: Optional[Dict] = None, method: Callable = bipartition_tree, ) -> Partition: """ ReCom (short for ReCombination) is a Markov Chain Monte Carlo (MCMC) algorithm used for redistricting. At each step of the algorithm, a pair of adjacent districts is selected at random and merged into a single district. The region is then split into two new districts by generating a spanning tree using the Kruskal/Karger algorithm and cutting an edge at random. The edge is checked to ensure that it separates the region into two new districts that are population balanced, and, if not, a new edge is selected at random and the process is repeated. Example usage: .. code-block:: python from functools import partial from gerrychain import MarkovChain from gerrychain.proposals import recom # ...define constraints, accept, partition, total_steps here... # Ideal population: pop_target = sum(partition["population"].values()) / len(partition) proposal = partial( recom, pop_col="POP10", pop_target=pop_target, epsilon=.05, node_repeats=10 ) chain = MarkovChain(proposal, constraints, accept, partition, total_steps) :param partition: The initial partition. :type partition: Partition :param pop_col: The name of the population column. :type pop_col: str :param pop_target: The target population for each district. :type pop_target: Union[int,float] :param epsilon: The epsilon value for population deviation as a percentage of the target population. :type epsilon: float :param node_repeats: The number of times to repeat the bipartitioning step. Default is 1. :type node_repeats: int, optional :param region_surcharge: The surcharge dictionary for the graph used for region-aware partitioning of the grid. Default is None. :type region_surcharge: Optional[Dict], optional :param method: The method used for bipartitioning the tree. Default is :func:`~gerrychain.tree.bipartition_tree`. :type method: Callable, optional :returns: The new partition resulting from the ReCom algorithm. print("bipartition_tree: updating restarts and attempts") :rtype: Partition """ bad_district_pairs = set() n_parts = len(partition) tot_pairs = n_parts * (n_parts - 1) / 2 # n choose 2 # Try to add the region aware in if the method accepts the surcharge dictionary if "region_surcharge" in signature(method).parameters: method = partial(method, region_surcharge=region_surcharge) while len(bad_district_pairs) < tot_pairs: # frm: In no particular order, try to merge and then split pairs of districts # that have a cut_edge - meaning that they are adjacent, until you either # find one that can be split, or you have tried all possible pairs # of adjacent districts... try: # frm: TODO: Refactoring: see if there is some way to avoid a while True loop... while True: edge = random.choice(tuple(partition["cut_edges"])) # Need to sort the tuple so that the order is consistent # in the bad_district_pairs set parts_to_merge = [ partition.assignment.mapping[edge[0]], partition.assignment.mapping[edge[1]], ] parts_to_merge.sort() if tuple(parts_to_merge) not in bad_district_pairs: break # frm: Note that the vertical bar operator merges the two sets into one set. subgraph_nodes = partition.parts[parts_to_merge[0]] | partition.parts[parts_to_merge[1]] flips = epsilon_tree_bipartition( partition.graph.subgraph(subgraph_nodes), parts_to_merge, pop_col=pop_col, pop_target=pop_target, epsilon=epsilon, node_repeats=node_repeats, method=method, ) break except Exception as e: if isinstance(e, ReselectException): # frm: Add this pair to list of pairs that did not work... bad_district_pairs.add(tuple(parts_to_merge)) continue else: raise if len(bad_district_pairs) == tot_pairs: raise MetagraphError( f"Bipartitioning failed for all {tot_pairs} district pairs." f"Consider rerunning the chain with a different random seed." ) return partition.flip(flips)
[docs] def reversible_recom( partition: Partition, pop_col: str, pop_target: Union[int, float], epsilon: float, balance_edge_fn: Callable = find_balanced_edge_cuts_memoization, M: int = 1, repeat_until_valid: bool = False, choice: Callable = random.choice, ) -> Partition: """ Reversible ReCom algorithm for redistricting. This function performs the reversible ReCom algorithm, which is a Markov Chain Monte Carlo (MCMC) algorithm used for redistricting. For more information, see the paper "Spanning Tree Methods for Sampling Graph Partitions" by Cannon, et al. (2022) at https://arxiv.org/abs/2210.01401 :param partition: The initial partition. :type partition: Partition :param pop_col: The name of the population column. :type pop_col: str :param pop_target: The target population for each district. :type pop_target: Union[int,float] :param epsilon: The epsilon value for population deviation as a percentage of the target population. :type epsilon: float :param balance_edge_fn: The balance edge function. Default is find_balanced_edge_cuts_memoization. :type balance_edge_fn: Callable, optional frm: it returns a list of Cuts - a named tuple defined in tree.py :param M: The maximum number of balance edges. Default is 1. :type M: int, optional :param repeat_until_valid: Flag indicating whether to repeat until a valid partition is found. Default is False. :type repeat_until_valid: bool, optional :param choice: The choice function for selecting a random element. Default is random.choice. :type choice: Callable, optional :returns: The new partition resulting from the reversible ReCom algorithm. :rtype: Partition """ def dist_pair_edges(part, a, b): # frm: Find all edges that cross from district a into district b return set( e for e in part.graph.edges if ( (part.assignment.mapping[e[0]] == a and part.assignment.mapping[e[1]] == b) or (part.assignment.mapping[e[0]] == b and part.assignment.mapping[e[1]] == a) ) ) def bounded_balance_edge_fn(*args, **kwargs): cuts = balance_edge_fn(*args, **kwargs) if len(cuts) > M: raise ReversibilityError( f"Found {len(cuts)} balance edges, " f"but the upper bound is {M}." ) return cuts """ frm: Original Code: bipartition_tree_random_reversible = partial( _bipartition_tree_random_all, repeat_until_valid=repeat_until_valid, spanning_tree_fn=uniform_spanning_tree, balance_edge_fn=bounded_balance_edge_fn, ) I deemed this code to be evil, if only because it used an internal tree.py routine _bipartition_tree_random_all(). This internal routine returns a set of Cut objects which otherwise never appear outside tree.py, so this just adds complexity. The only reason the original code used _bipartition_tree_random_all() instead of just using bipartition_tree_random() is that it needs to know how many possible new districts there are. So, I created a new function in tree.py that does EXACTLY what bipartition_tree_random() does but which also returns the number of possible new districts. """ bipartition_tree_random_reversible = partial( bipartition_tree_random_with_num_cuts, repeat_until_valid=repeat_until_valid, spanning_tree_fn=uniform_spanning_tree, balance_edge_fn=bounded_balance_edge_fn, ) parts = sorted(list(partition.parts.keys())) dist_pairs = [] for out_part in parts: for in_part in parts: dist_pairs.append((out_part, in_part)) # frm: TODO: Code: ???: Grok why this code considers pairs that are the same part... # # For instance, if there are only two parts (districts), then this code will # produce four pairs: (0,0), (0,1), (1,0), (1,1). The code below tests # to see if there is any adjacency, but there will never be adjacency between # the same part (district). Why not just prune out all pairs that have the # same two values and save an interation of the entire chain? # # Stated differently, is there any value in doing an entire chain iteration # when we randomly select the same part (district) to merge with itself??? # # A similar issue comes up if there are no pair_edges (below). We waste # an entire iteration in that case too - which seems kind of dumb... # random_pair = random.choice(dist_pairs) pair_edges = dist_pair_edges(partition, *random_pair) if random_pair[0] == random_pair[1] or not pair_edges: return partition # self-loop: no adjacency # frm: TODO: Code: ???: Grok why it is OK to return the partition unchanged as the next step. # # This runs the risk of running an entire chain without ever changing the partition. # I assume that the logic is that there is deliberate randomness introduced each time, # so eventually, if it is possible, the chain will get started, but it seems like there # should be some kind of check to see if it doesn't ever get started, so that the # user can have a clue about what is going on... edge = random.choice(list(pair_edges)) parts_to_merge = ( partition.assignment.mapping[edge[0]], partition.assignment.mapping[edge[1]], ) # Remember node_ids from which subgraph was created - we will need them below subgraph_nodes = partition.parts[parts_to_merge[0]] | partition.parts[parts_to_merge[1]] # frm: Note: This code has changed to make sure we don't access subgraph node_ids. # The former code saved the subgraph and used its nodes to compute # the remaining_nodes, but this doesn't work with RX, because the # node_ids for the subgraph are different from those in the parent graph. # The solution is to just remember the parent node_ids that were used # to create the subgraph, and to move the subgraph call in as an actual # parameter, so that after the call there is no way to reference it. # # Going forward, this should be a coding style - only invoke Graph.subgraph() # as an actual parameter so that there is no way to inadvertently access # the subgraph's node_ids afterwards. # result = bipartition_tree_random_reversible( partition.graph.subgraph(subgraph_nodes), pop_col=pop_col, pop_target=pop_target, epsilon=epsilon, ) if not result: return partition # self-loop: no balance edge num_possible_districts, nodes = result remaining_nodes = subgraph_nodes - set(nodes) # Note: Clever way to create a single dictionary from # two dictionaries - the ** operator unpacks each dictionary # and then they get merged into a new dictionary. flips = { **{node: parts_to_merge[0] for node in nodes}, **{node: parts_to_merge[1] for node in remaining_nodes}, } new_part = partition.flip(flips) seam_length = len(dist_pair_edges(new_part, *random_pair)) prob = num_possible_districts / (M * seam_length) if prob > 1: raise ReversibilityError( f"Found {len(result) if result is not None else 0} balance edges, but " f"the upper bound (with seam length 1) is {M}." ) if random.random() < prob: return new_part return partition # self-loop
# frm TODO: Refactoring: I do not think that ReCom() is ever called. Note that it # only defines a constructor and a __call__() which would allow # you to call the recom() function by creating a ReCom object and then # "calling" that object - why not just call the recom function? # # ...confused... # # My guess is that someone started writing this code thinking that # a class would make sense but then realized that the only use # was to call the recom() function but never went back to remove # the class. In short, I think that we should probably remove the # class and just keep the function... # # What Peter said in a PR: # # Another bit of legacy code. I am also not sure why this exists. Seems like # there were plans for this and then it got dropped when someone graduated #
[docs] class ReCom: """ ReCom (short for ReCombination) is a class that represents a ReCom proposal for redistricting. It is used to create new partitions by recombining existing districts while maintaining population balance. """ def __init__( self, pop_col: str, ideal_pop: Union[int, float], epsilon: float, method: Callable = bipartition_tree_random, ): """ :param pop_col: The name of the column in the partition that contains the population data. :type pop_col: str :param ideal_pop: The ideal population for each district. :type ideal_pop: Union[int,float] :param epsilon: The epsilon value for population deviation as a percentage of the target population. :type epsilon: float :param method: The method used for bipartitioning the tree. Defaults to `bipartition_tree_random`. :type method: function, optional """ self.pop_col = pop_col self.ideal_pop = ideal_pop self.epsilon = epsilon self.method = method def __call__(self, partition: Partition): return recom(partition, self.pop_col, self.ideal_pop, self.epsilon, method=self.method)
[docs] class ReversibilityError(Exception): """Raised when the cut edge upper bound is violated.""" def __init__(self, msg): self.message = msg