"""
Simple function for building ensembles of iterators that
represent disjoint partitions of an overall Cartesian product.
"""
from __future__ import annotations
from typing import Tuple, Optional, Collection, Sequence, Iterable
import doctest
import collections.abc as collections_abc # Avoid collision with function argument.
import itertools
from parts import parts
[docs]def products(
*collections: Tuple[Collection, ...],
number: Optional[int] = None
) -> Sequence[Iterable]:
"""
Accept zero or more :obj:`~collections.abc.Collection` instances as arguments
and return a :obj:`~collections.abc.Sequence` of the specified number of disjoint
subsets of the `Cartesian product <https://en.wikipedia.org/wiki/Cartesian_product>`__
of the supplied :obj:`~collections.abc.Collection` instances. Each subset is
represented as an :obj:`~collections.abc.Iterable` and the union of the disjoint
subsets is equal to the overall Cartesian product.
:param collections: Zero or more arguments that represent the
factor sets of the Cartesian product.
:param number: Number of disjoint subsets to return.
>>> ss = products(range(1, 3), {'a', 'b'}, (False, True), number=3)
>>> for s in sorted([sorted(list(s)) for s in ss]):
... for t in s:
... print(t)
(1, 'a', False)
(1, 'a', True)
(1, 'b', False)
(1, 'b', True)
(2, 'a', False)
(2, 'a', True)
(2, 'b', False)
(2, 'b', True)
Two additional basic examples are presented below.
>>> (x, y, z) = ([1, 2], ['a', 'b'], [True, False])
>>> [list(s) for s in products(x, y, number=2)]
[[(1, 'a'), (1, 'b')], [(2, 'a'), (2, 'b')]]
>>> for s in [list(s) for s in products(x, y, z, number=2)]:
... print(s)
[(1, 'a', True), (1, 'a', False), (1, 'b', True), (1, 'b', False)]
[(2, 'a', True), (2, 'a', False), (2, 'b', True), (2, 'b', False)]
By default (if the ``number`` argument is not assigned a value), the number
of disjoint subsets is one. Note that the union of the disjoint subsets is
equivalent to the output of the :obj:`itertools.product` function.
>>> p = itertools.product([1, 2], {'a', 'b'}, (True, False))
>>> ss = products([1, 2], {'a', 'b'}, (True, False))
>>> list(p) == list(list(ss)[0])
True
If no sets are specified, the Cartesian product consists of a single empty
tuple. If there is one set, the Cartesian product consists of a set of
one-element tuples. In both cases, a list of disjoint subsets is returned
as in all other cases (even though the number of disjoint subsets may be
one).
>>> list(list(products())[0])
[()]
>>> list(list(products([1, 2]))[0])
[(1,), (2,)]
It is possible to confirm that the returned subsets are disjoint, and that
the union of the disjoint subsets is the Cartesian product.
>>> (x, y, z) = ([1, 2], ['a', 'b'], [True, False])
>>> ss = [set(s) for s in products(x, y, z, x, y, z, number=5)]
>>> set([len(ss[i] & ss[j]) for i in range(5) for j in range(5) if i != j])
{0}
>>> s = ss[0] | ss[1] | ss[2] | ss[3] | ss[4]
>>> s == set(itertools.product(x, y, z, x, y, z))
True
>>> len(products(*[[1, 2, 3]]*1000, number=5))
5
>>> ls = [len(products(*[[1, 2]]*1000, number=n)) for n in range(1, 100)]
>>> ls == list(range(1, 100))
True
If the requested number of disjoint subsets exceeds the number of elements in
Cartesian product, the number of disjoint subsets will be equivalent to the
number of elements in the Cartesian product.
>>> len(products([1, 2], ['a', 'b'], number=10))
4
Any attempt to apply this function to arguments that have unsupported types
raises an exception.
>>> products([1, 2], number='abc')
Traceback (most recent call last):
...
TypeError: number of disjoint subsets must be an integer
>>> products((i for i in range(3)), number=2)
Traceback (most recent call last):
...
TypeError: arguments must be collections
>>> products([1, 2], number=0)
Traceback (most recent call last):
...
ValueError: number of disjoint subsets must be a positive integer
>>> products([1, 2], number=0)
Traceback (most recent call last):
...
ValueError: number of disjoint subsets must be a positive integer
"""
factors = list(collections) # Ensure factor sets are reusable and their quantity is known.
if not all(isinstance(factor, collections_abc.Collection) for factor in factors):
raise TypeError('arguments must be collections')
if number is not None:
if not isinstance(number, int):
raise TypeError('number of disjoint subsets must be an integer')
if number < 1:
raise ValueError('number of disjoint subsets must be a positive integer')
# If no target number of disjoint subsets has been supplied, simply
# return a single product. Note that this function is not equivalent to
# :obj:`itertools.product` because this function always returns a sequence
# of iterables (the subsets). In this case, there is only one element
# in that sequence.
if number is None or number == 1:
return [itertools.product(*factors)]
# Determine the product of which prefix of arguments to break up
# based on the target number of disjoint subsets.
number_ = 1
index = len(factors)
for (i, a) in enumerate(factors):
number_ = number_ * len(a)
if number_ >= number:
index = min(len(factors), i + 1)
break
# Create an iterable for each prefix.
prefixes = list(parts(list(itertools.product(*factors[0: index])), number))
# For each prefix, create an iterable for that prefix by concatenating
# elements of the prefix to elements of suffix product.
def generator(prefix):
for p in prefix:
for s in itertools.product(*factors[index:]):
yield p + s # Concatenation of tuples.
return [generator(prefix) for prefix in prefixes]
if __name__ == '__main__':
doctest.testmod() # pragma: no cover