diff --git a/.gitignore b/.gitignore index dc99556..16b6138 100644 --- a/.gitignore +++ b/.gitignore @@ -111,6 +111,8 @@ venv.bak/ # cython files dwave/samplers/greedy/*.cpp dwave/samplers/greedy/*.html +dwave/samplers/random/*.cpp +dwave/samplers/random/*.html dwave/samplers/sa/*.cpp dwave/samplers/sa/*.html dwave/samplers/tabu/*.cpp diff --git a/README.rst b/README.rst index ccace4e..df1da01 100644 --- a/README.rst +++ b/README.rst @@ -24,6 +24,7 @@ or locally on your CPU. *dwave-samplers* implements the following classical algorithms for solving :term:`binary quadratic model`\ s (BQM): +* Random: a sampler that draws uniform random samples. * `Simulated Annealing`_: a probabilistic heuristic for optimization and approximate Boltzmann sampling well suited to finding good solutions of large problems. * `Steepest Descent`_: a discrete analogue of gradient descent, often used in @@ -31,6 +32,25 @@ or locally on your CPU. * `Tabu`_: a heuristic that employs local search with methods to escape local minima. * `Tree Decomposition`_: an exact solver for problems with low treewidth. +Random +====== + +Random samplers provide a useful baseline performance comparison. The variable +assignments in each sample are chosen by a coin flip. + +>>> from dwave.samplers import RandomSampler +>>> sampler = RandomSampler() + +Create a random binary quadratic model. + +>>> import dimod +>>> bqm = dimod.generators.gnp_random_bqm(100, .5, 'BINARY') + +Get the best 5 sample found in .1 seconds. + +>>> sampleset = sampler.sample(bqm, time_limit=.1, max_num_samples=5) +>>> num_reads = sampleset.info['num_reads'] # the total number of samples generated + Simulated Annealing =================== diff --git a/docs/reference.rst b/docs/reference.rst index 6cb5ad7..827708b 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -4,6 +4,34 @@ Reference Documentation .. currentmodule:: dwave.samplers +Random +======= + +RandomSampler +------------- + +.. autoclass:: RandomSampler + +Attributes +~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + ~RandomSampler.parameters + ~RandomSampler.properties + +Methods +~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + ~RandomSampler.sample + ~RandomSampler.sample_ising + ~RandomSampler.sample_qubo + + Simulated Annealing =================== diff --git a/dwave/samplers/__init__.py b/dwave/samplers/__init__.py index c67ffb9..1951b5a 100644 --- a/dwave/samplers/__init__.py +++ b/dwave/samplers/__init__.py @@ -16,6 +16,8 @@ from dwave.samplers.greedy import * +from dwave.samplers.random import * + from dwave.samplers.sa import * from dwave.samplers.tabu import * diff --git a/dwave/samplers/random/__init__.py b/dwave/samplers/random/__init__.py new file mode 100644 index 0000000..612204d --- /dev/null +++ b/dwave/samplers/random/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 D-Wave Systems Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dwave.samplers.random.sampler import * diff --git a/dwave/samplers/random/cyrandom.pyx b/dwave/samplers/random/cyrandom.pyx new file mode 100644 index 0000000..9cb3caa --- /dev/null +++ b/dwave/samplers/random/cyrandom.pyx @@ -0,0 +1,163 @@ +# distutils: language = c++ +# cython: language_level = 3 + +# Copyright 2022 D-Wave Systems Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cimport cython + +from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer +from libcpp.algorithm cimport make_heap, push_heap, pop_heap +from libcpp.utility cimport pair +from libcpp.vector cimport vector + +import dimod +cimport dimod +import numpy as np +cimport numpy as np +cimport numpy.random + +# chrono is not included in Cython's libcpp. So we do it more manually +cdef extern from *: + """ + #include + + double realtime_clock() { + auto t = std::chrono::high_resolution_clock::now(); + return std::chrono::duration(t.time_since_epoch()).count(); + } + """ + double realtime_clock() + + +# it would be nicer to use a struct, but OSX throws segfaults when sorting +# Cython-created structs. We should retest that when we switch to Cython 3. +ctypedef pair[np.float64_t, vector[np.int8_t]] state_t + + +cdef state_t get_sample(dimod.cyBQM_float64 cybqm, + numpy.random.bitgen_t* bitgen, + bint is_spin = False, + ): + # developer note: there is a bunch of potential optimization here + cdef state_t state + + # generate the sample + state.second.reserve(cybqm.num_variables()) + cdef Py_ssize_t i + for i in range(cybqm.num_variables()): + state.second.push_back(bitgen.next_uint32(bitgen.state) % 2) + + if is_spin: + # go back through and convert to spin + for i in range(state.second.size()): + state.second[i] = 2 * state.second[i] - 1 + + state.first = cybqm.data().energy(state.second.begin()) + + return state + + +@cython.boundscheck(False) +@cython.wraparound(False) +def sample(object bqm, + Py_ssize_t num_reads, + np.float64_t time_limit, + Py_ssize_t max_num_samples, + object seed): + + cdef Py_ssize_t i, j # counters for use later + + # Get Cython access to the BQM. We could template to avoid the copy, + # but honestly everyone just uses float64 anyway so... + cdef dimod.cyBQM_float64 cybqm = dimod.as_bqm(bqm, dtype=float).data + cdef bint is_spin = bqm.vartype is dimod.SPIN + + if num_reads <= 0: + raise ValueError("num_reads must be positive") + if time_limit <= 0: + raise ValueError("time_limit must be positive") + if max_num_samples <= 0: + raise ValueError("max_num_samples must be positive") + + # Get Cython access to the rng + rng = np.random.default_rng(seed) + cdef numpy.random.bitgen_t *bitgen + cdef const char *capsule_name = "BitGenerator" + capsule = rng.bit_generator.capsule + if not PyCapsule_IsValid(capsule, capsule_name): + raise ValueError("Invalid pointer to anon_func_state") + bitgen = PyCapsule_GetPointer(capsule, capsule_name) + + # ok, time to start sampling! + cdef double sampling_start_time = realtime_clock() + cdef double sampling_stop_time = sampling_start_time + time_limit + + # fill out the population + cdef vector[state_t] samples + for i in range(max_num_samples): + samples.push_back(get_sample(cybqm, bitgen, is_spin)) + + if samples.size() >= num_reads: + break + if realtime_clock() >= sampling_stop_time: + break + + cdef Py_ssize_t num_drawn = samples.size() + + # determine if we need to keep going + if samples.size() < num_reads and realtime_clock() < sampling_stop_time: + # at this point, we want to stop growing our samples vector, so + # we turn it into a heap + make_heap(samples.begin(), samples.end()) + + # we're never going to change size, so we stop testing the num_reads + # condition. It was up to the caller to ensure that max_num_samples >= num_reads + # if they want to terminate on num_reads + while realtime_clock() < sampling_stop_time: + samples.push_back(get_sample(cybqm, bitgen, is_spin)) + push_heap(samples.begin(), samples.end()) + pop_heap(samples.begin(), samples.end()) + samples.pop_back() + + num_drawn += 1 + + # sampling done! + # time to construct the return objects + + record = np.rec.array( + np.empty(samples.size(), + dtype=[('sample', np.int8, (bqm.num_variables,)), + ('energy', float), + ('num_occurrences', int)])) + + record['num_occurrences'][:] = 1 + + cdef np.float64_t[:] energies_view = record['energy'] + for i in range(samples.size()): + energies_view[i] = samples[i].first + + cdef np.int8_t[:, :] sample_view = record['sample'] + for i in range(samples.size()): + for j in range(cybqm.num_variables()): + sample_view[i, j] = samples[i].second[j] + + sampleset = dimod.SampleSet(record, bqm.variables, info=dict(), vartype=bqm.vartype) + + sampleset.info.update( + num_reads=num_drawn, + # todo: timing data + ) + + return sampleset diff --git a/dwave/samplers/random/sampler.py b/dwave/samplers/random/sampler.py new file mode 100644 index 0000000..1844c8e --- /dev/null +++ b/dwave/samplers/random/sampler.py @@ -0,0 +1,160 @@ +# Copyright 2022 D-Wave Systems Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import sys +import typing + +import dimod +import numpy as np + +from dwave.samplers.random.cyrandom import sample + + +__all__ = ['RandomSampler'] + + +class RandomSampler(dimod.Sampler): + """A random sampler, useful as a performance baseline and for testing. + + Examples: + + >>> from dwave.samplers import RandomSampler + >>> sampler = RandomSampler() + + Create a random binary quadratic model. + + >>> import dimod + >>> bqm = dimod.generators.gnp_random_bqm(100, .5, 'BINARY') + + Get 20 random samples. + + >>> sampleset = sampler.sample(bqm, num_reads=20) + + Get the best 5 sample found in .1 seconds + + >>> sampleset = sampler.sample(bqm, time_limit=.1, max_num_samples=5) + + """ + + parameters: typing.Mapping[str, typing.List] = dict( + num_reads=[], + time_limit=[], + max_num_samples=[], + seed=[], + ) + """Keyword arguments accepted by the sampling methods. + + Examples: + + >>> from dwave.samplers import RandomSampler + >>> sampler = RandomSampler() + >>> sampler.parameters + {'num_reads': [], 'time_limit': [], 'max_num_samples': [], 'seed': []} + + """ + + properties: typing.Mapping[str, typing.Any] = dict( + ) + """Information about the solver. Empty. + + Examples: + + >>> from dwave.samplers import RandomSampler + >>> sampler = RandomSampler() + >>> sampler.properties + {} + + """ + + def sample(self, + bqm: dimod.BinaryQuadraticModel, + *, + num_reads: typing.Optional[int] = None, + time_limit: typing.Optional[typing.Union[float, datetime.timedelta]] = None, + max_num_samples: int = 1000, + seed: typing.Union[None, int, np.random.Generator] = None, + **kwargs, + ) -> dimod.SampleSet: + """Return random samples for a binary quadratic model. + + Args: + bqm: Binary quadratic model to be sampled from. + + num_reads: + The maximum number of random samples to be drawn. + If neither ``num_reads`` nor ``time_limit`` are provided, + ``num_reads`` is set to 1. + If ``time_limit`` is provided, there is no limit imposed on + the number of reads. + + time_limit: + The maximum run time in seconds. + Only the time to generate the samples, calculate the energies, + and maintain the population is counted. + + max_num_samples: + The maximum number of samples returned by the sampler. + This limits the memory usage for small problems are large + ``time_limits``. + Ignored when ``num_reads`` is set. + + seed: + Seed for the random number generator. + Passed to :func:`numpy.random.default_rng()`. + + Returns: + A sample set. + Some additional information is provided in the + :attr:`~dimod.SampleSet.info` dictionary: + + * **num_reads**: The total number of samples generated. + + """ + + # we could count this towards preprocesing time but IMO it's fine to + # skip for simplicity. + self.remove_unknown_kwargs(**kwargs) + + # default case + if num_reads is None and time_limit is None: + num_reads = 1 + time_limit = float('inf') + + if num_reads is None: + # we know that time_limit was set + num_reads = sys.maxsize + elif num_reads <= 0: + raise ValueError("if given, num_reads must be a positive integer") + else: + # it was given, so max_num_samples is ignored + max_num_samples = num_reads + + if time_limit is None: + # num_reads is specified + time_limit = float('inf') + elif isinstance(time_limit, datetime.timedelta): + time_limit = time_limit.total_seconds() + elif time_limit <= 0: + raise ValueError("if given, time_limit must be positive") + + if max_num_samples <= 0: + raise ValueError("max_num_samples must be a positive integer") + + return sample(bqm, + num_reads=num_reads, + time_limit=time_limit, + max_num_samples=max_num_samples, + seed=seed, + ) diff --git a/pyproject.toml b/pyproject.toml index ae3057b..19095ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,9 +3,9 @@ requires = [ "setuptools>=46.4.0", # PEP-420 support, PEP-517/518 support, setup.cfg attr: support "wheel>=0.30.0", # limited python api support "cython>=0.29.24,<3.0", - 'dimod==0.11.2', - 'numpy==1.17.3;python_version<"3.8"', # oldest supported by dimod - 'oldest-supported-numpy;python_version>="3.8"', + 'dimod==0.11.3', + 'numpy==1.19.0;python_version<"3.9"', # C API for numpy.random + 'oldest-supported-numpy;python_version>="3.9"', ] build-backend = "setuptools.build_meta" diff --git a/releasenotes/notes/random-sampler-a5bacc2b7761222f.yaml b/releasenotes/notes/random-sampler-a5bacc2b7761222f.yaml new file mode 100644 index 0000000..423e56f --- /dev/null +++ b/releasenotes/notes/random-sampler-a5bacc2b7761222f.yaml @@ -0,0 +1,3 @@ +--- +features: + - Add ``RandomSampler`` which generates samples randomly. diff --git a/requirements.txt b/requirements.txt index 6602ce1..80a91e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ cython==0.29.28 -dimod==0.11.1 +dimod==0.11.3 numpy==1.21.6 reno==3.3.0 # for changelog setuptools>=46.4.0 # to support setup.cfg getting __version__ diff --git a/setup.cfg b/setup.cfg index 7700246..b9f8fb9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ classifiers = packages = dwave.samplers dwave.samplers.greedy + dwave.samplers.random dwave.samplers.sa dwave.samplers.tabu dwave.samplers.tree diff --git a/setup.py b/setup.py index 7b4cf4e..24a7dcb 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ def build_extensions(self): cmdclass={'build_ext': build_ext_with_args}, ext_modules=cythonize( ['dwave/samplers/greedy/descent.pyx', + 'dwave/samplers/random/*.pyx', 'dwave/samplers/sa/*.pyx', 'dwave/samplers/tabu/tabu_search.pyx', 'dwave/samplers/tree/*.pyx', diff --git a/tests/test_random_sampler.py b/tests/test_random_sampler.py new file mode 100644 index 0000000..6717c1d --- /dev/null +++ b/tests/test_random_sampler.py @@ -0,0 +1,97 @@ +# Copyright 2022 D-Wave Systems Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import time +import unittest + +import dimod +import dimod.testing + +from dwave.samplers import RandomSampler + + +@dimod.testing.load_sampler_bqm_tests(RandomSampler) +class TestRandomSampler(unittest.TestCase): + def test_initialization(self): + sampler = RandomSampler() + + dimod.testing.assert_sampler_api(sampler) + + self.assertEqual(sampler.properties, {}) + + def test_energies(self): + bqm = dimod.BinaryQuadraticModel({0: 0.0, 1: 0.0, 2: 0.0}, + {(0, 1): -1.0, (1, 2): 1.0, (0, 2): 1.0}, + 1.0, + dimod.SPIN) + sampler = RandomSampler() + response = sampler.sample(bqm, num_reads=10) + self.assertEqual(len(response), 10) + + dimod.testing.assert_response_energies(response, bqm) + + def test_kwargs(self): + bqm = dimod.BinaryQuadraticModel({}, {}, 0.0, dimod.SPIN) + with self.assertWarns(dimod.exceptions.SamplerUnknownArgWarning): + RandomSampler().sample(bqm, a=5, b=2) + + def test_time_limit(self): + t = time.time() + + bqm = dimod.BinaryQuadraticModel({0: 0.0, 1: 0.0, 2: 0.0}, + {(0, 1): -1.0, (1, 2): 1.0, (0, 2): 1.0}, + 1.0, + dimod.SPIN) + + t = time.perf_counter() + sampleset = RandomSampler().sample(bqm, time_limit=.02, max_num_samples=10) + runtime = time.perf_counter() - t + + self.assertTrue(.01 < runtime < .04) + + dimod.testing.assert_sampleset_energies(sampleset, bqm) + self.assertEqual(len(sampleset), 10) + self.assertGreater(sampleset.info['num_reads'], 10) # should be much much bigger + + def test_time_limit_datetime(self): + t = time.time() + + bqm = dimod.BinaryQuadraticModel({0: 0.0, 1: 0.0, 2: 0.0}, + {(0, 1): -1.0, (1, 2): 1.0, (0, 2): 1.0}, + 1.0, + dimod.SPIN) + + t = time.perf_counter() + sampleset = RandomSampler().sample( + bqm, time_limit=datetime.timedelta(minutes=1)/600) + runtime = time.perf_counter() - t + + self.assertTrue(.05 < runtime < .15) + + def test_time_limit_quality(self): + # get a linear BQM + bqm = dimod.BQM('BINARY') + for v in range(32): + bqm.set_linear(v, 1 << v) + + max_num_samples = 100 + # pick a time limit that should produce many more draws than reads + sampleset = RandomSampler().sample(bqm, max_num_samples=max_num_samples, + time_limit=.01) + num_drawn = sampleset.info['num_reads'] + + # solutions should all be in range [0, (2 << 32) * (max_num_samples / num_draws)] + self.assertTrue( + (sampleset.record.energy < (2 << 32) * (max_num_samples / num_drawn) * 1.25).all())