Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 64 additions & 22 deletions mkl_random/mklrand.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2189,17 +2189,17 @@ cdef class _MKLRandomState:

if size is not None:
if (np.prod(size) == 0):
return np.empty(size, dtype=np.dtype(dtype))
return np.empty(size, dtype=np.dtype(_dtype))

lowbnd, highbnd, randfunc = self._choose_randint_type(dtype)
lowbnd, highbnd, randfunc = self._choose_randint_type(_dtype)

if low < lowbnd:
raise ValueError(
f"low is out of bounds for {np.dtype(dtype).name}"
f"low is out of bounds for {np.dtype(_dtype).name}"
)
if high > highbnd:
raise ValueError(
f"high is out of bounds for {np.dtype(dtype).name}"
f"high is out of bounds for {np.dtype(_dtype).name}"
)
if low >= high:
raise ValueError("low >= high")
Expand Down Expand Up @@ -6303,29 +6303,55 @@ cdef class _MKLRandomState:
array([100, 0])

"""
cdef cnp.npy_intp d
cdef cnp.npy_intp d, sz, niter
cdef cnp.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr"
cdef double *pix
cdef int *mnix
cdef cnp.npy_intp sz

d = len(pvals)
parr = <cnp.ndarray>cnp.PyArray_ContiguousFromObject(
pvals, cnp.NPY_DOUBLE, 1, 1
cdef long ni

parr = <cnp.ndarray>cnp.PyArray_FROMANY(
pvals,
cnp.NPY_DOUBLE,
0,
1,
cnp.NPY_ARRAY_ALIGNED | cnp.NPY_ARRAY_C_CONTIGUOUS
)
if cnp.PyArray_NDIM(parr) == 0:
raise TypeError("pvals must be a 1-d sequence")
d = cnp.PyArray_SIZE(parr)
pix = <double*>cnp.PyArray_DATA(parr)

if kahan_sum(pix, d-1) > (1.0 + 1e-12):
raise ValueError("sum(pvals[:-1]) > 1.0")

if (
not np.all(np.greater_equal(parr, 0))
or not np.all(np.less_equal(parr, 1))
):
raise ValueError("pvals < 0, pvals > 1 or pvals is NaN")

if d and kahan_sum(pix, d - 1) > (1.0 + 1e-12):
# When floating, but not float dtype, and close, improve the error
# 1.0001 works for float16 and float32
if (isinstance(pvals, np.ndarray)
and np.issubdtype(pvals.dtype, np.floating)
and pvals.dtype != float
and pvals.sum() < 1.0001):
msg = ("sum(pvals[:-1].astype(np.float64)) > 1.0. The pvals "
"array is cast to 64-bit floating point prior to "
"checking the sum. Precision changes when casting may "
"cause problems even if the sum of the original pvals "
"is valid.")
else:
msg = "sum(pvals[:-1]) > 1.0"
raise ValueError(msg)
shape = _shape_from_size(size, d)
multin = np.zeros(shape, np.int32)

mnarr = <cnp.ndarray>multin
mnix = <int*>cnp.PyArray_DATA(mnarr)
sz = cnp.PyArray_SIZE(mnarr)

irk_multinomial_vec(self.internal_state, sz // d, mnix, n, d, pix)
ni = n
if (ni < 0):
raise ValueError("n < 0")
# numpy#20483: Avoids divide by 0
niter = sz // d if d else 0
irk_multinomial_vec(self.internal_state, niter, mnix, n, d, pix)

return multin

Expand Down Expand Up @@ -6614,11 +6640,27 @@ cdef class _MKLRandomState:

"""
if isinstance(x, (int, np.integer)):
arr = np.arange(x)
else:
arr = np.array(x)
self.shuffle(arr)
return arr
# keep using long as the default here (main numpy switched to intp)
arr = np.arange(x, dtype=np.result_type(x, np.long))
self.shuffle(arr)
return arr

arr = np.asarray(x)
if arr.ndim < 1:
raise IndexError("x must be an integer or at least 1-dimensional")

# shuffle has fast-path for 1-d
if arr.ndim == 1:
# Return a copy if same memory
if np.may_share_memory(arr, x):
arr = np.array(arr)
self.shuffle(arr)
return arr

# Shuffle index array, dtype to ensure fast path
idx = np.arange(arr.shape[0], dtype=np.intp)
self.shuffle(idx)
return arr[idx]


cdef class MKLRandomState(_MKLRandomState):
Expand Down
7 changes: 4 additions & 3 deletions mkl_random/tests/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ class RandIntData(NamedTuple):
def randint():
rfunc_method = rnd.randint
integral_dtypes = [
np.bool_,
np.bool,
np.int8,
np.uint8,
np.int16,
Expand Down Expand Up @@ -323,8 +323,9 @@ def test_randint_respect_dtype_singleton(randint):
assert_equal(sample.dtype, np.dtype(dt))

for dt in (bool, int):
lbnd = 0 if dt is bool else np.iinfo(np.dtype(dt)).min
ubnd = 2 if dt is bool else np.iinfo(np.dtype(dt)).max + 1
# The legacy rng uses "long" as the default integer:
lbnd = 0 if dt is bool else np.iinfo("long").min
ubnd = 2 if dt is bool else np.iinfo("long").max + 1

# gh-7284: Ensure that we get Python data types
sample = randint.rfunc(lbnd, ubnd, dtype=dt)
Expand Down
176 changes: 176 additions & 0 deletions mkl_random/tests/third_party/test_numpy_random.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This file includes tests from numpy.random module:
# https://github.com/numpy/numpy/blob/main/numpy/random/tests/test_random.py

import sys
import warnings

import numpy as np
Expand Down Expand Up @@ -1128,3 +1129,178 @@ def test_three_arg_funcs(self):

out = func(argOne, argTwo[0], argThree)
assert_equal(out.shape, tgtShape)


class TestRegression:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to move them into test_regression.py to keep file alignment with NumPy?
Probably it would be easier to maintain later when we need to refresh them with the latest NumPy changes.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not a bad idea, yeah

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interestingly, I didn't realize that these tests were already vendored into mkl_random/tests/test_regression.py

maybe it would be best in the scope of this PR to just update those tests and attribute them to NumPy


def test_VonMises_range(self):
# Make sure generated random variables are in [-pi, pi].
# Regression test for ticket #986.
for mu in np.linspace(-7.0, 7.0, 5):
r = mkl_random.vonmises(mu, 1, 50)
assert_(np.all(r > -np.pi) and np.all(r <= np.pi))

def test_hypergeometric_range(self):
# Test for ticket #921
assert_(np.all(mkl_random.hypergeometric(3, 18, 11, size=10) < 4))
assert_(np.all(mkl_random.hypergeometric(18, 3, 11, size=10) > 0))

# Test for ticket #5623
args = [
(2**20 - 2, 2**20 - 2, 2**20 - 2), # Check for 32-bit systems
]
for arg in args:
assert_(mkl_random.hypergeometric(*arg) > 0)

def test_logseries_convergence(self):
# Test for ticket #923
N = 1000
mkl_random.seed(0)
rvsn = mkl_random.logseries(0.8, size=N)
# these two frequency counts should be close to theoretical
# numbers with this large sample
# theoretical large N result is 0.49706795
freq = np.sum(rvsn == 1) / N
msg = f"Frequency was {freq:f}, should be > 0.45"
assert_(freq > 0.45, msg)
# theoretical large N result is 0.19882718
freq = np.sum(rvsn == 2) / N
msg = f"Frequency was {freq:f}, should be < 0.23"
assert_(freq < 0.23, msg)

def test_shuffle_mixed_dimension(self):
# Test for trac ticket #2074
# only check that shuffle does not raise an error
for t in [
[1, 2, 3, None],
[(1, 1), (2, 2), (3, 3), None],
[1, (2, 2), (3, 3), None],
[(1, 1), 2, 3, None],
]:
shuffled = list(t)
mkl_random.shuffle(shuffled)

def test_call_within_randomstate(self):
# Check that custom RandomState does not call into global state
m = mkl_random.RandomState()
m.seed(1234)
res = m.choice(10, size=10, p=np.ones(10) / 10.0)
for i in range(3):
mkl_random.seed(i)
m.seed(1234)
# If m.state is not honored, the result will change
assert_array_equal(m.choice(10, size=10, p=np.ones(10) / 10.0), res)

def test_multivariate_normal_size_types(self):
# Test for multivariate_normal issue with 'size' argument.
# Check that the multivariate_normal size argument can be a
# numpy integer.
mkl_random.multivariate_normal([0], [[0]], size=1)
mkl_random.multivariate_normal([0], [[0]], size=np.int_(1))
mkl_random.multivariate_normal([0], [[0]], size=np.int64(1))

def test_beta_small_parameters(self):
# Test that beta with small a and b parameters does not produce
# NaNs due to roundoff errors causing 0 / 0, gh-5851
mkl_random.seed(1234567890)
x = mkl_random.beta(0.0001, 0.0001, size=100)
assert_(not np.any(np.isnan(x)), "Nans in mkl_random.beta")

def test_choice_sum_of_probs_tolerance(self):
# The sum of probs should be 1.0 with some tolerance.
# For low precision dtypes the tolerance was too tight.
# See numpy github issue 6123.
mkl_random.seed(1234)
a = [1, 2, 3]
counts = [4, 4, 2]
for dt in np.float16, np.float32, np.float64:
probs = np.array(counts, dtype=dt) / sum(counts)
c = mkl_random.choice(a, p=probs)
assert_(c in a)
assert_raises(ValueError, mkl_random.choice, a, p=probs * 0.9)

def test_shuffle_of_array_of_different_length_strings(self):
# Test that permuting an array of different length strings
# will not cause a segfault on garbage collection
# Tests gh-7710
mkl_random.seed(1234)

a = np.array(["a", "a" * 1000])

for _ in range(100):
mkl_random.shuffle(a)

# Force Garbage Collection - should not segfault.
import gc

gc.collect()

def test_shuffle_of_array_of_objects(self):
# Test that permuting an array of objects will not cause
# a segfault on garbage collection.
# See gh-7719
mkl_random.seed(1234)
a = np.array([np.arange(1), np.arange(4)], dtype=object)

for _ in range(1000):
mkl_random.shuffle(a)

# Force Garbage Collection - should not segfault.
import gc

gc.collect()

def test_permutation_subclass(self):
class N(np.ndarray):
pass

rng = mkl_random.RandomState()
orig = np.arange(3).view(N)
rng.permutation(orig)
assert_array_equal(orig, np.arange(3).view(N))

class M:
a = np.arange(5)

def __array__(self, dtype=None, copy=None):
return self.a

m = M()
rng.permutation(m)
assert_array_equal(m.__array__(), np.arange(5))

def test_warns_byteorder(self):
# GH 13159
other_byteord_dt = "<i4" if sys.byteorder == "big" else ">i4"
with pytest.deprecated_call(match="non-native byteorder is not"):
mkl_random.randint(0, 200, size=10, dtype=other_byteord_dt)

def test_named_argument_initialization(self):
# GH 13669
rs1 = mkl_random.RandomState(123456789)
rs2 = mkl_random.RandomState(seed=123456789)
assert rs1.randint(0, 100) == rs2.randint(0, 100)

def test_choice_return_dtype(self):
# GH 9867, now long since the NumPy default changed.
c = mkl_random.choice(10, p=[0.1] * 10, size=2)
assert c.dtype == np.dtype(np.long)
c = mkl_random.choice(10, p=[0.1] * 10, replace=False, size=2)
assert c.dtype == np.dtype(np.long)
c = mkl_random.choice(10, size=2)
assert c.dtype == np.dtype(np.long)
c = mkl_random.choice(10, replace=False, size=2)
assert c.dtype == np.dtype(np.long)


def test_multinomial_empty():
# gh-20483
# Ensure that empty p-vals are correctly handled
assert mkl_random.multinomial(10, []).shape == (0,)
assert mkl_random.multinomial(3, [], size=(7, 5, 3)).shape == (7, 5, 3, 0)


def test_multinomial_1d_pval():
# gh-20483
with pytest.raises(TypeError, match="pvals must be a 1-d"):
mkl_random.multinomial(10, 0.3)
Loading