Skip to content

Commit 560f4a6

Browse files
committed
Added functions from CFAPyX
1 parent e936ad6 commit 560f4a6

File tree

1 file changed

+155
-5
lines changed

1 file changed

+155
-5
lines changed

arraypartition/partition.py

Lines changed: 155 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ def _identical_extents(old, new, dshape):
487487
(ostop == nstop) and \
488488
(ostep == nstep)
489489

490-
def _get_chunk_space(chunk_shape, shape):
490+
def get_chunk_space(chunk_shape, shape):
491491
"""
492492
Derive the chunk space and shape given the user-provided ``chunks`` option.
493493
Chunk space is the number of chunks in each dimension which presents like an array
@@ -512,10 +512,9 @@ def _get_chunk_space(chunk_shape, shape):
512512
513513
"""
514514

515-
q = tuple([int(i/j) for i, j in zip(shape, chunk_shape)])
516-
return q
515+
return tuple([int(i/j) for i, j in zip(shape, chunk_shape)])
517516

518-
def _get_chunk_shape(chunks, shape, dims, chunk_limits=True):
517+
def get_chunk_shape(chunks, shape, dims, chunk_limits=True):
519518
chunk_shape = [i for i in shape]
520519

521520
for dim in chunks.keys():
@@ -525,7 +524,7 @@ def _get_chunk_shape(chunks, shape, dims, chunk_limits=True):
525524
if d == dim:
526525
idim = x
527526

528-
if idim == None:
527+
if not idim:
529528
raise ValueError(
530529
f"Requested chunking across dimension '{dim}'"
531530
f"but only '{dims}' present in the dataset"
@@ -539,3 +538,154 @@ def _get_chunk_shape(chunks, shape, dims, chunk_limits=True):
539538
chunk_shape[idim] = max(chunk_size, min_size)
540539

541540
return tuple(chunk_shape)
541+
542+
def get_chunk_positions(chunk_space):
543+
origin = [0 for i in chunk_space]
544+
545+
positions = [
546+
coord for coord in product(
547+
*[range(r[0], r[1]) for r in zip(origin, chunk_space)]
548+
)
549+
]
550+
551+
return positions
552+
553+
def get_chunk_extent(position, shape, chunk_space):
554+
extent = []
555+
for dim in range(len(position)):
556+
pos_index = position[dim]
557+
shape_size = shape[dim]
558+
space_size = chunk_space[dim]
559+
560+
conversion = shape_size/space_size
561+
562+
ext = slice(
563+
int(pos_index*conversion), int((pos_index+1)*conversion)
564+
)
565+
extent.append(ext)
566+
return extent
567+
568+
def get_dask_chunks(
569+
array_space,
570+
fragment_space,
571+
extent,
572+
dtype,
573+
explicit_shapes=None):
574+
"""
575+
Define the `chunks` array passed to Dask when creating a Dask Array. This is an array of fragment sizes
576+
per dimension for each of the relevant dimensions. Copied from cf-python version 3.14.0 onwards.
577+
578+
:param array_space: (tuple) The shape of the array in ``array space``.
579+
580+
:param fragment_space: (tuple) The shape of the array in ``fragment space``.
581+
582+
:param extent: (dict) The global extent of each fragment - where it fits into the total array for this variable (in array space).
583+
584+
:param dtype: (obj) The datatype for this variable.
585+
586+
:param explicit_shapes: (tuple) Set of shapes to apply to the fragments - currently not implemented outside this function.
587+
588+
:returns: A tuple of the chunk sizes along each dimension.
589+
"""
590+
591+
from numbers import Number
592+
from dask.array.core import normalize_chunks
593+
594+
ndim = len(array_space)
595+
fsizes_per_dim, fragmented_dim_indices = [],[]
596+
597+
for dim, n_fragments in enumerate(fragment_space):
598+
if n_fragments != 1:
599+
600+
fsizes = []
601+
index = [0] * ndim
602+
for n in range(n_fragments):
603+
index[dim] = n
604+
ext = extent[tuple(index)][dim]
605+
fragment_size = ext.stop - ext.start
606+
fsizes.append(fragment_size)
607+
608+
fsizes_per_dim.append(tuple(fsizes))
609+
fragmented_dim_indices.append(dim)
610+
else:
611+
# This aggregated dimension is spanned by exactly one
612+
# fragment. Store None, for now, in the expectation
613+
# that it will get overwritten.
614+
fsizes_per_dim.append(None)
615+
616+
## Handle explicit shapes for the fragments.
617+
618+
if isinstance(explicit_shapes, (str, Number)) or explicit_shapes is None:
619+
fsizes_per_dim = [ # For each dimension, use fs or explicit_shapes if the dimension is fragmented or not respectively.
620+
fs if i in fragmented_dim_indices else explicit_shapes for i, fs in enumerate(fsizes_per_dim)
621+
]
622+
elif isinstance(explicit_shapes, dict):
623+
fsizes_per_dim = [
624+
fsizes_per_dim[i] if i in fragmented_dim_indices else explicit_shapes.get(i, "auto")
625+
for i, fs in enumerate(fsizes_per_dim)
626+
]
627+
else:
628+
# explicit_shapes is a sequence
629+
if len(explicit_shapes) != ndim:
630+
raise ValueError(
631+
f"Wrong number of 'explicit_shapes' elements in {explicit_shapes}: "
632+
f"Got {len(explicit_shapes)}, expected {ndim}"
633+
)
634+
635+
fsizes_per_dim = [
636+
fs if i in fragmented_dim_indices else explicit_shapes[i] for i, fs in enumerate(fsizes_per_dim)
637+
]
638+
639+
return normalize_chunks(fsizes_per_dim, shape=array_space, dtype=dtype)
640+
641+
def combine_slices(shape, extent, newslice):
642+
"""
643+
Combine existing ``extent`` attribute with a new set of slices.
644+
645+
:param newslice: (tuple) A set of slices to apply to the data
646+
'Super-Lazily', i.e the slices will be combined with existing information
647+
and applied later in the process.
648+
649+
:returns: The combined set of slices.
650+
"""
651+
652+
if len(newslice) != len(shape):
653+
654+
raise ValueError(
655+
"Compute chain broken - dimensions have been reduced already."
656+
)
657+
658+
def combine_sliced_dim(old, new, dim):
659+
660+
ostart = old.start or 0
661+
ostop = old.stop or shape[dim]
662+
ostep = old.step or 1
663+
664+
osize = (ostop - ostart)/ostep
665+
666+
nstart = new.start or 0
667+
nstop = new.stop or shape[dim]
668+
nstep = new.step or 1
669+
670+
nsize = (nstop - nstart)/nstep
671+
672+
if nsize > osize:
673+
raise IndexError(
674+
f'Attempted to slice dimension "{dim}" with new slice "({nstart},{nstop},{nstep})'
675+
f'but the dimension size is limited to {osize}.'
676+
)
677+
678+
start = ostart + ostep*nstart
679+
step = ostep * nstep
680+
stop = start + step * (nstop - nstart)
681+
682+
return slice(start, stop, step)
683+
684+
685+
if not extent:
686+
return newslice
687+
else:
688+
for dim in range(len(newslice)):
689+
if not _identical_extents(extent[dim], newslice[dim], shape[dim]):
690+
extent[dim] = combine_sliced_dim(extent[dim], newslice[dim], dim)
691+
return extent

0 commit comments

Comments
 (0)