@@ -487,7 +487,7 @@ def _identical_extents(old, new, dshape):
487487 (ostop == nstop ) and \
488488 (ostep == nstep )
489489
490- def _get_chunk_space (chunk_shape , shape ):
490+ def get_chunk_space (chunk_shape , shape ):
491491 """
492492 Derive the chunk space and shape given the user-provided ``chunks`` option.
493493 Chunk space is the number of chunks in each dimension which presents like an array
@@ -512,10 +512,9 @@ def _get_chunk_space(chunk_shape, shape):
512512
513513 """
514514
515- q = tuple ([int (i / j ) for i , j in zip (shape , chunk_shape )])
516- return q
515+ return tuple ([int (i / j ) for i , j in zip (shape , chunk_shape )])
517516
518- def _get_chunk_shape (chunks , shape , dims , chunk_limits = True ):
517+ def get_chunk_shape (chunks , shape , dims , chunk_limits = True ):
519518 chunk_shape = [i for i in shape ]
520519
521520 for dim in chunks .keys ():
@@ -525,7 +524,7 @@ def _get_chunk_shape(chunks, shape, dims, chunk_limits=True):
525524 if d == dim :
526525 idim = x
527526
528- if idim == None :
527+ if not idim :
529528 raise ValueError (
530529 f"Requested chunking across dimension '{ dim } '"
531530 f"but only '{ dims } ' present in the dataset"
@@ -539,3 +538,154 @@ def _get_chunk_shape(chunks, shape, dims, chunk_limits=True):
539538 chunk_shape [idim ] = max (chunk_size , min_size )
540539
541540 return tuple (chunk_shape )
541+
542+ def get_chunk_positions (chunk_space ):
543+ origin = [0 for i in chunk_space ]
544+
545+ positions = [
546+ coord for coord in product (
547+ * [range (r [0 ], r [1 ]) for r in zip (origin , chunk_space )]
548+ )
549+ ]
550+
551+ return positions
552+
553+ def get_chunk_extent (position , shape , chunk_space ):
554+ extent = []
555+ for dim in range (len (position )):
556+ pos_index = position [dim ]
557+ shape_size = shape [dim ]
558+ space_size = chunk_space [dim ]
559+
560+ conversion = shape_size / space_size
561+
562+ ext = slice (
563+ int (pos_index * conversion ), int ((pos_index + 1 )* conversion )
564+ )
565+ extent .append (ext )
566+ return extent
567+
568+ def get_dask_chunks (
569+ array_space ,
570+ fragment_space ,
571+ extent ,
572+ dtype ,
573+ explicit_shapes = None ):
574+ """
575+ Define the `chunks` array passed to Dask when creating a Dask Array. This is an array of fragment sizes
576+ per dimension for each of the relevant dimensions. Copied from cf-python version 3.14.0 onwards.
577+
578+ :param array_space: (tuple) The shape of the array in ``array space``.
579+
580+ :param fragment_space: (tuple) The shape of the array in ``fragment space``.
581+
582+ :param extent: (dict) The global extent of each fragment - where it fits into the total array for this variable (in array space).
583+
584+ :param dtype: (obj) The datatype for this variable.
585+
586+ :param explicit_shapes: (tuple) Set of shapes to apply to the fragments - currently not implemented outside this function.
587+
588+ :returns: A tuple of the chunk sizes along each dimension.
589+ """
590+
591+ from numbers import Number
592+ from dask .array .core import normalize_chunks
593+
594+ ndim = len (array_space )
595+ fsizes_per_dim , fragmented_dim_indices = [],[]
596+
597+ for dim , n_fragments in enumerate (fragment_space ):
598+ if n_fragments != 1 :
599+
600+ fsizes = []
601+ index = [0 ] * ndim
602+ for n in range (n_fragments ):
603+ index [dim ] = n
604+ ext = extent [tuple (index )][dim ]
605+ fragment_size = ext .stop - ext .start
606+ fsizes .append (fragment_size )
607+
608+ fsizes_per_dim .append (tuple (fsizes ))
609+ fragmented_dim_indices .append (dim )
610+ else :
611+ # This aggregated dimension is spanned by exactly one
612+ # fragment. Store None, for now, in the expectation
613+ # that it will get overwritten.
614+ fsizes_per_dim .append (None )
615+
616+ ## Handle explicit shapes for the fragments.
617+
618+ if isinstance (explicit_shapes , (str , Number )) or explicit_shapes is None :
619+ fsizes_per_dim = [ # For each dimension, use fs or explicit_shapes if the dimension is fragmented or not respectively.
620+ fs if i in fragmented_dim_indices else explicit_shapes for i , fs in enumerate (fsizes_per_dim )
621+ ]
622+ elif isinstance (explicit_shapes , dict ):
623+ fsizes_per_dim = [
624+ fsizes_per_dim [i ] if i in fragmented_dim_indices else explicit_shapes .get (i , "auto" )
625+ for i , fs in enumerate (fsizes_per_dim )
626+ ]
627+ else :
628+ # explicit_shapes is a sequence
629+ if len (explicit_shapes ) != ndim :
630+ raise ValueError (
631+ f"Wrong number of 'explicit_shapes' elements in { explicit_shapes } : "
632+ f"Got { len (explicit_shapes )} , expected { ndim } "
633+ )
634+
635+ fsizes_per_dim = [
636+ fs if i in fragmented_dim_indices else explicit_shapes [i ] for i , fs in enumerate (fsizes_per_dim )
637+ ]
638+
639+ return normalize_chunks (fsizes_per_dim , shape = array_space , dtype = dtype )
640+
641+ def combine_slices (shape , extent , newslice ):
642+ """
643+ Combine existing ``extent`` attribute with a new set of slices.
644+
645+ :param newslice: (tuple) A set of slices to apply to the data
646+ 'Super-Lazily', i.e the slices will be combined with existing information
647+ and applied later in the process.
648+
649+ :returns: The combined set of slices.
650+ """
651+
652+ if len (newslice ) != len (shape ):
653+
654+ raise ValueError (
655+ "Compute chain broken - dimensions have been reduced already."
656+ )
657+
658+ def combine_sliced_dim (old , new , dim ):
659+
660+ ostart = old .start or 0
661+ ostop = old .stop or shape [dim ]
662+ ostep = old .step or 1
663+
664+ osize = (ostop - ostart )/ ostep
665+
666+ nstart = new .start or 0
667+ nstop = new .stop or shape [dim ]
668+ nstep = new .step or 1
669+
670+ nsize = (nstop - nstart )/ nstep
671+
672+ if nsize > osize :
673+ raise IndexError (
674+ f'Attempted to slice dimension "{ dim } " with new slice "({ nstart } ,{ nstop } ,{ nstep } )'
675+ f'but the dimension size is limited to { osize } .'
676+ )
677+
678+ start = ostart + ostep * nstart
679+ step = ostep * nstep
680+ stop = start + step * (nstop - nstart )
681+
682+ return slice (start , stop , step )
683+
684+
685+ if not extent :
686+ return newslice
687+ else :
688+ for dim in range (len (newslice )):
689+ if not _identical_extents (extent [dim ], newslice [dim ], shape [dim ]):
690+ extent [dim ] = combine_sliced_dim (extent [dim ], newslice [dim ], dim )
691+ return extent
0 commit comments