JuliaFolds2 · carstenbauer · Sep 26, 2024
diff --git a/docs/src/literate/falsesharing/falsesharing.jl b/docs/src/literate/falsesharing/falsesharing.jl
@@ -30,14 +30,14 @@ data = rand(1_000_000 * nthreads());
 #
 # A common, manual implementation of this idea might look like this:
 
-using OhMyThreads: @spawn, index_chunks
+using OhMyThreads: @spawn, chunks
 
 function parallel_sum_falsesharing(data; nchunks = nthreads())
     psums = zeros(eltype(data), nchunks)
-    @sync for (c, idcs) in enumerate(index_chunks(data; n = nchunks))
+    @sync for (i, chunk) in enumerate(chunks(data; n = nchunks))
         @spawn begin
-            for i in idcs
-                psums[c] += data[i]
+            for x in chunk
+                psums[i] += x
             end
         end
     end
@@ -102,13 +102,13 @@ nthreads()
 
 function parallel_sum_tasklocal(data; nchunks = nthreads())
     psums = zeros(eltype(data), nchunks)
-    @sync for (c, idcs) in enumerate(index_chunks(data; n = nchunks))
+    @sync for (i, chunk) in enumerate(chunks(data; n = nchunks))
         @spawn begin
             local s = zero(eltype(data))
-            for i in idcs
-                s += data[i]
+            for x in chunk
+                s += x
             end
-            psums[c] = s
+            psums[i] = s
         end
     end
     return sum(psums)
@@ -131,8 +131,8 @@ end
 # using `map` and reusing the built-in (sequential) `sum` function on each parallel task:
 
 function parallel_sum_map(data; nchunks = nthreads())
-    ts = map(index_chunks(data, n = nchunks)) do idcs
-        @spawn @views sum(data[idcs])
+    ts = map(chunks(data, n = nchunks)) do chunk
+        @spawn sum(chunk)
     end
     return sum(fetch.(ts))
 end
@@ -141,7 +141,7 @@ end
 @btime parallel_sum_map($data);
 
 # This implementation is conceptually
-# clearer in that there is no explicit modification of shared state, i.e. no `pums[c] = s`,
+# clearer in that there is no explicit modification of shared state, i.e. no `pums[i] = s`,
 # anywhere at all. We can't run into false sharing if we don't modify shared state 😉.
 #
 # Note that since we use the built-in `sum` function, which is highly optimized, we might see

diff --git a/docs/src/literate/mc/mc.jl b/docs/src/literate/mc/mc.jl
@@ -79,16 +79,16 @@ using OhMyThreads: StaticScheduler
 
 # ## Manual parallelization
 #
-# First, using the `index_chunks` function, we divide the iteration interval `1:N` into
+# First, using the `chunks` function, we divide the iteration interval `1:N` into
 # `nthreads()` parts. Then, we apply a regular (sequential) `map` to spawn a Julia task
 # per chunk. Each task will locally and independently perform a sequential Monte Carlo
 # simulation. Finally, we fetch the results and compute the average estimate for $\pi$.
 
-using OhMyThreads: @spawn, index_chunks
+using OhMyThreads: @spawn, chunks
 
 function mc_parallel_manual(N; nchunks = nthreads())
-    tasks = map(index_chunks(1:N; n = nchunks)) do idcs
-        @spawn mc(length(idcs))
+    tasks = map(chunks(1:N; n = nchunks)) do chunk
+        @spawn mc(length(chunk))
     end
     pi = sum(fetch, tasks) / nchunks
     return pi
@@ -101,13 +101,13 @@ mc_parallel_manual(N)
 @btime mc_parallel_manual($N) samples=10 evals=3;
 
 # It is faster than `mc_parallel` above because the task-local computation
-# `mc(length(idcs))` is faster than the implicit task-local computation within
+# `mc(length(chunk))` is faster than the implicit task-local computation within
 # `tmapreduce` (which itself is a `mapreduce`).
 
-idcs = first(index_chunks(1:N; n = nthreads()))
+chunk = first(chunks(1:N; n = nthreads()))
 
-@btime mapreduce($+, $idcs) do i
+@btime mapreduce($+, $chunk) do i
     rand()^2 + rand()^2 < 1.0
 end samples=10 evals=3;
 
-@btime mc($(length(idcs))) samples=10 evals=3;
+@btime mc($(length(chunk))) samples=10 evals=3;