Skip to content

Commit 420f718

Browse files
committed
Thread utility methods
1 parent f8a633b commit 420f718

File tree

13 files changed

+101
-110
lines changed

13 files changed

+101
-110
lines changed

src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pub mod util {
2323
pub mod parse;
2424
pub mod point;
2525
pub mod slice;
26+
pub mod thread;
2627
}
2728

2829
/// # Help Santa by solving puzzles to fix the weather machine's snow function.

src/util/thread.rs

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//! Utility methods to spawn a number of
2+
//! [scoped](https://doc.rust-lang.org/stable/std/thread/fn.scope.html)
3+
//! threads equals to the number of cores on the machine. Unlike normal threads, scoped threads
4+
//! can borrow data from their environment.
5+
use std::thread::*;
6+
7+
/// Spawn `n` scoped threads, where `n` is the available parallelism.
8+
pub fn spawn<F, T>(f: F)
9+
where
10+
F: FnOnce() -> T + Copy + Send,
11+
T: Send,
12+
{
13+
scope(|scope| {
14+
for _ in 0..threads() {
15+
scope.spawn(f);
16+
}
17+
});
18+
}
19+
20+
/// Splits `items` into batches, one per thread. Items are assigned in a round robin fashion,
21+
/// to achieve a crude load balacing in case some items are more complex to process than others.
22+
pub fn spawn_batches<F, T, U>(mut items: Vec<U>, f: F)
23+
where
24+
F: FnOnce(Vec<U>) -> T + Copy + Send,
25+
T: Send,
26+
U: Send,
27+
{
28+
let threads = threads();
29+
let mut batches: Vec<_> = (0..threads).map(|_| Vec::new()).collect();
30+
let mut index = 0;
31+
32+
// Round robin items over each thread.
33+
while let Some(next) = items.pop() {
34+
batches[index % threads].push(next);
35+
index += 1;
36+
}
37+
38+
scope(|scope| {
39+
for batch in batches {
40+
scope.spawn(move || f(batch));
41+
}
42+
});
43+
}
44+
45+
fn threads() -> usize {
46+
available_parallelism().unwrap().get()
47+
}

src/year2015/day04.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
//! [`MD5`]: crate::util::md5
1919
//! [`format!`]: std::format
2020
use crate::util::md5::*;
21+
use crate::util::thread::*;
2122
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
22-
use std::thread;
2323

2424
pub struct Shared {
2525
prefix: String,
@@ -45,13 +45,11 @@ pub fn parse(input: &str) -> Shared {
4545
}
4646

4747
// Use as many cores as possible to parallelize the remaining search.
48-
thread::scope(|scope| {
49-
for _ in 0..thread::available_parallelism().unwrap().get() {
50-
#[cfg(not(feature = "simd"))]
51-
scope.spawn(|| worker(&shared));
52-
#[cfg(feature = "simd")]
53-
scope.spawn(|| simd::worker(&shared));
54-
}
48+
spawn(|| {
49+
#[cfg(not(feature = "simd"))]
50+
worker(&shared);
51+
#[cfg(feature = "simd")]
52+
simd::worker(&shared);
5553
});
5654

5755
shared

src/year2016/day05.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
//!
66
//! [`Year 2015 Day 4`]: crate::year2015::day04
77
use crate::util::md5::*;
8+
use crate::util::thread::*;
89
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
910
use std::sync::Mutex;
10-
use std::thread;
1111

1212
struct Shared {
1313
prefix: String,
@@ -35,13 +35,11 @@ pub fn parse(input: &str) -> Vec<u32> {
3535
}
3636

3737
// Use as many cores as possible to parallelize the remaining search.
38-
thread::scope(|scope| {
39-
for _ in 0..thread::available_parallelism().unwrap().get() {
40-
#[cfg(not(feature = "simd"))]
41-
scope.spawn(|| worker(&shared, &mutex));
42-
#[cfg(feature = "simd")]
43-
scope.spawn(|| simd::worker(&shared, &mutex));
44-
}
38+
spawn(|| {
39+
#[cfg(not(feature = "simd"))]
40+
worker(&shared, &mutex);
41+
#[cfg(feature = "simd")]
42+
simd::worker(&shared, &mutex);
4543
});
4644

4745
let mut found = mutex.into_inner().unwrap().found;

src/year2016/day14.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
//! Brute force slog through all possible keys, parallelized as much as possible. An optimization
44
//! for part two is a quick method to convert `u32` to 8 ASCII digits.
55
use crate::util::md5::*;
6+
use crate::util::thread::*;
67
use std::collections::{BTreeMap, BTreeSet};
78
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
89
use std::sync::Mutex;
9-
use std::thread;
1010

1111
/// Atomics can be safely shared between threads.
1212
struct Shared<'a> {
@@ -44,11 +44,7 @@ fn generate_pad(input: &str, part_two: bool) -> i32 {
4444
let mutex = Mutex::new(exclusive);
4545

4646
// Use as many cores as possible to parallelize the search.
47-
thread::scope(|scope| {
48-
for _ in 0..thread::available_parallelism().unwrap().get() {
49-
scope.spawn(|| worker(&shared, &mutex, part_two));
50-
}
51-
});
47+
spawn(|| worker(&shared, &mutex, part_two));
5248

5349
let exclusive = mutex.into_inner().unwrap();
5450
*exclusive.found.iter().nth(63).unwrap()

src/year2017/day14.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
//!
66
//! [`Day 10`]: crate::year2017::day10
77
//! [`Day 12`]: crate::year2017::day12
8+
use crate::util::thread::*;
89
use std::sync::atomic::{AtomicUsize, Ordering};
910
use std::sync::Mutex;
10-
use std::thread;
1111

1212
/// Atomics can be safely shared between threads.
1313
pub struct Shared {
@@ -27,11 +27,7 @@ pub fn parse(input: &str) -> Vec<u8> {
2727
let mutex = Mutex::new(exclusive);
2828

2929
// Use as many cores as possible to parallelize the hashing.
30-
thread::scope(|scope| {
31-
for _ in 0..thread::available_parallelism().unwrap().get() {
32-
scope.spawn(|| worker(&shared, &mutex));
33-
}
34-
});
30+
spawn(|| worker(&shared, &mutex));
3531

3632
mutex.into_inner().unwrap().grid
3733
}

src/year2018/day11.rs

+21-30
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
//! This makes the total complexity `O(n³)`, however the calculation for each size is independent
77
//! so we can parallelize over multiple threads.
88
use crate::util::parse::*;
9+
use crate::util::thread::*;
910
use std::sync::Mutex;
10-
use std::thread;
1111

1212
pub struct Result {
1313
x: usize,
@@ -38,36 +38,15 @@ pub fn parse(input: &str) -> Vec<Result> {
3838
}
3939

4040
// Use as many cores as possible to parallelize the search.
41-
let threads = thread::available_parallelism().unwrap().get();
41+
// Smaller sizes take more time so keep batches roughly the same effort so that some
42+
// threads are not finishing too soon and waiting idle, while others are still busy.
43+
// For example if there are 4 cores, then they will be assigned sizes:
44+
// * 1, 5, 9, ..
45+
// * 2, 6, 10, ..
46+
// * 3, 7, 11, ..
47+
// * 4, 8, 12, ..
4248
let mutex = Mutex::new(Vec::new());
43-
44-
thread::scope(|scope| {
45-
for i in 0..threads {
46-
// Shadow references in local variables so that they can be moved into closure.
47-
let sat = &sat;
48-
let mutex = &mutex;
49-
50-
// Smaller sizes take more time so keep batches roughly the same effort so that some
51-
// threads are not finishing too soon and waiting idle, while others are still busy.
52-
// For example if there are 4 cores, then they will be assigned sizes:
53-
// * 1, 5, 9, ..
54-
// * 2, 6, 10, ..
55-
// * 3, 7, 11, ..
56-
// * 4, 8, 12, ..
57-
scope.spawn(move || {
58-
let batch: Vec<_> = (1 + i..301)
59-
.step_by(threads)
60-
.map(|size| {
61-
let (power, x, y) = square(sat, size);
62-
Result { x, y, size, power }
63-
})
64-
.collect();
65-
66-
mutex.lock().unwrap().extend(batch);
67-
});
68-
}
69-
});
70-
49+
spawn_batches((1..301).collect(), |batch| worker(batch, &sat, &mutex));
7150
mutex.into_inner().unwrap()
7251
}
7352

@@ -81,6 +60,18 @@ pub fn part2(input: &[Result]) -> String {
8160
format!("{x},{y},{size}")
8261
}
8362

63+
fn worker(batch: Vec<usize>, sat: &[i32], mutex: &Mutex<Vec<Result>>) {
64+
let result: Vec<_> = batch
65+
.into_iter()
66+
.map(|size| {
67+
let (power, x, y) = square(sat, size);
68+
Result { x, y, size, power }
69+
})
70+
.collect();
71+
72+
mutex.lock().unwrap().extend(result);
73+
}
74+
8475
/// Find the (x,y) coordinates and max power for a square of the specified size.
8576
fn square(sat: &[i32], size: usize) -> (i32, usize, usize) {
8677
let mut max_power = i32::MIN;

src/year2018/day15.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@
7878
//! Choosing the first intersection in reading order the Elf correctly moves left.
7979
use crate::util::grid::*;
8080
use crate::util::point::*;
81+
use crate::util::thread::*;
8182
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
8283
use std::sync::mpsc::{channel, Sender};
83-
use std::thread;
8484

8585
const READING_ORDER: [Point; 4] = [UP, LEFT, RIGHT, DOWN];
8686

@@ -149,11 +149,7 @@ pub fn part2(input: &Input) -> i32 {
149149
let shared = Shared { done: AtomicBool::new(false), elf_attack_power: AtomicI32::new(4), tx };
150150

151151
// Use as many cores as possible to parallelize the search.
152-
thread::scope(|scope| {
153-
for _ in 0..thread::available_parallelism().unwrap().get() {
154-
scope.spawn(|| worker(input, &shared));
155-
}
156-
});
152+
spawn(|| worker(input, &shared));
157153

158154
// Hang up the channel.
159155
drop(shared.tx);

src/year2018/day24.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
//! [`Day 15`]: crate::year2018::day15
1010
use crate::util::hash::*;
1111
use crate::util::parse::*;
12+
use crate::util::thread::*;
1213
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
1314
use std::sync::mpsc::{channel, Sender};
14-
use std::thread;
1515

1616
pub struct Input {
1717
immune: Vec<Group>,
@@ -99,11 +99,7 @@ pub fn part2(input: &Input) -> i32 {
9999
let shared = Shared { done: AtomicBool::new(false), boost: AtomicI32::new(1), tx };
100100

101101
// Use as many cores as possible to parallelize the search.
102-
thread::scope(|scope| {
103-
for _ in 0..thread::available_parallelism().unwrap().get() {
104-
scope.spawn(|| worker(input, &shared));
105-
}
106-
});
102+
spawn(|| worker(input, &shared));
107103

108104
// Hang up the channel.
109105
drop(shared.tx);

src/year2021/day18.rs

+4-14
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
//! The root node is stored at index 0. For a node at index `i` its left child is at index
2828
//! `2i + 1`, right child at index `2i + 2` and parent at index `i / 2`. As leaf nodes are
2929
//! always greater than or equal to zero, `-1` is used as a special sentinel value for non-leaf nodes.
30+
use crate::util::thread::*;
3031
use std::sync::Mutex;
31-
use std::thread;
3232

3333
type Snailfish = [i32; 63];
3434

@@ -83,20 +83,10 @@ pub fn part2(input: &[Snailfish]) -> i32 {
8383
}
8484
}
8585

86-
// Break the work into roughly equally size batches.
87-
let threads = thread::available_parallelism().unwrap().get();
88-
let size = pairs.len().div_ceil(threads);
89-
let batches: Vec<_> = pairs.chunks(size).collect();
90-
91-
// Use as many cores as possible to parallelize the calculation.
86+
// Use as many cores as possible to parallelize the calculation,
87+
// breaking the work into roughly equally size batches.
9288
let mutex = Mutex::new(0);
93-
94-
thread::scope(|scope| {
95-
for batch in batches {
96-
scope.spawn(|| worker(batch, &mutex));
97-
}
98-
});
99-
89+
spawn_batches(pairs, |batch| worker(&batch, &mutex));
10090
mutex.into_inner().unwrap()
10191
}
10292

src/year2022/day11.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@
4242
//!
4343
//! [`iter_unsigned`]: ParseOps::iter_unsigned
4444
use crate::util::parse::*;
45+
use crate::util::thread::*;
4546
use std::sync::Mutex;
46-
use std::thread;
4747

4848
pub struct Monkey {
4949
items: Vec<u64>,
@@ -130,11 +130,7 @@ fn parallel(monkeys: &[Monkey], pairs: Vec<Pair>) -> Business {
130130
let mutex = Mutex::new(exclusive);
131131

132132
// Use as many cores as possible to parallelize the calculation.
133-
thread::scope(|scope| {
134-
for _ in 0..thread::available_parallelism().unwrap().get() {
135-
scope.spawn(|| worker(monkeys, &mutex));
136-
}
137-
});
133+
spawn(|| worker(monkeys, &mutex));
138134

139135
mutex.into_inner().unwrap().business
140136
}

src/year2023/day12.rs

+4-14
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@
119119
//! This is equivalent to the prefix sum approach described above but a little clearer to
120120
//! understand however slower to calculate.
121121
use crate::util::parse::*;
122+
use crate::util::thread::*;
122123
use std::sync::atomic::{AtomicU64, Ordering};
123-
use std::thread;
124124

125125
type Spring<'a> = (&'a [u8], Vec<usize>);
126126

@@ -141,20 +141,10 @@ pub fn part1(input: &[Spring<'_>]) -> u64 {
141141
}
142142

143143
pub fn part2(input: &[Spring<'_>]) -> u64 {
144-
// Break the work into roughly equally size batches.
145-
let threads = thread::available_parallelism().unwrap().get();
146-
let size = input.len().div_ceil(threads);
147-
let batches: Vec<_> = input.chunks(size).collect();
148-
149-
// Use as many cores as possible to parallelize the calculation.
144+
// Use as many cores as possible to parallelize the calculation,
145+
// breaking the work into roughly equally size batches.
150146
let shared = AtomicU64::new(0);
151-
152-
thread::scope(|scope| {
153-
for batch in batches {
154-
scope.spawn(|| shared.fetch_add(solve(batch, 5), Ordering::Relaxed));
155-
}
156-
});
157-
147+
spawn_batches(input.to_vec(), |batch| shared.fetch_add(solve(&batch, 5), Ordering::Relaxed));
158148
shared.load(Ordering::Relaxed)
159149
}
160150

0 commit comments

Comments
 (0)