|
2 | 2 | // then cut and paste the following lines to see how it works
|
3 | 3 | // To exit jshell type /exit
|
4 | 4 |
|
| 5 | +// # Stream |
| 6 | +// A stream is an API that defines a query on a source of values. |
| 7 | +// It's an abstraction of loops over the values using a declarative API |
| 8 | +// (what result you want and not how to compute it). |
| 9 | + |
| 10 | +// By example, to count the number of persons with a name starting by 'E', |
| 11 | +// one can write |
| 12 | +var names = List.of("Evan", "Helen", "Ebo"); |
| 13 | +var sum = 0; |
| 14 | +for(var name: names) { |
| 15 | + if (name.startsWith("E")) { |
| 16 | + sum++; |
| 17 | + } |
| 18 | +} |
| 19 | +System.out.println(sum); |
| 20 | + |
| 21 | +// But using a stream, it's simpler |
| 22 | +var names = List.of("Evan", "Helen", "Ebo"); |
| 23 | +var sum = names.stream().filter(name -> name.startsWith("E")).count(); |
| 24 | + |
| 25 | + |
| 26 | +// ## Sources |
| 27 | +// There are several ways to create a stream depending on the source |
| 28 | +// (The following examples are using `count()` to compute the number of values, |
| 29 | +// we will see later that the API is richer) |
| 30 | + |
| 31 | +// - stream of values |
| 32 | +var empty = Stream.empty(); |
| 33 | +var one = Stream.of(4); |
| 34 | +var many = Stream.of("hello", "stream"); |
| 35 | +System.out.println("empty count " + empty.count()); |
| 36 | +System.out.println("one count " + one.count()); |
| 37 | +System.out.println("many count " + many.count()); |
| 38 | + |
| 39 | +// - stream from a collection |
| 40 | +var listStream = List.of(1, 2, 3, 4).stream(); |
| 41 | +var mapStream = Map.of("bob", 3, "ana", 7).keySet().stream(); |
| 42 | +System.out.println("list count " + listStream.count()); |
| 43 | +System.out.println("map keys count " + mapStream.count()); |
| 44 | + |
| 45 | +// - stream from a range |
| 46 | +var range = IntStream.range(0, 10); |
| 47 | +System.out.println("range count " + range.count()); |
| 48 | + |
| 49 | + |
| 50 | +// ## Primitive version |
| 51 | +// Streams are represented by several classes, `java.util.stream.Stream` for a stream of objects and |
| 52 | +// `IntStream`, `DoubleStream` and `LongStream` for a stream of ints, doubles and longs. |
| 53 | +Stream<String> many = Stream.of("hello", "stream"); |
| 54 | +IntStream ints = IntStream.range(0, 10); |
| 55 | + |
| 56 | +// Using specialized classes for the numeric types: |
| 57 | +// - avoid boxing, an IntStream is more efficient than a Stream<Integer> |
| 58 | +// - `boxed()` convert to a Stream of wrapper |
| 59 | +// - offer supplementary numeric methods like, `min`, `max`, `sum()`, etc. |
| 60 | + |
| 61 | +// To sum of the values between [0, 10[ |
| 62 | +var range = IntStream.range(0, 10); |
| 63 | +System.out.println("range sum: " + range.sum()); |
| 64 | + |
| 65 | + |
| 66 | +// ## Filter, map and flatMap |
| 67 | +// The main transformation methods are `filter`, `map` and `reduce`. |
| 68 | + |
| 69 | +// ### filtering |
| 70 | +// `filter()` take a function as parameter and keep in the stream the values |
| 71 | +// for which the function returned true. |
| 72 | + |
| 73 | +record Employee(String name, int age) { } |
| 74 | +var employees = List.of(new Employee("bob", 55), new Employee("Ana", 32)); |
| 75 | +var youngCount = employees.stream().filter(e -> e.age() < 30).count(); |
| 76 | +System.out.println(youngCount); |
| 77 | + |
| 78 | +// ### mapping |
| 79 | +// `map()` transforms a value to another value |
| 80 | +record Employee(String name, int age) { } |
| 81 | +var employees = List.of(new Employee("Bob", 55), new Employee("Ana", 32)); |
| 82 | +var array = employees.stream().map(Employee::name).toArray(); |
| 83 | +System.out.println(Arrays.toString(array)); |
| 84 | + |
| 85 | +// `map()` has variations (`mapToInt`, `mapToLong`, etc) to transform to numeric streams |
| 86 | +record Employee(String name, int age) { } |
| 87 | +var employees = List.of(new Employee("bob", 55), new Employee("Ana", 32)); |
| 88 | +var average = employees.stream().mapToInt(Employee::age).average(); |
| 89 | +System.out.println(average); |
| 90 | + |
| 91 | +// ### flatMap |
| 92 | +// `flatMap()` transforms one value to 0 to _n_ values |
| 93 | +record Friend(String name, List<String> pets) { } |
| 94 | +var friends = List.of(new Friend("Bob", List.of()), |
| 95 | + new Friend("Ana", List.of("dog", "cat")), |
| 96 | + new Friend("Uno", List.of("rabbit")) |
| 97 | + ); |
| 98 | +System.out.println(friends.stream().flatMap(friends -> friends.pets().stream()).count()); |
| 99 | + |
| 100 | +// Like map, flatMap has also variation to numeric streams |
| 101 | +record Friend(String name, List<Integer> kidAges) { } |
| 102 | +var friends = List.of(new Friend("Bob", List.of(1, 3)), |
| 103 | + new Friend("Ana", List.of(15, 17)), |
| 104 | + new Friend("Uno", List.of()) |
| 105 | + ); |
| 106 | +System.out.println(friends.stream().flatMapToInt(friends -> friends.kidAges().stream().mapToInt(x -> x)).average()); |
| 107 | + |
| 108 | +// You can notice that while there is a method boxed() to transform a numeric stream to an object stream, |
| 109 | +// there is no method unboxToInt() equivalent because `mapToInt()` can be used instead. |
| 110 | + |
| 111 | +// ### flatMap is a generalization of filter and map |
| 112 | +// `filter()` result in a stream with 0 or 1 value, `map()` result in a stream with one transformed value so |
| 113 | +// both can be simulated with `flatMap()`. |
| 114 | + |
| 115 | +// so filter can be simulated by a flatMap like this |
| 116 | +record Employee(String name, int age) { } |
| 117 | +var employees = List.of(new Employee("Bob", 55), new Employee("Ana", 32)); |
| 118 | +System.out.println(employees.stream().filter(e -> e.age() < 30).count()); |
| 119 | +System.out.println(Arrays.toString(employees.stream().map(Employee::name).toArray())); |
| 120 | + |
| 121 | +// and map can be written like this |
| 122 | +System.out.println(employees.stream().flatMap(e -> (e.age() < 30)? Stream.of(e): Stream.empty()).count()); |
| 123 | +System.out.println(Arrays.toString(employees.stream().flatMap(e -> Stream.of(e.name())).toArray())); |
| 124 | + |
| 125 | +// While flatMap can simulate filter and map, they are implemented in a more effective way |
| 126 | + |
| 127 | + |
| 128 | +// ## Distinct, sorted, min and max |
| 129 | +// Like in SQL, you can ask to filter values to only have distinct values or sort |
| 130 | +// the value with a comparator. |
| 131 | +// `distinct()` or `sorted()` are operations that requires to store all the values |
| 132 | +// in an intermediary collection so they are not cheap. |
| 133 | + |
| 134 | +// `distinct()` ask for unique values |
| 135 | +System.out.println(IntStream.range(0, 10).map(x -> x / 2).distinct().count()); |
| 136 | + |
| 137 | +// `sorted()` ask to sort the values with a comparator |
| 138 | +record Employee(String name, int age) { } |
| 139 | +var employees = List.of(new Employee("Bob", 55), new Employee("Ana", 32)); |
| 140 | +var youngest = employees.stream().sorted(Comparator.comparingInt(Employee::age)).findFirst(); |
| 141 | +System.out.println(youngest); |
| 142 | + |
| 143 | +// The Stream API also provides `min()` and `max()` that are more efficient that sorting |
| 144 | +// all the values if you just want the minimum or the maximum |
| 145 | +var youngest = employees.stream().min(Comparator.comparingInt(Employee::age)); |
| 146 | +System.out.println(youngest); |
| 147 | +var oldest = employees.stream().max(Comparator.comparingInt(Employee::age)); |
| 148 | +System.out.println(oldest); |
| 149 | + |
| 150 | + |
| 151 | +// ## ForEach, reduce, collect and toArray |
| 152 | + |
| 153 | +// ### forEach |
| 154 | +// Takes a consumer as parameter that is called for each values of the stream |
| 155 | +// This method is not used often because if you want to do a side effect on collection |
| 156 | +// using the method `collect` is easier. |
| 157 | +record Point(int x, int y) { } |
| 158 | +var points = List.of( |
| 159 | + new Point(1, 2), new Point(2, 5), new Point(3, -1)); |
| 160 | +points.stream().filter(p -> p.x() <= 2).forEach(System.out::println); |
| 161 | + |
| 162 | +// Note: collections (`java.util.Collection`) already have a method `forEach()`, |
| 163 | +// so no need to create a stream if you don't want to do a transformation on the |
| 164 | +// elements of the collection |
| 165 | +record Point(int x, int y) { } |
| 166 | +var points = List.of(new Point(2, 5)); |
| 167 | +points.stream().forEach(System.out::println); // stupid ! |
| 168 | +points.forEach(System.out::println); // better |
| 169 | + |
| 170 | +// ### reduce |
| 171 | +// Reduce allows to reduce all the values of a stream to only one result |
| 172 | +// by applying iteratively the same accumulator function on each value of the stream. |
| 173 | + |
| 174 | +// There are two forms of reduce |
| 175 | +// - reduce the stream values |
| 176 | +// - reduce using projected values |
| 177 | + |
| 178 | +// reduce with the stream values |
| 179 | +record Point(int x, int y) { } |
| 180 | +var points = List.of( |
| 181 | + new Point(1, 2), new Point(2, 5), new Point(3, -1)); |
| 182 | +var sum = points.stream().reduce((p1, p2) -> new Point(p1.x() + p2.x(), p1.x() + p2.x())); |
| 183 | +System.out.println("sum " + sum); |
| 184 | + |
| 185 | +// You can notice that the result is an `Optional` because is the stream is empty, `reduce` |
| 186 | +// has no result to return |
| 187 | + |
| 188 | +// reduce using projected values |
| 189 | +record Point(int x, int y) { } |
| 190 | +var points = List.of( |
| 191 | + new Point(1, 2), new Point(2, 5), new Point(3, -1)); |
| 192 | +var sumX = points.stream().reduce(0, (acc, p) -> acc + p.x(), Integer::sum); |
| 193 | +System.out.println("sumX " + sumX); |
| 194 | + |
| 195 | +// The first argument is the initial accumulator value, this is also the value |
| 196 | +// returned is the stream is empty so this variation of reduce doesn't return an `Optional`. |
| 197 | +// The last argument of `reduce()` is only used in parallel to aggregate the values |
| 198 | +// process on different threads (see below for more info on parallel streams). |
| 199 | + |
| 200 | +// ### collect |
| 201 | +// reduce works well when the result is one value but not well when the result |
| 202 | +// is a list, a map or any data structures because collections are mutable |
| 203 | +// in Java. |
| 204 | +// For that, there is another mechanism, called `collect` that takes |
| 205 | +// a `Collector` as parameter and is tailored to create, mutate, merge and |
| 206 | +// optionally makes unmodifiable any mutable collections. |
| 207 | + |
| 208 | +// This section contains only a small number of example because there is a following chapter |
| 209 | +// dedicated to collector. |
| 210 | + |
| 211 | +// While you can create your own collector by implementing the interface `Collector`, |
| 212 | +// there are already more than 20 collectors available in the class `Collectors` |
| 213 | +import java.util.stream.Collectors; |
| 214 | + |
| 215 | +// `toList()`: gather all values to a list |
| 216 | +var names = List.of("Bob", "Ana", "Elvis", "Emma", "Josh"); |
| 217 | +var endsWithA = names.stream().filter(name -> name.endsWith("a")).collect(Collectors.toList()); |
| 218 | +System.out.println(endsWithA); |
| 219 | + |
| 220 | +// `toUnmodifiableist()`: gather all values to an unmodifiable list |
| 221 | +var names = List.of("Bob", "Ana", "Elvis", "Emma", "Josh"); |
| 222 | +var uppercases = names.stream().map(String::toUpperCase).collect(Collectors.toUnmodifiableList()); |
| 223 | +System.out.println(uppercases); |
| 224 | + |
| 225 | +// `toMap()`: gather all values to a map |
| 226 | +var names = List.of("Bob", "Ana", "Elvis", "Emma", "Josh"); |
| 227 | +var uppercaseMap = names.stream().collect(Collectors.toMap(name -> name, String::toUpperCase)); |
| 228 | +System.out.println(uppercaseMap); |
| 229 | + |
| 230 | +// `joining()`: gather all strings to one string |
| 231 | +var names = List.of("Bob", "Ana", "Elvis", "Emma", "Josh"); |
| 232 | +var asString = names.stream().collect(Collectors.joining(", ")); |
| 233 | +System.out.println(asString); |
| 234 | + |
| 235 | +// `groupingBy()`: gather all values into a map of list of values |
| 236 | +var names = List.of("Bob", "Ana", "Elvis", "Emma", "Josh"); |
| 237 | +var nameByLength = names.stream().collect(Collectors.groupingBy(String::length)); |
| 238 | +System.out.println(nameByLength); |
| 239 | + |
| 240 | + |
| 241 | +// ### toArray |
| 242 | +// Because in Java, array are typed at runtime there are two ways to create an array |
| 243 | +// from a stream |
| 244 | +// - as an array of Object |
| 245 | +// - as an array of a specific type |
| 246 | + |
| 247 | +// As an array Object |
| 248 | +var names = List.of(14, 67, 32, 78); |
| 249 | +Object[] array = names.stream().toArray(); |
| 250 | +System.out.println(Arrays.toString(array)); |
| 251 | + |
| 252 | +// As an array of a specific type, passing the constructor as argument |
| 253 | +var names = List.of(14, 67, 32, 78); |
| 254 | +Integer[] array = names.stream().toArray(Integer[]::new); |
| 255 | +System.out.println(Arrays.toString(array)); |
| 256 | + |
| 257 | +// Note that you can provide an array type with a more specific type |
| 258 | +// because the VM will do a runtime check when the values are inserted |
| 259 | +// The following example compiles but throws a ClassCastException at runtime |
| 260 | +List<Object> names = List.<Object>of(14, 67, 32, "boom !"); |
| 261 | +Integer[] array = names.stream().toArray(Integer[]::new); |
| 262 | + |
| 263 | + |
| 264 | +// ## Infinite Stream |
| 265 | +// Streams can be infinite (like you can create an infinite loop), |
| 266 | +// and you have shortcut methods to stop the loop. |
| 267 | + |
| 268 | +// generate an infinite number of random values between [0, 10[ as String |
| 269 | +var random = new Random(0); |
| 270 | +var stream = Stream.generate(() -> "" + random.nextInt(10)); |
| 271 | +System.out.println(stream.limit(5).collect(Collectors.toList())); |
| 272 | + |
| 273 | +// iterate over all the power of two values |
| 274 | +var stream = IntStream.iterate(1, x -> x* 2); |
| 275 | +System.out.println(stream.limit(5).boxed().collect(Collectors.toList())); |
| 276 | + |
| 277 | +// ### Shortcut method `limit()` |
| 278 | +var sum = IntStream.iterate(1, x -> x* 2).limit(10).sum(); |
| 279 | +System.out.println("sum: " + sum); |
| 280 | + |
| 281 | +// ### Shortcut method `dropWhile()` |
| 282 | +// All lines after the one that starts with `#` have a length greater than 10 |
| 283 | +var text = """ |
| 284 | + # a line |
| 285 | + # another one |
| 286 | + a line that doesn't start with # |
| 287 | + """; |
| 288 | +var result = text.lines().dropWhile(l -> l.startsWith("#")).allMatch(s -> s.length() > 10); |
| 289 | +System.out.println(result); |
| 290 | + |
| 291 | +// ### Shortcut method `takeWhile()` |
| 292 | +// Find the first words that have a length lesser than 5 |
| 293 | +var list = List.of("foo", "bar", "baz", "whizzzz", "bob"); |
| 294 | +var result = list.stream().takeWhile(s -> s.length() < 5).collect(Collectors.joining(", ")); |
| 295 | +System.out.println(result); |
| 296 | + |
| 297 | + |
| 298 | +// ## Sequential vs parallel API |
| 299 | +// By default stream are executed sequentially in the same thread (think CPU core if you don't know what a thread is). |
| 300 | +// You can ask to split the processing of the stream on several threads using `.parallel()` |
| 301 | +// Using `.parallel()` is usually slower because you have to first distribute the calculation |
| 302 | +// and at the end gather the results from several threads. As a rule of thumb, it only worth to use `.parallel()` |
| 303 | +// if either you have a lot of value (like 100_000 or more) or the calculation is slooow. |
| 304 | + |
| 305 | +// By example, if you want to calculate the square root (not a slow operation) on the first 1_000_000 values |
| 306 | +// and prints only the first 10 values |
| 307 | +import static java.util.stream.Collectors.toList; |
| 308 | +var squareRoots = IntStream.range(0, 1_000_000).parallel().mapToDouble(Math::sqrt).toArray(); |
| 309 | +System.out.println(Arrays.stream(squareRoots).limit(10).boxed().collect(toList())); |
| 310 | + |
| 311 | +// ### findAny()/findFirst(), forEach()/forEachOrdered() |
| 312 | +// Because a stream can be evaluated in parallel and maintaining a strict order in parallel cost a lot, |
| 313 | +// usual methods like `findAny()` or `forEach()` doesn't maintain the order on a parallel stream. |
| 314 | +// You have to use specialized method (resp `findFirst()` and `forEachOrdered()`) to maintain the order. |
| 315 | + |
| 316 | + |
| 317 | +// ## Limitations |
| 318 | + |
| 319 | +// ### Reusing stream objects |
| 320 | +var stream = Stream.of(1, 2, 3); |
| 321 | +System.out.println(stream.count()); |
| 322 | +System.out.println(stream.count()); |
| 323 | + |
| 324 | +// > One query, one stream ! |
| 325 | + |
| 326 | +// ### Source mutation |
| 327 | +// A stream can not modify the source from which it was created |
| 328 | +// The following example throws a ConcurrentModificationException |
| 329 | +var list = new ArrayList<>(List.of("foo", "bar")); |
| 330 | +list.stream().map(String::toUpperCase).forEach(list::add); |
| 331 | + |
| 332 | +// Use an iterator (in the example a `ListIterator`) for that |
| 333 | +var list = new ArrayList<>(List.of("foo", "bar")); |
| 334 | +var it = list.listIterator(); |
| 335 | +while(it.hasNext()) { |
| 336 | + var value = it.next(); |
| 337 | + it.add(value.toUpperCase()); |
| 338 | +} |
| 339 | +System.out.println(list); |
0 commit comments