Skip to content

Commit 9c95345

Browse files
committed
Updates ml/pythonml/ji-qi-xue-xi-ku2.md
Auto commit by GitBook Editor
1 parent 0e42743 commit 9c95345

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed

ml/pythonml/ji-qi-xue-xi-ku2.md

+89
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,95 @@
88

99
---
1010

11+
Initializing Spark
12+
13+
14+
```python
15+
#SparkContext
16+
from pyspark import SparkContext
17+
sc = SparkContext(master = 'local[2]')
18+
```
19+
20+
21+
```python
22+
#Calculations With Variables
23+
sc.version
24+
sc.pythonVer
25+
sc.master
26+
str(sc.sparkHome)
27+
str(sc.sparkUser())
28+
sc.appName
29+
sc.applicationId
30+
sc.defaultParallelism
31+
sc.defaultMinPartitions
32+
```
33+
34+
35+
```python
36+
#Configuration
37+
from pyspark import SparkConf, SparkContext
38+
conf = (SparkConf().setMaster("local").setAppName("My app").set("spark.executor.memory", "1g"))
39+
sc = SparkContext(conf = conf)
40+
```
41+
42+
43+
```python
44+
Loading Data
45+
```
46+
47+
48+
```python
49+
#Parallelized Collections
50+
rdd = sc.parallelize([('a',7),('a',2),('b',2)])
51+
rdd2 = sc.parallelize([('a',2),('d',1),('b',1)])
52+
rdd3 = sc.parallelize(range(100))
53+
rdd4 = sc.parallelize([("a",["x","y","z"]),("b",["p", "r"])])
54+
```
55+
56+
57+
```python
58+
#External Data
59+
textFile = sc.textFile("/my/directory/*.txt")
60+
textFile2 = sc.wholeTextFiles("/my/directory/")
61+
```
62+
63+
64+
```python
65+
Selecting Data
66+
```
67+
68+
69+
```python
70+
#Getting
71+
rdd.collect() #[('a', 7), ('a', 2), ('b', 2)]
72+
rdd.take(2) #[('a', 7), ('a', 2)]
73+
rdd.first() #('a', 7)
74+
rdd.top(2) #[('b', 2), ('a', 7)]
75+
76+
#Sampling
77+
rdd3.sample(False, 0.15, 81).collect() #[3,4,27,31,40,41,42,43,60,76,79,80,86,97]
78+
79+
#Filtering
80+
rdd.filter(lambda x: "a" in x).collect() #[('a',7),('a',2)]
81+
rdd5.distinct().collect() #['a',2,'b',7]
82+
rdd.keys().collect() #['a', 'a', 'b']
83+
84+
#Iterating
85+
def g(x): print(x)
86+
rdd.foreach(g)
87+
```
88+
89+
''''''
90+
('a', 7)
91+
('b', 2)
92+
('a', 2)
93+
'''''''
94+
95+
96+
```python
97+
98+
```
99+
11100
#
12101

13102
#

0 commit comments

Comments
 (0)