JobReadyProgrammer
diff --git a/‎project5/spark-warehouse/grades_view_perm/_temporary/0/_temporary/attempt_20180912224126_0002_m_000000_0/.part-00000-71756340-146d-4fb3-8e31-a959a9a52fcc-c000.snappy.parquet.crc b/‎project5/spark-warehouse/grades_view_perm/_temporary/0/_temporary/attempt_20180912224126_0002_m_000000_0/.part-00000-71756340-146d-4fb3-8e31-a959a9a52fcc-c000.snappy.parquet.crc
diff --git a/‎project5/spark-warehouse/grades_view_perm/_temporary/0/_temporary/attempt_20180912224126_0002_m_000000_0/part-00000-71756340-146d-4fb3-8e31-a959a9a52fcc-c000.snappy.parquet b/‎project5/spark-warehouse/grades_view_perm/_temporary/0/_temporary/attempt_20180912224126_0002_m_000000_0/part-00000-71756340-146d-4fb3-8e31-a959a9a52fcc-c000.snappy.parquet
diff --git a/‎project5/src/main/java/com/jobreadyprogrammer/spark/Application.java
+32-5 b/‎project5/src/main/java/com/jobreadyprogrammer/spark/Application.java
+32-5
diff --git a/‎project5/src/main/resources/students.csv
+1-2 b/‎project5/src/main/resources/students.csv
+1-2
@@ -4,6 +4,9 @@
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
+import static org.apache.spark.sql.functions.*;
+
+
 public class Application {
 
 	public static void main(String[] args) {
@@ -14,16 +17,40 @@ public static void main(String[] args) {
 		        .getOrCreate();
 
 
-		 String filename = "src/main/resources/grade_chart.csv";
+		 String studentsFile = "src/main/resources/students.csv";
 
-		    Dataset<Row> df = spark.read().format("csv")
+		    Dataset<Row> studentDf = spark.read().format("csv")
 		        .option("inferSchema", "true") // Make sure to use string version of true
 		        .option("header", true)
-		        .load(filename);
+		        .load(studentsFile); 
+		    
+		 String gradeChartFile = "src/main/resources/grade_chart.csv";
+		 
+		    Dataset<Row> gradesDf = spark.read().format("csv")
+		        .option("inferSchema", "true") // Make sure to use string version of true
+		        .option("header", true)
+		        .load(gradeChartFile);
+		    
+
+		     // How to join tables
+		    // Talk about how you can get rid of the df.col() and just use col()
+		    // Talk about using just column names in strings in the select
+		    // Talk about how you can also just use the col() function instead of df.col()
+		    // start with removing df. Then go on to remove the col() as well to show the stripped down version
+		    // Talk about how adding filter after the select limits what you can filter! Unlike SQL.
+		    // Always have your selects at the end of your filtering
+			    studentDf.join(gradesDf,  studentDf.col("GPA").equalTo((gradesDf.col("gpa"))))
+//			    	.drop("gpa").drop("GPA")
+			    	.select(studentDf.col("student_name"), 
+			    			gradesDf.col("letter_grade"),
+			    			studentDf.col("favorite_book_title"),
+			    			studentDf.col("GPA")) // must have this for below filter to work
+			    	.filter(col("GPA").between(2, 3.5)).show();
+//			    	.filter(upper(col("letter_grade")).like("B")).show();
+			    	
 
-		    df.show(10);
-		
 	}
 
 
+	
 }
@@ -10,5 +10,4 @@ student_id,student_name,State,GPA,favorite_book_title,working
 1190,Dan Iacovelli,CA,3.5,The Hunger Games,FALSE
 1200,Ned Alvin,CA,1.0,,TRUE
 1210,Sidney Ducote,FL,1.5,The Secret Garden,FALSE
-1220,Bobbie Shrader,FL,2.0,The Color Purple,FALSE
-,,,,,
+1220,Bobbie Shrader,FL,2.0,The Color Purple,FALSE