|
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 22 15:07:44 2017
练习SparkSQL
@author: wanghuan
"""
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("spark://cent0s7Master:7077").appName("Python Spark SQL basic example").config("spark.some.config.option", "some-value")
.getOrCreate()
#ssc=SparkContext("local[2]","sparksqltest")
peopleDF = spark.read.json("examples/src/main/resources/people.json")
salaryDF = spark.read.json("examples/src/main/resources/salary.json")
#peopleDF.printSchema()
# Creates a temporary view using the DataFrame
peopleDF.createOrReplaceTempView("people")
salaryDF.createOrReplaceTempView("salary")
# SQL statements can be run by using the sql methods provided by spark
teenagerNamesDF = spark.sql("SELECT a.name,a.age,b.salary FROM people a,salary b where a.name=b.name and a.age <30 and b.salary>5000")
teenagerNamesDF.show() |
|
|