Skip to content

Commit a5ea120

Browse files
committed
added python api; changed test to be more meaningful
1 parent b680db6 commit a5ea120

File tree

3 files changed

+143
-53
lines changed

3 files changed

+143
-53
lines changed

python/pyspark/sql/functions.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,96 @@ def ntile(n):
486486
sc = SparkContext._active_spark_context
487487
return Column(sc._jvm.functions.ntile(int(n)))
488488

489+
@since(1.5)
490+
def dateFormat(dateCol, formatCol):
491+
"""
492+
Convert the given date into the format specified by the second argument. Return type is always string.
493+
>>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(dateFormat('a', 'MM/dd/yyy').alias('date')).collect()
494+
[Row(date=u'04/08/2015')]
495+
"""
496+
sc = SparkContext._active_spark_context
497+
return Column(sc._jvm.functions.dateFormat(dateCol, formatCol))
498+
499+
@since(1.5)
500+
def year(col):
501+
"""
502+
Extract the year of a given date as integer.
503+
>>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(year('a').alias('year')).collect()
504+
[Row(year=2015)]
505+
"""
506+
sc = SparkContext._active_spark_context
507+
return Column(sc._jvm.functions.year(col))
508+
509+
@since(1.5)
510+
def quarter(col):
511+
"""
512+
Extract the quarter of a given date as integer.
513+
>>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(quarter('a').alias('quarter')).collect()
514+
[Row(quarter=2)]
515+
"""
516+
sc = SparkContext._active_spark_context
517+
return Column(sc._jvm.functions.quarter(col))
518+
519+
@since(1.5)
520+
def month(col):
521+
"""
522+
Extract the month of a given date as integer.
523+
>>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(month('a').alias('month')).collect()
524+
[Row(month=4)]
525+
"""
526+
sc = SparkContext._active_spark_context
527+
return Column(sc._jvm.functions.month(col))
528+
529+
@since(1.5)
530+
def day(col):
531+
"""
532+
Extract the day of a given date as integer.
533+
>>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(day('a').alias('day')).collect()
534+
[Row(day=8)]
535+
"""
536+
sc = SparkContext._active_spark_context
537+
return Column(sc._jvm.functions.day(col))
538+
539+
@since(1.5)
540+
def hour(col):
541+
"""
542+
Extract the hours of a given date as integer.
543+
>>> sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a']).select(hour('a').alias('hour')).collect()
544+
[Row(hour=13)]
545+
"""
546+
sc = SparkContext._active_spark_context
547+
return Column(sc._jvm.functions.hour(col))
548+
549+
@since(1.5)
550+
def minute(col):
551+
"""
552+
Extract the minutes of a given date as integer.
553+
>>> sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a']).select(minute('a').alias('minute')).collect()
554+
[Row(minute=8)]
555+
"""
556+
sc = SparkContext._active_spark_context
557+
return Column(sc._jvm.functions.minute(col))
558+
559+
@since(1.5)
560+
def second(col):
561+
"""
562+
Extract the seconds of a given date as integer.
563+
>>> sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a']).select(second('a').alias('second')).collect()
564+
[Row(second=15)]
565+
"""
566+
sc = SparkContext._active_spark_context
567+
return Column(sc._jvm.functions.second(col))
568+
569+
@since(1.5)
570+
def weekOfYear(col):
571+
"""
572+
Extract the week number of a given date as integer.
573+
>>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(weekOfYear('a').alias('week')).collect()
574+
[Row(week=15)]
575+
"""
576+
sc = SparkContext._active_spark_context
577+
return Column(sc._jvm.functions.weekOfYear(col))
578+
489579

490580
class UserDefinedFunction(object):
491581
"""

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateTimeFunctionsSuite.scala

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,61 +24,61 @@ import org.apache.spark.SparkFunSuite
2424

2525
class DateTimeFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
2626

27-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
28-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
29-
val ts = new Timestamp(sdf.parse("2013/11/08 13:10:15").getTime)
27+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
28+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
29+
val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime)
3030

3131
test("DateFormat") {
3232
checkEvaluation(DateFormatClass(Literal(d), Literal("y")), "2015")
33-
checkEvaluation(DateFormatClass(Literal(d.toString), Literal("y")), "2015")
33+
checkEvaluation(DateFormatClass(Literal(sdf.format(d)), Literal("y")), "2015")
3434
checkEvaluation(DateFormatClass(Literal(ts), Literal("y")), "2013")
3535
}
3636

3737
test("Year") {
3838
checkEvaluation(Year(Literal(d)), 2015)
39-
checkEvaluation(Year(Literal(d.toString)), 2015)
39+
checkEvaluation(Year(Literal(sdf.format(d))), 2015)
4040
checkEvaluation(Year(Literal(ts)), 2013)
4141
}
4242

4343
test("Quarter") {
4444
checkEvaluation(Quarter(Literal(d)), 2)
45-
checkEvaluation(Quarter(Literal(d.toString)), 2)
45+
checkEvaluation(Quarter(Literal(sdf.format(d))), 2)
4646
checkEvaluation(Quarter(Literal(ts)), 4)
4747
}
4848

4949
test("Month") {
5050
checkEvaluation(Month(Literal(d)), 4)
51-
checkEvaluation(Month(Literal(d.toString)), 4)
51+
checkEvaluation(Month(Literal(sdf.format(d))), 4)
5252
checkEvaluation(Month(Literal(ts)), 11)
5353
}
5454

5555
test("Day") {
5656
checkEvaluation(Day(Literal(d)), 8)
57-
checkEvaluation(Day(Literal(d.toString)), 8)
57+
checkEvaluation(Day(Literal(sdf.format(d))), 8)
5858
checkEvaluation(Day(Literal(ts)), 8)
5959
}
6060

6161
test("Hour") {
6262
checkEvaluation(Hour(Literal(d)), 0)
63-
checkEvaluation(Hour(Literal(d.toString)), 0)
63+
checkEvaluation(Hour(Literal(sdf.format(d))), 13)
6464
checkEvaluation(Hour(Literal(ts)), 13)
6565
}
6666

6767
test("Minute") {
6868
checkEvaluation(Minute(Literal(d)), 0)
69-
checkEvaluation(Minute(Literal(d.toString)), 0)
69+
checkEvaluation(Minute(Literal(sdf.format(d))), 10)
7070
checkEvaluation(Minute(Literal(ts)), 10)
7171
}
7272

7373
test("Seconds") {
7474
checkEvaluation(Second(Literal(d)), 0)
75-
checkEvaluation(Second(Literal(d.toString)), 0)
75+
checkEvaluation(Second(Literal(sdf.format(d))), 15)
7676
checkEvaluation(Second(Literal(ts)), 15)
7777
}
7878

7979
test("WeekOfYear") {
8080
checkEvaluation(WeekOfYear(Literal(d)), 15)
81-
checkEvaluation(WeekOfYear(Literal(d.toString)), 15)
81+
checkEvaluation(WeekOfYear(Literal(sdf.format(d))), 15)
8282
checkEvaluation(WeekOfYear(Literal(ts)), 45)
8383
}
8484

sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,9 @@ class DataFrameFunctionsSuite extends QueryTest {
168168

169169

170170
test("date format") {
171-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
172-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
173-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
171+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
172+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
173+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
174174

175175
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
176176

@@ -184,11 +184,11 @@ class DataFrameFunctionsSuite extends QueryTest {
184184
}
185185

186186
test("year") {
187-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
188-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
189-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
187+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
188+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
189+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
190190

191-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
191+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
192192

193193
checkAnswer(
194194
df.select(year("a"), year("b"), year("c")),
@@ -200,11 +200,11 @@ class DataFrameFunctionsSuite extends QueryTest {
200200
}
201201

202202
test("quarter") {
203-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
204-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
205-
val ts = new Timestamp(sdf.parse("2013/11/08 13:10:15").getTime)
203+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
204+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
205+
val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime)
206206

207-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
207+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
208208

209209
checkAnswer(
210210
df.select(quarter("a"), quarter("b"), quarter("c")),
@@ -216,11 +216,11 @@ class DataFrameFunctionsSuite extends QueryTest {
216216
}
217217

218218
test("month") {
219-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
220-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
221-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
219+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
220+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
221+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
222222

223-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
223+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
224224

225225
checkAnswer(
226226
df.select(month("a"), month("b"), month("c")),
@@ -232,11 +232,11 @@ class DataFrameFunctionsSuite extends QueryTest {
232232
}
233233

234234
test("day") {
235-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
236-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
237-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
235+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
236+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
237+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
238238

239-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
239+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
240240

241241
checkAnswer(
242242
df.select(day("a"), day("b"), day("c")),
@@ -248,59 +248,59 @@ class DataFrameFunctionsSuite extends QueryTest {
248248
}
249249

250250
test("hour") {
251-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
252-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
253-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
251+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
252+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
253+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
254254

255-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
255+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
256256

257257
checkAnswer(
258258
df.select(hour("a"), hour("b"), hour("c")),
259-
Row(0, 0, 13))
259+
Row(0, 13, 13))
260260

261261
checkAnswer(
262262
df.selectExpr("hour(a)", "hour(b)", "hour(c)"),
263-
Row(0, 0, 13))
263+
Row(0, 13, 13))
264264
}
265265

266266
test("minute") {
267-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
268-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
269-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
267+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
268+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
269+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
270270

271-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
271+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
272272

273273
checkAnswer(
274274
df.select(minute("a"), minute("b"), minute("c")),
275-
Row(0, 0, 10))
275+
Row(0, 10, 10))
276276

277277
checkAnswer(
278278
df.selectExpr("minute(a)", "minute(b)", "minute(c)"),
279-
Row(0, 0, 10))
279+
Row(0, 10, 10))
280280
}
281281

282282
test("second") {
283-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
284-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
285-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
283+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
284+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
285+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
286286

287-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
287+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
288288

289289
checkAnswer(
290290
df.select(second("a"), second("b"), second("c")),
291-
Row(0, 0, 15))
291+
Row(0, 15, 15))
292292

293293
checkAnswer(
294294
df.selectExpr("second(a)", "second(b)", "second(c)"),
295-
Row(0, 0, 15))
295+
Row(0, 15, 15))
296296
}
297297

298298
test("weekOfYear") {
299-
val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
300-
val d = new Date(sdf.parse("2015/04/08 13:10:15").getTime)
301-
val ts = new Timestamp(sdf.parse("2013/04/08 13:10:15").getTime)
299+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
300+
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
301+
val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
302302

303-
val df = Seq((d, d.toString, ts)).toDF("a", "b", "c")
303+
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
304304

305305
checkAnswer(
306306
df.select(weekOfYear("a"), weekOfYear("b"), weekOfYear("c")),

0 commit comments

Comments
 (0)