Skip to content

Commit 57026a1

Browse files
viiryahvanhovell
authored andcommitted
[SPARK-23599][SQL] Add a UUID generator from Pseudo-Random Numbers
## What changes were proposed in this pull request? This patch adds a UUID generator from Pseudo-Random Numbers. We can use it later to have deterministic `UUID()` expression. ## How was this patch tested? Added unit tests. Author: Liang-Chi Hsieh <[email protected]> Closes #20817 from viirya/SPARK-23599. (cherry picked from commit 4de638c) Signed-off-by: Herman van Hovell <[email protected]>
1 parent 523fcaf commit 57026a1

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.util
19+
20+
import java.util.UUID
21+
22+
import org.apache.commons.math3.random.MersenneTwister
23+
24+
import org.apache.spark.unsafe.types.UTF8String
25+
26+
/**
27+
* This class is used to generate a UUID from Pseudo-Random Numbers.
28+
*
29+
* For the algorithm, see RFC 4122: A Universally Unique IDentifier (UUID) URN Namespace,
30+
* section 4.4 "Algorithms for Creating a UUID from Truly Random or Pseudo-Random Numbers".
31+
*/
32+
case class RandomUUIDGenerator(randomSeed: Long) {
33+
private val random = new MersenneTwister(randomSeed)
34+
35+
def getNextUUID(): UUID = {
36+
val mostSigBits = (random.nextLong() & 0xFFFFFFFFFFFF0FFFL) | 0x0000000000004000L
37+
val leastSigBits = (random.nextLong() | 0x8000000000000000L) & 0xBFFFFFFFFFFFFFFFL
38+
39+
new UUID(mostSigBits, leastSigBits)
40+
}
41+
42+
def getNextUUIDUTF8String(): UTF8String = UTF8String.fromString(getNextUUID().toString())
43+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.util
19+
20+
import scala.util.Random
21+
22+
import org.apache.spark.SparkFunSuite
23+
24+
class RandomUUIDGeneratorSuite extends SparkFunSuite {
25+
test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") {
26+
val generator = RandomUUIDGenerator(new Random().nextLong())
27+
for (_ <- 0 to 100) {
28+
val uuid = generator.getNextUUID()
29+
assert(uuid.version() == 4)
30+
assert(uuid.variant() == 2)
31+
}
32+
}
33+
34+
test("UUID from RandomUUIDGenerator should be deterministic") {
35+
val r1 = new Random(100)
36+
val generator1 = RandomUUIDGenerator(r1.nextLong())
37+
val r2 = new Random(100)
38+
val generator2 = RandomUUIDGenerator(r2.nextLong())
39+
val r3 = new Random(101)
40+
val generator3 = RandomUUIDGenerator(r3.nextLong())
41+
42+
for (_ <- 0 to 100) {
43+
val uuid1 = generator1.getNextUUID()
44+
val uuid2 = generator2.getNextUUID()
45+
val uuid3 = generator3.getNextUUID()
46+
assert(uuid1 == uuid2)
47+
assert(uuid1 != uuid3)
48+
}
49+
}
50+
51+
test("Get UTF8String UUID") {
52+
val generator = RandomUUIDGenerator(new Random().nextLong())
53+
val utf8StringUUID = generator.getNextUUIDUTF8String()
54+
val uuid = java.util.UUID.fromString(utf8StringUUID.toString)
55+
assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString)
56+
}
57+
}

0 commit comments

Comments
 (0)