Skip to content

Commit eb43699

Browse files
authored
feat: support map_contains_key expression (#3369)
1 parent 025e2a6 commit eb43699

File tree

4 files changed

+95
-2
lines changed

4 files changed

+95
-2
lines changed

docs/spark_expressions_support.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,11 @@
272272
- [ ] element_at
273273
- [ ] map
274274
- [ ] map_concat
275-
- [ ] map_contains_key
275+
- [x] map_contains_key
276276
- [ ] map_entries
277277
- [ ] map_from_arrays
278278
- [ ] map_from_entries
279-
- [ ] map_keys
279+
- [x] map_keys
280280
- [ ] map_values
281281
- [ ] str_to_map
282282
- [ ] try_element_at

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
127127
classOf[MapEntries] -> CometMapEntries,
128128
classOf[MapValues] -> CometMapValues,
129129
classOf[MapFromArrays] -> CometMapFromArrays,
130+
classOf[MapContainsKey] -> CometMapContainsKey,
130131
classOf[MapFromEntries] -> CometMapFromEntries)
131132

132133
private val structExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] = Map(

spark/src/main/scala/org/apache/comet/serde/maps.scala

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,23 @@ object CometMapFromArrays extends CometExpressionSerde[MapFromArrays] {
9090
}
9191
}
9292

93+
object CometMapContainsKey extends CometExpressionSerde[MapContainsKey] {
94+
95+
override def convert(
96+
expr: MapContainsKey,
97+
inputs: Seq[Attribute],
98+
binding: Boolean): Option[ExprOuterClass.Expr] = {
99+
// Replace with array_has(map_keys(map), key)
100+
val mapExpr = exprToProtoInternal(expr.left, inputs, binding)
101+
val keyExpr = exprToProtoInternal(expr.right, inputs, binding)
102+
103+
val mapKeysExpr = scalarFunctionExprToProto("map_keys", mapExpr)
104+
105+
val mapContainsKeyExpr = scalarFunctionExprToProto("array_has", mapKeysExpr, keyExpr)
106+
optExprWithInfo(mapContainsKeyExpr, expr, expr.children: _*)
107+
}
108+
}
109+
93110
object CometMapFromEntries extends CometScalarFunction[MapFromEntries]("map_from_entries") {
94111
val keyUnsupportedReason = "Using BinaryType as Map keys is not allowed in map_from_entries"
95112
val valueUnsupportedReason = "Using BinaryType as Map values is not allowed in map_from_entries"
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ConfigMatrix: parquet.enable.dictionary=false,true
19+
20+
-- TODO: replace map_from_arrays with map whenever map is supported in Comet
21+
22+
-- Basic integer key tests with map literals
23+
query
24+
select map_contains_key(map_from_arrays(array(1, 2), array('a', 'b')), 5)
25+
26+
query
27+
select map_contains_key(map_from_arrays(array(1, 2), array('a', 'b')), 1)
28+
29+
-- Decimal type coercion tests
30+
-- TODO: requires map cast to be supported in Comet
31+
query spark_answer_only
32+
select map_contains_key(map_from_arrays(array(1, 2), array('a', 'b')), 5.0)
33+
34+
query spark_answer_only
35+
select map_contains_key(map_from_arrays(array(1, 2), array('a', 'b')), 1.0)
36+
37+
query spark_answer_only
38+
select map_contains_key(map_from_arrays(array(1.0, 2), array('a', 'b')), 5)
39+
40+
query spark_answer_only
41+
select map_contains_key(map_from_arrays(array(1.0, 2), array('a', 'b')), 1)
42+
43+
-- Empty map tests
44+
-- TODO: requires casting from NullType to be supported in Comet
45+
query spark_answer_only
46+
select map_contains_key(map_from_arrays(array(), array()), 0)
47+
48+
-- Test with table data
49+
statement
50+
CREATE TABLE test_map_contains_key(m map<string, int>) USING parquet
51+
52+
statement
53+
INSERT INTO test_map_contains_key VALUES (map_from_arrays(array('a', 'b', 'c'), array(1, 2, 3))), (map_from_arrays(array('x'), array(10))), (map_from_arrays(array(), array())), (NULL)
54+
55+
query
56+
SELECT map_contains_key(m, 'a') FROM test_map_contains_key
57+
58+
query
59+
SELECT map_contains_key(m, 'x') FROM test_map_contains_key
60+
61+
query
62+
SELECT map_contains_key(m, 'missing') FROM test_map_contains_key
63+
64+
-- Test with integer key map
65+
statement
66+
CREATE TABLE test_map_int_key(m map<int, string>) USING parquet
67+
68+
statement
69+
INSERT INTO test_map_int_key VALUES (map_from_arrays(array(1, 2), array('a', 'b'))), (map_from_arrays(array(), array())), (NULL)
70+
71+
query
72+
SELECT map_contains_key(m, 1) FROM test_map_int_key
73+
74+
query
75+
SELECT map_contains_key(m, 5) FROM test_map_int_key

0 commit comments

Comments
 (0)