Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,7 @@ kafka-handler/src/test/gen
dependency-reduced-pom.xml
**/.mvn/.develocity*
packaging/cache
standalone-metastore/packaging/cache
standalone-metastore/packaging/cache

**/fb303.thrift
apache*.tar.gz
332 changes: 332 additions & 0 deletions RELEASE_NOTES.txt

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -5528,6 +5528,8 @@ public static enum ConfVars {
"hive.security.authenticator.manager,hive.security.authorization.manager," +
"hive.security.metastore.authorization.manager,hive.security.metastore.authenticator.manager," +
"hive.users.in.admin.role,hive.server2.xsrf.filter.enabled,hive.server2.csrf.filter.enabled,hive.security.authorization.enabled," +
"anon.policy.enforce.privileges," +
"anon.policy.grant.admin.users," +
"hive.distcp.privileged.doAs," +
"hive.server2.authentication.ldap.baseDN," +
"hive.server2.authentication.ldap.url," +
Expand Down
71 changes: 71 additions & 0 deletions dae/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hive</groupId>
<artifactId>hive</artifactId>
<version>4.3.0-SNAPSHOT</version>
</parent>

<artifactId>hive-dae</artifactId>

<properties>
<!-- <maven.compiler.source>21</maven.compiler.source>-->
<!-- <maven.compiler.target>21</maven.compiler.target>-->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hive.path.to.root>..</hive.path.to.root>
</properties>
<name>Hive Data Anonymization Extensions</name>
<dependencies>
<!-- The validator depends only on the ANTLR4 runtime. Avoiding a
dependency on hive-exec keeps this module above ql in the
module graph so the ql module's AnonStatementAnalyzer can use
org.apache.hive.hep.ErasurePolicyValidator without creating
a circular dependency. -->
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>${antlr4.version}</version>
<configuration>
<visitor>true</visitor>
</configuration>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
178 changes: 178 additions & 0 deletions dae/src/main/antlr4/org/apache/hive/hep/ErasurePolicy.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/**
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Erasure Policy Domain-Specific Language Grammar
// Used by VALIDATE / ACTIVATE / DEACTIVATE commands to parse .erp files.
grammar ErasurePolicy;

// ---------------------------------------------------------------------------
// Parser Rules
// ---------------------------------------------------------------------------

policy
: headerDecl* identityDecl schemaTypeDecl statement+ EOF
;

// Optional LPL-style metadata headers carried at the top of the file. They
// record governance context (version, purpose, legal basis) that the narrow
// erasure DSL otherwise delegates to the policy tier above. The validator
// records them on the ParsedPolicy but does not interpret them as executable
// logic, so the linear-time validation property is preserved. VERSION, when
// present, becomes the policy version label at LOAD time.
headerDecl
: versionDecl
| purposeDecl
| legalBasisDecl
;

versionDecl : VERSION headerValue ;
purposeDecl : PURPOSE STRING_LITERAL ;
legalBasisDecl : LEGAL BASIS headerValue ;

// A header value is an unquoted identifier (e.g. v3, consent), a quoted string
// (e.g. '1.2.0', 'legal-obligation'), or a bare number.
headerValue
: ID | STRING_LITERAL | INT_LITERAL
;

identityDecl
: IDENTITY fieldName TYPE fieldType
;

schemaTypeDecl
: SCHEMA TYPE fieldType
;

statement
: FOR SCHEMA schemaId eraseClause? replaceClause? inspectClause? flagClause?
;

eraseClause
: ERASE fieldPath ( COMMA fieldPath )*
;

replaceClause
: REPLACE replaceRule ( COMMA replaceRule )*
;

replaceRule
: fieldPath ASSIGN literal
;

// INSPECT records a regex/literal match for review without modifying the value.
// FLAG marks the row for deferred review and emits a match row only; neither
// alters the underlying field. Both participate in the C1/C2/C3 conflict
// detection machinery introduced in §5.
inspectClause
: INSPECT fieldPath ( COMMA fieldPath )*
;

flagClause
: FLAG fieldPath ( COMMA fieldPath )*
;

// A colon-delimited sequence of path steps. Each step is an identifier
// optionally followed by one or more bracket-delimited predicates.
fieldPath
: pathStep ( COLON pathStep )*
;

// A path step is either an identifier (optionally with bracket predicates)
// or a standalone STAR terminal. The STAR form is permitted by the grammar
// at any position, but semantic validation in ErasurePolicyValidator rejects
// it everywhere except as the final step of a fieldPath; this keeps the
// grammar simple while preserving the "wildcard only as terminal" property
// that the §5 prose relies on.
pathStep
: ID predicate*
| STAR
;

// Predicates select into a repeated (list-typed) field:
// - INT_LITERAL : indexed access, e.g. ctxs[3]
// - STAR : wildcard over every element, e.g. ctxs[*]
// - ID op literal: filtered selection, e.g. addresses[country='DE']
predicate
: LBRACK INT_LITERAL RBRACK
| LBRACK STAR RBRACK
| LBRACK ID compareOp literal RBRACK
;

compareOp
: ASSIGN | NEQ | GT | LT | GTE | LTE
;

literal
: INT_LITERAL | LONG_LITERAL | STRING_LITERAL
;

schemaId : literal ;
fieldName : ID ;
fieldType : INT | LONG | STRING ;

// ---------------------------------------------------------------------------
// Lexer Rules
// ---------------------------------------------------------------------------

// Keyword tokens must precede ID so the lexer prefers them on equal-length matches.
IDENTITY : 'IDENTITY' ;
TYPE : 'TYPE' ;
SCHEMA : 'SCHEMA' ;
FOR : 'FOR' ;
ERASE : 'ERASE' ;
REPLACE : 'REPLACE' ;
INSPECT : 'INSPECT' ;
FLAG : 'FLAG' ;
INT : 'INT' ;
LONG : 'LONG' ;
STRING : 'STRING' ;

// Optional-header keywords.
VERSION : 'VERSION' ;
PURPOSE : 'PURPOSE' ;
LEGAL : 'LEGAL' ;
BASIS : 'BASIS' ;

// Structural punctuation
COLON : ':' ;
COMMA : ',' ;
ASSIGN : '=' ;
LBRACK : '[' ;
RBRACK : ']' ;
STAR : '*' ;

// Comparison operators for filter predicates
NEQ : '!=' ;
GTE : '>=' ;
LTE : '<=' ;
GT : '>' ;
LT : '<' ;

// Literals
INT_LITERAL : [0-9]+ ;
LONG_LITERAL : [0-9]+ [lL] ;
STRING_LITERAL : '\'' ( ~['\\\r\n] | '\\' . )* '\'' ;

// Identifiers (must follow keyword tokens above)
ID : [a-zA-Z_] [a-zA-Z_0-9]* ;

// Whitespace and comments (skipped by the parser).
// Only '#' line comments are supported; the SQL-style '--' marker is reserved
// for the surrounding HiveQL command grammar to avoid confusion when policy
// bodies appear inline inside SQL statements.
WS : [ \t\r\n]+ -> skip ;
LINE_COMMENT : '#' ~[\r\n]* -> skip ;
Loading
Loading