sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.MONEY, 181 TokenType.SMALLMONEY, 182 TokenType.ROWVERSION, 183 TokenType.IMAGE, 184 TokenType.VARIANT, 185 TokenType.OBJECT, 186 TokenType.INET, 187 TokenType.ENUM, 188 *NESTED_TYPE_TOKENS, 189 } 190 191 SUBQUERY_PREDICATES = { 192 TokenType.ANY: exp.Any, 193 TokenType.ALL: exp.All, 194 TokenType.EXISTS: exp.Exists, 195 TokenType.SOME: exp.Any, 196 } 197 198 RESERVED_KEYWORDS = { 199 *Tokenizer.SINGLE_TOKENS.values(), 200 TokenType.SELECT, 201 } 202 203 DB_CREATABLES = { 204 TokenType.DATABASE, 205 TokenType.SCHEMA, 206 TokenType.TABLE, 207 TokenType.VIEW, 208 TokenType.DICTIONARY, 209 } 210 211 CREATABLES = { 212 TokenType.COLUMN, 213 TokenType.FUNCTION, 214 TokenType.INDEX, 215 TokenType.PROCEDURE, 216 *DB_CREATABLES, 217 } 218 219 # Tokens that can represent identifiers 220 ID_VAR_TOKENS = { 221 TokenType.VAR, 222 TokenType.ANTI, 223 TokenType.APPLY, 224 TokenType.ASC, 225 TokenType.AUTO_INCREMENT, 226 TokenType.BEGIN, 227 TokenType.CACHE, 228 TokenType.CASE, 229 TokenType.COLLATE, 230 TokenType.COMMAND, 231 TokenType.COMMENT, 232 TokenType.COMMIT, 233 TokenType.CONSTRAINT, 234 TokenType.DEFAULT, 235 TokenType.DELETE, 236 TokenType.DESC, 237 TokenType.DESCRIBE, 238 TokenType.DICTIONARY, 239 TokenType.DIV, 240 TokenType.END, 241 TokenType.EXECUTE, 242 TokenType.ESCAPE, 243 TokenType.FALSE, 244 TokenType.FIRST, 245 TokenType.FILTER, 246 TokenType.FORMAT, 247 TokenType.FULL, 248 TokenType.IF, 249 TokenType.IS, 250 TokenType.ISNULL, 251 TokenType.INTERVAL, 252 TokenType.KEEP, 253 TokenType.LEFT, 254 TokenType.LOAD, 255 TokenType.MERGE, 256 TokenType.NATURAL, 257 TokenType.NEXT, 258 TokenType.OFFSET, 259 TokenType.ORDINALITY, 260 TokenType.OVERWRITE, 261 TokenType.PARTITION, 262 TokenType.PERCENT, 263 TokenType.PIVOT, 264 TokenType.PRAGMA, 265 TokenType.RANGE, 266 TokenType.REFERENCES, 267 TokenType.RIGHT, 268 TokenType.ROW, 269 TokenType.ROWS, 270 TokenType.SEMI, 271 TokenType.SET, 272 TokenType.SETTINGS, 273 TokenType.SHOW, 274 TokenType.TEMPORARY, 275 TokenType.TOP, 276 TokenType.TRUE, 277 TokenType.UNIQUE, 278 TokenType.UNPIVOT, 279 TokenType.UPDATE, 280 TokenType.VOLATILE, 281 TokenType.WINDOW, 282 *CREATABLES, 283 *SUBQUERY_PREDICATES, 284 *TYPE_TOKENS, 285 *NO_PAREN_FUNCTIONS, 286 } 287 288 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 289 290 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 291 TokenType.APPLY, 292 TokenType.ASOF, 293 TokenType.FULL, 294 TokenType.LEFT, 295 TokenType.LOCK, 296 TokenType.NATURAL, 297 TokenType.OFFSET, 298 TokenType.RIGHT, 299 TokenType.WINDOW, 300 } 301 302 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 303 304 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 305 306 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 307 308 FUNC_TOKENS = { 309 TokenType.COMMAND, 310 TokenType.CURRENT_DATE, 311 TokenType.CURRENT_DATETIME, 312 TokenType.CURRENT_TIMESTAMP, 313 TokenType.CURRENT_TIME, 314 TokenType.CURRENT_USER, 315 TokenType.FILTER, 316 TokenType.FIRST, 317 TokenType.FORMAT, 318 TokenType.GLOB, 319 TokenType.IDENTIFIER, 320 TokenType.INDEX, 321 TokenType.ISNULL, 322 TokenType.ILIKE, 323 TokenType.LIKE, 324 TokenType.MERGE, 325 TokenType.OFFSET, 326 TokenType.PRIMARY_KEY, 327 TokenType.RANGE, 328 TokenType.REPLACE, 329 TokenType.ROW, 330 TokenType.UNNEST, 331 TokenType.VAR, 332 TokenType.LEFT, 333 TokenType.RIGHT, 334 TokenType.DATE, 335 TokenType.DATETIME, 336 TokenType.TABLE, 337 TokenType.TIMESTAMP, 338 TokenType.TIMESTAMPTZ, 339 TokenType.WINDOW, 340 *TYPE_TOKENS, 341 *SUBQUERY_PREDICATES, 342 } 343 344 CONJUNCTION = { 345 TokenType.AND: exp.And, 346 TokenType.OR: exp.Or, 347 } 348 349 EQUALITY = { 350 TokenType.EQ: exp.EQ, 351 TokenType.NEQ: exp.NEQ, 352 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 353 } 354 355 COMPARISON = { 356 TokenType.GT: exp.GT, 357 TokenType.GTE: exp.GTE, 358 TokenType.LT: exp.LT, 359 TokenType.LTE: exp.LTE, 360 } 361 362 BITWISE = { 363 TokenType.AMP: exp.BitwiseAnd, 364 TokenType.CARET: exp.BitwiseXor, 365 TokenType.PIPE: exp.BitwiseOr, 366 TokenType.DPIPE: exp.DPipe, 367 } 368 369 TERM = { 370 TokenType.DASH: exp.Sub, 371 TokenType.PLUS: exp.Add, 372 TokenType.MOD: exp.Mod, 373 TokenType.COLLATE: exp.Collate, 374 } 375 376 FACTOR = { 377 TokenType.DIV: exp.IntDiv, 378 TokenType.LR_ARROW: exp.Distance, 379 TokenType.SLASH: exp.Div, 380 TokenType.STAR: exp.Mul, 381 } 382 383 TIMESTAMPS = { 384 TokenType.TIME, 385 TokenType.TIMESTAMP, 386 TokenType.TIMESTAMPTZ, 387 TokenType.TIMESTAMPLTZ, 388 } 389 390 SET_OPERATIONS = { 391 TokenType.UNION, 392 TokenType.INTERSECT, 393 TokenType.EXCEPT, 394 } 395 396 JOIN_METHODS = { 397 TokenType.NATURAL, 398 TokenType.ASOF, 399 } 400 401 JOIN_SIDES = { 402 TokenType.LEFT, 403 TokenType.RIGHT, 404 TokenType.FULL, 405 } 406 407 JOIN_KINDS = { 408 TokenType.INNER, 409 TokenType.OUTER, 410 TokenType.CROSS, 411 TokenType.SEMI, 412 TokenType.ANTI, 413 } 414 415 JOIN_HINTS: t.Set[str] = set() 416 417 LAMBDAS = { 418 TokenType.ARROW: lambda self, expressions: self.expression( 419 exp.Lambda, 420 this=self._replace_lambda( 421 self._parse_conjunction(), 422 {node.name for node in expressions}, 423 ), 424 expressions=expressions, 425 ), 426 TokenType.FARROW: lambda self, expressions: self.expression( 427 exp.Kwarg, 428 this=exp.var(expressions[0].name), 429 expression=self._parse_conjunction(), 430 ), 431 } 432 433 COLUMN_OPERATORS = { 434 TokenType.DOT: None, 435 TokenType.DCOLON: lambda self, this, to: self.expression( 436 exp.Cast if self.STRICT_CAST else exp.TryCast, 437 this=this, 438 to=to, 439 ), 440 TokenType.ARROW: lambda self, this, path: self.expression( 441 exp.JSONExtract, 442 this=this, 443 expression=path, 444 ), 445 TokenType.DARROW: lambda self, this, path: self.expression( 446 exp.JSONExtractScalar, 447 this=this, 448 expression=path, 449 ), 450 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtract, 452 this=this, 453 expression=path, 454 ), 455 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtractScalar, 457 this=this, 458 expression=path, 459 ), 460 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 461 exp.JSONBContains, 462 this=this, 463 expression=key, 464 ), 465 } 466 467 EXPRESSION_PARSERS = { 468 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 469 exp.Column: lambda self: self._parse_column(), 470 exp.Condition: lambda self: self._parse_conjunction(), 471 exp.DataType: lambda self: self._parse_types(), 472 exp.Expression: lambda self: self._parse_statement(), 473 exp.From: lambda self: self._parse_from(), 474 exp.Group: lambda self: self._parse_group(), 475 exp.Having: lambda self: self._parse_having(), 476 exp.Identifier: lambda self: self._parse_id_var(), 477 exp.Join: lambda self: self._parse_join(), 478 exp.Lambda: lambda self: self._parse_lambda(), 479 exp.Lateral: lambda self: self._parse_lateral(), 480 exp.Limit: lambda self: self._parse_limit(), 481 exp.Offset: lambda self: self._parse_offset(), 482 exp.Order: lambda self: self._parse_order(), 483 exp.Ordered: lambda self: self._parse_ordered(), 484 exp.Properties: lambda self: self._parse_properties(), 485 exp.Qualify: lambda self: self._parse_qualify(), 486 exp.Returning: lambda self: self._parse_returning(), 487 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 488 exp.Table: lambda self: self._parse_table_parts(), 489 exp.TableAlias: lambda self: self._parse_table_alias(), 490 exp.Where: lambda self: self._parse_where(), 491 exp.Window: lambda self: self._parse_named_window(), 492 exp.With: lambda self: self._parse_with(), 493 "JOIN_TYPE": lambda self: self._parse_join_parts(), 494 } 495 496 STATEMENT_PARSERS = { 497 TokenType.ALTER: lambda self: self._parse_alter(), 498 TokenType.BEGIN: lambda self: self._parse_transaction(), 499 TokenType.CACHE: lambda self: self._parse_cache(), 500 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 501 TokenType.COMMENT: lambda self: self._parse_comment(), 502 TokenType.CREATE: lambda self: self._parse_create(), 503 TokenType.DELETE: lambda self: self._parse_delete(), 504 TokenType.DESC: lambda self: self._parse_describe(), 505 TokenType.DESCRIBE: lambda self: self._parse_describe(), 506 TokenType.DROP: lambda self: self._parse_drop(), 507 TokenType.END: lambda self: self._parse_commit_or_rollback(), 508 TokenType.FROM: lambda self: exp.select("*").from_( 509 t.cast(exp.From, self._parse_from(skip_from_token=True)) 510 ), 511 TokenType.INSERT: lambda self: self._parse_insert(), 512 TokenType.LOAD: lambda self: self._parse_load(), 513 TokenType.MERGE: lambda self: self._parse_merge(), 514 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 515 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 516 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 517 TokenType.SET: lambda self: self._parse_set(), 518 TokenType.UNCACHE: lambda self: self._parse_uncache(), 519 TokenType.UPDATE: lambda self: self._parse_update(), 520 TokenType.USE: lambda self: self.expression( 521 exp.Use, 522 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 523 and exp.var(self._prev.text), 524 this=self._parse_table(schema=False), 525 ), 526 } 527 528 UNARY_PARSERS = { 529 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 530 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 531 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 532 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 533 } 534 535 PRIMARY_PARSERS = { 536 TokenType.STRING: lambda self, token: self.expression( 537 exp.Literal, this=token.text, is_string=True 538 ), 539 TokenType.NUMBER: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=False 541 ), 542 TokenType.STAR: lambda self, _: self.expression( 543 exp.Star, 544 **{"except": self._parse_except(), "replace": self._parse_replace()}, 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER": lambda self: self._parse_cluster(), 588 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 589 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 590 "COPY": lambda self: self._parse_copy_property(), 591 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 592 "DEFINER": lambda self: self._parse_definer(), 593 "DETERMINISTIC": lambda self: self.expression( 594 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 595 ), 596 "DISTKEY": lambda self: self._parse_distkey(), 597 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 598 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 599 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 600 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 601 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 602 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 603 "FREESPACE": lambda self: self._parse_freespace(), 604 "IMMUTABLE": lambda self: self.expression( 605 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 606 ), 607 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 608 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 609 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 610 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 611 "LIKE": lambda self: self._parse_create_like(), 612 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 613 "LOCK": lambda self: self._parse_locking(), 614 "LOCKING": lambda self: self._parse_locking(), 615 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 616 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 617 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_no_property(), 620 "ON": lambda self: self._parse_on_property(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 626 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 642 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 643 "TO": lambda self: self._parse_to_table(), 644 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 645 "TTL": lambda self: self._parse_ttl(), 646 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "VOLATILE": lambda self: self._parse_volatile_property(), 648 "WITH": lambda self: self._parse_with_property(), 649 } 650 651 CONSTRAINT_PARSERS = { 652 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 653 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 654 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 655 "CHARACTER SET": lambda self: self.expression( 656 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 657 ), 658 "CHECK": lambda self: self.expression( 659 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 660 ), 661 "COLLATE": lambda self: self.expression( 662 exp.CollateColumnConstraint, this=self._parse_var() 663 ), 664 "COMMENT": lambda self: self.expression( 665 exp.CommentColumnConstraint, this=self._parse_string() 666 ), 667 "COMPRESS": lambda self: self._parse_compress(), 668 "DEFAULT": lambda self: self.expression( 669 exp.DefaultColumnConstraint, this=self._parse_bitwise() 670 ), 671 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 672 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 673 "FORMAT": lambda self: self.expression( 674 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 675 ), 676 "GENERATED": lambda self: self._parse_generated_as_identity(), 677 "IDENTITY": lambda self: self._parse_auto_increment(), 678 "INLINE": lambda self: self._parse_inline(), 679 "LIKE": lambda self: self._parse_create_like(), 680 "NOT": lambda self: self._parse_not_constraint(), 681 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 682 "ON": lambda self: self._match(TokenType.UPDATE) 683 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 684 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 685 "PRIMARY KEY": lambda self: self._parse_primary_key(), 686 "REFERENCES": lambda self: self._parse_references(match=False), 687 "TITLE": lambda self: self.expression( 688 exp.TitleColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 691 "UNIQUE": lambda self: self._parse_unique(), 692 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 693 } 694 695 ALTER_PARSERS = { 696 "ADD": lambda self: self._parse_alter_table_add(), 697 "ALTER": lambda self: self._parse_alter_table_alter(), 698 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 699 "DROP": lambda self: self._parse_alter_table_drop(), 700 "RENAME": lambda self: self._parse_alter_table_rename(), 701 } 702 703 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 704 705 NO_PAREN_FUNCTION_PARSERS = { 706 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 707 TokenType.CASE: lambda self: self._parse_case(), 708 TokenType.IF: lambda self: self._parse_if(), 709 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 710 exp.NextValueFor, 711 this=self._parse_column(), 712 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 713 ), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 719 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 720 "CONCAT": lambda self: self._parse_concat(), 721 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 722 "DECODE": lambda self: self._parse_decode(), 723 "EXTRACT": lambda self: self._parse_extract(), 724 "JSON_OBJECT": lambda self: self._parse_json_object(), 725 "LOG": lambda self: self._parse_logarithm(), 726 "MATCH": lambda self: self._parse_match_against(), 727 "OPENJSON": lambda self: self._parse_open_json(), 728 "POSITION": lambda self: self._parse_position(), 729 "SAFE_CAST": lambda self: self._parse_cast(False), 730 "STRING_AGG": lambda self: self._parse_string_agg(), 731 "SUBSTRING": lambda self: self._parse_substring(), 732 "TRIM": lambda self: self._parse_trim(), 733 "TRY_CAST": lambda self: self._parse_cast(False), 734 "TRY_CONVERT": lambda self: self._parse_convert(False), 735 } 736 737 QUERY_MODIFIER_PARSERS = { 738 "joins": lambda self: list(iter(self._parse_join, None)), 739 "laterals": lambda self: list(iter(self._parse_lateral, None)), 740 "match": lambda self: self._parse_match_recognize(), 741 "where": lambda self: self._parse_where(), 742 "group": lambda self: self._parse_group(), 743 "having": lambda self: self._parse_having(), 744 "qualify": lambda self: self._parse_qualify(), 745 "windows": lambda self: self._parse_window_clause(), 746 "order": lambda self: self._parse_order(), 747 "limit": lambda self: self._parse_limit(), 748 "offset": lambda self: self._parse_offset(), 749 "locks": lambda self: self._parse_locks(), 750 "sample": lambda self: self._parse_table_sample(as_modifier=True), 751 } 752 753 SET_PARSERS = { 754 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 755 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 756 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 757 "TRANSACTION": lambda self: self._parse_set_transaction(), 758 } 759 760 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 761 762 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 763 764 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 765 766 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 767 768 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 769 770 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 771 TRANSACTION_CHARACTERISTICS = { 772 "ISOLATION LEVEL REPEATABLE READ", 773 "ISOLATION LEVEL READ COMMITTED", 774 "ISOLATION LEVEL READ UNCOMMITTED", 775 "ISOLATION LEVEL SERIALIZABLE", 776 "READ WRITE", 777 "READ ONLY", 778 } 779 780 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 781 782 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 783 784 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 785 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 786 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 787 788 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 789 790 STRICT_CAST = True 791 792 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 793 794 CONVERT_TYPE_FIRST = False 795 796 PREFIXED_PIVOT_COLUMNS = False 797 IDENTIFY_PIVOT_STRINGS = False 798 799 LOG_BASE_FIRST = True 800 LOG_DEFAULTS_TO_LN = False 801 802 __slots__ = ( 803 "error_level", 804 "error_message_context", 805 "max_errors", 806 "sql", 807 "errors", 808 "_tokens", 809 "_index", 810 "_curr", 811 "_next", 812 "_prev", 813 "_prev_comments", 814 ) 815 816 # Autofilled 817 INDEX_OFFSET: int = 0 818 UNNEST_COLUMN_ONLY: bool = False 819 ALIAS_POST_TABLESAMPLE: bool = False 820 STRICT_STRING_CONCAT = False 821 NULL_ORDERING: str = "nulls_are_small" 822 SHOW_TRIE: t.Dict = {} 823 SET_TRIE: t.Dict = {} 824 FORMAT_MAPPING: t.Dict[str, str] = {} 825 FORMAT_TRIE: t.Dict = {} 826 TIME_MAPPING: t.Dict[str, str] = {} 827 TIME_TRIE: t.Dict = {} 828 829 def __init__( 830 self, 831 error_level: t.Optional[ErrorLevel] = None, 832 error_message_context: int = 100, 833 max_errors: int = 3, 834 ): 835 self.error_level = error_level or ErrorLevel.IMMEDIATE 836 self.error_message_context = error_message_context 837 self.max_errors = max_errors 838 self.reset() 839 840 def reset(self): 841 self.sql = "" 842 self.errors = [] 843 self._tokens = [] 844 self._index = 0 845 self._curr = None 846 self._next = None 847 self._prev = None 848 self._prev_comments = None 849 850 def parse( 851 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 852 ) -> t.List[t.Optional[exp.Expression]]: 853 """ 854 Parses a list of tokens and returns a list of syntax trees, one tree 855 per parsed SQL statement. 856 857 Args: 858 raw_tokens: The list of tokens. 859 sql: The original SQL string, used to produce helpful debug messages. 860 861 Returns: 862 The list of the produced syntax trees. 863 """ 864 return self._parse( 865 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 866 ) 867 868 def parse_into( 869 self, 870 expression_types: exp.IntoType, 871 raw_tokens: t.List[Token], 872 sql: t.Optional[str] = None, 873 ) -> t.List[t.Optional[exp.Expression]]: 874 """ 875 Parses a list of tokens into a given Expression type. If a collection of Expression 876 types is given instead, this method will try to parse the token list into each one 877 of them, stopping at the first for which the parsing succeeds. 878 879 Args: 880 expression_types: The expression type(s) to try and parse the token list into. 881 raw_tokens: The list of tokens. 882 sql: The original SQL string, used to produce helpful debug messages. 883 884 Returns: 885 The target Expression. 886 """ 887 errors = [] 888 for expression_type in ensure_list(expression_types): 889 parser = self.EXPRESSION_PARSERS.get(expression_type) 890 if not parser: 891 raise TypeError(f"No parser registered for {expression_type}") 892 893 try: 894 return self._parse(parser, raw_tokens, sql) 895 except ParseError as e: 896 e.errors[0]["into_expression"] = expression_type 897 errors.append(e) 898 899 raise ParseError( 900 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 901 errors=merge_errors(errors), 902 ) from errors[-1] 903 904 def _parse( 905 self, 906 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 907 raw_tokens: t.List[Token], 908 sql: t.Optional[str] = None, 909 ) -> t.List[t.Optional[exp.Expression]]: 910 self.reset() 911 self.sql = sql or "" 912 913 total = len(raw_tokens) 914 chunks: t.List[t.List[Token]] = [[]] 915 916 for i, token in enumerate(raw_tokens): 917 if token.token_type == TokenType.SEMICOLON: 918 if i < total - 1: 919 chunks.append([]) 920 else: 921 chunks[-1].append(token) 922 923 expressions = [] 924 925 for tokens in chunks: 926 self._index = -1 927 self._tokens = tokens 928 self._advance() 929 930 expressions.append(parse_method(self)) 931 932 if self._index < len(self._tokens): 933 self.raise_error("Invalid expression / Unexpected token") 934 935 self.check_errors() 936 937 return expressions 938 939 def check_errors(self) -> None: 940 """Logs or raises any found errors, depending on the chosen error level setting.""" 941 if self.error_level == ErrorLevel.WARN: 942 for error in self.errors: 943 logger.error(str(error)) 944 elif self.error_level == ErrorLevel.RAISE and self.errors: 945 raise ParseError( 946 concat_messages(self.errors, self.max_errors), 947 errors=merge_errors(self.errors), 948 ) 949 950 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 951 """ 952 Appends an error in the list of recorded errors or raises it, depending on the chosen 953 error level setting. 954 """ 955 token = token or self._curr or self._prev or Token.string("") 956 start = token.start 957 end = token.end + 1 958 start_context = self.sql[max(start - self.error_message_context, 0) : start] 959 highlight = self.sql[start:end] 960 end_context = self.sql[end : end + self.error_message_context] 961 962 error = ParseError.new( 963 f"{message}. Line {token.line}, Col: {token.col}.\n" 964 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 965 description=message, 966 line=token.line, 967 col=token.col, 968 start_context=start_context, 969 highlight=highlight, 970 end_context=end_context, 971 ) 972 973 if self.error_level == ErrorLevel.IMMEDIATE: 974 raise error 975 976 self.errors.append(error) 977 978 def expression( 979 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 980 ) -> E: 981 """ 982 Creates a new, validated Expression. 983 984 Args: 985 exp_class: The expression class to instantiate. 986 comments: An optional list of comments to attach to the expression. 987 kwargs: The arguments to set for the expression along with their respective values. 988 989 Returns: 990 The target expression. 991 """ 992 instance = exp_class(**kwargs) 993 instance.add_comments(comments) if comments else self._add_comments(instance) 994 return self.validate_expression(instance) 995 996 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 997 if expression and self._prev_comments: 998 expression.add_comments(self._prev_comments) 999 self._prev_comments = None 1000 1001 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1002 """ 1003 Validates an Expression, making sure that all its mandatory arguments are set. 1004 1005 Args: 1006 expression: The expression to validate. 1007 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1008 1009 Returns: 1010 The validated expression. 1011 """ 1012 if self.error_level != ErrorLevel.IGNORE: 1013 for error_message in expression.error_messages(args): 1014 self.raise_error(error_message) 1015 1016 return expression 1017 1018 def _find_sql(self, start: Token, end: Token) -> str: 1019 return self.sql[start.start : end.end + 1] 1020 1021 def _advance(self, times: int = 1) -> None: 1022 self._index += times 1023 self._curr = seq_get(self._tokens, self._index) 1024 self._next = seq_get(self._tokens, self._index + 1) 1025 1026 if self._index > 0: 1027 self._prev = self._tokens[self._index - 1] 1028 self._prev_comments = self._prev.comments 1029 else: 1030 self._prev = None 1031 self._prev_comments = None 1032 1033 def _retreat(self, index: int) -> None: 1034 if index != self._index: 1035 self._advance(index - self._index) 1036 1037 def _parse_command(self) -> exp.Command: 1038 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1039 1040 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1041 start = self._prev 1042 exists = self._parse_exists() if allow_exists else None 1043 1044 self._match(TokenType.ON) 1045 1046 kind = self._match_set(self.CREATABLES) and self._prev 1047 if not kind: 1048 return self._parse_as_command(start) 1049 1050 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1051 this = self._parse_user_defined_function(kind=kind.token_type) 1052 elif kind.token_type == TokenType.TABLE: 1053 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1054 elif kind.token_type == TokenType.COLUMN: 1055 this = self._parse_column() 1056 else: 1057 this = self._parse_id_var() 1058 1059 self._match(TokenType.IS) 1060 1061 return self.expression( 1062 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1063 ) 1064 1065 def _parse_to_table( 1066 self, 1067 ) -> exp.ToTableProperty: 1068 table = self._parse_table_parts(schema=True) 1069 return self.expression(exp.ToTableProperty, this=table) 1070 1071 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1072 def _parse_ttl(self) -> exp.Expression: 1073 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1074 this = self._parse_bitwise() 1075 1076 if self._match_text_seq("DELETE"): 1077 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1078 if self._match_text_seq("RECOMPRESS"): 1079 return self.expression( 1080 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1081 ) 1082 if self._match_text_seq("TO", "DISK"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1085 ) 1086 if self._match_text_seq("TO", "VOLUME"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1089 ) 1090 1091 return this 1092 1093 expressions = self._parse_csv(_parse_ttl_action) 1094 where = self._parse_where() 1095 group = self._parse_group() 1096 1097 aggregates = None 1098 if group and self._match(TokenType.SET): 1099 aggregates = self._parse_csv(self._parse_set_item) 1100 1101 return self.expression( 1102 exp.MergeTreeTTL, 1103 expressions=expressions, 1104 where=where, 1105 group=group, 1106 aggregates=aggregates, 1107 ) 1108 1109 def _parse_statement(self) -> t.Optional[exp.Expression]: 1110 if self._curr is None: 1111 return None 1112 1113 if self._match_set(self.STATEMENT_PARSERS): 1114 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1115 1116 if self._match_set(Tokenizer.COMMANDS): 1117 return self._parse_command() 1118 1119 expression = self._parse_expression() 1120 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1121 return self._parse_query_modifiers(expression) 1122 1123 def _parse_drop(self) -> exp.Drop | exp.Command: 1124 start = self._prev 1125 temporary = self._match(TokenType.TEMPORARY) 1126 materialized = self._match_text_seq("MATERIALIZED") 1127 1128 kind = self._match_set(self.CREATABLES) and self._prev.text 1129 if not kind: 1130 return self._parse_as_command(start) 1131 1132 return self.expression( 1133 exp.Drop, 1134 exists=self._parse_exists(), 1135 this=self._parse_table(schema=True), 1136 kind=kind, 1137 temporary=temporary, 1138 materialized=materialized, 1139 cascade=self._match_text_seq("CASCADE"), 1140 constraints=self._match_text_seq("CONSTRAINTS"), 1141 purge=self._match_text_seq("PURGE"), 1142 ) 1143 1144 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1145 return ( 1146 self._match(TokenType.IF) 1147 and (not not_ or self._match(TokenType.NOT)) 1148 and self._match(TokenType.EXISTS) 1149 ) 1150 1151 def _parse_create(self) -> exp.Create | exp.Command: 1152 # Note: this can't be None because we've matched a statement parser 1153 start = self._prev 1154 replace = start.text.upper() == "REPLACE" or self._match_pair( 1155 TokenType.OR, TokenType.REPLACE 1156 ) 1157 unique = self._match(TokenType.UNIQUE) 1158 1159 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1160 self._advance() 1161 1162 properties = None 1163 create_token = self._match_set(self.CREATABLES) and self._prev 1164 1165 if not create_token: 1166 # exp.Properties.Location.POST_CREATE 1167 properties = self._parse_properties() 1168 create_token = self._match_set(self.CREATABLES) and self._prev 1169 1170 if not properties or not create_token: 1171 return self._parse_as_command(start) 1172 1173 exists = self._parse_exists(not_=True) 1174 this = None 1175 expression = None 1176 indexes = None 1177 no_schema_binding = None 1178 begin = None 1179 clone = None 1180 1181 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1182 nonlocal properties 1183 if properties and temp_props: 1184 properties.expressions.extend(temp_props.expressions) 1185 elif temp_props: 1186 properties = temp_props 1187 1188 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1189 this = self._parse_user_defined_function(kind=create_token.token_type) 1190 1191 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1192 extend_props(self._parse_properties()) 1193 1194 self._match(TokenType.ALIAS) 1195 begin = self._match(TokenType.BEGIN) 1196 return_ = self._match_text_seq("RETURN") 1197 expression = self._parse_statement() 1198 1199 if return_: 1200 expression = self.expression(exp.Return, this=expression) 1201 elif create_token.token_type == TokenType.INDEX: 1202 this = self._parse_index(index=self._parse_id_var()) 1203 elif create_token.token_type in self.DB_CREATABLES: 1204 table_parts = self._parse_table_parts(schema=True) 1205 1206 # exp.Properties.Location.POST_NAME 1207 self._match(TokenType.COMMA) 1208 extend_props(self._parse_properties(before=True)) 1209 1210 this = self._parse_schema(this=table_parts) 1211 1212 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1213 extend_props(self._parse_properties()) 1214 1215 self._match(TokenType.ALIAS) 1216 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1217 # exp.Properties.Location.POST_ALIAS 1218 extend_props(self._parse_properties()) 1219 1220 expression = self._parse_ddl_select() 1221 1222 if create_token.token_type == TokenType.TABLE: 1223 indexes = [] 1224 while True: 1225 index = self._parse_index() 1226 1227 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1228 extend_props(self._parse_properties()) 1229 1230 if not index: 1231 break 1232 else: 1233 self._match(TokenType.COMMA) 1234 indexes.append(index) 1235 elif create_token.token_type == TokenType.VIEW: 1236 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1237 no_schema_binding = True 1238 1239 if self._match_text_seq("CLONE"): 1240 clone = self._parse_table(schema=True) 1241 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1242 clone_kind = ( 1243 self._match(TokenType.L_PAREN) 1244 and self._match_texts(self.CLONE_KINDS) 1245 and self._prev.text.upper() 1246 ) 1247 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1248 self._match(TokenType.R_PAREN) 1249 clone = self.expression( 1250 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1251 ) 1252 1253 return self.expression( 1254 exp.Create, 1255 this=this, 1256 kind=create_token.text, 1257 replace=replace, 1258 unique=unique, 1259 expression=expression, 1260 exists=exists, 1261 properties=properties, 1262 indexes=indexes, 1263 no_schema_binding=no_schema_binding, 1264 begin=begin, 1265 clone=clone, 1266 ) 1267 1268 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1269 # only used for teradata currently 1270 self._match(TokenType.COMMA) 1271 1272 kwargs = { 1273 "no": self._match_text_seq("NO"), 1274 "dual": self._match_text_seq("DUAL"), 1275 "before": self._match_text_seq("BEFORE"), 1276 "default": self._match_text_seq("DEFAULT"), 1277 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1278 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1279 "after": self._match_text_seq("AFTER"), 1280 "minimum": self._match_texts(("MIN", "MINIMUM")), 1281 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1282 } 1283 1284 if self._match_texts(self.PROPERTY_PARSERS): 1285 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1286 try: 1287 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1288 except TypeError: 1289 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1290 1291 return None 1292 1293 def _parse_property(self) -> t.Optional[exp.Expression]: 1294 if self._match_texts(self.PROPERTY_PARSERS): 1295 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1296 1297 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1298 return self._parse_character_set(default=True) 1299 1300 if self._match_text_seq("COMPOUND", "SORTKEY"): 1301 return self._parse_sortkey(compound=True) 1302 1303 if self._match_text_seq("SQL", "SECURITY"): 1304 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1305 1306 assignment = self._match_pair( 1307 TokenType.VAR, TokenType.EQ, advance=False 1308 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1309 1310 if assignment: 1311 key = self._parse_var_or_string() 1312 self._match(TokenType.EQ) 1313 return self.expression(exp.Property, this=key, value=self._parse_column()) 1314 1315 return None 1316 1317 def _parse_stored(self) -> exp.FileFormatProperty: 1318 self._match(TokenType.ALIAS) 1319 1320 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1321 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1322 1323 return self.expression( 1324 exp.FileFormatProperty, 1325 this=self.expression( 1326 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1327 ) 1328 if input_format or output_format 1329 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1330 ) 1331 1332 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1333 self._match(TokenType.EQ) 1334 self._match(TokenType.ALIAS) 1335 return self.expression(exp_class, this=self._parse_field()) 1336 1337 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1338 properties = [] 1339 while True: 1340 if before: 1341 prop = self._parse_property_before() 1342 else: 1343 prop = self._parse_property() 1344 1345 if not prop: 1346 break 1347 for p in ensure_list(prop): 1348 properties.append(p) 1349 1350 if properties: 1351 return self.expression(exp.Properties, expressions=properties) 1352 1353 return None 1354 1355 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1356 return self.expression( 1357 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1358 ) 1359 1360 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1361 if self._index >= 2: 1362 pre_volatile_token = self._tokens[self._index - 2] 1363 else: 1364 pre_volatile_token = None 1365 1366 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1367 return exp.VolatileProperty() 1368 1369 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1370 1371 def _parse_with_property( 1372 self, 1373 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1374 self._match(TokenType.WITH) 1375 if self._match(TokenType.L_PAREN, advance=False): 1376 return self._parse_wrapped_csv(self._parse_property) 1377 1378 if self._match_text_seq("JOURNAL"): 1379 return self._parse_withjournaltable() 1380 1381 if self._match_text_seq("DATA"): 1382 return self._parse_withdata(no=False) 1383 elif self._match_text_seq("NO", "DATA"): 1384 return self._parse_withdata(no=True) 1385 1386 if not self._next: 1387 return None 1388 1389 return self._parse_withisolatedloading() 1390 1391 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1392 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1393 self._match(TokenType.EQ) 1394 1395 user = self._parse_id_var() 1396 self._match(TokenType.PARAMETER) 1397 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1398 1399 if not user or not host: 1400 return None 1401 1402 return exp.DefinerProperty(this=f"{user}@{host}") 1403 1404 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1405 self._match(TokenType.TABLE) 1406 self._match(TokenType.EQ) 1407 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1408 1409 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1410 return self.expression(exp.LogProperty, no=no) 1411 1412 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1413 return self.expression(exp.JournalProperty, **kwargs) 1414 1415 def _parse_checksum(self) -> exp.ChecksumProperty: 1416 self._match(TokenType.EQ) 1417 1418 on = None 1419 if self._match(TokenType.ON): 1420 on = True 1421 elif self._match_text_seq("OFF"): 1422 on = False 1423 1424 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1425 1426 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1427 if not self._match_text_seq("BY"): 1428 self._retreat(self._index - 1) 1429 return None 1430 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1434 if not self._match_text_seq("GRANTS"): 1435 self._retreat(self._index - 1) 1436 return None 1437 1438 return self.expression(exp.CopyGrantsProperty) 1439 1440 def _parse_freespace(self) -> exp.FreespaceProperty: 1441 self._match(TokenType.EQ) 1442 return self.expression( 1443 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1444 ) 1445 1446 def _parse_mergeblockratio( 1447 self, no: bool = False, default: bool = False 1448 ) -> exp.MergeBlockRatioProperty: 1449 if self._match(TokenType.EQ): 1450 return self.expression( 1451 exp.MergeBlockRatioProperty, 1452 this=self._parse_number(), 1453 percent=self._match(TokenType.PERCENT), 1454 ) 1455 1456 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1457 1458 def _parse_datablocksize( 1459 self, 1460 default: t.Optional[bool] = None, 1461 minimum: t.Optional[bool] = None, 1462 maximum: t.Optional[bool] = None, 1463 ) -> exp.DataBlocksizeProperty: 1464 self._match(TokenType.EQ) 1465 size = self._parse_number() 1466 1467 units = None 1468 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1469 units = self._prev.text 1470 1471 return self.expression( 1472 exp.DataBlocksizeProperty, 1473 size=size, 1474 units=units, 1475 default=default, 1476 minimum=minimum, 1477 maximum=maximum, 1478 ) 1479 1480 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1481 self._match(TokenType.EQ) 1482 always = self._match_text_seq("ALWAYS") 1483 manual = self._match_text_seq("MANUAL") 1484 never = self._match_text_seq("NEVER") 1485 default = self._match_text_seq("DEFAULT") 1486 1487 autotemp = None 1488 if self._match_text_seq("AUTOTEMP"): 1489 autotemp = self._parse_schema() 1490 1491 return self.expression( 1492 exp.BlockCompressionProperty, 1493 always=always, 1494 manual=manual, 1495 never=never, 1496 default=default, 1497 autotemp=autotemp, 1498 ) 1499 1500 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1501 no = self._match_text_seq("NO") 1502 concurrent = self._match_text_seq("CONCURRENT") 1503 self._match_text_seq("ISOLATED", "LOADING") 1504 for_all = self._match_text_seq("FOR", "ALL") 1505 for_insert = self._match_text_seq("FOR", "INSERT") 1506 for_none = self._match_text_seq("FOR", "NONE") 1507 return self.expression( 1508 exp.IsolatedLoadingProperty, 1509 no=no, 1510 concurrent=concurrent, 1511 for_all=for_all, 1512 for_insert=for_insert, 1513 for_none=for_none, 1514 ) 1515 1516 def _parse_locking(self) -> exp.LockingProperty: 1517 if self._match(TokenType.TABLE): 1518 kind = "TABLE" 1519 elif self._match(TokenType.VIEW): 1520 kind = "VIEW" 1521 elif self._match(TokenType.ROW): 1522 kind = "ROW" 1523 elif self._match_text_seq("DATABASE"): 1524 kind = "DATABASE" 1525 else: 1526 kind = None 1527 1528 if kind in ("DATABASE", "TABLE", "VIEW"): 1529 this = self._parse_table_parts() 1530 else: 1531 this = None 1532 1533 if self._match(TokenType.FOR): 1534 for_or_in = "FOR" 1535 elif self._match(TokenType.IN): 1536 for_or_in = "IN" 1537 else: 1538 for_or_in = None 1539 1540 if self._match_text_seq("ACCESS"): 1541 lock_type = "ACCESS" 1542 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1543 lock_type = "EXCLUSIVE" 1544 elif self._match_text_seq("SHARE"): 1545 lock_type = "SHARE" 1546 elif self._match_text_seq("READ"): 1547 lock_type = "READ" 1548 elif self._match_text_seq("WRITE"): 1549 lock_type = "WRITE" 1550 elif self._match_text_seq("CHECKSUM"): 1551 lock_type = "CHECKSUM" 1552 else: 1553 lock_type = None 1554 1555 override = self._match_text_seq("OVERRIDE") 1556 1557 return self.expression( 1558 exp.LockingProperty, 1559 this=this, 1560 kind=kind, 1561 for_or_in=for_or_in, 1562 lock_type=lock_type, 1563 override=override, 1564 ) 1565 1566 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1567 if self._match(TokenType.PARTITION_BY): 1568 return self._parse_csv(self._parse_conjunction) 1569 return [] 1570 1571 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1572 self._match(TokenType.EQ) 1573 return self.expression( 1574 exp.PartitionedByProperty, 1575 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1576 ) 1577 1578 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1579 if self._match_text_seq("AND", "STATISTICS"): 1580 statistics = True 1581 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1582 statistics = False 1583 else: 1584 statistics = None 1585 1586 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1587 1588 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1589 if self._match_text_seq("PRIMARY", "INDEX"): 1590 return exp.NoPrimaryIndexProperty() 1591 return None 1592 1593 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1594 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1595 return exp.OnCommitProperty() 1596 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1597 return exp.OnCommitProperty(delete=True) 1598 return None 1599 1600 def _parse_distkey(self) -> exp.DistKeyProperty: 1601 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1602 1603 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1604 table = self._parse_table(schema=True) 1605 1606 options = [] 1607 while self._match_texts(("INCLUDING", "EXCLUDING")): 1608 this = self._prev.text.upper() 1609 1610 id_var = self._parse_id_var() 1611 if not id_var: 1612 return None 1613 1614 options.append( 1615 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1616 ) 1617 1618 return self.expression(exp.LikeProperty, this=table, expressions=options) 1619 1620 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1621 return self.expression( 1622 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1623 ) 1624 1625 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1626 self._match(TokenType.EQ) 1627 return self.expression( 1628 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1629 ) 1630 1631 def _parse_returns(self) -> exp.ReturnsProperty: 1632 value: t.Optional[exp.Expression] 1633 is_table = self._match(TokenType.TABLE) 1634 1635 if is_table: 1636 if self._match(TokenType.LT): 1637 value = self.expression( 1638 exp.Schema, 1639 this="TABLE", 1640 expressions=self._parse_csv(self._parse_struct_types), 1641 ) 1642 if not self._match(TokenType.GT): 1643 self.raise_error("Expecting >") 1644 else: 1645 value = self._parse_schema(exp.var("TABLE")) 1646 else: 1647 value = self._parse_types() 1648 1649 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1650 1651 def _parse_describe(self) -> exp.Describe: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 return self.expression(exp.Describe, this=this, kind=kind) 1655 1656 def _parse_insert(self) -> exp.Insert: 1657 overwrite = self._match(TokenType.OVERWRITE) 1658 local = self._match_text_seq("LOCAL") 1659 alternative = None 1660 1661 if self._match_text_seq("DIRECTORY"): 1662 this: t.Optional[exp.Expression] = self.expression( 1663 exp.Directory, 1664 this=self._parse_var_or_string(), 1665 local=local, 1666 row_format=self._parse_row_format(match_row=True), 1667 ) 1668 else: 1669 if self._match(TokenType.OR): 1670 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1671 1672 self._match(TokenType.INTO) 1673 self._match(TokenType.TABLE) 1674 this = self._parse_table(schema=True) 1675 1676 return self.expression( 1677 exp.Insert, 1678 this=this, 1679 exists=self._parse_exists(), 1680 partition=self._parse_partition(), 1681 expression=self._parse_ddl_select(), 1682 conflict=self._parse_on_conflict(), 1683 returning=self._parse_returning(), 1684 overwrite=overwrite, 1685 alternative=alternative, 1686 ) 1687 1688 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1689 conflict = self._match_text_seq("ON", "CONFLICT") 1690 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1691 1692 if not conflict and not duplicate: 1693 return None 1694 1695 nothing = None 1696 expressions = None 1697 key = None 1698 constraint = None 1699 1700 if conflict: 1701 if self._match_text_seq("ON", "CONSTRAINT"): 1702 constraint = self._parse_id_var() 1703 else: 1704 key = self._parse_csv(self._parse_value) 1705 1706 self._match_text_seq("DO") 1707 if self._match_text_seq("NOTHING"): 1708 nothing = True 1709 else: 1710 self._match(TokenType.UPDATE) 1711 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1712 1713 return self.expression( 1714 exp.OnConflict, 1715 duplicate=duplicate, 1716 expressions=expressions, 1717 nothing=nothing, 1718 key=key, 1719 constraint=constraint, 1720 ) 1721 1722 def _parse_returning(self) -> t.Optional[exp.Returning]: 1723 if not self._match(TokenType.RETURNING): 1724 return None 1725 1726 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1727 1728 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1729 if not self._match(TokenType.FORMAT): 1730 return None 1731 return self._parse_row_format() 1732 1733 def _parse_row_format( 1734 self, match_row: bool = False 1735 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1736 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1737 return None 1738 1739 if self._match_text_seq("SERDE"): 1740 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1741 1742 self._match_text_seq("DELIMITED") 1743 1744 kwargs = {} 1745 1746 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1747 kwargs["fields"] = self._parse_string() 1748 if self._match_text_seq("ESCAPED", "BY"): 1749 kwargs["escaped"] = self._parse_string() 1750 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1751 kwargs["collection_items"] = self._parse_string() 1752 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1753 kwargs["map_keys"] = self._parse_string() 1754 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1755 kwargs["lines"] = self._parse_string() 1756 if self._match_text_seq("NULL", "DEFINED", "AS"): 1757 kwargs["null"] = self._parse_string() 1758 1759 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1760 1761 def _parse_load(self) -> exp.LoadData | exp.Command: 1762 if self._match_text_seq("DATA"): 1763 local = self._match_text_seq("LOCAL") 1764 self._match_text_seq("INPATH") 1765 inpath = self._parse_string() 1766 overwrite = self._match(TokenType.OVERWRITE) 1767 self._match_pair(TokenType.INTO, TokenType.TABLE) 1768 1769 return self.expression( 1770 exp.LoadData, 1771 this=self._parse_table(schema=True), 1772 local=local, 1773 overwrite=overwrite, 1774 inpath=inpath, 1775 partition=self._parse_partition(), 1776 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1777 serde=self._match_text_seq("SERDE") and self._parse_string(), 1778 ) 1779 return self._parse_as_command(self._prev) 1780 1781 def _parse_delete(self) -> exp.Delete: 1782 self._match(TokenType.FROM) 1783 1784 return self.expression( 1785 exp.Delete, 1786 this=self._parse_table(), 1787 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1788 where=self._parse_where(), 1789 returning=self._parse_returning(), 1790 ) 1791 1792 def _parse_update(self) -> exp.Update: 1793 return self.expression( 1794 exp.Update, 1795 **{ # type: ignore 1796 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1797 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1798 "from": self._parse_from(modifiers=True), 1799 "where": self._parse_where(), 1800 "returning": self._parse_returning(), 1801 }, 1802 ) 1803 1804 def _parse_uncache(self) -> exp.Uncache: 1805 if not self._match(TokenType.TABLE): 1806 self.raise_error("Expecting TABLE after UNCACHE") 1807 1808 return self.expression( 1809 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1810 ) 1811 1812 def _parse_cache(self) -> exp.Cache: 1813 lazy = self._match_text_seq("LAZY") 1814 self._match(TokenType.TABLE) 1815 table = self._parse_table(schema=True) 1816 1817 options = [] 1818 if self._match_text_seq("OPTIONS"): 1819 self._match_l_paren() 1820 k = self._parse_string() 1821 self._match(TokenType.EQ) 1822 v = self._parse_string() 1823 options = [k, v] 1824 self._match_r_paren() 1825 1826 self._match(TokenType.ALIAS) 1827 return self.expression( 1828 exp.Cache, 1829 this=table, 1830 lazy=lazy, 1831 options=options, 1832 expression=self._parse_select(nested=True), 1833 ) 1834 1835 def _parse_partition(self) -> t.Optional[exp.Partition]: 1836 if not self._match(TokenType.PARTITION): 1837 return None 1838 1839 return self.expression( 1840 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1841 ) 1842 1843 def _parse_value(self) -> exp.Tuple: 1844 if self._match(TokenType.L_PAREN): 1845 expressions = self._parse_csv(self._parse_conjunction) 1846 self._match_r_paren() 1847 return self.expression(exp.Tuple, expressions=expressions) 1848 1849 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1850 # Source: https://prestodb.io/docs/current/sql/values.html 1851 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1852 1853 def _parse_select( 1854 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1855 ) -> t.Optional[exp.Expression]: 1856 cte = self._parse_with() 1857 if cte: 1858 this = self._parse_statement() 1859 1860 if not this: 1861 self.raise_error("Failed to parse any statement following CTE") 1862 return cte 1863 1864 if "with" in this.arg_types: 1865 this.set("with", cte) 1866 else: 1867 self.raise_error(f"{this.key} does not support CTE") 1868 this = cte 1869 elif self._match(TokenType.SELECT): 1870 comments = self._prev_comments 1871 1872 hint = self._parse_hint() 1873 all_ = self._match(TokenType.ALL) 1874 distinct = self._match(TokenType.DISTINCT) 1875 1876 kind = ( 1877 self._match(TokenType.ALIAS) 1878 and self._match_texts(("STRUCT", "VALUE")) 1879 and self._prev.text 1880 ) 1881 1882 if distinct: 1883 distinct = self.expression( 1884 exp.Distinct, 1885 on=self._parse_value() if self._match(TokenType.ON) else None, 1886 ) 1887 1888 if all_ and distinct: 1889 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1890 1891 limit = self._parse_limit(top=True) 1892 expressions = self._parse_csv(self._parse_expression) 1893 1894 this = self.expression( 1895 exp.Select, 1896 kind=kind, 1897 hint=hint, 1898 distinct=distinct, 1899 expressions=expressions, 1900 limit=limit, 1901 ) 1902 this.comments = comments 1903 1904 into = self._parse_into() 1905 if into: 1906 this.set("into", into) 1907 1908 from_ = self._parse_from() 1909 if from_: 1910 this.set("from", from_) 1911 1912 this = self._parse_query_modifiers(this) 1913 elif (table or nested) and self._match(TokenType.L_PAREN): 1914 if self._match(TokenType.PIVOT): 1915 this = self._parse_simplified_pivot() 1916 elif self._match(TokenType.FROM): 1917 this = exp.select("*").from_( 1918 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1919 ) 1920 else: 1921 this = self._parse_table() if table else self._parse_select(nested=True) 1922 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1923 1924 self._match_r_paren() 1925 1926 # early return so that subquery unions aren't parsed again 1927 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1928 # Union ALL should be a property of the top select node, not the subquery 1929 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1930 elif self._match(TokenType.VALUES): 1931 this = self.expression( 1932 exp.Values, 1933 expressions=self._parse_csv(self._parse_value), 1934 alias=self._parse_table_alias(), 1935 ) 1936 else: 1937 this = None 1938 1939 return self._parse_set_operations(this) 1940 1941 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1942 if not skip_with_token and not self._match(TokenType.WITH): 1943 return None 1944 1945 comments = self._prev_comments 1946 recursive = self._match(TokenType.RECURSIVE) 1947 1948 expressions = [] 1949 while True: 1950 expressions.append(self._parse_cte()) 1951 1952 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1953 break 1954 else: 1955 self._match(TokenType.WITH) 1956 1957 return self.expression( 1958 exp.With, comments=comments, expressions=expressions, recursive=recursive 1959 ) 1960 1961 def _parse_cte(self) -> exp.CTE: 1962 alias = self._parse_table_alias() 1963 if not alias or not alias.this: 1964 self.raise_error("Expected CTE to have alias") 1965 1966 self._match(TokenType.ALIAS) 1967 return self.expression( 1968 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1969 ) 1970 1971 def _parse_table_alias( 1972 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1973 ) -> t.Optional[exp.TableAlias]: 1974 any_token = self._match(TokenType.ALIAS) 1975 alias = ( 1976 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1977 or self._parse_string_as_identifier() 1978 ) 1979 1980 index = self._index 1981 if self._match(TokenType.L_PAREN): 1982 columns = self._parse_csv(self._parse_function_parameter) 1983 self._match_r_paren() if columns else self._retreat(index) 1984 else: 1985 columns = None 1986 1987 if not alias and not columns: 1988 return None 1989 1990 return self.expression(exp.TableAlias, this=alias, columns=columns) 1991 1992 def _parse_subquery( 1993 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1994 ) -> t.Optional[exp.Subquery]: 1995 if not this: 1996 return None 1997 1998 return self.expression( 1999 exp.Subquery, 2000 this=this, 2001 pivots=self._parse_pivots(), 2002 alias=self._parse_table_alias() if parse_alias else None, 2003 ) 2004 2005 def _parse_query_modifiers( 2006 self, this: t.Optional[exp.Expression] 2007 ) -> t.Optional[exp.Expression]: 2008 if isinstance(this, self.MODIFIABLES): 2009 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2010 expression = parser(self) 2011 2012 if expression: 2013 if key == "limit": 2014 offset = expression.args.pop("offset", None) 2015 if offset: 2016 this.set("offset", exp.Offset(expression=offset)) 2017 this.set(key, expression) 2018 return this 2019 2020 def _parse_hint(self) -> t.Optional[exp.Hint]: 2021 if self._match(TokenType.HINT): 2022 hints = self._parse_csv(self._parse_function) 2023 2024 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2025 self.raise_error("Expected */ after HINT") 2026 2027 return self.expression(exp.Hint, expressions=hints) 2028 2029 return None 2030 2031 def _parse_into(self) -> t.Optional[exp.Into]: 2032 if not self._match(TokenType.INTO): 2033 return None 2034 2035 temp = self._match(TokenType.TEMPORARY) 2036 unlogged = self._match_text_seq("UNLOGGED") 2037 self._match(TokenType.TABLE) 2038 2039 return self.expression( 2040 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2041 ) 2042 2043 def _parse_from( 2044 self, modifiers: bool = False, skip_from_token: bool = False 2045 ) -> t.Optional[exp.From]: 2046 if not skip_from_token and not self._match(TokenType.FROM): 2047 return None 2048 2049 comments = self._prev_comments 2050 this = self._parse_table() 2051 2052 return self.expression( 2053 exp.From, 2054 comments=comments, 2055 this=self._parse_query_modifiers(this) if modifiers else this, 2056 ) 2057 2058 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2059 if not self._match(TokenType.MATCH_RECOGNIZE): 2060 return None 2061 2062 self._match_l_paren() 2063 2064 partition = self._parse_partition_by() 2065 order = self._parse_order() 2066 measures = ( 2067 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2068 ) 2069 2070 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2071 rows = exp.var("ONE ROW PER MATCH") 2072 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2073 text = "ALL ROWS PER MATCH" 2074 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2075 text += f" SHOW EMPTY MATCHES" 2076 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2077 text += f" OMIT EMPTY MATCHES" 2078 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2079 text += f" WITH UNMATCHED ROWS" 2080 rows = exp.var(text) 2081 else: 2082 rows = None 2083 2084 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2085 text = "AFTER MATCH SKIP" 2086 if self._match_text_seq("PAST", "LAST", "ROW"): 2087 text += f" PAST LAST ROW" 2088 elif self._match_text_seq("TO", "NEXT", "ROW"): 2089 text += f" TO NEXT ROW" 2090 elif self._match_text_seq("TO", "FIRST"): 2091 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2092 elif self._match_text_seq("TO", "LAST"): 2093 text += f" TO LAST {self._advance_any().text}" # type: ignore 2094 after = exp.var(text) 2095 else: 2096 after = None 2097 2098 if self._match_text_seq("PATTERN"): 2099 self._match_l_paren() 2100 2101 if not self._curr: 2102 self.raise_error("Expecting )", self._curr) 2103 2104 paren = 1 2105 start = self._curr 2106 2107 while self._curr and paren > 0: 2108 if self._curr.token_type == TokenType.L_PAREN: 2109 paren += 1 2110 if self._curr.token_type == TokenType.R_PAREN: 2111 paren -= 1 2112 2113 end = self._prev 2114 self._advance() 2115 2116 if paren > 0: 2117 self.raise_error("Expecting )", self._curr) 2118 2119 pattern = exp.var(self._find_sql(start, end)) 2120 else: 2121 pattern = None 2122 2123 define = ( 2124 self._parse_csv( 2125 lambda: self.expression( 2126 exp.Alias, 2127 alias=self._parse_id_var(any_token=True), 2128 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2129 ) 2130 ) 2131 if self._match_text_seq("DEFINE") 2132 else None 2133 ) 2134 2135 self._match_r_paren() 2136 2137 return self.expression( 2138 exp.MatchRecognize, 2139 partition_by=partition, 2140 order=order, 2141 measures=measures, 2142 rows=rows, 2143 after=after, 2144 pattern=pattern, 2145 define=define, 2146 alias=self._parse_table_alias(), 2147 ) 2148 2149 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2150 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2151 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2152 2153 if outer_apply or cross_apply: 2154 this = self._parse_select(table=True) 2155 view = None 2156 outer = not cross_apply 2157 elif self._match(TokenType.LATERAL): 2158 this = self._parse_select(table=True) 2159 view = self._match(TokenType.VIEW) 2160 outer = self._match(TokenType.OUTER) 2161 else: 2162 return None 2163 2164 if not this: 2165 this = self._parse_function() or self._parse_id_var(any_token=False) 2166 while self._match(TokenType.DOT): 2167 this = exp.Dot( 2168 this=this, 2169 expression=self._parse_function() or self._parse_id_var(any_token=False), 2170 ) 2171 2172 if view: 2173 table = self._parse_id_var(any_token=False) 2174 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2175 table_alias: t.Optional[exp.TableAlias] = self.expression( 2176 exp.TableAlias, this=table, columns=columns 2177 ) 2178 elif isinstance(this, exp.Subquery) and this.alias: 2179 # Ensures parity between the Subquery's and the Lateral's "alias" args 2180 table_alias = this.args["alias"].copy() 2181 else: 2182 table_alias = self._parse_table_alias() 2183 2184 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2185 2186 def _parse_join_parts( 2187 self, 2188 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2189 return ( 2190 self._match_set(self.JOIN_METHODS) and self._prev, 2191 self._match_set(self.JOIN_SIDES) and self._prev, 2192 self._match_set(self.JOIN_KINDS) and self._prev, 2193 ) 2194 2195 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2196 if self._match(TokenType.COMMA): 2197 return self.expression(exp.Join, this=self._parse_table()) 2198 2199 index = self._index 2200 method, side, kind = self._parse_join_parts() 2201 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2202 join = self._match(TokenType.JOIN) 2203 2204 if not skip_join_token and not join: 2205 self._retreat(index) 2206 kind = None 2207 method = None 2208 side = None 2209 2210 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2211 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2212 2213 if not skip_join_token and not join and not outer_apply and not cross_apply: 2214 return None 2215 2216 if outer_apply: 2217 side = Token(TokenType.LEFT, "LEFT") 2218 2219 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2220 2221 if method: 2222 kwargs["method"] = method.text 2223 if side: 2224 kwargs["side"] = side.text 2225 if kind: 2226 kwargs["kind"] = kind.text 2227 if hint: 2228 kwargs["hint"] = hint 2229 2230 if self._match(TokenType.ON): 2231 kwargs["on"] = self._parse_conjunction() 2232 elif self._match(TokenType.USING): 2233 kwargs["using"] = self._parse_wrapped_id_vars() 2234 2235 return self.expression(exp.Join, **kwargs) 2236 2237 def _parse_index( 2238 self, 2239 index: t.Optional[exp.Expression] = None, 2240 ) -> t.Optional[exp.Index]: 2241 if index: 2242 unique = None 2243 primary = None 2244 amp = None 2245 2246 self._match(TokenType.ON) 2247 self._match(TokenType.TABLE) # hive 2248 table = self._parse_table_parts(schema=True) 2249 else: 2250 unique = self._match(TokenType.UNIQUE) 2251 primary = self._match_text_seq("PRIMARY") 2252 amp = self._match_text_seq("AMP") 2253 2254 if not self._match(TokenType.INDEX): 2255 return None 2256 2257 index = self._parse_id_var() 2258 table = None 2259 2260 using = self._parse_field() if self._match(TokenType.USING) else None 2261 2262 if self._match(TokenType.L_PAREN, advance=False): 2263 columns = self._parse_wrapped_csv(self._parse_ordered) 2264 else: 2265 columns = None 2266 2267 return self.expression( 2268 exp.Index, 2269 this=index, 2270 table=table, 2271 using=using, 2272 columns=columns, 2273 unique=unique, 2274 primary=primary, 2275 amp=amp, 2276 partition_by=self._parse_partition_by(), 2277 ) 2278 2279 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2280 return ( 2281 (not schema and self._parse_function(optional_parens=False)) 2282 or self._parse_id_var(any_token=False) 2283 or self._parse_string_as_identifier() 2284 or self._parse_placeholder() 2285 ) 2286 2287 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2288 catalog = None 2289 db = None 2290 table = self._parse_table_part(schema=schema) 2291 2292 while self._match(TokenType.DOT): 2293 if catalog: 2294 # This allows nesting the table in arbitrarily many dot expressions if needed 2295 table = self.expression( 2296 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2297 ) 2298 else: 2299 catalog = db 2300 db = table 2301 table = self._parse_table_part(schema=schema) 2302 2303 if not table: 2304 self.raise_error(f"Expected table name but got {self._curr}") 2305 2306 return self.expression( 2307 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2308 ) 2309 2310 def _parse_table( 2311 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2312 ) -> t.Optional[exp.Expression]: 2313 lateral = self._parse_lateral() 2314 if lateral: 2315 return lateral 2316 2317 unnest = self._parse_unnest() 2318 if unnest: 2319 return unnest 2320 2321 values = self._parse_derived_table_values() 2322 if values: 2323 return values 2324 2325 subquery = self._parse_select(table=True) 2326 if subquery: 2327 if not subquery.args.get("pivots"): 2328 subquery.set("pivots", self._parse_pivots()) 2329 return subquery 2330 2331 this: exp.Expression = self._parse_table_parts(schema=schema) 2332 2333 if schema: 2334 return self._parse_schema(this=this) 2335 2336 if self.ALIAS_POST_TABLESAMPLE: 2337 table_sample = self._parse_table_sample() 2338 2339 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2340 if alias: 2341 this.set("alias", alias) 2342 2343 if not this.args.get("pivots"): 2344 this.set("pivots", self._parse_pivots()) 2345 2346 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2347 this.set( 2348 "hints", 2349 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2350 ) 2351 self._match_r_paren() 2352 2353 if not self.ALIAS_POST_TABLESAMPLE: 2354 table_sample = self._parse_table_sample() 2355 2356 if table_sample: 2357 table_sample.set("this", this) 2358 this = table_sample 2359 2360 return this 2361 2362 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2363 if not self._match(TokenType.UNNEST): 2364 return None 2365 2366 expressions = self._parse_wrapped_csv(self._parse_type) 2367 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2368 2369 alias = self._parse_table_alias() if with_alias else None 2370 2371 if alias and self.UNNEST_COLUMN_ONLY: 2372 if alias.args.get("columns"): 2373 self.raise_error("Unexpected extra column alias in unnest.") 2374 2375 alias.set("columns", [alias.this]) 2376 alias.set("this", None) 2377 2378 offset = None 2379 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2380 self._match(TokenType.ALIAS) 2381 offset = self._parse_id_var() or exp.to_identifier("offset") 2382 2383 return self.expression( 2384 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2385 ) 2386 2387 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2388 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2389 if not is_derived and not self._match(TokenType.VALUES): 2390 return None 2391 2392 expressions = self._parse_csv(self._parse_value) 2393 alias = self._parse_table_alias() 2394 2395 if is_derived: 2396 self._match_r_paren() 2397 2398 return self.expression( 2399 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2400 ) 2401 2402 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2403 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2404 as_modifier and self._match_text_seq("USING", "SAMPLE") 2405 ): 2406 return None 2407 2408 bucket_numerator = None 2409 bucket_denominator = None 2410 bucket_field = None 2411 percent = None 2412 rows = None 2413 size = None 2414 seed = None 2415 2416 kind = ( 2417 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2418 ) 2419 method = self._parse_var(tokens=(TokenType.ROW,)) 2420 2421 self._match(TokenType.L_PAREN) 2422 2423 num = self._parse_number() 2424 2425 if self._match_text_seq("BUCKET"): 2426 bucket_numerator = self._parse_number() 2427 self._match_text_seq("OUT", "OF") 2428 bucket_denominator = bucket_denominator = self._parse_number() 2429 self._match(TokenType.ON) 2430 bucket_field = self._parse_field() 2431 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2432 percent = num 2433 elif self._match(TokenType.ROWS): 2434 rows = num 2435 else: 2436 size = num 2437 2438 self._match(TokenType.R_PAREN) 2439 2440 if self._match(TokenType.L_PAREN): 2441 method = self._parse_var() 2442 seed = self._match(TokenType.COMMA) and self._parse_number() 2443 self._match_r_paren() 2444 elif self._match_texts(("SEED", "REPEATABLE")): 2445 seed = self._parse_wrapped(self._parse_number) 2446 2447 return self.expression( 2448 exp.TableSample, 2449 method=method, 2450 bucket_numerator=bucket_numerator, 2451 bucket_denominator=bucket_denominator, 2452 bucket_field=bucket_field, 2453 percent=percent, 2454 rows=rows, 2455 size=size, 2456 seed=seed, 2457 kind=kind, 2458 ) 2459 2460 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2461 return list(iter(self._parse_pivot, None)) 2462 2463 # https://duckdb.org/docs/sql/statements/pivot 2464 def _parse_simplified_pivot(self) -> exp.Pivot: 2465 def _parse_on() -> t.Optional[exp.Expression]: 2466 this = self._parse_bitwise() 2467 return self._parse_in(this) if self._match(TokenType.IN) else this 2468 2469 this = self._parse_table() 2470 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2471 using = self._match(TokenType.USING) and self._parse_csv( 2472 lambda: self._parse_alias(self._parse_function()) 2473 ) 2474 group = self._parse_group() 2475 return self.expression( 2476 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2477 ) 2478 2479 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2480 index = self._index 2481 2482 if self._match(TokenType.PIVOT): 2483 unpivot = False 2484 elif self._match(TokenType.UNPIVOT): 2485 unpivot = True 2486 else: 2487 return None 2488 2489 expressions = [] 2490 field = None 2491 2492 if not self._match(TokenType.L_PAREN): 2493 self._retreat(index) 2494 return None 2495 2496 if unpivot: 2497 expressions = self._parse_csv(self._parse_column) 2498 else: 2499 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2500 2501 if not expressions: 2502 self.raise_error("Failed to parse PIVOT's aggregation list") 2503 2504 if not self._match(TokenType.FOR): 2505 self.raise_error("Expecting FOR") 2506 2507 value = self._parse_column() 2508 2509 if not self._match(TokenType.IN): 2510 self.raise_error("Expecting IN") 2511 2512 field = self._parse_in(value, alias=True) 2513 2514 self._match_r_paren() 2515 2516 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2517 2518 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2519 pivot.set("alias", self._parse_table_alias()) 2520 2521 if not unpivot: 2522 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2523 2524 columns: t.List[exp.Expression] = [] 2525 for fld in pivot.args["field"].expressions: 2526 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2527 for name in names: 2528 if self.PREFIXED_PIVOT_COLUMNS: 2529 name = f"{name}_{field_name}" if name else field_name 2530 else: 2531 name = f"{field_name}_{name}" if name else field_name 2532 2533 columns.append(exp.to_identifier(name)) 2534 2535 pivot.set("columns", columns) 2536 2537 return pivot 2538 2539 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2540 return [agg.alias for agg in aggregations] 2541 2542 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2543 if not skip_where_token and not self._match(TokenType.WHERE): 2544 return None 2545 2546 return self.expression( 2547 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2548 ) 2549 2550 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2551 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2552 return None 2553 2554 elements = defaultdict(list) 2555 2556 while True: 2557 expressions = self._parse_csv(self._parse_conjunction) 2558 if expressions: 2559 elements["expressions"].extend(expressions) 2560 2561 grouping_sets = self._parse_grouping_sets() 2562 if grouping_sets: 2563 elements["grouping_sets"].extend(grouping_sets) 2564 2565 rollup = None 2566 cube = None 2567 totals = None 2568 2569 with_ = self._match(TokenType.WITH) 2570 if self._match(TokenType.ROLLUP): 2571 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2572 elements["rollup"].extend(ensure_list(rollup)) 2573 2574 if self._match(TokenType.CUBE): 2575 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2576 elements["cube"].extend(ensure_list(cube)) 2577 2578 if self._match_text_seq("TOTALS"): 2579 totals = True 2580 elements["totals"] = True # type: ignore 2581 2582 if not (grouping_sets or rollup or cube or totals): 2583 break 2584 2585 return self.expression(exp.Group, **elements) # type: ignore 2586 2587 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2588 if not self._match(TokenType.GROUPING_SETS): 2589 return None 2590 2591 return self._parse_wrapped_csv(self._parse_grouping_set) 2592 2593 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2594 if self._match(TokenType.L_PAREN): 2595 grouping_set = self._parse_csv(self._parse_column) 2596 self._match_r_paren() 2597 return self.expression(exp.Tuple, expressions=grouping_set) 2598 2599 return self._parse_column() 2600 2601 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2602 if not skip_having_token and not self._match(TokenType.HAVING): 2603 return None 2604 return self.expression(exp.Having, this=self._parse_conjunction()) 2605 2606 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2607 if not self._match(TokenType.QUALIFY): 2608 return None 2609 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2610 2611 def _parse_order( 2612 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2613 ) -> t.Optional[exp.Expression]: 2614 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2615 return this 2616 2617 return self.expression( 2618 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2619 ) 2620 2621 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2622 if not self._match_text_seq(*texts): 2623 return None 2624 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2625 2626 def _parse_ordered(self) -> exp.Ordered: 2627 this = self._parse_conjunction() 2628 self._match(TokenType.ASC) 2629 2630 is_desc = self._match(TokenType.DESC) 2631 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2632 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2633 desc = is_desc or False 2634 asc = not desc 2635 nulls_first = is_nulls_first or False 2636 explicitly_null_ordered = is_nulls_first or is_nulls_last 2637 2638 if ( 2639 not explicitly_null_ordered 2640 and ( 2641 (asc and self.NULL_ORDERING == "nulls_are_small") 2642 or (desc and self.NULL_ORDERING != "nulls_are_small") 2643 ) 2644 and self.NULL_ORDERING != "nulls_are_last" 2645 ): 2646 nulls_first = True 2647 2648 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2649 2650 def _parse_limit( 2651 self, this: t.Optional[exp.Expression] = None, top: bool = False 2652 ) -> t.Optional[exp.Expression]: 2653 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2654 limit_paren = self._match(TokenType.L_PAREN) 2655 expression = self._parse_number() if top else self._parse_term() 2656 2657 if self._match(TokenType.COMMA): 2658 offset = expression 2659 expression = self._parse_term() 2660 else: 2661 offset = None 2662 2663 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2664 2665 if limit_paren: 2666 self._match_r_paren() 2667 2668 return limit_exp 2669 2670 if self._match(TokenType.FETCH): 2671 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2672 direction = self._prev.text if direction else "FIRST" 2673 2674 count = self._parse_number() 2675 percent = self._match(TokenType.PERCENT) 2676 2677 self._match_set((TokenType.ROW, TokenType.ROWS)) 2678 2679 only = self._match_text_seq("ONLY") 2680 with_ties = self._match_text_seq("WITH", "TIES") 2681 2682 if only and with_ties: 2683 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2684 2685 return self.expression( 2686 exp.Fetch, 2687 direction=direction, 2688 count=count, 2689 percent=percent, 2690 with_ties=with_ties, 2691 ) 2692 2693 return this 2694 2695 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2696 if not self._match(TokenType.OFFSET): 2697 return this 2698 2699 count = self._parse_number() 2700 self._match_set((TokenType.ROW, TokenType.ROWS)) 2701 return self.expression(exp.Offset, this=this, expression=count) 2702 2703 def _parse_locks(self) -> t.List[exp.Lock]: 2704 locks = [] 2705 while True: 2706 if self._match_text_seq("FOR", "UPDATE"): 2707 update = True 2708 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2709 "LOCK", "IN", "SHARE", "MODE" 2710 ): 2711 update = False 2712 else: 2713 break 2714 2715 expressions = None 2716 if self._match_text_seq("OF"): 2717 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2718 2719 wait: t.Optional[bool | exp.Expression] = None 2720 if self._match_text_seq("NOWAIT"): 2721 wait = True 2722 elif self._match_text_seq("WAIT"): 2723 wait = self._parse_primary() 2724 elif self._match_text_seq("SKIP", "LOCKED"): 2725 wait = False 2726 2727 locks.append( 2728 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2729 ) 2730 2731 return locks 2732 2733 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2734 if not self._match_set(self.SET_OPERATIONS): 2735 return this 2736 2737 token_type = self._prev.token_type 2738 2739 if token_type == TokenType.UNION: 2740 expression = exp.Union 2741 elif token_type == TokenType.EXCEPT: 2742 expression = exp.Except 2743 else: 2744 expression = exp.Intersect 2745 2746 return self.expression( 2747 expression, 2748 this=this, 2749 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2750 expression=self._parse_set_operations(self._parse_select(nested=True)), 2751 ) 2752 2753 def _parse_expression(self) -> t.Optional[exp.Expression]: 2754 return self._parse_alias(self._parse_conjunction()) 2755 2756 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2757 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2758 2759 def _parse_equality(self) -> t.Optional[exp.Expression]: 2760 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2761 2762 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2763 return self._parse_tokens(self._parse_range, self.COMPARISON) 2764 2765 def _parse_range(self) -> t.Optional[exp.Expression]: 2766 this = self._parse_bitwise() 2767 negate = self._match(TokenType.NOT) 2768 2769 if self._match_set(self.RANGE_PARSERS): 2770 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2771 if not expression: 2772 return this 2773 2774 this = expression 2775 elif self._match(TokenType.ISNULL): 2776 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2777 2778 # Postgres supports ISNULL and NOTNULL for conditions. 2779 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2780 if self._match(TokenType.NOTNULL): 2781 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2782 this = self.expression(exp.Not, this=this) 2783 2784 if negate: 2785 this = self.expression(exp.Not, this=this) 2786 2787 if self._match(TokenType.IS): 2788 this = self._parse_is(this) 2789 2790 return this 2791 2792 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2793 index = self._index - 1 2794 negate = self._match(TokenType.NOT) 2795 2796 if self._match_text_seq("DISTINCT", "FROM"): 2797 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2798 return self.expression(klass, this=this, expression=self._parse_expression()) 2799 2800 expression = self._parse_null() or self._parse_boolean() 2801 if not expression: 2802 self._retreat(index) 2803 return None 2804 2805 this = self.expression(exp.Is, this=this, expression=expression) 2806 return self.expression(exp.Not, this=this) if negate else this 2807 2808 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2809 unnest = self._parse_unnest(with_alias=False) 2810 if unnest: 2811 this = self.expression(exp.In, this=this, unnest=unnest) 2812 elif self._match(TokenType.L_PAREN): 2813 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2814 2815 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2816 this = self.expression(exp.In, this=this, query=expressions[0]) 2817 else: 2818 this = self.expression(exp.In, this=this, expressions=expressions) 2819 2820 self._match_r_paren(this) 2821 else: 2822 this = self.expression(exp.In, this=this, field=self._parse_field()) 2823 2824 return this 2825 2826 def _parse_between(self, this: exp.Expression) -> exp.Between: 2827 low = self._parse_bitwise() 2828 self._match(TokenType.AND) 2829 high = self._parse_bitwise() 2830 return self.expression(exp.Between, this=this, low=low, high=high) 2831 2832 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2833 if not self._match(TokenType.ESCAPE): 2834 return this 2835 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2836 2837 def _parse_interval(self) -> t.Optional[exp.Interval]: 2838 if not self._match(TokenType.INTERVAL): 2839 return None 2840 2841 this = self._parse_primary() or self._parse_term() 2842 unit = self._parse_function() or self._parse_var() 2843 2844 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2845 # each INTERVAL expression into this canonical form so it's easy to transpile 2846 if this and this.is_number: 2847 this = exp.Literal.string(this.name) 2848 elif this and this.is_string: 2849 parts = this.name.split() 2850 2851 if len(parts) == 2: 2852 if unit: 2853 # this is not actually a unit, it's something else 2854 unit = None 2855 self._retreat(self._index - 1) 2856 else: 2857 this = exp.Literal.string(parts[0]) 2858 unit = self.expression(exp.Var, this=parts[1]) 2859 2860 return self.expression(exp.Interval, this=this, unit=unit) 2861 2862 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2863 this = self._parse_term() 2864 2865 while True: 2866 if self._match_set(self.BITWISE): 2867 this = self.expression( 2868 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2869 ) 2870 elif self._match_pair(TokenType.LT, TokenType.LT): 2871 this = self.expression( 2872 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2873 ) 2874 elif self._match_pair(TokenType.GT, TokenType.GT): 2875 this = self.expression( 2876 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2877 ) 2878 else: 2879 break 2880 2881 return this 2882 2883 def _parse_term(self) -> t.Optional[exp.Expression]: 2884 return self._parse_tokens(self._parse_factor, self.TERM) 2885 2886 def _parse_factor(self) -> t.Optional[exp.Expression]: 2887 return self._parse_tokens(self._parse_unary, self.FACTOR) 2888 2889 def _parse_unary(self) -> t.Optional[exp.Expression]: 2890 if self._match_set(self.UNARY_PARSERS): 2891 return self.UNARY_PARSERS[self._prev.token_type](self) 2892 return self._parse_at_time_zone(self._parse_type()) 2893 2894 def _parse_type(self) -> t.Optional[exp.Expression]: 2895 interval = self._parse_interval() 2896 if interval: 2897 return interval 2898 2899 index = self._index 2900 data_type = self._parse_types(check_func=True) 2901 this = self._parse_column() 2902 2903 if data_type: 2904 if isinstance(this, exp.Literal): 2905 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2906 if parser: 2907 return parser(self, this, data_type) 2908 return self.expression(exp.Cast, this=this, to=data_type) 2909 if not data_type.expressions: 2910 self._retreat(index) 2911 return self._parse_column() 2912 return self._parse_column_ops(data_type) 2913 2914 return this 2915 2916 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2917 this = self._parse_type() 2918 if not this: 2919 return None 2920 2921 return self.expression( 2922 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2923 ) 2924 2925 def _parse_types( 2926 self, check_func: bool = False, schema: bool = False 2927 ) -> t.Optional[exp.Expression]: 2928 index = self._index 2929 2930 prefix = self._match_text_seq("SYSUDTLIB", ".") 2931 2932 if not self._match_set(self.TYPE_TOKENS): 2933 return None 2934 2935 type_token = self._prev.token_type 2936 2937 if type_token == TokenType.PSEUDO_TYPE: 2938 return self.expression(exp.PseudoType, this=self._prev.text) 2939 2940 nested = type_token in self.NESTED_TYPE_TOKENS 2941 is_struct = type_token == TokenType.STRUCT 2942 expressions = None 2943 maybe_func = False 2944 2945 if self._match(TokenType.L_PAREN): 2946 if is_struct: 2947 expressions = self._parse_csv(self._parse_struct_types) 2948 elif nested: 2949 expressions = self._parse_csv( 2950 lambda: self._parse_types(check_func=check_func, schema=schema) 2951 ) 2952 elif type_token in self.ENUM_TYPE_TOKENS: 2953 expressions = self._parse_csv(self._parse_primary) 2954 else: 2955 expressions = self._parse_csv(self._parse_type_size) 2956 2957 if not expressions or not self._match(TokenType.R_PAREN): 2958 self._retreat(index) 2959 return None 2960 2961 maybe_func = True 2962 2963 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2964 this = exp.DataType( 2965 this=exp.DataType.Type.ARRAY, 2966 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2967 nested=True, 2968 ) 2969 2970 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2971 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2972 2973 return this 2974 2975 if self._match(TokenType.L_BRACKET): 2976 self._retreat(index) 2977 return None 2978 2979 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2980 if nested and self._match(TokenType.LT): 2981 if is_struct: 2982 expressions = self._parse_csv(self._parse_struct_types) 2983 else: 2984 expressions = self._parse_csv( 2985 lambda: self._parse_types(check_func=check_func, schema=schema) 2986 ) 2987 2988 if not self._match(TokenType.GT): 2989 self.raise_error("Expecting >") 2990 2991 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2992 values = self._parse_csv(self._parse_conjunction) 2993 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2994 2995 value: t.Optional[exp.Expression] = None 2996 if type_token in self.TIMESTAMPS: 2997 if self._match_text_seq("WITH", "TIME", "ZONE"): 2998 maybe_func = False 2999 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3000 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3001 maybe_func = False 3002 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3003 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3004 maybe_func = False 3005 elif type_token == TokenType.INTERVAL: 3006 unit = self._parse_var() 3007 3008 if not unit: 3009 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3010 else: 3011 value = self.expression(exp.Interval, unit=unit) 3012 3013 if maybe_func and check_func: 3014 index2 = self._index 3015 peek = self._parse_string() 3016 3017 if not peek: 3018 self._retreat(index) 3019 return None 3020 3021 self._retreat(index2) 3022 3023 if value: 3024 return value 3025 3026 return exp.DataType( 3027 this=exp.DataType.Type[type_token.value.upper()], 3028 expressions=expressions, 3029 nested=nested, 3030 values=values, 3031 prefix=prefix, 3032 ) 3033 3034 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3035 this = self._parse_type() or self._parse_id_var() 3036 self._match(TokenType.COLON) 3037 return self._parse_column_def(this) 3038 3039 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3040 if not self._match_text_seq("AT", "TIME", "ZONE"): 3041 return this 3042 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3043 3044 def _parse_column(self) -> t.Optional[exp.Expression]: 3045 this = self._parse_field() 3046 if isinstance(this, exp.Identifier): 3047 this = self.expression(exp.Column, this=this) 3048 elif not this: 3049 return self._parse_bracket(this) 3050 return self._parse_column_ops(this) 3051 3052 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3053 this = self._parse_bracket(this) 3054 3055 while self._match_set(self.COLUMN_OPERATORS): 3056 op_token = self._prev.token_type 3057 op = self.COLUMN_OPERATORS.get(op_token) 3058 3059 if op_token == TokenType.DCOLON: 3060 field = self._parse_types() 3061 if not field: 3062 self.raise_error("Expected type") 3063 elif op and self._curr: 3064 self._advance() 3065 value = self._prev.text 3066 field = ( 3067 exp.Literal.number(value) 3068 if self._prev.token_type == TokenType.NUMBER 3069 else exp.Literal.string(value) 3070 ) 3071 else: 3072 field = self._parse_field(anonymous_func=True, any_token=True) 3073 3074 if isinstance(field, exp.Func): 3075 # bigquery allows function calls like x.y.count(...) 3076 # SAFE.SUBSTR(...) 3077 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3078 this = self._replace_columns_with_dots(this) 3079 3080 if op: 3081 this = op(self, this, field) 3082 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3083 this = self.expression( 3084 exp.Column, 3085 this=field, 3086 table=this.this, 3087 db=this.args.get("table"), 3088 catalog=this.args.get("db"), 3089 ) 3090 else: 3091 this = self.expression(exp.Dot, this=this, expression=field) 3092 this = self._parse_bracket(this) 3093 return this 3094 3095 def _parse_primary(self) -> t.Optional[exp.Expression]: 3096 if self._match_set(self.PRIMARY_PARSERS): 3097 token_type = self._prev.token_type 3098 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3099 3100 if token_type == TokenType.STRING: 3101 expressions = [primary] 3102 while self._match(TokenType.STRING): 3103 expressions.append(exp.Literal.string(self._prev.text)) 3104 3105 if len(expressions) > 1: 3106 return self.expression(exp.Concat, expressions=expressions) 3107 3108 return primary 3109 3110 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3111 return exp.Literal.number(f"0.{self._prev.text}") 3112 3113 if self._match(TokenType.L_PAREN): 3114 comments = self._prev_comments 3115 query = self._parse_select() 3116 3117 if query: 3118 expressions = [query] 3119 else: 3120 expressions = self._parse_csv(self._parse_expression) 3121 3122 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3123 3124 if isinstance(this, exp.Subqueryable): 3125 this = self._parse_set_operations( 3126 self._parse_subquery(this=this, parse_alias=False) 3127 ) 3128 elif len(expressions) > 1: 3129 this = self.expression(exp.Tuple, expressions=expressions) 3130 else: 3131 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3132 3133 if this: 3134 this.add_comments(comments) 3135 3136 self._match_r_paren(expression=this) 3137 return this 3138 3139 return None 3140 3141 def _parse_field( 3142 self, 3143 any_token: bool = False, 3144 tokens: t.Optional[t.Collection[TokenType]] = None, 3145 anonymous_func: bool = False, 3146 ) -> t.Optional[exp.Expression]: 3147 return ( 3148 self._parse_primary() 3149 or self._parse_function(anonymous=anonymous_func) 3150 or self._parse_id_var(any_token=any_token, tokens=tokens) 3151 ) 3152 3153 def _parse_function( 3154 self, 3155 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3156 anonymous: bool = False, 3157 optional_parens: bool = True, 3158 ) -> t.Optional[exp.Expression]: 3159 if not self._curr: 3160 return None 3161 3162 token_type = self._curr.token_type 3163 3164 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3165 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3166 3167 if not self._next or self._next.token_type != TokenType.L_PAREN: 3168 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3169 self._advance() 3170 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3171 3172 return None 3173 3174 if token_type not in self.FUNC_TOKENS: 3175 return None 3176 3177 this = self._curr.text 3178 upper = this.upper() 3179 self._advance(2) 3180 3181 parser = self.FUNCTION_PARSERS.get(upper) 3182 3183 if parser and not anonymous: 3184 this = parser(self) 3185 else: 3186 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3187 3188 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3189 this = self.expression(subquery_predicate, this=self._parse_select()) 3190 self._match_r_paren() 3191 return this 3192 3193 if functions is None: 3194 functions = self.FUNCTIONS 3195 3196 function = functions.get(upper) 3197 3198 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3199 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3200 3201 if function and not anonymous: 3202 this = self.validate_expression(function(args), args) 3203 else: 3204 this = self.expression(exp.Anonymous, this=this, expressions=args) 3205 3206 self._match_r_paren(this) 3207 return self._parse_window(this) 3208 3209 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3210 return self._parse_column_def(self._parse_id_var()) 3211 3212 def _parse_user_defined_function( 3213 self, kind: t.Optional[TokenType] = None 3214 ) -> t.Optional[exp.Expression]: 3215 this = self._parse_id_var() 3216 3217 while self._match(TokenType.DOT): 3218 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3219 3220 if not self._match(TokenType.L_PAREN): 3221 return this 3222 3223 expressions = self._parse_csv(self._parse_function_parameter) 3224 self._match_r_paren() 3225 return self.expression( 3226 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3227 ) 3228 3229 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3230 literal = self._parse_primary() 3231 if literal: 3232 return self.expression(exp.Introducer, this=token.text, expression=literal) 3233 3234 return self.expression(exp.Identifier, this=token.text) 3235 3236 def _parse_session_parameter(self) -> exp.SessionParameter: 3237 kind = None 3238 this = self._parse_id_var() or self._parse_primary() 3239 3240 if this and self._match(TokenType.DOT): 3241 kind = this.name 3242 this = self._parse_var() or self._parse_primary() 3243 3244 return self.expression(exp.SessionParameter, this=this, kind=kind) 3245 3246 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3247 index = self._index 3248 3249 if self._match(TokenType.L_PAREN): 3250 expressions = self._parse_csv(self._parse_id_var) 3251 3252 if not self._match(TokenType.R_PAREN): 3253 self._retreat(index) 3254 else: 3255 expressions = [self._parse_id_var()] 3256 3257 if self._match_set(self.LAMBDAS): 3258 return self.LAMBDAS[self._prev.token_type](self, expressions) 3259 3260 self._retreat(index) 3261 3262 this: t.Optional[exp.Expression] 3263 3264 if self._match(TokenType.DISTINCT): 3265 this = self.expression( 3266 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3267 ) 3268 else: 3269 this = self._parse_select_or_expression(alias=alias) 3270 3271 if isinstance(this, exp.EQ): 3272 left = this.this 3273 if isinstance(left, exp.Column): 3274 left.replace(exp.var(left.text("this"))) 3275 3276 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3277 3278 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3279 index = self._index 3280 3281 if not self.errors: 3282 try: 3283 if self._parse_select(nested=True): 3284 return this 3285 except ParseError: 3286 pass 3287 finally: 3288 self.errors.clear() 3289 self._retreat(index) 3290 3291 if not self._match(TokenType.L_PAREN): 3292 return this 3293 3294 args = self._parse_csv( 3295 lambda: self._parse_constraint() 3296 or self._parse_column_def(self._parse_field(any_token=True)) 3297 ) 3298 3299 self._match_r_paren() 3300 return self.expression(exp.Schema, this=this, expressions=args) 3301 3302 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3303 # column defs are not really columns, they're identifiers 3304 if isinstance(this, exp.Column): 3305 this = this.this 3306 3307 kind = self._parse_types(schema=True) 3308 3309 if self._match_text_seq("FOR", "ORDINALITY"): 3310 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3311 3312 constraints = [] 3313 while True: 3314 constraint = self._parse_column_constraint() 3315 if not constraint: 3316 break 3317 constraints.append(constraint) 3318 3319 if not kind and not constraints: 3320 return this 3321 3322 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3323 3324 def _parse_auto_increment( 3325 self, 3326 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3327 start = None 3328 increment = None 3329 3330 if self._match(TokenType.L_PAREN, advance=False): 3331 args = self._parse_wrapped_csv(self._parse_bitwise) 3332 start = seq_get(args, 0) 3333 increment = seq_get(args, 1) 3334 elif self._match_text_seq("START"): 3335 start = self._parse_bitwise() 3336 self._match_text_seq("INCREMENT") 3337 increment = self._parse_bitwise() 3338 3339 if start and increment: 3340 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3341 3342 return exp.AutoIncrementColumnConstraint() 3343 3344 def _parse_compress(self) -> exp.CompressColumnConstraint: 3345 if self._match(TokenType.L_PAREN, advance=False): 3346 return self.expression( 3347 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3348 ) 3349 3350 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3351 3352 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3353 if self._match_text_seq("BY", "DEFAULT"): 3354 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3355 this = self.expression( 3356 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3357 ) 3358 else: 3359 self._match_text_seq("ALWAYS") 3360 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3361 3362 self._match(TokenType.ALIAS) 3363 identity = self._match_text_seq("IDENTITY") 3364 3365 if self._match(TokenType.L_PAREN): 3366 if self._match_text_seq("START", "WITH"): 3367 this.set("start", self._parse_bitwise()) 3368 if self._match_text_seq("INCREMENT", "BY"): 3369 this.set("increment", self._parse_bitwise()) 3370 if self._match_text_seq("MINVALUE"): 3371 this.set("minvalue", self._parse_bitwise()) 3372 if self._match_text_seq("MAXVALUE"): 3373 this.set("maxvalue", self._parse_bitwise()) 3374 3375 if self._match_text_seq("CYCLE"): 3376 this.set("cycle", True) 3377 elif self._match_text_seq("NO", "CYCLE"): 3378 this.set("cycle", False) 3379 3380 if not identity: 3381 this.set("expression", self._parse_bitwise()) 3382 3383 self._match_r_paren() 3384 3385 return this 3386 3387 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3388 self._match_text_seq("LENGTH") 3389 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3390 3391 def _parse_not_constraint( 3392 self, 3393 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3394 if self._match_text_seq("NULL"): 3395 return self.expression(exp.NotNullColumnConstraint) 3396 if self._match_text_seq("CASESPECIFIC"): 3397 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3398 return None 3399 3400 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3401 if self._match(TokenType.CONSTRAINT): 3402 this = self._parse_id_var() 3403 else: 3404 this = None 3405 3406 if self._match_texts(self.CONSTRAINT_PARSERS): 3407 return self.expression( 3408 exp.ColumnConstraint, 3409 this=this, 3410 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3411 ) 3412 3413 return this 3414 3415 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3416 if not self._match(TokenType.CONSTRAINT): 3417 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3418 3419 this = self._parse_id_var() 3420 expressions = [] 3421 3422 while True: 3423 constraint = self._parse_unnamed_constraint() or self._parse_function() 3424 if not constraint: 3425 break 3426 expressions.append(constraint) 3427 3428 return self.expression(exp.Constraint, this=this, expressions=expressions) 3429 3430 def _parse_unnamed_constraint( 3431 self, constraints: t.Optional[t.Collection[str]] = None 3432 ) -> t.Optional[exp.Expression]: 3433 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3434 return None 3435 3436 constraint = self._prev.text.upper() 3437 if constraint not in self.CONSTRAINT_PARSERS: 3438 self.raise_error(f"No parser found for schema constraint {constraint}.") 3439 3440 return self.CONSTRAINT_PARSERS[constraint](self) 3441 3442 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3443 self._match_text_seq("KEY") 3444 return self.expression( 3445 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3446 ) 3447 3448 def _parse_key_constraint_options(self) -> t.List[str]: 3449 options = [] 3450 while True: 3451 if not self._curr: 3452 break 3453 3454 if self._match(TokenType.ON): 3455 action = None 3456 on = self._advance_any() and self._prev.text 3457 3458 if self._match_text_seq("NO", "ACTION"): 3459 action = "NO ACTION" 3460 elif self._match_text_seq("CASCADE"): 3461 action = "CASCADE" 3462 elif self._match_pair(TokenType.SET, TokenType.NULL): 3463 action = "SET NULL" 3464 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3465 action = "SET DEFAULT" 3466 else: 3467 self.raise_error("Invalid key constraint") 3468 3469 options.append(f"ON {on} {action}") 3470 elif self._match_text_seq("NOT", "ENFORCED"): 3471 options.append("NOT ENFORCED") 3472 elif self._match_text_seq("DEFERRABLE"): 3473 options.append("DEFERRABLE") 3474 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3475 options.append("INITIALLY DEFERRED") 3476 elif self._match_text_seq("NORELY"): 3477 options.append("NORELY") 3478 elif self._match_text_seq("MATCH", "FULL"): 3479 options.append("MATCH FULL") 3480 else: 3481 break 3482 3483 return options 3484 3485 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3486 if match and not self._match(TokenType.REFERENCES): 3487 return None 3488 3489 expressions = None 3490 this = self._parse_id_var() 3491 3492 if self._match(TokenType.L_PAREN, advance=False): 3493 expressions = self._parse_wrapped_id_vars() 3494 3495 options = self._parse_key_constraint_options() 3496 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3497 3498 def _parse_foreign_key(self) -> exp.ForeignKey: 3499 expressions = self._parse_wrapped_id_vars() 3500 reference = self._parse_references() 3501 options = {} 3502 3503 while self._match(TokenType.ON): 3504 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3505 self.raise_error("Expected DELETE or UPDATE") 3506 3507 kind = self._prev.text.lower() 3508 3509 if self._match_text_seq("NO", "ACTION"): 3510 action = "NO ACTION" 3511 elif self._match(TokenType.SET): 3512 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3513 action = "SET " + self._prev.text.upper() 3514 else: 3515 self._advance() 3516 action = self._prev.text.upper() 3517 3518 options[kind] = action 3519 3520 return self.expression( 3521 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3522 ) 3523 3524 def _parse_primary_key( 3525 self, wrapped_optional: bool = False, in_props: bool = False 3526 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3527 desc = ( 3528 self._match_set((TokenType.ASC, TokenType.DESC)) 3529 and self._prev.token_type == TokenType.DESC 3530 ) 3531 3532 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3533 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3534 3535 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3536 options = self._parse_key_constraint_options() 3537 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3538 3539 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3540 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3541 return this 3542 3543 bracket_kind = self._prev.token_type 3544 3545 if self._match(TokenType.COLON): 3546 expressions: t.List[t.Optional[exp.Expression]] = [ 3547 self.expression(exp.Slice, expression=self._parse_conjunction()) 3548 ] 3549 else: 3550 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3551 3552 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3553 if bracket_kind == TokenType.L_BRACE: 3554 this = self.expression(exp.Struct, expressions=expressions) 3555 elif not this or this.name.upper() == "ARRAY": 3556 this = self.expression(exp.Array, expressions=expressions) 3557 else: 3558 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3559 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3560 3561 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3562 self.raise_error("Expected ]") 3563 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3564 self.raise_error("Expected }") 3565 3566 self._add_comments(this) 3567 return self._parse_bracket(this) 3568 3569 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3570 if self._match(TokenType.COLON): 3571 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3572 return this 3573 3574 def _parse_case(self) -> t.Optional[exp.Expression]: 3575 ifs = [] 3576 default = None 3577 3578 expression = self._parse_conjunction() 3579 3580 while self._match(TokenType.WHEN): 3581 this = self._parse_conjunction() 3582 self._match(TokenType.THEN) 3583 then = self._parse_conjunction() 3584 ifs.append(self.expression(exp.If, this=this, true=then)) 3585 3586 if self._match(TokenType.ELSE): 3587 default = self._parse_conjunction() 3588 3589 if not self._match(TokenType.END): 3590 self.raise_error("Expected END after CASE", self._prev) 3591 3592 return self._parse_window( 3593 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3594 ) 3595 3596 def _parse_if(self) -> t.Optional[exp.Expression]: 3597 if self._match(TokenType.L_PAREN): 3598 args = self._parse_csv(self._parse_conjunction) 3599 this = self.validate_expression(exp.If.from_arg_list(args), args) 3600 self._match_r_paren() 3601 else: 3602 index = self._index - 1 3603 condition = self._parse_conjunction() 3604 3605 if not condition: 3606 self._retreat(index) 3607 return None 3608 3609 self._match(TokenType.THEN) 3610 true = self._parse_conjunction() 3611 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3612 self._match(TokenType.END) 3613 this = self.expression(exp.If, this=condition, true=true, false=false) 3614 3615 return self._parse_window(this) 3616 3617 def _parse_extract(self) -> exp.Extract: 3618 this = self._parse_function() or self._parse_var() or self._parse_type() 3619 3620 if self._match(TokenType.FROM): 3621 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3622 3623 if not self._match(TokenType.COMMA): 3624 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3625 3626 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3627 3628 def _parse_cast(self, strict: bool) -> exp.Expression: 3629 this = self._parse_conjunction() 3630 3631 if not self._match(TokenType.ALIAS): 3632 if self._match(TokenType.COMMA): 3633 return self.expression( 3634 exp.CastToStrType, this=this, expression=self._parse_string() 3635 ) 3636 else: 3637 self.raise_error("Expected AS after CAST") 3638 3639 to = self._parse_types() 3640 3641 if not to: 3642 self.raise_error("Expected TYPE after CAST") 3643 elif to.this == exp.DataType.Type.CHAR: 3644 if self._match(TokenType.CHARACTER_SET): 3645 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3646 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3647 fmt = self._parse_string() 3648 3649 return self.expression( 3650 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3651 this=this, 3652 format=exp.Literal.string( 3653 format_time( 3654 fmt.this if fmt else "", 3655 self.FORMAT_MAPPING or self.TIME_MAPPING, 3656 self.FORMAT_TRIE or self.TIME_TRIE, 3657 ) 3658 ), 3659 ) 3660 3661 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3662 3663 def _parse_concat(self) -> t.Optional[exp.Expression]: 3664 args = self._parse_csv(self._parse_conjunction) 3665 if self.CONCAT_NULL_OUTPUTS_STRING: 3666 args = [ 3667 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3668 for arg in args 3669 if arg 3670 ] 3671 3672 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3673 # we find such a call we replace it with its argument. 3674 if len(args) == 1: 3675 return args[0] 3676 3677 return self.expression( 3678 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3679 ) 3680 3681 def _parse_string_agg(self) -> exp.Expression: 3682 expression: t.Optional[exp.Expression] 3683 3684 if self._match(TokenType.DISTINCT): 3685 args = self._parse_csv(self._parse_conjunction) 3686 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3687 else: 3688 args = self._parse_csv(self._parse_conjunction) 3689 expression = seq_get(args, 0) 3690 3691 index = self._index 3692 if not self._match(TokenType.R_PAREN): 3693 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3694 order = self._parse_order(this=expression) 3695 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3696 3697 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3698 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3699 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3700 if not self._match_text_seq("WITHIN", "GROUP"): 3701 self._retreat(index) 3702 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3703 3704 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3705 order = self._parse_order(this=expression) 3706 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3707 3708 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3709 to: t.Optional[exp.Expression] 3710 this = self._parse_bitwise() 3711 3712 if self._match(TokenType.USING): 3713 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3714 elif self._match(TokenType.COMMA): 3715 to = self._parse_bitwise() 3716 else: 3717 to = None 3718 3719 # Swap the argument order if needed to produce the correct AST 3720 if self.CONVERT_TYPE_FIRST: 3721 this, to = to, this 3722 3723 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3724 3725 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3726 """ 3727 There are generally two variants of the DECODE function: 3728 3729 - DECODE(bin, charset) 3730 - DECODE(expression, search, result [, search, result] ... [, default]) 3731 3732 The second variant will always be parsed into a CASE expression. Note that NULL 3733 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3734 instead of relying on pattern matching. 3735 """ 3736 args = self._parse_csv(self._parse_conjunction) 3737 3738 if len(args) < 3: 3739 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3740 3741 expression, *expressions = args 3742 if not expression: 3743 return None 3744 3745 ifs = [] 3746 for search, result in zip(expressions[::2], expressions[1::2]): 3747 if not search or not result: 3748 return None 3749 3750 if isinstance(search, exp.Literal): 3751 ifs.append( 3752 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3753 ) 3754 elif isinstance(search, exp.Null): 3755 ifs.append( 3756 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3757 ) 3758 else: 3759 cond = exp.or_( 3760 exp.EQ(this=expression.copy(), expression=search), 3761 exp.and_( 3762 exp.Is(this=expression.copy(), expression=exp.Null()), 3763 exp.Is(this=search.copy(), expression=exp.Null()), 3764 copy=False, 3765 ), 3766 copy=False, 3767 ) 3768 ifs.append(exp.If(this=cond, true=result)) 3769 3770 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3771 3772 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3773 self._match_text_seq("KEY") 3774 key = self._parse_field() 3775 self._match(TokenType.COLON) 3776 self._match_text_seq("VALUE") 3777 value = self._parse_field() 3778 3779 if not key and not value: 3780 return None 3781 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3782 3783 def _parse_json_object(self) -> exp.JSONObject: 3784 star = self._parse_star() 3785 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3786 3787 null_handling = None 3788 if self._match_text_seq("NULL", "ON", "NULL"): 3789 null_handling = "NULL ON NULL" 3790 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3791 null_handling = "ABSENT ON NULL" 3792 3793 unique_keys = None 3794 if self._match_text_seq("WITH", "UNIQUE"): 3795 unique_keys = True 3796 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3797 unique_keys = False 3798 3799 self._match_text_seq("KEYS") 3800 3801 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3802 format_json = self._match_text_seq("FORMAT", "JSON") 3803 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3804 3805 return self.expression( 3806 exp.JSONObject, 3807 expressions=expressions, 3808 null_handling=null_handling, 3809 unique_keys=unique_keys, 3810 return_type=return_type, 3811 format_json=format_json, 3812 encoding=encoding, 3813 ) 3814 3815 def _parse_logarithm(self) -> exp.Func: 3816 # Default argument order is base, expression 3817 args = self._parse_csv(self._parse_range) 3818 3819 if len(args) > 1: 3820 if not self.LOG_BASE_FIRST: 3821 args.reverse() 3822 return exp.Log.from_arg_list(args) 3823 3824 return self.expression( 3825 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3826 ) 3827 3828 def _parse_match_against(self) -> exp.MatchAgainst: 3829 expressions = self._parse_csv(self._parse_column) 3830 3831 self._match_text_seq(")", "AGAINST", "(") 3832 3833 this = self._parse_string() 3834 3835 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3836 modifier = "IN NATURAL LANGUAGE MODE" 3837 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3838 modifier = f"{modifier} WITH QUERY EXPANSION" 3839 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3840 modifier = "IN BOOLEAN MODE" 3841 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3842 modifier = "WITH QUERY EXPANSION" 3843 else: 3844 modifier = None 3845 3846 return self.expression( 3847 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3848 ) 3849 3850 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3851 def _parse_open_json(self) -> exp.OpenJSON: 3852 this = self._parse_bitwise() 3853 path = self._match(TokenType.COMMA) and self._parse_string() 3854 3855 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3856 this = self._parse_field(any_token=True) 3857 kind = self._parse_types() 3858 path = self._parse_string() 3859 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3860 3861 return self.expression( 3862 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3863 ) 3864 3865 expressions = None 3866 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3867 self._match_l_paren() 3868 expressions = self._parse_csv(_parse_open_json_column_def) 3869 3870 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3871 3872 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3873 args = self._parse_csv(self._parse_bitwise) 3874 3875 if self._match(TokenType.IN): 3876 return self.expression( 3877 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3878 ) 3879 3880 if haystack_first: 3881 haystack = seq_get(args, 0) 3882 needle = seq_get(args, 1) 3883 else: 3884 needle = seq_get(args, 0) 3885 haystack = seq_get(args, 1) 3886 3887 return self.expression( 3888 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3889 ) 3890 3891 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3892 args = self._parse_csv(self._parse_table) 3893 return exp.JoinHint(this=func_name.upper(), expressions=args) 3894 3895 def _parse_substring(self) -> exp.Substring: 3896 # Postgres supports the form: substring(string [from int] [for int]) 3897 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3898 3899 args = self._parse_csv(self._parse_bitwise) 3900 3901 if self._match(TokenType.FROM): 3902 args.append(self._parse_bitwise()) 3903 if self._match(TokenType.FOR): 3904 args.append(self._parse_bitwise()) 3905 3906 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3907 3908 def _parse_trim(self) -> exp.Trim: 3909 # https://www.w3resource.com/sql/character-functions/trim.php 3910 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3911 3912 position = None 3913 collation = None 3914 3915 if self._match_texts(self.TRIM_TYPES): 3916 position = self._prev.text.upper() 3917 3918 expression = self._parse_bitwise() 3919 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3920 this = self._parse_bitwise() 3921 else: 3922 this = expression 3923 expression = None 3924 3925 if self._match(TokenType.COLLATE): 3926 collation = self._parse_bitwise() 3927 3928 return self.expression( 3929 exp.Trim, this=this, position=position, expression=expression, collation=collation 3930 ) 3931 3932 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3933 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3934 3935 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3936 return self._parse_window(self._parse_id_var(), alias=True) 3937 3938 def _parse_respect_or_ignore_nulls( 3939 self, this: t.Optional[exp.Expression] 3940 ) -> t.Optional[exp.Expression]: 3941 if self._match_text_seq("IGNORE", "NULLS"): 3942 return self.expression(exp.IgnoreNulls, this=this) 3943 if self._match_text_seq("RESPECT", "NULLS"): 3944 return self.expression(exp.RespectNulls, this=this) 3945 return this 3946 3947 def _parse_window( 3948 self, this: t.Optional[exp.Expression], alias: bool = False 3949 ) -> t.Optional[exp.Expression]: 3950 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3951 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3952 self._match_r_paren() 3953 3954 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3955 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3956 if self._match_text_seq("WITHIN", "GROUP"): 3957 order = self._parse_wrapped(self._parse_order) 3958 this = self.expression(exp.WithinGroup, this=this, expression=order) 3959 3960 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3961 # Some dialects choose to implement and some do not. 3962 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3963 3964 # There is some code above in _parse_lambda that handles 3965 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3966 3967 # The below changes handle 3968 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3969 3970 # Oracle allows both formats 3971 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3972 # and Snowflake chose to do the same for familiarity 3973 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3974 this = self._parse_respect_or_ignore_nulls(this) 3975 3976 # bigquery select from window x AS (partition by ...) 3977 if alias: 3978 over = None 3979 self._match(TokenType.ALIAS) 3980 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3981 return this 3982 else: 3983 over = self._prev.text.upper() 3984 3985 if not self._match(TokenType.L_PAREN): 3986 return self.expression( 3987 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3988 ) 3989 3990 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3991 3992 first = self._match(TokenType.FIRST) 3993 if self._match_text_seq("LAST"): 3994 first = False 3995 3996 partition = self._parse_partition_by() 3997 order = self._parse_order() 3998 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3999 4000 if kind: 4001 self._match(TokenType.BETWEEN) 4002 start = self._parse_window_spec() 4003 self._match(TokenType.AND) 4004 end = self._parse_window_spec() 4005 4006 spec = self.expression( 4007 exp.WindowSpec, 4008 kind=kind, 4009 start=start["value"], 4010 start_side=start["side"], 4011 end=end["value"], 4012 end_side=end["side"], 4013 ) 4014 else: 4015 spec = None 4016 4017 self._match_r_paren() 4018 4019 return self.expression( 4020 exp.Window, 4021 this=this, 4022 partition_by=partition, 4023 order=order, 4024 spec=spec, 4025 alias=window_alias, 4026 over=over, 4027 first=first, 4028 ) 4029 4030 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4031 self._match(TokenType.BETWEEN) 4032 4033 return { 4034 "value": ( 4035 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4036 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4037 or self._parse_bitwise() 4038 ), 4039 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4040 } 4041 4042 def _parse_alias( 4043 self, this: t.Optional[exp.Expression], explicit: bool = False 4044 ) -> t.Optional[exp.Expression]: 4045 any_token = self._match(TokenType.ALIAS) 4046 4047 if explicit and not any_token: 4048 return this 4049 4050 if self._match(TokenType.L_PAREN): 4051 aliases = self.expression( 4052 exp.Aliases, 4053 this=this, 4054 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4055 ) 4056 self._match_r_paren(aliases) 4057 return aliases 4058 4059 alias = self._parse_id_var(any_token) 4060 4061 if alias: 4062 return self.expression(exp.Alias, this=this, alias=alias) 4063 4064 return this 4065 4066 def _parse_id_var( 4067 self, 4068 any_token: bool = True, 4069 tokens: t.Optional[t.Collection[TokenType]] = None, 4070 ) -> t.Optional[exp.Expression]: 4071 identifier = self._parse_identifier() 4072 4073 if identifier: 4074 return identifier 4075 4076 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4077 quoted = self._prev.token_type == TokenType.STRING 4078 return exp.Identifier(this=self._prev.text, quoted=quoted) 4079 4080 return None 4081 4082 def _parse_string(self) -> t.Optional[exp.Expression]: 4083 if self._match(TokenType.STRING): 4084 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4085 return self._parse_placeholder() 4086 4087 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4088 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4089 4090 def _parse_number(self) -> t.Optional[exp.Expression]: 4091 if self._match(TokenType.NUMBER): 4092 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4093 return self._parse_placeholder() 4094 4095 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4096 if self._match(TokenType.IDENTIFIER): 4097 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4098 return self._parse_placeholder() 4099 4100 def _parse_var( 4101 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4102 ) -> t.Optional[exp.Expression]: 4103 if ( 4104 (any_token and self._advance_any()) 4105 or self._match(TokenType.VAR) 4106 or (self._match_set(tokens) if tokens else False) 4107 ): 4108 return self.expression(exp.Var, this=self._prev.text) 4109 return self._parse_placeholder() 4110 4111 def _advance_any(self) -> t.Optional[Token]: 4112 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4113 self._advance() 4114 return self._prev 4115 return None 4116 4117 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4118 return self._parse_var() or self._parse_string() 4119 4120 def _parse_null(self) -> t.Optional[exp.Expression]: 4121 if self._match(TokenType.NULL): 4122 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4123 return None 4124 4125 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4126 if self._match(TokenType.TRUE): 4127 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4128 if self._match(TokenType.FALSE): 4129 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4130 return None 4131 4132 def _parse_star(self) -> t.Optional[exp.Expression]: 4133 if self._match(TokenType.STAR): 4134 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4135 return None 4136 4137 def _parse_parameter(self) -> exp.Parameter: 4138 wrapped = self._match(TokenType.L_BRACE) 4139 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4140 self._match(TokenType.R_BRACE) 4141 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4142 4143 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4144 if self._match_set(self.PLACEHOLDER_PARSERS): 4145 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4146 if placeholder: 4147 return placeholder 4148 self._advance(-1) 4149 return None 4150 4151 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4152 if not self._match(TokenType.EXCEPT): 4153 return None 4154 if self._match(TokenType.L_PAREN, advance=False): 4155 return self._parse_wrapped_csv(self._parse_column) 4156 return self._parse_csv(self._parse_column) 4157 4158 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4159 if not self._match(TokenType.REPLACE): 4160 return None 4161 if self._match(TokenType.L_PAREN, advance=False): 4162 return self._parse_wrapped_csv(self._parse_expression) 4163 return self._parse_csv(self._parse_expression) 4164 4165 def _parse_csv( 4166 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4167 ) -> t.List[t.Optional[exp.Expression]]: 4168 parse_result = parse_method() 4169 items = [parse_result] if parse_result is not None else [] 4170 4171 while self._match(sep): 4172 self._add_comments(parse_result) 4173 parse_result = parse_method() 4174 if parse_result is not None: 4175 items.append(parse_result) 4176 4177 return items 4178 4179 def _parse_tokens( 4180 self, parse_method: t.Callable, expressions: t.Dict 4181 ) -> t.Optional[exp.Expression]: 4182 this = parse_method() 4183 4184 while self._match_set(expressions): 4185 this = self.expression( 4186 expressions[self._prev.token_type], 4187 this=this, 4188 comments=self._prev_comments, 4189 expression=parse_method(), 4190 ) 4191 4192 return this 4193 4194 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4195 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4196 4197 def _parse_wrapped_csv( 4198 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4199 ) -> t.List[t.Optional[exp.Expression]]: 4200 return self._parse_wrapped( 4201 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4202 ) 4203 4204 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4205 wrapped = self._match(TokenType.L_PAREN) 4206 if not wrapped and not optional: 4207 self.raise_error("Expecting (") 4208 parse_result = parse_method() 4209 if wrapped: 4210 self._match_r_paren() 4211 return parse_result 4212 4213 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4214 return self._parse_select() or self._parse_set_operations( 4215 self._parse_expression() if alias else self._parse_conjunction() 4216 ) 4217 4218 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4219 return self._parse_query_modifiers( 4220 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4221 ) 4222 4223 def _parse_transaction(self) -> exp.Transaction: 4224 this = None 4225 if self._match_texts(self.TRANSACTION_KIND): 4226 this = self._prev.text 4227 4228 self._match_texts({"TRANSACTION", "WORK"}) 4229 4230 modes = [] 4231 while True: 4232 mode = [] 4233 while self._match(TokenType.VAR): 4234 mode.append(self._prev.text) 4235 4236 if mode: 4237 modes.append(" ".join(mode)) 4238 if not self._match(TokenType.COMMA): 4239 break 4240 4241 return self.expression(exp.Transaction, this=this, modes=modes) 4242 4243 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4244 chain = None 4245 savepoint = None 4246 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4247 4248 self._match_texts({"TRANSACTION", "WORK"}) 4249 4250 if self._match_text_seq("TO"): 4251 self._match_text_seq("SAVEPOINT") 4252 savepoint = self._parse_id_var() 4253 4254 if self._match(TokenType.AND): 4255 chain = not self._match_text_seq("NO") 4256 self._match_text_seq("CHAIN") 4257 4258 if is_rollback: 4259 return self.expression(exp.Rollback, savepoint=savepoint) 4260 4261 return self.expression(exp.Commit, chain=chain) 4262 4263 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4264 if not self._match_text_seq("ADD"): 4265 return None 4266 4267 self._match(TokenType.COLUMN) 4268 exists_column = self._parse_exists(not_=True) 4269 expression = self._parse_column_def(self._parse_field(any_token=True)) 4270 4271 if expression: 4272 expression.set("exists", exists_column) 4273 4274 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4275 if self._match_texts(("FIRST", "AFTER")): 4276 position = self._prev.text 4277 column_position = self.expression( 4278 exp.ColumnPosition, this=self._parse_column(), position=position 4279 ) 4280 expression.set("position", column_position) 4281 4282 return expression 4283 4284 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4285 drop = self._match(TokenType.DROP) and self._parse_drop() 4286 if drop and not isinstance(drop, exp.Command): 4287 drop.set("kind", drop.args.get("kind", "COLUMN")) 4288 return drop 4289 4290 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4291 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4292 return self.expression( 4293 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4294 ) 4295 4296 def _parse_add_constraint(self) -> exp.AddConstraint: 4297 this = None 4298 kind = self._prev.token_type 4299 4300 if kind == TokenType.CONSTRAINT: 4301 this = self._parse_id_var() 4302 4303 if self._match_text_seq("CHECK"): 4304 expression = self._parse_wrapped(self._parse_conjunction) 4305 enforced = self._match_text_seq("ENFORCED") 4306 4307 return self.expression( 4308 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4309 ) 4310 4311 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4312 expression = self._parse_foreign_key() 4313 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4314 expression = self._parse_primary_key() 4315 else: 4316 expression = None 4317 4318 return self.expression(exp.AddConstraint, this=this, expression=expression) 4319 4320 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4321 index = self._index - 1 4322 4323 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4324 return self._parse_csv(self._parse_add_constraint) 4325 4326 self._retreat(index) 4327 return self._parse_csv(self._parse_add_column) 4328 4329 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4330 self._match(TokenType.COLUMN) 4331 column = self._parse_field(any_token=True) 4332 4333 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4334 return self.expression(exp.AlterColumn, this=column, drop=True) 4335 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4336 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4337 4338 self._match_text_seq("SET", "DATA") 4339 return self.expression( 4340 exp.AlterColumn, 4341 this=column, 4342 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4343 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4344 using=self._match(TokenType.USING) and self._parse_conjunction(), 4345 ) 4346 4347 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4348 index = self._index - 1 4349 4350 partition_exists = self._parse_exists() 4351 if self._match(TokenType.PARTITION, advance=False): 4352 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4353 4354 self._retreat(index) 4355 return self._parse_csv(self._parse_drop_column) 4356 4357 def _parse_alter_table_rename(self) -> exp.RenameTable: 4358 self._match_text_seq("TO") 4359 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4360 4361 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4362 start = self._prev 4363 4364 if not self._match(TokenType.TABLE): 4365 return self._parse_as_command(start) 4366 4367 exists = self._parse_exists() 4368 this = self._parse_table(schema=True) 4369 4370 if self._next: 4371 self._advance() 4372 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4373 4374 if parser: 4375 actions = ensure_list(parser(self)) 4376 4377 if not self._curr: 4378 return self.expression( 4379 exp.AlterTable, 4380 this=this, 4381 exists=exists, 4382 actions=actions, 4383 ) 4384 return self._parse_as_command(start) 4385 4386 def _parse_merge(self) -> exp.Merge: 4387 self._match(TokenType.INTO) 4388 target = self._parse_table() 4389 4390 self._match(TokenType.USING) 4391 using = self._parse_table() 4392 4393 self._match(TokenType.ON) 4394 on = self._parse_conjunction() 4395 4396 whens = [] 4397 while self._match(TokenType.WHEN): 4398 matched = not self._match(TokenType.NOT) 4399 self._match_text_seq("MATCHED") 4400 source = ( 4401 False 4402 if self._match_text_seq("BY", "TARGET") 4403 else self._match_text_seq("BY", "SOURCE") 4404 ) 4405 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4406 4407 self._match(TokenType.THEN) 4408 4409 if self._match(TokenType.INSERT): 4410 _this = self._parse_star() 4411 if _this: 4412 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4413 else: 4414 then = self.expression( 4415 exp.Insert, 4416 this=self._parse_value(), 4417 expression=self._match(TokenType.VALUES) and self._parse_value(), 4418 ) 4419 elif self._match(TokenType.UPDATE): 4420 expressions = self._parse_star() 4421 if expressions: 4422 then = self.expression(exp.Update, expressions=expressions) 4423 else: 4424 then = self.expression( 4425 exp.Update, 4426 expressions=self._match(TokenType.SET) 4427 and self._parse_csv(self._parse_equality), 4428 ) 4429 elif self._match(TokenType.DELETE): 4430 then = self.expression(exp.Var, this=self._prev.text) 4431 else: 4432 then = None 4433 4434 whens.append( 4435 self.expression( 4436 exp.When, 4437 matched=matched, 4438 source=source, 4439 condition=condition, 4440 then=then, 4441 ) 4442 ) 4443 4444 return self.expression( 4445 exp.Merge, 4446 this=target, 4447 using=using, 4448 on=on, 4449 expressions=whens, 4450 ) 4451 4452 def _parse_show(self) -> t.Optional[exp.Expression]: 4453 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4454 if parser: 4455 return parser(self) 4456 self._advance() 4457 return self.expression(exp.Show, this=self._prev.text.upper()) 4458 4459 def _parse_set_item_assignment( 4460 self, kind: t.Optional[str] = None 4461 ) -> t.Optional[exp.Expression]: 4462 index = self._index 4463 4464 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4465 return self._parse_set_transaction(global_=kind == "GLOBAL") 4466 4467 left = self._parse_primary() or self._parse_id_var() 4468 4469 if not self._match_texts(("=", "TO")): 4470 self._retreat(index) 4471 return None 4472 4473 right = self._parse_statement() or self._parse_id_var() 4474 this = self.expression(exp.EQ, this=left, expression=right) 4475 4476 return self.expression(exp.SetItem, this=this, kind=kind) 4477 4478 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4479 self._match_text_seq("TRANSACTION") 4480 characteristics = self._parse_csv( 4481 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4482 ) 4483 return self.expression( 4484 exp.SetItem, 4485 expressions=characteristics, 4486 kind="TRANSACTION", 4487 **{"global": global_}, # type: ignore 4488 ) 4489 4490 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4491 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4492 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4493 4494 def _parse_set(self) -> exp.Set | exp.Command: 4495 index = self._index 4496 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4497 4498 if self._curr: 4499 self._retreat(index) 4500 return self._parse_as_command(self._prev) 4501 4502 return set_ 4503 4504 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4505 for option in options: 4506 if self._match_text_seq(*option.split(" ")): 4507 return exp.var(option) 4508 return None 4509 4510 def _parse_as_command(self, start: Token) -> exp.Command: 4511 while self._curr: 4512 self._advance() 4513 text = self._find_sql(start, self._prev) 4514 size = len(start.text) 4515 return exp.Command(this=text[:size], expression=text[size:]) 4516 4517 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4518 settings = [] 4519 4520 self._match_l_paren() 4521 kind = self._parse_id_var() 4522 4523 if self._match(TokenType.L_PAREN): 4524 while True: 4525 key = self._parse_id_var() 4526 value = self._parse_primary() 4527 4528 if not key and value is None: 4529 break 4530 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4531 self._match(TokenType.R_PAREN) 4532 4533 self._match_r_paren() 4534 4535 return self.expression( 4536 exp.DictProperty, 4537 this=this, 4538 kind=kind.this if kind else None, 4539 settings=settings, 4540 ) 4541 4542 def _parse_dict_range(self, this: str) -> exp.DictRange: 4543 self._match_l_paren() 4544 has_min = self._match_text_seq("MIN") 4545 if has_min: 4546 min = self._parse_var() or self._parse_primary() 4547 self._match_text_seq("MAX") 4548 max = self._parse_var() or self._parse_primary() 4549 else: 4550 max = self._parse_var() or self._parse_primary() 4551 min = exp.Literal.number(0) 4552 self._match_r_paren() 4553 return self.expression(exp.DictRange, this=this, min=min, max=max) 4554 4555 def _find_parser( 4556 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4557 ) -> t.Optional[t.Callable]: 4558 if not self._curr: 4559 return None 4560 4561 index = self._index 4562 this = [] 4563 while True: 4564 # The current token might be multiple words 4565 curr = self._curr.text.upper() 4566 key = curr.split(" ") 4567 this.append(curr) 4568 self._advance() 4569 result, trie = in_trie(trie, key) 4570 if result == 0: 4571 break 4572 if result == 2: 4573 subparser = parsers[" ".join(this)] 4574 return subparser 4575 self._retreat(index) 4576 return None 4577 4578 def _match(self, token_type, advance=True, expression=None): 4579 if not self._curr: 4580 return None 4581 4582 if self._curr.token_type == token_type: 4583 if advance: 4584 self._advance() 4585 self._add_comments(expression) 4586 return True 4587 4588 return None 4589 4590 def _match_set(self, types, advance=True): 4591 if not self._curr: 4592 return None 4593 4594 if self._curr.token_type in types: 4595 if advance: 4596 self._advance() 4597 return True 4598 4599 return None 4600 4601 def _match_pair(self, token_type_a, token_type_b, advance=True): 4602 if not self._curr or not self._next: 4603 return None 4604 4605 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4606 if advance: 4607 self._advance(2) 4608 return True 4609 4610 return None 4611 4612 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4613 if not self._match(TokenType.L_PAREN, expression=expression): 4614 self.raise_error("Expecting (") 4615 4616 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4617 if not self._match(TokenType.R_PAREN, expression=expression): 4618 self.raise_error("Expecting )") 4619 4620 def _match_texts(self, texts, advance=True): 4621 if self._curr and self._curr.text.upper() in texts: 4622 if advance: 4623 self._advance() 4624 return True 4625 return False 4626 4627 def _match_text_seq(self, *texts, advance=True): 4628 index = self._index 4629 for text in texts: 4630 if self._curr and self._curr.text.upper() == text: 4631 self._advance() 4632 else: 4633 self._retreat(index) 4634 return False 4635 4636 if not advance: 4637 self._retreat(index) 4638 4639 return True 4640 4641 @t.overload 4642 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4643 ... 4644 4645 @t.overload 4646 def _replace_columns_with_dots( 4647 self, this: t.Optional[exp.Expression] 4648 ) -> t.Optional[exp.Expression]: 4649 ... 4650 4651 def _replace_columns_with_dots(self, this): 4652 if isinstance(this, exp.Dot): 4653 exp.replace_children(this, self._replace_columns_with_dots) 4654 elif isinstance(this, exp.Column): 4655 exp.replace_children(this, self._replace_columns_with_dots) 4656 table = this.args.get("table") 4657 this = ( 4658 self.expression(exp.Dot, this=table, expression=this.this) 4659 if table 4660 else self.expression(exp.Var, this=this.name) 4661 ) 4662 elif isinstance(this, exp.Identifier): 4663 this = self.expression(exp.Var, this=this.name) 4664 4665 return this 4666 4667 def _replace_lambda( 4668 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4669 ) -> t.Optional[exp.Expression]: 4670 if not node: 4671 return node 4672 4673 for column in node.find_all(exp.Column): 4674 if column.parts[0].name in lambda_variables: 4675 dot_or_id = column.to_dot() if column.table else column.this 4676 parent = column.parent 4677 4678 while isinstance(parent, exp.Dot): 4679 if not isinstance(parent.parent, exp.Dot): 4680 parent.replace(dot_or_id) 4681 break 4682 parent = parent.parent 4683 else: 4684 if column is node: 4685 node = dot_or_id 4686 else: 4687 column.replace(dot_or_id) 4688 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, 545 **{"except": self._parse_except(), "replace": self._parse_replace()}, 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER": lambda self: self._parse_cluster(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 "joins": lambda self: list(iter(self._parse_join, None)), 740 "laterals": lambda self: list(iter(self._parse_lateral, None)), 741 "match": lambda self: self._parse_match_recognize(), 742 "where": lambda self: self._parse_where(), 743 "group": lambda self: self._parse_group(), 744 "having": lambda self: self._parse_having(), 745 "qualify": lambda self: self._parse_qualify(), 746 "windows": lambda self: self._parse_window_clause(), 747 "order": lambda self: self._parse_order(), 748 "limit": lambda self: self._parse_limit(), 749 "offset": lambda self: self._parse_offset(), 750 "locks": lambda self: self._parse_locks(), 751 "sample": lambda self: self._parse_table_sample(as_modifier=True), 752 } 753 754 SET_PARSERS = { 755 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 756 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 757 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 758 "TRANSACTION": lambda self: self._parse_set_transaction(), 759 } 760 761 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 762 763 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 764 765 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 766 767 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 768 769 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 770 771 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 772 TRANSACTION_CHARACTERISTICS = { 773 "ISOLATION LEVEL REPEATABLE READ", 774 "ISOLATION LEVEL READ COMMITTED", 775 "ISOLATION LEVEL READ UNCOMMITTED", 776 "ISOLATION LEVEL SERIALIZABLE", 777 "READ WRITE", 778 "READ ONLY", 779 } 780 781 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 782 783 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 784 785 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 786 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 787 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 788 789 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 790 791 STRICT_CAST = True 792 793 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 794 795 CONVERT_TYPE_FIRST = False 796 797 PREFIXED_PIVOT_COLUMNS = False 798 IDENTIFY_PIVOT_STRINGS = False 799 800 LOG_BASE_FIRST = True 801 LOG_DEFAULTS_TO_LN = False 802 803 __slots__ = ( 804 "error_level", 805 "error_message_context", 806 "max_errors", 807 "sql", 808 "errors", 809 "_tokens", 810 "_index", 811 "_curr", 812 "_next", 813 "_prev", 814 "_prev_comments", 815 ) 816 817 # Autofilled 818 INDEX_OFFSET: int = 0 819 UNNEST_COLUMN_ONLY: bool = False 820 ALIAS_POST_TABLESAMPLE: bool = False 821 STRICT_STRING_CONCAT = False 822 NULL_ORDERING: str = "nulls_are_small" 823 SHOW_TRIE: t.Dict = {} 824 SET_TRIE: t.Dict = {} 825 FORMAT_MAPPING: t.Dict[str, str] = {} 826 FORMAT_TRIE: t.Dict = {} 827 TIME_MAPPING: t.Dict[str, str] = {} 828 TIME_TRIE: t.Dict = {} 829 830 def __init__( 831 self, 832 error_level: t.Optional[ErrorLevel] = None, 833 error_message_context: int = 100, 834 max_errors: int = 3, 835 ): 836 self.error_level = error_level or ErrorLevel.IMMEDIATE 837 self.error_message_context = error_message_context 838 self.max_errors = max_errors 839 self.reset() 840 841 def reset(self): 842 self.sql = "" 843 self.errors = [] 844 self._tokens = [] 845 self._index = 0 846 self._curr = None 847 self._next = None 848 self._prev = None 849 self._prev_comments = None 850 851 def parse( 852 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 853 ) -> t.List[t.Optional[exp.Expression]]: 854 """ 855 Parses a list of tokens and returns a list of syntax trees, one tree 856 per parsed SQL statement. 857 858 Args: 859 raw_tokens: The list of tokens. 860 sql: The original SQL string, used to produce helpful debug messages. 861 862 Returns: 863 The list of the produced syntax trees. 864 """ 865 return self._parse( 866 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 867 ) 868 869 def parse_into( 870 self, 871 expression_types: exp.IntoType, 872 raw_tokens: t.List[Token], 873 sql: t.Optional[str] = None, 874 ) -> t.List[t.Optional[exp.Expression]]: 875 """ 876 Parses a list of tokens into a given Expression type. If a collection of Expression 877 types is given instead, this method will try to parse the token list into each one 878 of them, stopping at the first for which the parsing succeeds. 879 880 Args: 881 expression_types: The expression type(s) to try and parse the token list into. 882 raw_tokens: The list of tokens. 883 sql: The original SQL string, used to produce helpful debug messages. 884 885 Returns: 886 The target Expression. 887 """ 888 errors = [] 889 for expression_type in ensure_list(expression_types): 890 parser = self.EXPRESSION_PARSERS.get(expression_type) 891 if not parser: 892 raise TypeError(f"No parser registered for {expression_type}") 893 894 try: 895 return self._parse(parser, raw_tokens, sql) 896 except ParseError as e: 897 e.errors[0]["into_expression"] = expression_type 898 errors.append(e) 899 900 raise ParseError( 901 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 902 errors=merge_errors(errors), 903 ) from errors[-1] 904 905 def _parse( 906 self, 907 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 908 raw_tokens: t.List[Token], 909 sql: t.Optional[str] = None, 910 ) -> t.List[t.Optional[exp.Expression]]: 911 self.reset() 912 self.sql = sql or "" 913 914 total = len(raw_tokens) 915 chunks: t.List[t.List[Token]] = [[]] 916 917 for i, token in enumerate(raw_tokens): 918 if token.token_type == TokenType.SEMICOLON: 919 if i < total - 1: 920 chunks.append([]) 921 else: 922 chunks[-1].append(token) 923 924 expressions = [] 925 926 for tokens in chunks: 927 self._index = -1 928 self._tokens = tokens 929 self._advance() 930 931 expressions.append(parse_method(self)) 932 933 if self._index < len(self._tokens): 934 self.raise_error("Invalid expression / Unexpected token") 935 936 self.check_errors() 937 938 return expressions 939 940 def check_errors(self) -> None: 941 """Logs or raises any found errors, depending on the chosen error level setting.""" 942 if self.error_level == ErrorLevel.WARN: 943 for error in self.errors: 944 logger.error(str(error)) 945 elif self.error_level == ErrorLevel.RAISE and self.errors: 946 raise ParseError( 947 concat_messages(self.errors, self.max_errors), 948 errors=merge_errors(self.errors), 949 ) 950 951 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 952 """ 953 Appends an error in the list of recorded errors or raises it, depending on the chosen 954 error level setting. 955 """ 956 token = token or self._curr or self._prev or Token.string("") 957 start = token.start 958 end = token.end + 1 959 start_context = self.sql[max(start - self.error_message_context, 0) : start] 960 highlight = self.sql[start:end] 961 end_context = self.sql[end : end + self.error_message_context] 962 963 error = ParseError.new( 964 f"{message}. Line {token.line}, Col: {token.col}.\n" 965 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 966 description=message, 967 line=token.line, 968 col=token.col, 969 start_context=start_context, 970 highlight=highlight, 971 end_context=end_context, 972 ) 973 974 if self.error_level == ErrorLevel.IMMEDIATE: 975 raise error 976 977 self.errors.append(error) 978 979 def expression( 980 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 981 ) -> E: 982 """ 983 Creates a new, validated Expression. 984 985 Args: 986 exp_class: The expression class to instantiate. 987 comments: An optional list of comments to attach to the expression. 988 kwargs: The arguments to set for the expression along with their respective values. 989 990 Returns: 991 The target expression. 992 """ 993 instance = exp_class(**kwargs) 994 instance.add_comments(comments) if comments else self._add_comments(instance) 995 return self.validate_expression(instance) 996 997 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 998 if expression and self._prev_comments: 999 expression.add_comments(self._prev_comments) 1000 self._prev_comments = None 1001 1002 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1003 """ 1004 Validates an Expression, making sure that all its mandatory arguments are set. 1005 1006 Args: 1007 expression: The expression to validate. 1008 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1009 1010 Returns: 1011 The validated expression. 1012 """ 1013 if self.error_level != ErrorLevel.IGNORE: 1014 for error_message in expression.error_messages(args): 1015 self.raise_error(error_message) 1016 1017 return expression 1018 1019 def _find_sql(self, start: Token, end: Token) -> str: 1020 return self.sql[start.start : end.end + 1] 1021 1022 def _advance(self, times: int = 1) -> None: 1023 self._index += times 1024 self._curr = seq_get(self._tokens, self._index) 1025 self._next = seq_get(self._tokens, self._index + 1) 1026 1027 if self._index > 0: 1028 self._prev = self._tokens[self._index - 1] 1029 self._prev_comments = self._prev.comments 1030 else: 1031 self._prev = None 1032 self._prev_comments = None 1033 1034 def _retreat(self, index: int) -> None: 1035 if index != self._index: 1036 self._advance(index - self._index) 1037 1038 def _parse_command(self) -> exp.Command: 1039 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1040 1041 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1042 start = self._prev 1043 exists = self._parse_exists() if allow_exists else None 1044 1045 self._match(TokenType.ON) 1046 1047 kind = self._match_set(self.CREATABLES) and self._prev 1048 if not kind: 1049 return self._parse_as_command(start) 1050 1051 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1052 this = self._parse_user_defined_function(kind=kind.token_type) 1053 elif kind.token_type == TokenType.TABLE: 1054 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1055 elif kind.token_type == TokenType.COLUMN: 1056 this = self._parse_column() 1057 else: 1058 this = self._parse_id_var() 1059 1060 self._match(TokenType.IS) 1061 1062 return self.expression( 1063 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1064 ) 1065 1066 def _parse_to_table( 1067 self, 1068 ) -> exp.ToTableProperty: 1069 table = self._parse_table_parts(schema=True) 1070 return self.expression(exp.ToTableProperty, this=table) 1071 1072 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1073 def _parse_ttl(self) -> exp.Expression: 1074 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1075 this = self._parse_bitwise() 1076 1077 if self._match_text_seq("DELETE"): 1078 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1079 if self._match_text_seq("RECOMPRESS"): 1080 return self.expression( 1081 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1082 ) 1083 if self._match_text_seq("TO", "DISK"): 1084 return self.expression( 1085 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1086 ) 1087 if self._match_text_seq("TO", "VOLUME"): 1088 return self.expression( 1089 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1090 ) 1091 1092 return this 1093 1094 expressions = self._parse_csv(_parse_ttl_action) 1095 where = self._parse_where() 1096 group = self._parse_group() 1097 1098 aggregates = None 1099 if group and self._match(TokenType.SET): 1100 aggregates = self._parse_csv(self._parse_set_item) 1101 1102 return self.expression( 1103 exp.MergeTreeTTL, 1104 expressions=expressions, 1105 where=where, 1106 group=group, 1107 aggregates=aggregates, 1108 ) 1109 1110 def _parse_statement(self) -> t.Optional[exp.Expression]: 1111 if self._curr is None: 1112 return None 1113 1114 if self._match_set(self.STATEMENT_PARSERS): 1115 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1116 1117 if self._match_set(Tokenizer.COMMANDS): 1118 return self._parse_command() 1119 1120 expression = self._parse_expression() 1121 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1122 return self._parse_query_modifiers(expression) 1123 1124 def _parse_drop(self) -> exp.Drop | exp.Command: 1125 start = self._prev 1126 temporary = self._match(TokenType.TEMPORARY) 1127 materialized = self._match_text_seq("MATERIALIZED") 1128 1129 kind = self._match_set(self.CREATABLES) and self._prev.text 1130 if not kind: 1131 return self._parse_as_command(start) 1132 1133 return self.expression( 1134 exp.Drop, 1135 exists=self._parse_exists(), 1136 this=self._parse_table(schema=True), 1137 kind=kind, 1138 temporary=temporary, 1139 materialized=materialized, 1140 cascade=self._match_text_seq("CASCADE"), 1141 constraints=self._match_text_seq("CONSTRAINTS"), 1142 purge=self._match_text_seq("PURGE"), 1143 ) 1144 1145 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1146 return ( 1147 self._match(TokenType.IF) 1148 and (not not_ or self._match(TokenType.NOT)) 1149 and self._match(TokenType.EXISTS) 1150 ) 1151 1152 def _parse_create(self) -> exp.Create | exp.Command: 1153 # Note: this can't be None because we've matched a statement parser 1154 start = self._prev 1155 replace = start.text.upper() == "REPLACE" or self._match_pair( 1156 TokenType.OR, TokenType.REPLACE 1157 ) 1158 unique = self._match(TokenType.UNIQUE) 1159 1160 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1161 self._advance() 1162 1163 properties = None 1164 create_token = self._match_set(self.CREATABLES) and self._prev 1165 1166 if not create_token: 1167 # exp.Properties.Location.POST_CREATE 1168 properties = self._parse_properties() 1169 create_token = self._match_set(self.CREATABLES) and self._prev 1170 1171 if not properties or not create_token: 1172 return self._parse_as_command(start) 1173 1174 exists = self._parse_exists(not_=True) 1175 this = None 1176 expression = None 1177 indexes = None 1178 no_schema_binding = None 1179 begin = None 1180 clone = None 1181 1182 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1183 nonlocal properties 1184 if properties and temp_props: 1185 properties.expressions.extend(temp_props.expressions) 1186 elif temp_props: 1187 properties = temp_props 1188 1189 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1190 this = self._parse_user_defined_function(kind=create_token.token_type) 1191 1192 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1193 extend_props(self._parse_properties()) 1194 1195 self._match(TokenType.ALIAS) 1196 begin = self._match(TokenType.BEGIN) 1197 return_ = self._match_text_seq("RETURN") 1198 expression = self._parse_statement() 1199 1200 if return_: 1201 expression = self.expression(exp.Return, this=expression) 1202 elif create_token.token_type == TokenType.INDEX: 1203 this = self._parse_index(index=self._parse_id_var()) 1204 elif create_token.token_type in self.DB_CREATABLES: 1205 table_parts = self._parse_table_parts(schema=True) 1206 1207 # exp.Properties.Location.POST_NAME 1208 self._match(TokenType.COMMA) 1209 extend_props(self._parse_properties(before=True)) 1210 1211 this = self._parse_schema(this=table_parts) 1212 1213 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1214 extend_props(self._parse_properties()) 1215 1216 self._match(TokenType.ALIAS) 1217 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1218 # exp.Properties.Location.POST_ALIAS 1219 extend_props(self._parse_properties()) 1220 1221 expression = self._parse_ddl_select() 1222 1223 if create_token.token_type == TokenType.TABLE: 1224 indexes = [] 1225 while True: 1226 index = self._parse_index() 1227 1228 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1229 extend_props(self._parse_properties()) 1230 1231 if not index: 1232 break 1233 else: 1234 self._match(TokenType.COMMA) 1235 indexes.append(index) 1236 elif create_token.token_type == TokenType.VIEW: 1237 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1238 no_schema_binding = True 1239 1240 if self._match_text_seq("CLONE"): 1241 clone = self._parse_table(schema=True) 1242 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1243 clone_kind = ( 1244 self._match(TokenType.L_PAREN) 1245 and self._match_texts(self.CLONE_KINDS) 1246 and self._prev.text.upper() 1247 ) 1248 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1249 self._match(TokenType.R_PAREN) 1250 clone = self.expression( 1251 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1252 ) 1253 1254 return self.expression( 1255 exp.Create, 1256 this=this, 1257 kind=create_token.text, 1258 replace=replace, 1259 unique=unique, 1260 expression=expression, 1261 exists=exists, 1262 properties=properties, 1263 indexes=indexes, 1264 no_schema_binding=no_schema_binding, 1265 begin=begin, 1266 clone=clone, 1267 ) 1268 1269 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1270 # only used for teradata currently 1271 self._match(TokenType.COMMA) 1272 1273 kwargs = { 1274 "no": self._match_text_seq("NO"), 1275 "dual": self._match_text_seq("DUAL"), 1276 "before": self._match_text_seq("BEFORE"), 1277 "default": self._match_text_seq("DEFAULT"), 1278 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1279 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1280 "after": self._match_text_seq("AFTER"), 1281 "minimum": self._match_texts(("MIN", "MINIMUM")), 1282 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1283 } 1284 1285 if self._match_texts(self.PROPERTY_PARSERS): 1286 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1287 try: 1288 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1289 except TypeError: 1290 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1291 1292 return None 1293 1294 def _parse_property(self) -> t.Optional[exp.Expression]: 1295 if self._match_texts(self.PROPERTY_PARSERS): 1296 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1297 1298 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1299 return self._parse_character_set(default=True) 1300 1301 if self._match_text_seq("COMPOUND", "SORTKEY"): 1302 return self._parse_sortkey(compound=True) 1303 1304 if self._match_text_seq("SQL", "SECURITY"): 1305 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1306 1307 assignment = self._match_pair( 1308 TokenType.VAR, TokenType.EQ, advance=False 1309 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1310 1311 if assignment: 1312 key = self._parse_var_or_string() 1313 self._match(TokenType.EQ) 1314 return self.expression(exp.Property, this=key, value=self._parse_column()) 1315 1316 return None 1317 1318 def _parse_stored(self) -> exp.FileFormatProperty: 1319 self._match(TokenType.ALIAS) 1320 1321 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1322 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1323 1324 return self.expression( 1325 exp.FileFormatProperty, 1326 this=self.expression( 1327 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1328 ) 1329 if input_format or output_format 1330 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1331 ) 1332 1333 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1334 self._match(TokenType.EQ) 1335 self._match(TokenType.ALIAS) 1336 return self.expression(exp_class, this=self._parse_field()) 1337 1338 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1339 properties = [] 1340 while True: 1341 if before: 1342 prop = self._parse_property_before() 1343 else: 1344 prop = self._parse_property() 1345 1346 if not prop: 1347 break 1348 for p in ensure_list(prop): 1349 properties.append(p) 1350 1351 if properties: 1352 return self.expression(exp.Properties, expressions=properties) 1353 1354 return None 1355 1356 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1357 return self.expression( 1358 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1359 ) 1360 1361 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1362 if self._index >= 2: 1363 pre_volatile_token = self._tokens[self._index - 2] 1364 else: 1365 pre_volatile_token = None 1366 1367 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1368 return exp.VolatileProperty() 1369 1370 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1371 1372 def _parse_with_property( 1373 self, 1374 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1375 self._match(TokenType.WITH) 1376 if self._match(TokenType.L_PAREN, advance=False): 1377 return self._parse_wrapped_csv(self._parse_property) 1378 1379 if self._match_text_seq("JOURNAL"): 1380 return self._parse_withjournaltable() 1381 1382 if self._match_text_seq("DATA"): 1383 return self._parse_withdata(no=False) 1384 elif self._match_text_seq("NO", "DATA"): 1385 return self._parse_withdata(no=True) 1386 1387 if not self._next: 1388 return None 1389 1390 return self._parse_withisolatedloading() 1391 1392 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1393 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1394 self._match(TokenType.EQ) 1395 1396 user = self._parse_id_var() 1397 self._match(TokenType.PARAMETER) 1398 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1399 1400 if not user or not host: 1401 return None 1402 1403 return exp.DefinerProperty(this=f"{user}@{host}") 1404 1405 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1406 self._match(TokenType.TABLE) 1407 self._match(TokenType.EQ) 1408 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1409 1410 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1411 return self.expression(exp.LogProperty, no=no) 1412 1413 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1414 return self.expression(exp.JournalProperty, **kwargs) 1415 1416 def _parse_checksum(self) -> exp.ChecksumProperty: 1417 self._match(TokenType.EQ) 1418 1419 on = None 1420 if self._match(TokenType.ON): 1421 on = True 1422 elif self._match_text_seq("OFF"): 1423 on = False 1424 1425 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1426 1427 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1428 if not self._match_text_seq("BY"): 1429 self._retreat(self._index - 1) 1430 return None 1431 1432 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1433 1434 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1435 if not self._match_text_seq("GRANTS"): 1436 self._retreat(self._index - 1) 1437 return None 1438 1439 return self.expression(exp.CopyGrantsProperty) 1440 1441 def _parse_freespace(self) -> exp.FreespaceProperty: 1442 self._match(TokenType.EQ) 1443 return self.expression( 1444 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1445 ) 1446 1447 def _parse_mergeblockratio( 1448 self, no: bool = False, default: bool = False 1449 ) -> exp.MergeBlockRatioProperty: 1450 if self._match(TokenType.EQ): 1451 return self.expression( 1452 exp.MergeBlockRatioProperty, 1453 this=self._parse_number(), 1454 percent=self._match(TokenType.PERCENT), 1455 ) 1456 1457 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1458 1459 def _parse_datablocksize( 1460 self, 1461 default: t.Optional[bool] = None, 1462 minimum: t.Optional[bool] = None, 1463 maximum: t.Optional[bool] = None, 1464 ) -> exp.DataBlocksizeProperty: 1465 self._match(TokenType.EQ) 1466 size = self._parse_number() 1467 1468 units = None 1469 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1470 units = self._prev.text 1471 1472 return self.expression( 1473 exp.DataBlocksizeProperty, 1474 size=size, 1475 units=units, 1476 default=default, 1477 minimum=minimum, 1478 maximum=maximum, 1479 ) 1480 1481 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1482 self._match(TokenType.EQ) 1483 always = self._match_text_seq("ALWAYS") 1484 manual = self._match_text_seq("MANUAL") 1485 never = self._match_text_seq("NEVER") 1486 default = self._match_text_seq("DEFAULT") 1487 1488 autotemp = None 1489 if self._match_text_seq("AUTOTEMP"): 1490 autotemp = self._parse_schema() 1491 1492 return self.expression( 1493 exp.BlockCompressionProperty, 1494 always=always, 1495 manual=manual, 1496 never=never, 1497 default=default, 1498 autotemp=autotemp, 1499 ) 1500 1501 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1502 no = self._match_text_seq("NO") 1503 concurrent = self._match_text_seq("CONCURRENT") 1504 self._match_text_seq("ISOLATED", "LOADING") 1505 for_all = self._match_text_seq("FOR", "ALL") 1506 for_insert = self._match_text_seq("FOR", "INSERT") 1507 for_none = self._match_text_seq("FOR", "NONE") 1508 return self.expression( 1509 exp.IsolatedLoadingProperty, 1510 no=no, 1511 concurrent=concurrent, 1512 for_all=for_all, 1513 for_insert=for_insert, 1514 for_none=for_none, 1515 ) 1516 1517 def _parse_locking(self) -> exp.LockingProperty: 1518 if self._match(TokenType.TABLE): 1519 kind = "TABLE" 1520 elif self._match(TokenType.VIEW): 1521 kind = "VIEW" 1522 elif self._match(TokenType.ROW): 1523 kind = "ROW" 1524 elif self._match_text_seq("DATABASE"): 1525 kind = "DATABASE" 1526 else: 1527 kind = None 1528 1529 if kind in ("DATABASE", "TABLE", "VIEW"): 1530 this = self._parse_table_parts() 1531 else: 1532 this = None 1533 1534 if self._match(TokenType.FOR): 1535 for_or_in = "FOR" 1536 elif self._match(TokenType.IN): 1537 for_or_in = "IN" 1538 else: 1539 for_or_in = None 1540 1541 if self._match_text_seq("ACCESS"): 1542 lock_type = "ACCESS" 1543 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1544 lock_type = "EXCLUSIVE" 1545 elif self._match_text_seq("SHARE"): 1546 lock_type = "SHARE" 1547 elif self._match_text_seq("READ"): 1548 lock_type = "READ" 1549 elif self._match_text_seq("WRITE"): 1550 lock_type = "WRITE" 1551 elif self._match_text_seq("CHECKSUM"): 1552 lock_type = "CHECKSUM" 1553 else: 1554 lock_type = None 1555 1556 override = self._match_text_seq("OVERRIDE") 1557 1558 return self.expression( 1559 exp.LockingProperty, 1560 this=this, 1561 kind=kind, 1562 for_or_in=for_or_in, 1563 lock_type=lock_type, 1564 override=override, 1565 ) 1566 1567 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1568 if self._match(TokenType.PARTITION_BY): 1569 return self._parse_csv(self._parse_conjunction) 1570 return [] 1571 1572 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1573 self._match(TokenType.EQ) 1574 return self.expression( 1575 exp.PartitionedByProperty, 1576 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1577 ) 1578 1579 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1580 if self._match_text_seq("AND", "STATISTICS"): 1581 statistics = True 1582 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1583 statistics = False 1584 else: 1585 statistics = None 1586 1587 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1588 1589 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1590 if self._match_text_seq("PRIMARY", "INDEX"): 1591 return exp.NoPrimaryIndexProperty() 1592 return None 1593 1594 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1595 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1596 return exp.OnCommitProperty() 1597 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1598 return exp.OnCommitProperty(delete=True) 1599 return None 1600 1601 def _parse_distkey(self) -> exp.DistKeyProperty: 1602 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1603 1604 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1605 table = self._parse_table(schema=True) 1606 1607 options = [] 1608 while self._match_texts(("INCLUDING", "EXCLUDING")): 1609 this = self._prev.text.upper() 1610 1611 id_var = self._parse_id_var() 1612 if not id_var: 1613 return None 1614 1615 options.append( 1616 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1617 ) 1618 1619 return self.expression(exp.LikeProperty, this=table, expressions=options) 1620 1621 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1622 return self.expression( 1623 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1624 ) 1625 1626 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1627 self._match(TokenType.EQ) 1628 return self.expression( 1629 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1630 ) 1631 1632 def _parse_returns(self) -> exp.ReturnsProperty: 1633 value: t.Optional[exp.Expression] 1634 is_table = self._match(TokenType.TABLE) 1635 1636 if is_table: 1637 if self._match(TokenType.LT): 1638 value = self.expression( 1639 exp.Schema, 1640 this="TABLE", 1641 expressions=self._parse_csv(self._parse_struct_types), 1642 ) 1643 if not self._match(TokenType.GT): 1644 self.raise_error("Expecting >") 1645 else: 1646 value = self._parse_schema(exp.var("TABLE")) 1647 else: 1648 value = self._parse_types() 1649 1650 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1651 1652 def _parse_describe(self) -> exp.Describe: 1653 kind = self._match_set(self.CREATABLES) and self._prev.text 1654 this = self._parse_table() 1655 return self.expression(exp.Describe, this=this, kind=kind) 1656 1657 def _parse_insert(self) -> exp.Insert: 1658 overwrite = self._match(TokenType.OVERWRITE) 1659 local = self._match_text_seq("LOCAL") 1660 alternative = None 1661 1662 if self._match_text_seq("DIRECTORY"): 1663 this: t.Optional[exp.Expression] = self.expression( 1664 exp.Directory, 1665 this=self._parse_var_or_string(), 1666 local=local, 1667 row_format=self._parse_row_format(match_row=True), 1668 ) 1669 else: 1670 if self._match(TokenType.OR): 1671 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1672 1673 self._match(TokenType.INTO) 1674 self._match(TokenType.TABLE) 1675 this = self._parse_table(schema=True) 1676 1677 return self.expression( 1678 exp.Insert, 1679 this=this, 1680 exists=self._parse_exists(), 1681 partition=self._parse_partition(), 1682 expression=self._parse_ddl_select(), 1683 conflict=self._parse_on_conflict(), 1684 returning=self._parse_returning(), 1685 overwrite=overwrite, 1686 alternative=alternative, 1687 ) 1688 1689 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1690 conflict = self._match_text_seq("ON", "CONFLICT") 1691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1692 1693 if not conflict and not duplicate: 1694 return None 1695 1696 nothing = None 1697 expressions = None 1698 key = None 1699 constraint = None 1700 1701 if conflict: 1702 if self._match_text_seq("ON", "CONSTRAINT"): 1703 constraint = self._parse_id_var() 1704 else: 1705 key = self._parse_csv(self._parse_value) 1706 1707 self._match_text_seq("DO") 1708 if self._match_text_seq("NOTHING"): 1709 nothing = True 1710 else: 1711 self._match(TokenType.UPDATE) 1712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1713 1714 return self.expression( 1715 exp.OnConflict, 1716 duplicate=duplicate, 1717 expressions=expressions, 1718 nothing=nothing, 1719 key=key, 1720 constraint=constraint, 1721 ) 1722 1723 def _parse_returning(self) -> t.Optional[exp.Returning]: 1724 if not self._match(TokenType.RETURNING): 1725 return None 1726 1727 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1728 1729 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1730 if not self._match(TokenType.FORMAT): 1731 return None 1732 return self._parse_row_format() 1733 1734 def _parse_row_format( 1735 self, match_row: bool = False 1736 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1737 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1738 return None 1739 1740 if self._match_text_seq("SERDE"): 1741 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1742 1743 self._match_text_seq("DELIMITED") 1744 1745 kwargs = {} 1746 1747 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1748 kwargs["fields"] = self._parse_string() 1749 if self._match_text_seq("ESCAPED", "BY"): 1750 kwargs["escaped"] = self._parse_string() 1751 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1752 kwargs["collection_items"] = self._parse_string() 1753 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1754 kwargs["map_keys"] = self._parse_string() 1755 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1756 kwargs["lines"] = self._parse_string() 1757 if self._match_text_seq("NULL", "DEFINED", "AS"): 1758 kwargs["null"] = self._parse_string() 1759 1760 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1761 1762 def _parse_load(self) -> exp.LoadData | exp.Command: 1763 if self._match_text_seq("DATA"): 1764 local = self._match_text_seq("LOCAL") 1765 self._match_text_seq("INPATH") 1766 inpath = self._parse_string() 1767 overwrite = self._match(TokenType.OVERWRITE) 1768 self._match_pair(TokenType.INTO, TokenType.TABLE) 1769 1770 return self.expression( 1771 exp.LoadData, 1772 this=self._parse_table(schema=True), 1773 local=local, 1774 overwrite=overwrite, 1775 inpath=inpath, 1776 partition=self._parse_partition(), 1777 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1778 serde=self._match_text_seq("SERDE") and self._parse_string(), 1779 ) 1780 return self._parse_as_command(self._prev) 1781 1782 def _parse_delete(self) -> exp.Delete: 1783 self._match(TokenType.FROM) 1784 1785 return self.expression( 1786 exp.Delete, 1787 this=self._parse_table(), 1788 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1789 where=self._parse_where(), 1790 returning=self._parse_returning(), 1791 ) 1792 1793 def _parse_update(self) -> exp.Update: 1794 return self.expression( 1795 exp.Update, 1796 **{ # type: ignore 1797 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1798 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1799 "from": self._parse_from(modifiers=True), 1800 "where": self._parse_where(), 1801 "returning": self._parse_returning(), 1802 }, 1803 ) 1804 1805 def _parse_uncache(self) -> exp.Uncache: 1806 if not self._match(TokenType.TABLE): 1807 self.raise_error("Expecting TABLE after UNCACHE") 1808 1809 return self.expression( 1810 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1811 ) 1812 1813 def _parse_cache(self) -> exp.Cache: 1814 lazy = self._match_text_seq("LAZY") 1815 self._match(TokenType.TABLE) 1816 table = self._parse_table(schema=True) 1817 1818 options = [] 1819 if self._match_text_seq("OPTIONS"): 1820 self._match_l_paren() 1821 k = self._parse_string() 1822 self._match(TokenType.EQ) 1823 v = self._parse_string() 1824 options = [k, v] 1825 self._match_r_paren() 1826 1827 self._match(TokenType.ALIAS) 1828 return self.expression( 1829 exp.Cache, 1830 this=table, 1831 lazy=lazy, 1832 options=options, 1833 expression=self._parse_select(nested=True), 1834 ) 1835 1836 def _parse_partition(self) -> t.Optional[exp.Partition]: 1837 if not self._match(TokenType.PARTITION): 1838 return None 1839 1840 return self.expression( 1841 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1842 ) 1843 1844 def _parse_value(self) -> exp.Tuple: 1845 if self._match(TokenType.L_PAREN): 1846 expressions = self._parse_csv(self._parse_conjunction) 1847 self._match_r_paren() 1848 return self.expression(exp.Tuple, expressions=expressions) 1849 1850 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1851 # Source: https://prestodb.io/docs/current/sql/values.html 1852 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1853 1854 def _parse_select( 1855 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1856 ) -> t.Optional[exp.Expression]: 1857 cte = self._parse_with() 1858 if cte: 1859 this = self._parse_statement() 1860 1861 if not this: 1862 self.raise_error("Failed to parse any statement following CTE") 1863 return cte 1864 1865 if "with" in this.arg_types: 1866 this.set("with", cte) 1867 else: 1868 self.raise_error(f"{this.key} does not support CTE") 1869 this = cte 1870 elif self._match(TokenType.SELECT): 1871 comments = self._prev_comments 1872 1873 hint = self._parse_hint() 1874 all_ = self._match(TokenType.ALL) 1875 distinct = self._match(TokenType.DISTINCT) 1876 1877 kind = ( 1878 self._match(TokenType.ALIAS) 1879 and self._match_texts(("STRUCT", "VALUE")) 1880 and self._prev.text 1881 ) 1882 1883 if distinct: 1884 distinct = self.expression( 1885 exp.Distinct, 1886 on=self._parse_value() if self._match(TokenType.ON) else None, 1887 ) 1888 1889 if all_ and distinct: 1890 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1891 1892 limit = self._parse_limit(top=True) 1893 expressions = self._parse_csv(self._parse_expression) 1894 1895 this = self.expression( 1896 exp.Select, 1897 kind=kind, 1898 hint=hint, 1899 distinct=distinct, 1900 expressions=expressions, 1901 limit=limit, 1902 ) 1903 this.comments = comments 1904 1905 into = self._parse_into() 1906 if into: 1907 this.set("into", into) 1908 1909 from_ = self._parse_from() 1910 if from_: 1911 this.set("from", from_) 1912 1913 this = self._parse_query_modifiers(this) 1914 elif (table or nested) and self._match(TokenType.L_PAREN): 1915 if self._match(TokenType.PIVOT): 1916 this = self._parse_simplified_pivot() 1917 elif self._match(TokenType.FROM): 1918 this = exp.select("*").from_( 1919 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1920 ) 1921 else: 1922 this = self._parse_table() if table else self._parse_select(nested=True) 1923 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1924 1925 self._match_r_paren() 1926 1927 # early return so that subquery unions aren't parsed again 1928 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1929 # Union ALL should be a property of the top select node, not the subquery 1930 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1931 elif self._match(TokenType.VALUES): 1932 this = self.expression( 1933 exp.Values, 1934 expressions=self._parse_csv(self._parse_value), 1935 alias=self._parse_table_alias(), 1936 ) 1937 else: 1938 this = None 1939 1940 return self._parse_set_operations(this) 1941 1942 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1943 if not skip_with_token and not self._match(TokenType.WITH): 1944 return None 1945 1946 comments = self._prev_comments 1947 recursive = self._match(TokenType.RECURSIVE) 1948 1949 expressions = [] 1950 while True: 1951 expressions.append(self._parse_cte()) 1952 1953 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1954 break 1955 else: 1956 self._match(TokenType.WITH) 1957 1958 return self.expression( 1959 exp.With, comments=comments, expressions=expressions, recursive=recursive 1960 ) 1961 1962 def _parse_cte(self) -> exp.CTE: 1963 alias = self._parse_table_alias() 1964 if not alias or not alias.this: 1965 self.raise_error("Expected CTE to have alias") 1966 1967 self._match(TokenType.ALIAS) 1968 return self.expression( 1969 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1970 ) 1971 1972 def _parse_table_alias( 1973 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1974 ) -> t.Optional[exp.TableAlias]: 1975 any_token = self._match(TokenType.ALIAS) 1976 alias = ( 1977 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1978 or self._parse_string_as_identifier() 1979 ) 1980 1981 index = self._index 1982 if self._match(TokenType.L_PAREN): 1983 columns = self._parse_csv(self._parse_function_parameter) 1984 self._match_r_paren() if columns else self._retreat(index) 1985 else: 1986 columns = None 1987 1988 if not alias and not columns: 1989 return None 1990 1991 return self.expression(exp.TableAlias, this=alias, columns=columns) 1992 1993 def _parse_subquery( 1994 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1995 ) -> t.Optional[exp.Subquery]: 1996 if not this: 1997 return None 1998 1999 return self.expression( 2000 exp.Subquery, 2001 this=this, 2002 pivots=self._parse_pivots(), 2003 alias=self._parse_table_alias() if parse_alias else None, 2004 ) 2005 2006 def _parse_query_modifiers( 2007 self, this: t.Optional[exp.Expression] 2008 ) -> t.Optional[exp.Expression]: 2009 if isinstance(this, self.MODIFIABLES): 2010 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2011 expression = parser(self) 2012 2013 if expression: 2014 if key == "limit": 2015 offset = expression.args.pop("offset", None) 2016 if offset: 2017 this.set("offset", exp.Offset(expression=offset)) 2018 this.set(key, expression) 2019 return this 2020 2021 def _parse_hint(self) -> t.Optional[exp.Hint]: 2022 if self._match(TokenType.HINT): 2023 hints = self._parse_csv(self._parse_function) 2024 2025 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2026 self.raise_error("Expected */ after HINT") 2027 2028 return self.expression(exp.Hint, expressions=hints) 2029 2030 return None 2031 2032 def _parse_into(self) -> t.Optional[exp.Into]: 2033 if not self._match(TokenType.INTO): 2034 return None 2035 2036 temp = self._match(TokenType.TEMPORARY) 2037 unlogged = self._match_text_seq("UNLOGGED") 2038 self._match(TokenType.TABLE) 2039 2040 return self.expression( 2041 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2042 ) 2043 2044 def _parse_from( 2045 self, modifiers: bool = False, skip_from_token: bool = False 2046 ) -> t.Optional[exp.From]: 2047 if not skip_from_token and not self._match(TokenType.FROM): 2048 return None 2049 2050 comments = self._prev_comments 2051 this = self._parse_table() 2052 2053 return self.expression( 2054 exp.From, 2055 comments=comments, 2056 this=self._parse_query_modifiers(this) if modifiers else this, 2057 ) 2058 2059 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2060 if not self._match(TokenType.MATCH_RECOGNIZE): 2061 return None 2062 2063 self._match_l_paren() 2064 2065 partition = self._parse_partition_by() 2066 order = self._parse_order() 2067 measures = ( 2068 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2069 ) 2070 2071 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2072 rows = exp.var("ONE ROW PER MATCH") 2073 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2074 text = "ALL ROWS PER MATCH" 2075 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2076 text += f" SHOW EMPTY MATCHES" 2077 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2078 text += f" OMIT EMPTY MATCHES" 2079 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2080 text += f" WITH UNMATCHED ROWS" 2081 rows = exp.var(text) 2082 else: 2083 rows = None 2084 2085 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2086 text = "AFTER MATCH SKIP" 2087 if self._match_text_seq("PAST", "LAST", "ROW"): 2088 text += f" PAST LAST ROW" 2089 elif self._match_text_seq("TO", "NEXT", "ROW"): 2090 text += f" TO NEXT ROW" 2091 elif self._match_text_seq("TO", "FIRST"): 2092 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2093 elif self._match_text_seq("TO", "LAST"): 2094 text += f" TO LAST {self._advance_any().text}" # type: ignore 2095 after = exp.var(text) 2096 else: 2097 after = None 2098 2099 if self._match_text_seq("PATTERN"): 2100 self._match_l_paren() 2101 2102 if not self._curr: 2103 self.raise_error("Expecting )", self._curr) 2104 2105 paren = 1 2106 start = self._curr 2107 2108 while self._curr and paren > 0: 2109 if self._curr.token_type == TokenType.L_PAREN: 2110 paren += 1 2111 if self._curr.token_type == TokenType.R_PAREN: 2112 paren -= 1 2113 2114 end = self._prev 2115 self._advance() 2116 2117 if paren > 0: 2118 self.raise_error("Expecting )", self._curr) 2119 2120 pattern = exp.var(self._find_sql(start, end)) 2121 else: 2122 pattern = None 2123 2124 define = ( 2125 self._parse_csv( 2126 lambda: self.expression( 2127 exp.Alias, 2128 alias=self._parse_id_var(any_token=True), 2129 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2130 ) 2131 ) 2132 if self._match_text_seq("DEFINE") 2133 else None 2134 ) 2135 2136 self._match_r_paren() 2137 2138 return self.expression( 2139 exp.MatchRecognize, 2140 partition_by=partition, 2141 order=order, 2142 measures=measures, 2143 rows=rows, 2144 after=after, 2145 pattern=pattern, 2146 define=define, 2147 alias=self._parse_table_alias(), 2148 ) 2149 2150 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2151 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2152 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2153 2154 if outer_apply or cross_apply: 2155 this = self._parse_select(table=True) 2156 view = None 2157 outer = not cross_apply 2158 elif self._match(TokenType.LATERAL): 2159 this = self._parse_select(table=True) 2160 view = self._match(TokenType.VIEW) 2161 outer = self._match(TokenType.OUTER) 2162 else: 2163 return None 2164 2165 if not this: 2166 this = self._parse_function() or self._parse_id_var(any_token=False) 2167 while self._match(TokenType.DOT): 2168 this = exp.Dot( 2169 this=this, 2170 expression=self._parse_function() or self._parse_id_var(any_token=False), 2171 ) 2172 2173 if view: 2174 table = self._parse_id_var(any_token=False) 2175 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2176 table_alias: t.Optional[exp.TableAlias] = self.expression( 2177 exp.TableAlias, this=table, columns=columns 2178 ) 2179 elif isinstance(this, exp.Subquery) and this.alias: 2180 # Ensures parity between the Subquery's and the Lateral's "alias" args 2181 table_alias = this.args["alias"].copy() 2182 else: 2183 table_alias = self._parse_table_alias() 2184 2185 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2186 2187 def _parse_join_parts( 2188 self, 2189 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2190 return ( 2191 self._match_set(self.JOIN_METHODS) and self._prev, 2192 self._match_set(self.JOIN_SIDES) and self._prev, 2193 self._match_set(self.JOIN_KINDS) and self._prev, 2194 ) 2195 2196 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2197 if self._match(TokenType.COMMA): 2198 return self.expression(exp.Join, this=self._parse_table()) 2199 2200 index = self._index 2201 method, side, kind = self._parse_join_parts() 2202 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2203 join = self._match(TokenType.JOIN) 2204 2205 if not skip_join_token and not join: 2206 self._retreat(index) 2207 kind = None 2208 method = None 2209 side = None 2210 2211 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2212 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2213 2214 if not skip_join_token and not join and not outer_apply and not cross_apply: 2215 return None 2216 2217 if outer_apply: 2218 side = Token(TokenType.LEFT, "LEFT") 2219 2220 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2221 2222 if method: 2223 kwargs["method"] = method.text 2224 if side: 2225 kwargs["side"] = side.text 2226 if kind: 2227 kwargs["kind"] = kind.text 2228 if hint: 2229 kwargs["hint"] = hint 2230 2231 if self._match(TokenType.ON): 2232 kwargs["on"] = self._parse_conjunction() 2233 elif self._match(TokenType.USING): 2234 kwargs["using"] = self._parse_wrapped_id_vars() 2235 2236 return self.expression(exp.Join, **kwargs) 2237 2238 def _parse_index( 2239 self, 2240 index: t.Optional[exp.Expression] = None, 2241 ) -> t.Optional[exp.Index]: 2242 if index: 2243 unique = None 2244 primary = None 2245 amp = None 2246 2247 self._match(TokenType.ON) 2248 self._match(TokenType.TABLE) # hive 2249 table = self._parse_table_parts(schema=True) 2250 else: 2251 unique = self._match(TokenType.UNIQUE) 2252 primary = self._match_text_seq("PRIMARY") 2253 amp = self._match_text_seq("AMP") 2254 2255 if not self._match(TokenType.INDEX): 2256 return None 2257 2258 index = self._parse_id_var() 2259 table = None 2260 2261 using = self._parse_field() if self._match(TokenType.USING) else None 2262 2263 if self._match(TokenType.L_PAREN, advance=False): 2264 columns = self._parse_wrapped_csv(self._parse_ordered) 2265 else: 2266 columns = None 2267 2268 return self.expression( 2269 exp.Index, 2270 this=index, 2271 table=table, 2272 using=using, 2273 columns=columns, 2274 unique=unique, 2275 primary=primary, 2276 amp=amp, 2277 partition_by=self._parse_partition_by(), 2278 ) 2279 2280 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2281 return ( 2282 (not schema and self._parse_function(optional_parens=False)) 2283 or self._parse_id_var(any_token=False) 2284 or self._parse_string_as_identifier() 2285 or self._parse_placeholder() 2286 ) 2287 2288 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2289 catalog = None 2290 db = None 2291 table = self._parse_table_part(schema=schema) 2292 2293 while self._match(TokenType.DOT): 2294 if catalog: 2295 # This allows nesting the table in arbitrarily many dot expressions if needed 2296 table = self.expression( 2297 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2298 ) 2299 else: 2300 catalog = db 2301 db = table 2302 table = self._parse_table_part(schema=schema) 2303 2304 if not table: 2305 self.raise_error(f"Expected table name but got {self._curr}") 2306 2307 return self.expression( 2308 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2309 ) 2310 2311 def _parse_table( 2312 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2313 ) -> t.Optional[exp.Expression]: 2314 lateral = self._parse_lateral() 2315 if lateral: 2316 return lateral 2317 2318 unnest = self._parse_unnest() 2319 if unnest: 2320 return unnest 2321 2322 values = self._parse_derived_table_values() 2323 if values: 2324 return values 2325 2326 subquery = self._parse_select(table=True) 2327 if subquery: 2328 if not subquery.args.get("pivots"): 2329 subquery.set("pivots", self._parse_pivots()) 2330 return subquery 2331 2332 this: exp.Expression = self._parse_table_parts(schema=schema) 2333 2334 if schema: 2335 return self._parse_schema(this=this) 2336 2337 if self.ALIAS_POST_TABLESAMPLE: 2338 table_sample = self._parse_table_sample() 2339 2340 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2341 if alias: 2342 this.set("alias", alias) 2343 2344 if not this.args.get("pivots"): 2345 this.set("pivots", self._parse_pivots()) 2346 2347 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2348 this.set( 2349 "hints", 2350 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2351 ) 2352 self._match_r_paren() 2353 2354 if not self.ALIAS_POST_TABLESAMPLE: 2355 table_sample = self._parse_table_sample() 2356 2357 if table_sample: 2358 table_sample.set("this", this) 2359 this = table_sample 2360 2361 return this 2362 2363 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2364 if not self._match(TokenType.UNNEST): 2365 return None 2366 2367 expressions = self._parse_wrapped_csv(self._parse_type) 2368 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2369 2370 alias = self._parse_table_alias() if with_alias else None 2371 2372 if alias and self.UNNEST_COLUMN_ONLY: 2373 if alias.args.get("columns"): 2374 self.raise_error("Unexpected extra column alias in unnest.") 2375 2376 alias.set("columns", [alias.this]) 2377 alias.set("this", None) 2378 2379 offset = None 2380 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2381 self._match(TokenType.ALIAS) 2382 offset = self._parse_id_var() or exp.to_identifier("offset") 2383 2384 return self.expression( 2385 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2386 ) 2387 2388 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2389 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2390 if not is_derived and not self._match(TokenType.VALUES): 2391 return None 2392 2393 expressions = self._parse_csv(self._parse_value) 2394 alias = self._parse_table_alias() 2395 2396 if is_derived: 2397 self._match_r_paren() 2398 2399 return self.expression( 2400 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2401 ) 2402 2403 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2404 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2405 as_modifier and self._match_text_seq("USING", "SAMPLE") 2406 ): 2407 return None 2408 2409 bucket_numerator = None 2410 bucket_denominator = None 2411 bucket_field = None 2412 percent = None 2413 rows = None 2414 size = None 2415 seed = None 2416 2417 kind = ( 2418 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2419 ) 2420 method = self._parse_var(tokens=(TokenType.ROW,)) 2421 2422 self._match(TokenType.L_PAREN) 2423 2424 num = self._parse_number() 2425 2426 if self._match_text_seq("BUCKET"): 2427 bucket_numerator = self._parse_number() 2428 self._match_text_seq("OUT", "OF") 2429 bucket_denominator = bucket_denominator = self._parse_number() 2430 self._match(TokenType.ON) 2431 bucket_field = self._parse_field() 2432 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2433 percent = num 2434 elif self._match(TokenType.ROWS): 2435 rows = num 2436 else: 2437 size = num 2438 2439 self._match(TokenType.R_PAREN) 2440 2441 if self._match(TokenType.L_PAREN): 2442 method = self._parse_var() 2443 seed = self._match(TokenType.COMMA) and self._parse_number() 2444 self._match_r_paren() 2445 elif self._match_texts(("SEED", "REPEATABLE")): 2446 seed = self._parse_wrapped(self._parse_number) 2447 2448 return self.expression( 2449 exp.TableSample, 2450 method=method, 2451 bucket_numerator=bucket_numerator, 2452 bucket_denominator=bucket_denominator, 2453 bucket_field=bucket_field, 2454 percent=percent, 2455 rows=rows, 2456 size=size, 2457 seed=seed, 2458 kind=kind, 2459 ) 2460 2461 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2462 return list(iter(self._parse_pivot, None)) 2463 2464 # https://duckdb.org/docs/sql/statements/pivot 2465 def _parse_simplified_pivot(self) -> exp.Pivot: 2466 def _parse_on() -> t.Optional[exp.Expression]: 2467 this = self._parse_bitwise() 2468 return self._parse_in(this) if self._match(TokenType.IN) else this 2469 2470 this = self._parse_table() 2471 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2472 using = self._match(TokenType.USING) and self._parse_csv( 2473 lambda: self._parse_alias(self._parse_function()) 2474 ) 2475 group = self._parse_group() 2476 return self.expression( 2477 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2478 ) 2479 2480 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2481 index = self._index 2482 2483 if self._match(TokenType.PIVOT): 2484 unpivot = False 2485 elif self._match(TokenType.UNPIVOT): 2486 unpivot = True 2487 else: 2488 return None 2489 2490 expressions = [] 2491 field = None 2492 2493 if not self._match(TokenType.L_PAREN): 2494 self._retreat(index) 2495 return None 2496 2497 if unpivot: 2498 expressions = self._parse_csv(self._parse_column) 2499 else: 2500 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2501 2502 if not expressions: 2503 self.raise_error("Failed to parse PIVOT's aggregation list") 2504 2505 if not self._match(TokenType.FOR): 2506 self.raise_error("Expecting FOR") 2507 2508 value = self._parse_column() 2509 2510 if not self._match(TokenType.IN): 2511 self.raise_error("Expecting IN") 2512 2513 field = self._parse_in(value, alias=True) 2514 2515 self._match_r_paren() 2516 2517 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2518 2519 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2520 pivot.set("alias", self._parse_table_alias()) 2521 2522 if not unpivot: 2523 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2524 2525 columns: t.List[exp.Expression] = [] 2526 for fld in pivot.args["field"].expressions: 2527 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2528 for name in names: 2529 if self.PREFIXED_PIVOT_COLUMNS: 2530 name = f"{name}_{field_name}" if name else field_name 2531 else: 2532 name = f"{field_name}_{name}" if name else field_name 2533 2534 columns.append(exp.to_identifier(name)) 2535 2536 pivot.set("columns", columns) 2537 2538 return pivot 2539 2540 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2541 return [agg.alias for agg in aggregations] 2542 2543 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2544 if not skip_where_token and not self._match(TokenType.WHERE): 2545 return None 2546 2547 return self.expression( 2548 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2549 ) 2550 2551 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2552 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2553 return None 2554 2555 elements = defaultdict(list) 2556 2557 while True: 2558 expressions = self._parse_csv(self._parse_conjunction) 2559 if expressions: 2560 elements["expressions"].extend(expressions) 2561 2562 grouping_sets = self._parse_grouping_sets() 2563 if grouping_sets: 2564 elements["grouping_sets"].extend(grouping_sets) 2565 2566 rollup = None 2567 cube = None 2568 totals = None 2569 2570 with_ = self._match(TokenType.WITH) 2571 if self._match(TokenType.ROLLUP): 2572 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2573 elements["rollup"].extend(ensure_list(rollup)) 2574 2575 if self._match(TokenType.CUBE): 2576 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2577 elements["cube"].extend(ensure_list(cube)) 2578 2579 if self._match_text_seq("TOTALS"): 2580 totals = True 2581 elements["totals"] = True # type: ignore 2582 2583 if not (grouping_sets or rollup or cube or totals): 2584 break 2585 2586 return self.expression(exp.Group, **elements) # type: ignore 2587 2588 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2589 if not self._match(TokenType.GROUPING_SETS): 2590 return None 2591 2592 return self._parse_wrapped_csv(self._parse_grouping_set) 2593 2594 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2595 if self._match(TokenType.L_PAREN): 2596 grouping_set = self._parse_csv(self._parse_column) 2597 self._match_r_paren() 2598 return self.expression(exp.Tuple, expressions=grouping_set) 2599 2600 return self._parse_column() 2601 2602 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2603 if not skip_having_token and not self._match(TokenType.HAVING): 2604 return None 2605 return self.expression(exp.Having, this=self._parse_conjunction()) 2606 2607 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2608 if not self._match(TokenType.QUALIFY): 2609 return None 2610 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2611 2612 def _parse_order( 2613 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2614 ) -> t.Optional[exp.Expression]: 2615 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2616 return this 2617 2618 return self.expression( 2619 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2620 ) 2621 2622 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2623 if not self._match_text_seq(*texts): 2624 return None 2625 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2626 2627 def _parse_ordered(self) -> exp.Ordered: 2628 this = self._parse_conjunction() 2629 self._match(TokenType.ASC) 2630 2631 is_desc = self._match(TokenType.DESC) 2632 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2633 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2634 desc = is_desc or False 2635 asc = not desc 2636 nulls_first = is_nulls_first or False 2637 explicitly_null_ordered = is_nulls_first or is_nulls_last 2638 2639 if ( 2640 not explicitly_null_ordered 2641 and ( 2642 (asc and self.NULL_ORDERING == "nulls_are_small") 2643 or (desc and self.NULL_ORDERING != "nulls_are_small") 2644 ) 2645 and self.NULL_ORDERING != "nulls_are_last" 2646 ): 2647 nulls_first = True 2648 2649 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2650 2651 def _parse_limit( 2652 self, this: t.Optional[exp.Expression] = None, top: bool = False 2653 ) -> t.Optional[exp.Expression]: 2654 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2655 limit_paren = self._match(TokenType.L_PAREN) 2656 expression = self._parse_number() if top else self._parse_term() 2657 2658 if self._match(TokenType.COMMA): 2659 offset = expression 2660 expression = self._parse_term() 2661 else: 2662 offset = None 2663 2664 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2665 2666 if limit_paren: 2667 self._match_r_paren() 2668 2669 return limit_exp 2670 2671 if self._match(TokenType.FETCH): 2672 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2673 direction = self._prev.text if direction else "FIRST" 2674 2675 count = self._parse_number() 2676 percent = self._match(TokenType.PERCENT) 2677 2678 self._match_set((TokenType.ROW, TokenType.ROWS)) 2679 2680 only = self._match_text_seq("ONLY") 2681 with_ties = self._match_text_seq("WITH", "TIES") 2682 2683 if only and with_ties: 2684 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2685 2686 return self.expression( 2687 exp.Fetch, 2688 direction=direction, 2689 count=count, 2690 percent=percent, 2691 with_ties=with_ties, 2692 ) 2693 2694 return this 2695 2696 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2697 if not self._match(TokenType.OFFSET): 2698 return this 2699 2700 count = self._parse_number() 2701 self._match_set((TokenType.ROW, TokenType.ROWS)) 2702 return self.expression(exp.Offset, this=this, expression=count) 2703 2704 def _parse_locks(self) -> t.List[exp.Lock]: 2705 locks = [] 2706 while True: 2707 if self._match_text_seq("FOR", "UPDATE"): 2708 update = True 2709 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2710 "LOCK", "IN", "SHARE", "MODE" 2711 ): 2712 update = False 2713 else: 2714 break 2715 2716 expressions = None 2717 if self._match_text_seq("OF"): 2718 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2719 2720 wait: t.Optional[bool | exp.Expression] = None 2721 if self._match_text_seq("NOWAIT"): 2722 wait = True 2723 elif self._match_text_seq("WAIT"): 2724 wait = self._parse_primary() 2725 elif self._match_text_seq("SKIP", "LOCKED"): 2726 wait = False 2727 2728 locks.append( 2729 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2730 ) 2731 2732 return locks 2733 2734 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2735 if not self._match_set(self.SET_OPERATIONS): 2736 return this 2737 2738 token_type = self._prev.token_type 2739 2740 if token_type == TokenType.UNION: 2741 expression = exp.Union 2742 elif token_type == TokenType.EXCEPT: 2743 expression = exp.Except 2744 else: 2745 expression = exp.Intersect 2746 2747 return self.expression( 2748 expression, 2749 this=this, 2750 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2751 expression=self._parse_set_operations(self._parse_select(nested=True)), 2752 ) 2753 2754 def _parse_expression(self) -> t.Optional[exp.Expression]: 2755 return self._parse_alias(self._parse_conjunction()) 2756 2757 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2758 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2759 2760 def _parse_equality(self) -> t.Optional[exp.Expression]: 2761 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2762 2763 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2764 return self._parse_tokens(self._parse_range, self.COMPARISON) 2765 2766 def _parse_range(self) -> t.Optional[exp.Expression]: 2767 this = self._parse_bitwise() 2768 negate = self._match(TokenType.NOT) 2769 2770 if self._match_set(self.RANGE_PARSERS): 2771 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2772 if not expression: 2773 return this 2774 2775 this = expression 2776 elif self._match(TokenType.ISNULL): 2777 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2778 2779 # Postgres supports ISNULL and NOTNULL for conditions. 2780 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2781 if self._match(TokenType.NOTNULL): 2782 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2783 this = self.expression(exp.Not, this=this) 2784 2785 if negate: 2786 this = self.expression(exp.Not, this=this) 2787 2788 if self._match(TokenType.IS): 2789 this = self._parse_is(this) 2790 2791 return this 2792 2793 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2794 index = self._index - 1 2795 negate = self._match(TokenType.NOT) 2796 2797 if self._match_text_seq("DISTINCT", "FROM"): 2798 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2799 return self.expression(klass, this=this, expression=self._parse_expression()) 2800 2801 expression = self._parse_null() or self._parse_boolean() 2802 if not expression: 2803 self._retreat(index) 2804 return None 2805 2806 this = self.expression(exp.Is, this=this, expression=expression) 2807 return self.expression(exp.Not, this=this) if negate else this 2808 2809 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2810 unnest = self._parse_unnest(with_alias=False) 2811 if unnest: 2812 this = self.expression(exp.In, this=this, unnest=unnest) 2813 elif self._match(TokenType.L_PAREN): 2814 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2815 2816 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2817 this = self.expression(exp.In, this=this, query=expressions[0]) 2818 else: 2819 this = self.expression(exp.In, this=this, expressions=expressions) 2820 2821 self._match_r_paren(this) 2822 else: 2823 this = self.expression(exp.In, this=this, field=self._parse_field()) 2824 2825 return this 2826 2827 def _parse_between(self, this: exp.Expression) -> exp.Between: 2828 low = self._parse_bitwise() 2829 self._match(TokenType.AND) 2830 high = self._parse_bitwise() 2831 return self.expression(exp.Between, this=this, low=low, high=high) 2832 2833 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2834 if not self._match(TokenType.ESCAPE): 2835 return this 2836 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2837 2838 def _parse_interval(self) -> t.Optional[exp.Interval]: 2839 if not self._match(TokenType.INTERVAL): 2840 return None 2841 2842 this = self._parse_primary() or self._parse_term() 2843 unit = self._parse_function() or self._parse_var() 2844 2845 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2846 # each INTERVAL expression into this canonical form so it's easy to transpile 2847 if this and this.is_number: 2848 this = exp.Literal.string(this.name) 2849 elif this and this.is_string: 2850 parts = this.name.split() 2851 2852 if len(parts) == 2: 2853 if unit: 2854 # this is not actually a unit, it's something else 2855 unit = None 2856 self._retreat(self._index - 1) 2857 else: 2858 this = exp.Literal.string(parts[0]) 2859 unit = self.expression(exp.Var, this=parts[1]) 2860 2861 return self.expression(exp.Interval, this=this, unit=unit) 2862 2863 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2864 this = self._parse_term() 2865 2866 while True: 2867 if self._match_set(self.BITWISE): 2868 this = self.expression( 2869 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2870 ) 2871 elif self._match_pair(TokenType.LT, TokenType.LT): 2872 this = self.expression( 2873 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2874 ) 2875 elif self._match_pair(TokenType.GT, TokenType.GT): 2876 this = self.expression( 2877 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2878 ) 2879 else: 2880 break 2881 2882 return this 2883 2884 def _parse_term(self) -> t.Optional[exp.Expression]: 2885 return self._parse_tokens(self._parse_factor, self.TERM) 2886 2887 def _parse_factor(self) -> t.Optional[exp.Expression]: 2888 return self._parse_tokens(self._parse_unary, self.FACTOR) 2889 2890 def _parse_unary(self) -> t.Optional[exp.Expression]: 2891 if self._match_set(self.UNARY_PARSERS): 2892 return self.UNARY_PARSERS[self._prev.token_type](self) 2893 return self._parse_at_time_zone(self._parse_type()) 2894 2895 def _parse_type(self) -> t.Optional[exp.Expression]: 2896 interval = self._parse_interval() 2897 if interval: 2898 return interval 2899 2900 index = self._index 2901 data_type = self._parse_types(check_func=True) 2902 this = self._parse_column() 2903 2904 if data_type: 2905 if isinstance(this, exp.Literal): 2906 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2907 if parser: 2908 return parser(self, this, data_type) 2909 return self.expression(exp.Cast, this=this, to=data_type) 2910 if not data_type.expressions: 2911 self._retreat(index) 2912 return self._parse_column() 2913 return self._parse_column_ops(data_type) 2914 2915 return this 2916 2917 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2918 this = self._parse_type() 2919 if not this: 2920 return None 2921 2922 return self.expression( 2923 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2924 ) 2925 2926 def _parse_types( 2927 self, check_func: bool = False, schema: bool = False 2928 ) -> t.Optional[exp.Expression]: 2929 index = self._index 2930 2931 prefix = self._match_text_seq("SYSUDTLIB", ".") 2932 2933 if not self._match_set(self.TYPE_TOKENS): 2934 return None 2935 2936 type_token = self._prev.token_type 2937 2938 if type_token == TokenType.PSEUDO_TYPE: 2939 return self.expression(exp.PseudoType, this=self._prev.text) 2940 2941 nested = type_token in self.NESTED_TYPE_TOKENS 2942 is_struct = type_token == TokenType.STRUCT 2943 expressions = None 2944 maybe_func = False 2945 2946 if self._match(TokenType.L_PAREN): 2947 if is_struct: 2948 expressions = self._parse_csv(self._parse_struct_types) 2949 elif nested: 2950 expressions = self._parse_csv( 2951 lambda: self._parse_types(check_func=check_func, schema=schema) 2952 ) 2953 elif type_token in self.ENUM_TYPE_TOKENS: 2954 expressions = self._parse_csv(self._parse_primary) 2955 else: 2956 expressions = self._parse_csv(self._parse_type_size) 2957 2958 if not expressions or not self._match(TokenType.R_PAREN): 2959 self._retreat(index) 2960 return None 2961 2962 maybe_func = True 2963 2964 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2965 this = exp.DataType( 2966 this=exp.DataType.Type.ARRAY, 2967 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2968 nested=True, 2969 ) 2970 2971 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2972 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2973 2974 return this 2975 2976 if self._match(TokenType.L_BRACKET): 2977 self._retreat(index) 2978 return None 2979 2980 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2981 if nested and self._match(TokenType.LT): 2982 if is_struct: 2983 expressions = self._parse_csv(self._parse_struct_types) 2984 else: 2985 expressions = self._parse_csv( 2986 lambda: self._parse_types(check_func=check_func, schema=schema) 2987 ) 2988 2989 if not self._match(TokenType.GT): 2990 self.raise_error("Expecting >") 2991 2992 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2993 values = self._parse_csv(self._parse_conjunction) 2994 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2995 2996 value: t.Optional[exp.Expression] = None 2997 if type_token in self.TIMESTAMPS: 2998 if self._match_text_seq("WITH", "TIME", "ZONE"): 2999 maybe_func = False 3000 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3001 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3002 maybe_func = False 3003 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3004 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3005 maybe_func = False 3006 elif type_token == TokenType.INTERVAL: 3007 unit = self._parse_var() 3008 3009 if not unit: 3010 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3011 else: 3012 value = self.expression(exp.Interval, unit=unit) 3013 3014 if maybe_func and check_func: 3015 index2 = self._index 3016 peek = self._parse_string() 3017 3018 if not peek: 3019 self._retreat(index) 3020 return None 3021 3022 self._retreat(index2) 3023 3024 if value: 3025 return value 3026 3027 return exp.DataType( 3028 this=exp.DataType.Type[type_token.value.upper()], 3029 expressions=expressions, 3030 nested=nested, 3031 values=values, 3032 prefix=prefix, 3033 ) 3034 3035 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3036 this = self._parse_type() or self._parse_id_var() 3037 self._match(TokenType.COLON) 3038 return self._parse_column_def(this) 3039 3040 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3041 if not self._match_text_seq("AT", "TIME", "ZONE"): 3042 return this 3043 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3044 3045 def _parse_column(self) -> t.Optional[exp.Expression]: 3046 this = self._parse_field() 3047 if isinstance(this, exp.Identifier): 3048 this = self.expression(exp.Column, this=this) 3049 elif not this: 3050 return self._parse_bracket(this) 3051 return self._parse_column_ops(this) 3052 3053 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3054 this = self._parse_bracket(this) 3055 3056 while self._match_set(self.COLUMN_OPERATORS): 3057 op_token = self._prev.token_type 3058 op = self.COLUMN_OPERATORS.get(op_token) 3059 3060 if op_token == TokenType.DCOLON: 3061 field = self._parse_types() 3062 if not field: 3063 self.raise_error("Expected type") 3064 elif op and self._curr: 3065 self._advance() 3066 value = self._prev.text 3067 field = ( 3068 exp.Literal.number(value) 3069 if self._prev.token_type == TokenType.NUMBER 3070 else exp.Literal.string(value) 3071 ) 3072 else: 3073 field = self._parse_field(anonymous_func=True, any_token=True) 3074 3075 if isinstance(field, exp.Func): 3076 # bigquery allows function calls like x.y.count(...) 3077 # SAFE.SUBSTR(...) 3078 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3079 this = self._replace_columns_with_dots(this) 3080 3081 if op: 3082 this = op(self, this, field) 3083 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3084 this = self.expression( 3085 exp.Column, 3086 this=field, 3087 table=this.this, 3088 db=this.args.get("table"), 3089 catalog=this.args.get("db"), 3090 ) 3091 else: 3092 this = self.expression(exp.Dot, this=this, expression=field) 3093 this = self._parse_bracket(this) 3094 return this 3095 3096 def _parse_primary(self) -> t.Optional[exp.Expression]: 3097 if self._match_set(self.PRIMARY_PARSERS): 3098 token_type = self._prev.token_type 3099 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3100 3101 if token_type == TokenType.STRING: 3102 expressions = [primary] 3103 while self._match(TokenType.STRING): 3104 expressions.append(exp.Literal.string(self._prev.text)) 3105 3106 if len(expressions) > 1: 3107 return self.expression(exp.Concat, expressions=expressions) 3108 3109 return primary 3110 3111 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3112 return exp.Literal.number(f"0.{self._prev.text}") 3113 3114 if self._match(TokenType.L_PAREN): 3115 comments = self._prev_comments 3116 query = self._parse_select() 3117 3118 if query: 3119 expressions = [query] 3120 else: 3121 expressions = self._parse_csv(self._parse_expression) 3122 3123 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3124 3125 if isinstance(this, exp.Subqueryable): 3126 this = self._parse_set_operations( 3127 self._parse_subquery(this=this, parse_alias=False) 3128 ) 3129 elif len(expressions) > 1: 3130 this = self.expression(exp.Tuple, expressions=expressions) 3131 else: 3132 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3133 3134 if this: 3135 this.add_comments(comments) 3136 3137 self._match_r_paren(expression=this) 3138 return this 3139 3140 return None 3141 3142 def _parse_field( 3143 self, 3144 any_token: bool = False, 3145 tokens: t.Optional[t.Collection[TokenType]] = None, 3146 anonymous_func: bool = False, 3147 ) -> t.Optional[exp.Expression]: 3148 return ( 3149 self._parse_primary() 3150 or self._parse_function(anonymous=anonymous_func) 3151 or self._parse_id_var(any_token=any_token, tokens=tokens) 3152 ) 3153 3154 def _parse_function( 3155 self, 3156 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3157 anonymous: bool = False, 3158 optional_parens: bool = True, 3159 ) -> t.Optional[exp.Expression]: 3160 if not self._curr: 3161 return None 3162 3163 token_type = self._curr.token_type 3164 3165 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3166 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3167 3168 if not self._next or self._next.token_type != TokenType.L_PAREN: 3169 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3170 self._advance() 3171 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3172 3173 return None 3174 3175 if token_type not in self.FUNC_TOKENS: 3176 return None 3177 3178 this = self._curr.text 3179 upper = this.upper() 3180 self._advance(2) 3181 3182 parser = self.FUNCTION_PARSERS.get(upper) 3183 3184 if parser and not anonymous: 3185 this = parser(self) 3186 else: 3187 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3188 3189 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3190 this = self.expression(subquery_predicate, this=self._parse_select()) 3191 self._match_r_paren() 3192 return this 3193 3194 if functions is None: 3195 functions = self.FUNCTIONS 3196 3197 function = functions.get(upper) 3198 3199 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3200 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3201 3202 if function and not anonymous: 3203 this = self.validate_expression(function(args), args) 3204 else: 3205 this = self.expression(exp.Anonymous, this=this, expressions=args) 3206 3207 self._match_r_paren(this) 3208 return self._parse_window(this) 3209 3210 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3211 return self._parse_column_def(self._parse_id_var()) 3212 3213 def _parse_user_defined_function( 3214 self, kind: t.Optional[TokenType] = None 3215 ) -> t.Optional[exp.Expression]: 3216 this = self._parse_id_var() 3217 3218 while self._match(TokenType.DOT): 3219 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3220 3221 if not self._match(TokenType.L_PAREN): 3222 return this 3223 3224 expressions = self._parse_csv(self._parse_function_parameter) 3225 self._match_r_paren() 3226 return self.expression( 3227 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3228 ) 3229 3230 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3231 literal = self._parse_primary() 3232 if literal: 3233 return self.expression(exp.Introducer, this=token.text, expression=literal) 3234 3235 return self.expression(exp.Identifier, this=token.text) 3236 3237 def _parse_session_parameter(self) -> exp.SessionParameter: 3238 kind = None 3239 this = self._parse_id_var() or self._parse_primary() 3240 3241 if this and self._match(TokenType.DOT): 3242 kind = this.name 3243 this = self._parse_var() or self._parse_primary() 3244 3245 return self.expression(exp.SessionParameter, this=this, kind=kind) 3246 3247 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3248 index = self._index 3249 3250 if self._match(TokenType.L_PAREN): 3251 expressions = self._parse_csv(self._parse_id_var) 3252 3253 if not self._match(TokenType.R_PAREN): 3254 self._retreat(index) 3255 else: 3256 expressions = [self._parse_id_var()] 3257 3258 if self._match_set(self.LAMBDAS): 3259 return self.LAMBDAS[self._prev.token_type](self, expressions) 3260 3261 self._retreat(index) 3262 3263 this: t.Optional[exp.Expression] 3264 3265 if self._match(TokenType.DISTINCT): 3266 this = self.expression( 3267 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3268 ) 3269 else: 3270 this = self._parse_select_or_expression(alias=alias) 3271 3272 if isinstance(this, exp.EQ): 3273 left = this.this 3274 if isinstance(left, exp.Column): 3275 left.replace(exp.var(left.text("this"))) 3276 3277 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3278 3279 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3280 index = self._index 3281 3282 if not self.errors: 3283 try: 3284 if self._parse_select(nested=True): 3285 return this 3286 except ParseError: 3287 pass 3288 finally: 3289 self.errors.clear() 3290 self._retreat(index) 3291 3292 if not self._match(TokenType.L_PAREN): 3293 return this 3294 3295 args = self._parse_csv( 3296 lambda: self._parse_constraint() 3297 or self._parse_column_def(self._parse_field(any_token=True)) 3298 ) 3299 3300 self._match_r_paren() 3301 return self.expression(exp.Schema, this=this, expressions=args) 3302 3303 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3304 # column defs are not really columns, they're identifiers 3305 if isinstance(this, exp.Column): 3306 this = this.this 3307 3308 kind = self._parse_types(schema=True) 3309 3310 if self._match_text_seq("FOR", "ORDINALITY"): 3311 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3312 3313 constraints = [] 3314 while True: 3315 constraint = self._parse_column_constraint() 3316 if not constraint: 3317 break 3318 constraints.append(constraint) 3319 3320 if not kind and not constraints: 3321 return this 3322 3323 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3324 3325 def _parse_auto_increment( 3326 self, 3327 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3328 start = None 3329 increment = None 3330 3331 if self._match(TokenType.L_PAREN, advance=False): 3332 args = self._parse_wrapped_csv(self._parse_bitwise) 3333 start = seq_get(args, 0) 3334 increment = seq_get(args, 1) 3335 elif self._match_text_seq("START"): 3336 start = self._parse_bitwise() 3337 self._match_text_seq("INCREMENT") 3338 increment = self._parse_bitwise() 3339 3340 if start and increment: 3341 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3342 3343 return exp.AutoIncrementColumnConstraint() 3344 3345 def _parse_compress(self) -> exp.CompressColumnConstraint: 3346 if self._match(TokenType.L_PAREN, advance=False): 3347 return self.expression( 3348 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3349 ) 3350 3351 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3352 3353 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3354 if self._match_text_seq("BY", "DEFAULT"): 3355 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3356 this = self.expression( 3357 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3358 ) 3359 else: 3360 self._match_text_seq("ALWAYS") 3361 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3362 3363 self._match(TokenType.ALIAS) 3364 identity = self._match_text_seq("IDENTITY") 3365 3366 if self._match(TokenType.L_PAREN): 3367 if self._match_text_seq("START", "WITH"): 3368 this.set("start", self._parse_bitwise()) 3369 if self._match_text_seq("INCREMENT", "BY"): 3370 this.set("increment", self._parse_bitwise()) 3371 if self._match_text_seq("MINVALUE"): 3372 this.set("minvalue", self._parse_bitwise()) 3373 if self._match_text_seq("MAXVALUE"): 3374 this.set("maxvalue", self._parse_bitwise()) 3375 3376 if self._match_text_seq("CYCLE"): 3377 this.set("cycle", True) 3378 elif self._match_text_seq("NO", "CYCLE"): 3379 this.set("cycle", False) 3380 3381 if not identity: 3382 this.set("expression", self._parse_bitwise()) 3383 3384 self._match_r_paren() 3385 3386 return this 3387 3388 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3389 self._match_text_seq("LENGTH") 3390 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3391 3392 def _parse_not_constraint( 3393 self, 3394 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3395 if self._match_text_seq("NULL"): 3396 return self.expression(exp.NotNullColumnConstraint) 3397 if self._match_text_seq("CASESPECIFIC"): 3398 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3399 return None 3400 3401 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3402 if self._match(TokenType.CONSTRAINT): 3403 this = self._parse_id_var() 3404 else: 3405 this = None 3406 3407 if self._match_texts(self.CONSTRAINT_PARSERS): 3408 return self.expression( 3409 exp.ColumnConstraint, 3410 this=this, 3411 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3412 ) 3413 3414 return this 3415 3416 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3417 if not self._match(TokenType.CONSTRAINT): 3418 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3419 3420 this = self._parse_id_var() 3421 expressions = [] 3422 3423 while True: 3424 constraint = self._parse_unnamed_constraint() or self._parse_function() 3425 if not constraint: 3426 break 3427 expressions.append(constraint) 3428 3429 return self.expression(exp.Constraint, this=this, expressions=expressions) 3430 3431 def _parse_unnamed_constraint( 3432 self, constraints: t.Optional[t.Collection[str]] = None 3433 ) -> t.Optional[exp.Expression]: 3434 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3435 return None 3436 3437 constraint = self._prev.text.upper() 3438 if constraint not in self.CONSTRAINT_PARSERS: 3439 self.raise_error(f"No parser found for schema constraint {constraint}.") 3440 3441 return self.CONSTRAINT_PARSERS[constraint](self) 3442 3443 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3444 self._match_text_seq("KEY") 3445 return self.expression( 3446 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3447 ) 3448 3449 def _parse_key_constraint_options(self) -> t.List[str]: 3450 options = [] 3451 while True: 3452 if not self._curr: 3453 break 3454 3455 if self._match(TokenType.ON): 3456 action = None 3457 on = self._advance_any() and self._prev.text 3458 3459 if self._match_text_seq("NO", "ACTION"): 3460 action = "NO ACTION" 3461 elif self._match_text_seq("CASCADE"): 3462 action = "CASCADE" 3463 elif self._match_pair(TokenType.SET, TokenType.NULL): 3464 action = "SET NULL" 3465 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3466 action = "SET DEFAULT" 3467 else: 3468 self.raise_error("Invalid key constraint") 3469 3470 options.append(f"ON {on} {action}") 3471 elif self._match_text_seq("NOT", "ENFORCED"): 3472 options.append("NOT ENFORCED") 3473 elif self._match_text_seq("DEFERRABLE"): 3474 options.append("DEFERRABLE") 3475 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3476 options.append("INITIALLY DEFERRED") 3477 elif self._match_text_seq("NORELY"): 3478 options.append("NORELY") 3479 elif self._match_text_seq("MATCH", "FULL"): 3480 options.append("MATCH FULL") 3481 else: 3482 break 3483 3484 return options 3485 3486 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3487 if match and not self._match(TokenType.REFERENCES): 3488 return None 3489 3490 expressions = None 3491 this = self._parse_id_var() 3492 3493 if self._match(TokenType.L_PAREN, advance=False): 3494 expressions = self._parse_wrapped_id_vars() 3495 3496 options = self._parse_key_constraint_options() 3497 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3498 3499 def _parse_foreign_key(self) -> exp.ForeignKey: 3500 expressions = self._parse_wrapped_id_vars() 3501 reference = self._parse_references() 3502 options = {} 3503 3504 while self._match(TokenType.ON): 3505 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3506 self.raise_error("Expected DELETE or UPDATE") 3507 3508 kind = self._prev.text.lower() 3509 3510 if self._match_text_seq("NO", "ACTION"): 3511 action = "NO ACTION" 3512 elif self._match(TokenType.SET): 3513 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3514 action = "SET " + self._prev.text.upper() 3515 else: 3516 self._advance() 3517 action = self._prev.text.upper() 3518 3519 options[kind] = action 3520 3521 return self.expression( 3522 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3523 ) 3524 3525 def _parse_primary_key( 3526 self, wrapped_optional: bool = False, in_props: bool = False 3527 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3528 desc = ( 3529 self._match_set((TokenType.ASC, TokenType.DESC)) 3530 and self._prev.token_type == TokenType.DESC 3531 ) 3532 3533 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3534 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3535 3536 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3537 options = self._parse_key_constraint_options() 3538 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3539 3540 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3541 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3542 return this 3543 3544 bracket_kind = self._prev.token_type 3545 3546 if self._match(TokenType.COLON): 3547 expressions: t.List[t.Optional[exp.Expression]] = [ 3548 self.expression(exp.Slice, expression=self._parse_conjunction()) 3549 ] 3550 else: 3551 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3552 3553 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3554 if bracket_kind == TokenType.L_BRACE: 3555 this = self.expression(exp.Struct, expressions=expressions) 3556 elif not this or this.name.upper() == "ARRAY": 3557 this = self.expression(exp.Array, expressions=expressions) 3558 else: 3559 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3560 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3561 3562 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3563 self.raise_error("Expected ]") 3564 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3565 self.raise_error("Expected }") 3566 3567 self._add_comments(this) 3568 return self._parse_bracket(this) 3569 3570 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3571 if self._match(TokenType.COLON): 3572 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3573 return this 3574 3575 def _parse_case(self) -> t.Optional[exp.Expression]: 3576 ifs = [] 3577 default = None 3578 3579 expression = self._parse_conjunction() 3580 3581 while self._match(TokenType.WHEN): 3582 this = self._parse_conjunction() 3583 self._match(TokenType.THEN) 3584 then = self._parse_conjunction() 3585 ifs.append(self.expression(exp.If, this=this, true=then)) 3586 3587 if self._match(TokenType.ELSE): 3588 default = self._parse_conjunction() 3589 3590 if not self._match(TokenType.END): 3591 self.raise_error("Expected END after CASE", self._prev) 3592 3593 return self._parse_window( 3594 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3595 ) 3596 3597 def _parse_if(self) -> t.Optional[exp.Expression]: 3598 if self._match(TokenType.L_PAREN): 3599 args = self._parse_csv(self._parse_conjunction) 3600 this = self.validate_expression(exp.If.from_arg_list(args), args) 3601 self._match_r_paren() 3602 else: 3603 index = self._index - 1 3604 condition = self._parse_conjunction() 3605 3606 if not condition: 3607 self._retreat(index) 3608 return None 3609 3610 self._match(TokenType.THEN) 3611 true = self._parse_conjunction() 3612 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3613 self._match(TokenType.END) 3614 this = self.expression(exp.If, this=condition, true=true, false=false) 3615 3616 return self._parse_window(this) 3617 3618 def _parse_extract(self) -> exp.Extract: 3619 this = self._parse_function() or self._parse_var() or self._parse_type() 3620 3621 if self._match(TokenType.FROM): 3622 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3623 3624 if not self._match(TokenType.COMMA): 3625 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3626 3627 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3628 3629 def _parse_cast(self, strict: bool) -> exp.Expression: 3630 this = self._parse_conjunction() 3631 3632 if not self._match(TokenType.ALIAS): 3633 if self._match(TokenType.COMMA): 3634 return self.expression( 3635 exp.CastToStrType, this=this, expression=self._parse_string() 3636 ) 3637 else: 3638 self.raise_error("Expected AS after CAST") 3639 3640 to = self._parse_types() 3641 3642 if not to: 3643 self.raise_error("Expected TYPE after CAST") 3644 elif to.this == exp.DataType.Type.CHAR: 3645 if self._match(TokenType.CHARACTER_SET): 3646 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3647 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3648 fmt = self._parse_string() 3649 3650 return self.expression( 3651 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3652 this=this, 3653 format=exp.Literal.string( 3654 format_time( 3655 fmt.this if fmt else "", 3656 self.FORMAT_MAPPING or self.TIME_MAPPING, 3657 self.FORMAT_TRIE or self.TIME_TRIE, 3658 ) 3659 ), 3660 ) 3661 3662 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3663 3664 def _parse_concat(self) -> t.Optional[exp.Expression]: 3665 args = self._parse_csv(self._parse_conjunction) 3666 if self.CONCAT_NULL_OUTPUTS_STRING: 3667 args = [ 3668 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3669 for arg in args 3670 if arg 3671 ] 3672 3673 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3674 # we find such a call we replace it with its argument. 3675 if len(args) == 1: 3676 return args[0] 3677 3678 return self.expression( 3679 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3680 ) 3681 3682 def _parse_string_agg(self) -> exp.Expression: 3683 expression: t.Optional[exp.Expression] 3684 3685 if self._match(TokenType.DISTINCT): 3686 args = self._parse_csv(self._parse_conjunction) 3687 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3688 else: 3689 args = self._parse_csv(self._parse_conjunction) 3690 expression = seq_get(args, 0) 3691 3692 index = self._index 3693 if not self._match(TokenType.R_PAREN): 3694 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3695 order = self._parse_order(this=expression) 3696 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3697 3698 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3699 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3700 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3701 if not self._match_text_seq("WITHIN", "GROUP"): 3702 self._retreat(index) 3703 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3704 3705 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3706 order = self._parse_order(this=expression) 3707 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3708 3709 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3710 to: t.Optional[exp.Expression] 3711 this = self._parse_bitwise() 3712 3713 if self._match(TokenType.USING): 3714 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3715 elif self._match(TokenType.COMMA): 3716 to = self._parse_bitwise() 3717 else: 3718 to = None 3719 3720 # Swap the argument order if needed to produce the correct AST 3721 if self.CONVERT_TYPE_FIRST: 3722 this, to = to, this 3723 3724 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3725 3726 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3727 """ 3728 There are generally two variants of the DECODE function: 3729 3730 - DECODE(bin, charset) 3731 - DECODE(expression, search, result [, search, result] ... [, default]) 3732 3733 The second variant will always be parsed into a CASE expression. Note that NULL 3734 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3735 instead of relying on pattern matching. 3736 """ 3737 args = self._parse_csv(self._parse_conjunction) 3738 3739 if len(args) < 3: 3740 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3741 3742 expression, *expressions = args 3743 if not expression: 3744 return None 3745 3746 ifs = [] 3747 for search, result in zip(expressions[::2], expressions[1::2]): 3748 if not search or not result: 3749 return None 3750 3751 if isinstance(search, exp.Literal): 3752 ifs.append( 3753 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3754 ) 3755 elif isinstance(search, exp.Null): 3756 ifs.append( 3757 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3758 ) 3759 else: 3760 cond = exp.or_( 3761 exp.EQ(this=expression.copy(), expression=search), 3762 exp.and_( 3763 exp.Is(this=expression.copy(), expression=exp.Null()), 3764 exp.Is(this=search.copy(), expression=exp.Null()), 3765 copy=False, 3766 ), 3767 copy=False, 3768 ) 3769 ifs.append(exp.If(this=cond, true=result)) 3770 3771 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3772 3773 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3774 self._match_text_seq("KEY") 3775 key = self._parse_field() 3776 self._match(TokenType.COLON) 3777 self._match_text_seq("VALUE") 3778 value = self._parse_field() 3779 3780 if not key and not value: 3781 return None 3782 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3783 3784 def _parse_json_object(self) -> exp.JSONObject: 3785 star = self._parse_star() 3786 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3787 3788 null_handling = None 3789 if self._match_text_seq("NULL", "ON", "NULL"): 3790 null_handling = "NULL ON NULL" 3791 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3792 null_handling = "ABSENT ON NULL" 3793 3794 unique_keys = None 3795 if self._match_text_seq("WITH", "UNIQUE"): 3796 unique_keys = True 3797 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3798 unique_keys = False 3799 3800 self._match_text_seq("KEYS") 3801 3802 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3803 format_json = self._match_text_seq("FORMAT", "JSON") 3804 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3805 3806 return self.expression( 3807 exp.JSONObject, 3808 expressions=expressions, 3809 null_handling=null_handling, 3810 unique_keys=unique_keys, 3811 return_type=return_type, 3812 format_json=format_json, 3813 encoding=encoding, 3814 ) 3815 3816 def _parse_logarithm(self) -> exp.Func: 3817 # Default argument order is base, expression 3818 args = self._parse_csv(self._parse_range) 3819 3820 if len(args) > 1: 3821 if not self.LOG_BASE_FIRST: 3822 args.reverse() 3823 return exp.Log.from_arg_list(args) 3824 3825 return self.expression( 3826 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3827 ) 3828 3829 def _parse_match_against(self) -> exp.MatchAgainst: 3830 expressions = self._parse_csv(self._parse_column) 3831 3832 self._match_text_seq(")", "AGAINST", "(") 3833 3834 this = self._parse_string() 3835 3836 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3837 modifier = "IN NATURAL LANGUAGE MODE" 3838 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3839 modifier = f"{modifier} WITH QUERY EXPANSION" 3840 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3841 modifier = "IN BOOLEAN MODE" 3842 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3843 modifier = "WITH QUERY EXPANSION" 3844 else: 3845 modifier = None 3846 3847 return self.expression( 3848 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3849 ) 3850 3851 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3852 def _parse_open_json(self) -> exp.OpenJSON: 3853 this = self._parse_bitwise() 3854 path = self._match(TokenType.COMMA) and self._parse_string() 3855 3856 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3857 this = self._parse_field(any_token=True) 3858 kind = self._parse_types() 3859 path = self._parse_string() 3860 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3861 3862 return self.expression( 3863 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3864 ) 3865 3866 expressions = None 3867 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3868 self._match_l_paren() 3869 expressions = self._parse_csv(_parse_open_json_column_def) 3870 3871 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3872 3873 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3874 args = self._parse_csv(self._parse_bitwise) 3875 3876 if self._match(TokenType.IN): 3877 return self.expression( 3878 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3879 ) 3880 3881 if haystack_first: 3882 haystack = seq_get(args, 0) 3883 needle = seq_get(args, 1) 3884 else: 3885 needle = seq_get(args, 0) 3886 haystack = seq_get(args, 1) 3887 3888 return self.expression( 3889 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3890 ) 3891 3892 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3893 args = self._parse_csv(self._parse_table) 3894 return exp.JoinHint(this=func_name.upper(), expressions=args) 3895 3896 def _parse_substring(self) -> exp.Substring: 3897 # Postgres supports the form: substring(string [from int] [for int]) 3898 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3899 3900 args = self._parse_csv(self._parse_bitwise) 3901 3902 if self._match(TokenType.FROM): 3903 args.append(self._parse_bitwise()) 3904 if self._match(TokenType.FOR): 3905 args.append(self._parse_bitwise()) 3906 3907 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3908 3909 def _parse_trim(self) -> exp.Trim: 3910 # https://www.w3resource.com/sql/character-functions/trim.php 3911 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3912 3913 position = None 3914 collation = None 3915 3916 if self._match_texts(self.TRIM_TYPES): 3917 position = self._prev.text.upper() 3918 3919 expression = self._parse_bitwise() 3920 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3921 this = self._parse_bitwise() 3922 else: 3923 this = expression 3924 expression = None 3925 3926 if self._match(TokenType.COLLATE): 3927 collation = self._parse_bitwise() 3928 3929 return self.expression( 3930 exp.Trim, this=this, position=position, expression=expression, collation=collation 3931 ) 3932 3933 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3934 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3935 3936 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3937 return self._parse_window(self._parse_id_var(), alias=True) 3938 3939 def _parse_respect_or_ignore_nulls( 3940 self, this: t.Optional[exp.Expression] 3941 ) -> t.Optional[exp.Expression]: 3942 if self._match_text_seq("IGNORE", "NULLS"): 3943 return self.expression(exp.IgnoreNulls, this=this) 3944 if self._match_text_seq("RESPECT", "NULLS"): 3945 return self.expression(exp.RespectNulls, this=this) 3946 return this 3947 3948 def _parse_window( 3949 self, this: t.Optional[exp.Expression], alias: bool = False 3950 ) -> t.Optional[exp.Expression]: 3951 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3952 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3953 self._match_r_paren() 3954 3955 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3956 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3957 if self._match_text_seq("WITHIN", "GROUP"): 3958 order = self._parse_wrapped(self._parse_order) 3959 this = self.expression(exp.WithinGroup, this=this, expression=order) 3960 3961 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3962 # Some dialects choose to implement and some do not. 3963 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3964 3965 # There is some code above in _parse_lambda that handles 3966 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3967 3968 # The below changes handle 3969 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3970 3971 # Oracle allows both formats 3972 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3973 # and Snowflake chose to do the same for familiarity 3974 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3975 this = self._parse_respect_or_ignore_nulls(this) 3976 3977 # bigquery select from window x AS (partition by ...) 3978 if alias: 3979 over = None 3980 self._match(TokenType.ALIAS) 3981 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3982 return this 3983 else: 3984 over = self._prev.text.upper() 3985 3986 if not self._match(TokenType.L_PAREN): 3987 return self.expression( 3988 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3989 ) 3990 3991 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3992 3993 first = self._match(TokenType.FIRST) 3994 if self._match_text_seq("LAST"): 3995 first = False 3996 3997 partition = self._parse_partition_by() 3998 order = self._parse_order() 3999 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4000 4001 if kind: 4002 self._match(TokenType.BETWEEN) 4003 start = self._parse_window_spec() 4004 self._match(TokenType.AND) 4005 end = self._parse_window_spec() 4006 4007 spec = self.expression( 4008 exp.WindowSpec, 4009 kind=kind, 4010 start=start["value"], 4011 start_side=start["side"], 4012 end=end["value"], 4013 end_side=end["side"], 4014 ) 4015 else: 4016 spec = None 4017 4018 self._match_r_paren() 4019 4020 return self.expression( 4021 exp.Window, 4022 this=this, 4023 partition_by=partition, 4024 order=order, 4025 spec=spec, 4026 alias=window_alias, 4027 over=over, 4028 first=first, 4029 ) 4030 4031 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4032 self._match(TokenType.BETWEEN) 4033 4034 return { 4035 "value": ( 4036 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4037 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4038 or self._parse_bitwise() 4039 ), 4040 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4041 } 4042 4043 def _parse_alias( 4044 self, this: t.Optional[exp.Expression], explicit: bool = False 4045 ) -> t.Optional[exp.Expression]: 4046 any_token = self._match(TokenType.ALIAS) 4047 4048 if explicit and not any_token: 4049 return this 4050 4051 if self._match(TokenType.L_PAREN): 4052 aliases = self.expression( 4053 exp.Aliases, 4054 this=this, 4055 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4056 ) 4057 self._match_r_paren(aliases) 4058 return aliases 4059 4060 alias = self._parse_id_var(any_token) 4061 4062 if alias: 4063 return self.expression(exp.Alias, this=this, alias=alias) 4064 4065 return this 4066 4067 def _parse_id_var( 4068 self, 4069 any_token: bool = True, 4070 tokens: t.Optional[t.Collection[TokenType]] = None, 4071 ) -> t.Optional[exp.Expression]: 4072 identifier = self._parse_identifier() 4073 4074 if identifier: 4075 return identifier 4076 4077 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4078 quoted = self._prev.token_type == TokenType.STRING 4079 return exp.Identifier(this=self._prev.text, quoted=quoted) 4080 4081 return None 4082 4083 def _parse_string(self) -> t.Optional[exp.Expression]: 4084 if self._match(TokenType.STRING): 4085 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4086 return self._parse_placeholder() 4087 4088 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4089 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4090 4091 def _parse_number(self) -> t.Optional[exp.Expression]: 4092 if self._match(TokenType.NUMBER): 4093 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4094 return self._parse_placeholder() 4095 4096 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4097 if self._match(TokenType.IDENTIFIER): 4098 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4099 return self._parse_placeholder() 4100 4101 def _parse_var( 4102 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4103 ) -> t.Optional[exp.Expression]: 4104 if ( 4105 (any_token and self._advance_any()) 4106 or self._match(TokenType.VAR) 4107 or (self._match_set(tokens) if tokens else False) 4108 ): 4109 return self.expression(exp.Var, this=self._prev.text) 4110 return self._parse_placeholder() 4111 4112 def _advance_any(self) -> t.Optional[Token]: 4113 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4114 self._advance() 4115 return self._prev 4116 return None 4117 4118 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4119 return self._parse_var() or self._parse_string() 4120 4121 def _parse_null(self) -> t.Optional[exp.Expression]: 4122 if self._match(TokenType.NULL): 4123 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4124 return None 4125 4126 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4127 if self._match(TokenType.TRUE): 4128 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4129 if self._match(TokenType.FALSE): 4130 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4131 return None 4132 4133 def _parse_star(self) -> t.Optional[exp.Expression]: 4134 if self._match(TokenType.STAR): 4135 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4136 return None 4137 4138 def _parse_parameter(self) -> exp.Parameter: 4139 wrapped = self._match(TokenType.L_BRACE) 4140 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4141 self._match(TokenType.R_BRACE) 4142 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4143 4144 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4145 if self._match_set(self.PLACEHOLDER_PARSERS): 4146 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4147 if placeholder: 4148 return placeholder 4149 self._advance(-1) 4150 return None 4151 4152 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4153 if not self._match(TokenType.EXCEPT): 4154 return None 4155 if self._match(TokenType.L_PAREN, advance=False): 4156 return self._parse_wrapped_csv(self._parse_column) 4157 return self._parse_csv(self._parse_column) 4158 4159 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4160 if not self._match(TokenType.REPLACE): 4161 return None 4162 if self._match(TokenType.L_PAREN, advance=False): 4163 return self._parse_wrapped_csv(self._parse_expression) 4164 return self._parse_csv(self._parse_expression) 4165 4166 def _parse_csv( 4167 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4168 ) -> t.List[t.Optional[exp.Expression]]: 4169 parse_result = parse_method() 4170 items = [parse_result] if parse_result is not None else [] 4171 4172 while self._match(sep): 4173 self._add_comments(parse_result) 4174 parse_result = parse_method() 4175 if parse_result is not None: 4176 items.append(parse_result) 4177 4178 return items 4179 4180 def _parse_tokens( 4181 self, parse_method: t.Callable, expressions: t.Dict 4182 ) -> t.Optional[exp.Expression]: 4183 this = parse_method() 4184 4185 while self._match_set(expressions): 4186 this = self.expression( 4187 expressions[self._prev.token_type], 4188 this=this, 4189 comments=self._prev_comments, 4190 expression=parse_method(), 4191 ) 4192 4193 return this 4194 4195 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4196 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4197 4198 def _parse_wrapped_csv( 4199 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4200 ) -> t.List[t.Optional[exp.Expression]]: 4201 return self._parse_wrapped( 4202 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4203 ) 4204 4205 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4206 wrapped = self._match(TokenType.L_PAREN) 4207 if not wrapped and not optional: 4208 self.raise_error("Expecting (") 4209 parse_result = parse_method() 4210 if wrapped: 4211 self._match_r_paren() 4212 return parse_result 4213 4214 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4215 return self._parse_select() or self._parse_set_operations( 4216 self._parse_expression() if alias else self._parse_conjunction() 4217 ) 4218 4219 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4220 return self._parse_query_modifiers( 4221 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4222 ) 4223 4224 def _parse_transaction(self) -> exp.Transaction: 4225 this = None 4226 if self._match_texts(self.TRANSACTION_KIND): 4227 this = self._prev.text 4228 4229 self._match_texts({"TRANSACTION", "WORK"}) 4230 4231 modes = [] 4232 while True: 4233 mode = [] 4234 while self._match(TokenType.VAR): 4235 mode.append(self._prev.text) 4236 4237 if mode: 4238 modes.append(" ".join(mode)) 4239 if not self._match(TokenType.COMMA): 4240 break 4241 4242 return self.expression(exp.Transaction, this=this, modes=modes) 4243 4244 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4245 chain = None 4246 savepoint = None 4247 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4248 4249 self._match_texts({"TRANSACTION", "WORK"}) 4250 4251 if self._match_text_seq("TO"): 4252 self._match_text_seq("SAVEPOINT") 4253 savepoint = self._parse_id_var() 4254 4255 if self._match(TokenType.AND): 4256 chain = not self._match_text_seq("NO") 4257 self._match_text_seq("CHAIN") 4258 4259 if is_rollback: 4260 return self.expression(exp.Rollback, savepoint=savepoint) 4261 4262 return self.expression(exp.Commit, chain=chain) 4263 4264 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4265 if not self._match_text_seq("ADD"): 4266 return None 4267 4268 self._match(TokenType.COLUMN) 4269 exists_column = self._parse_exists(not_=True) 4270 expression = self._parse_column_def(self._parse_field(any_token=True)) 4271 4272 if expression: 4273 expression.set("exists", exists_column) 4274 4275 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4276 if self._match_texts(("FIRST", "AFTER")): 4277 position = self._prev.text 4278 column_position = self.expression( 4279 exp.ColumnPosition, this=self._parse_column(), position=position 4280 ) 4281 expression.set("position", column_position) 4282 4283 return expression 4284 4285 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4286 drop = self._match(TokenType.DROP) and self._parse_drop() 4287 if drop and not isinstance(drop, exp.Command): 4288 drop.set("kind", drop.args.get("kind", "COLUMN")) 4289 return drop 4290 4291 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4292 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4293 return self.expression( 4294 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4295 ) 4296 4297 def _parse_add_constraint(self) -> exp.AddConstraint: 4298 this = None 4299 kind = self._prev.token_type 4300 4301 if kind == TokenType.CONSTRAINT: 4302 this = self._parse_id_var() 4303 4304 if self._match_text_seq("CHECK"): 4305 expression = self._parse_wrapped(self._parse_conjunction) 4306 enforced = self._match_text_seq("ENFORCED") 4307 4308 return self.expression( 4309 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4310 ) 4311 4312 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4313 expression = self._parse_foreign_key() 4314 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4315 expression = self._parse_primary_key() 4316 else: 4317 expression = None 4318 4319 return self.expression(exp.AddConstraint, this=this, expression=expression) 4320 4321 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4322 index = self._index - 1 4323 4324 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4325 return self._parse_csv(self._parse_add_constraint) 4326 4327 self._retreat(index) 4328 return self._parse_csv(self._parse_add_column) 4329 4330 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4331 self._match(TokenType.COLUMN) 4332 column = self._parse_field(any_token=True) 4333 4334 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4335 return self.expression(exp.AlterColumn, this=column, drop=True) 4336 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4337 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4338 4339 self._match_text_seq("SET", "DATA") 4340 return self.expression( 4341 exp.AlterColumn, 4342 this=column, 4343 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4344 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4345 using=self._match(TokenType.USING) and self._parse_conjunction(), 4346 ) 4347 4348 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4349 index = self._index - 1 4350 4351 partition_exists = self._parse_exists() 4352 if self._match(TokenType.PARTITION, advance=False): 4353 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4354 4355 self._retreat(index) 4356 return self._parse_csv(self._parse_drop_column) 4357 4358 def _parse_alter_table_rename(self) -> exp.RenameTable: 4359 self._match_text_seq("TO") 4360 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4361 4362 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4363 start = self._prev 4364 4365 if not self._match(TokenType.TABLE): 4366 return self._parse_as_command(start) 4367 4368 exists = self._parse_exists() 4369 this = self._parse_table(schema=True) 4370 4371 if self._next: 4372 self._advance() 4373 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4374 4375 if parser: 4376 actions = ensure_list(parser(self)) 4377 4378 if not self._curr: 4379 return self.expression( 4380 exp.AlterTable, 4381 this=this, 4382 exists=exists, 4383 actions=actions, 4384 ) 4385 return self._parse_as_command(start) 4386 4387 def _parse_merge(self) -> exp.Merge: 4388 self._match(TokenType.INTO) 4389 target = self._parse_table() 4390 4391 self._match(TokenType.USING) 4392 using = self._parse_table() 4393 4394 self._match(TokenType.ON) 4395 on = self._parse_conjunction() 4396 4397 whens = [] 4398 while self._match(TokenType.WHEN): 4399 matched = not self._match(TokenType.NOT) 4400 self._match_text_seq("MATCHED") 4401 source = ( 4402 False 4403 if self._match_text_seq("BY", "TARGET") 4404 else self._match_text_seq("BY", "SOURCE") 4405 ) 4406 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4407 4408 self._match(TokenType.THEN) 4409 4410 if self._match(TokenType.INSERT): 4411 _this = self._parse_star() 4412 if _this: 4413 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4414 else: 4415 then = self.expression( 4416 exp.Insert, 4417 this=self._parse_value(), 4418 expression=self._match(TokenType.VALUES) and self._parse_value(), 4419 ) 4420 elif self._match(TokenType.UPDATE): 4421 expressions = self._parse_star() 4422 if expressions: 4423 then = self.expression(exp.Update, expressions=expressions) 4424 else: 4425 then = self.expression( 4426 exp.Update, 4427 expressions=self._match(TokenType.SET) 4428 and self._parse_csv(self._parse_equality), 4429 ) 4430 elif self._match(TokenType.DELETE): 4431 then = self.expression(exp.Var, this=self._prev.text) 4432 else: 4433 then = None 4434 4435 whens.append( 4436 self.expression( 4437 exp.When, 4438 matched=matched, 4439 source=source, 4440 condition=condition, 4441 then=then, 4442 ) 4443 ) 4444 4445 return self.expression( 4446 exp.Merge, 4447 this=target, 4448 using=using, 4449 on=on, 4450 expressions=whens, 4451 ) 4452 4453 def _parse_show(self) -> t.Optional[exp.Expression]: 4454 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4455 if parser: 4456 return parser(self) 4457 self._advance() 4458 return self.expression(exp.Show, this=self._prev.text.upper()) 4459 4460 def _parse_set_item_assignment( 4461 self, kind: t.Optional[str] = None 4462 ) -> t.Optional[exp.Expression]: 4463 index = self._index 4464 4465 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4466 return self._parse_set_transaction(global_=kind == "GLOBAL") 4467 4468 left = self._parse_primary() or self._parse_id_var() 4469 4470 if not self._match_texts(("=", "TO")): 4471 self._retreat(index) 4472 return None 4473 4474 right = self._parse_statement() or self._parse_id_var() 4475 this = self.expression(exp.EQ, this=left, expression=right) 4476 4477 return self.expression(exp.SetItem, this=this, kind=kind) 4478 4479 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4480 self._match_text_seq("TRANSACTION") 4481 characteristics = self._parse_csv( 4482 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4483 ) 4484 return self.expression( 4485 exp.SetItem, 4486 expressions=characteristics, 4487 kind="TRANSACTION", 4488 **{"global": global_}, # type: ignore 4489 ) 4490 4491 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4492 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4493 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4494 4495 def _parse_set(self) -> exp.Set | exp.Command: 4496 index = self._index 4497 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4498 4499 if self._curr: 4500 self._retreat(index) 4501 return self._parse_as_command(self._prev) 4502 4503 return set_ 4504 4505 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4506 for option in options: 4507 if self._match_text_seq(*option.split(" ")): 4508 return exp.var(option) 4509 return None 4510 4511 def _parse_as_command(self, start: Token) -> exp.Command: 4512 while self._curr: 4513 self._advance() 4514 text = self._find_sql(start, self._prev) 4515 size = len(start.text) 4516 return exp.Command(this=text[:size], expression=text[size:]) 4517 4518 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4519 settings = [] 4520 4521 self._match_l_paren() 4522 kind = self._parse_id_var() 4523 4524 if self._match(TokenType.L_PAREN): 4525 while True: 4526 key = self._parse_id_var() 4527 value = self._parse_primary() 4528 4529 if not key and value is None: 4530 break 4531 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4532 self._match(TokenType.R_PAREN) 4533 4534 self._match_r_paren() 4535 4536 return self.expression( 4537 exp.DictProperty, 4538 this=this, 4539 kind=kind.this if kind else None, 4540 settings=settings, 4541 ) 4542 4543 def _parse_dict_range(self, this: str) -> exp.DictRange: 4544 self._match_l_paren() 4545 has_min = self._match_text_seq("MIN") 4546 if has_min: 4547 min = self._parse_var() or self._parse_primary() 4548 self._match_text_seq("MAX") 4549 max = self._parse_var() or self._parse_primary() 4550 else: 4551 max = self._parse_var() or self._parse_primary() 4552 min = exp.Literal.number(0) 4553 self._match_r_paren() 4554 return self.expression(exp.DictRange, this=this, min=min, max=max) 4555 4556 def _find_parser( 4557 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4558 ) -> t.Optional[t.Callable]: 4559 if not self._curr: 4560 return None 4561 4562 index = self._index 4563 this = [] 4564 while True: 4565 # The current token might be multiple words 4566 curr = self._curr.text.upper() 4567 key = curr.split(" ") 4568 this.append(curr) 4569 self._advance() 4570 result, trie = in_trie(trie, key) 4571 if result == 0: 4572 break 4573 if result == 2: 4574 subparser = parsers[" ".join(this)] 4575 return subparser 4576 self._retreat(index) 4577 return None 4578 4579 def _match(self, token_type, advance=True, expression=None): 4580 if not self._curr: 4581 return None 4582 4583 if self._curr.token_type == token_type: 4584 if advance: 4585 self._advance() 4586 self._add_comments(expression) 4587 return True 4588 4589 return None 4590 4591 def _match_set(self, types, advance=True): 4592 if not self._curr: 4593 return None 4594 4595 if self._curr.token_type in types: 4596 if advance: 4597 self._advance() 4598 return True 4599 4600 return None 4601 4602 def _match_pair(self, token_type_a, token_type_b, advance=True): 4603 if not self._curr or not self._next: 4604 return None 4605 4606 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4607 if advance: 4608 self._advance(2) 4609 return True 4610 4611 return None 4612 4613 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4614 if not self._match(TokenType.L_PAREN, expression=expression): 4615 self.raise_error("Expecting (") 4616 4617 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4618 if not self._match(TokenType.R_PAREN, expression=expression): 4619 self.raise_error("Expecting )") 4620 4621 def _match_texts(self, texts, advance=True): 4622 if self._curr and self._curr.text.upper() in texts: 4623 if advance: 4624 self._advance() 4625 return True 4626 return False 4627 4628 def _match_text_seq(self, *texts, advance=True): 4629 index = self._index 4630 for text in texts: 4631 if self._curr and self._curr.text.upper() == text: 4632 self._advance() 4633 else: 4634 self._retreat(index) 4635 return False 4636 4637 if not advance: 4638 self._retreat(index) 4639 4640 return True 4641 4642 @t.overload 4643 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4644 ... 4645 4646 @t.overload 4647 def _replace_columns_with_dots( 4648 self, this: t.Optional[exp.Expression] 4649 ) -> t.Optional[exp.Expression]: 4650 ... 4651 4652 def _replace_columns_with_dots(self, this): 4653 if isinstance(this, exp.Dot): 4654 exp.replace_children(this, self._replace_columns_with_dots) 4655 elif isinstance(this, exp.Column): 4656 exp.replace_children(this, self._replace_columns_with_dots) 4657 table = this.args.get("table") 4658 this = ( 4659 self.expression(exp.Dot, this=table, expression=this.this) 4660 if table 4661 else self.expression(exp.Var, this=this.name) 4662 ) 4663 elif isinstance(this, exp.Identifier): 4664 this = self.expression(exp.Var, this=this.name) 4665 4666 return this 4667 4668 def _replace_lambda( 4669 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4670 ) -> t.Optional[exp.Expression]: 4671 if not node: 4672 return node 4673 4674 for column in node.find_all(exp.Column): 4675 if column.parts[0].name in lambda_variables: 4676 dot_or_id = column.to_dot() if column.table else column.this 4677 parent = column.parent 4678 4679 while isinstance(parent, exp.Dot): 4680 if not isinstance(parent.parent, exp.Dot): 4681 parent.replace(dot_or_id) 4682 break 4683 parent = parent.parent 4684 else: 4685 if column is node: 4686 node = dot_or_id 4687 else: 4688 column.replace(dot_or_id) 4689 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
830 def __init__( 831 self, 832 error_level: t.Optional[ErrorLevel] = None, 833 error_message_context: int = 100, 834 max_errors: int = 3, 835 ): 836 self.error_level = error_level or ErrorLevel.IMMEDIATE 837 self.error_message_context = error_message_context 838 self.max_errors = max_errors 839 self.reset()
851 def parse( 852 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 853 ) -> t.List[t.Optional[exp.Expression]]: 854 """ 855 Parses a list of tokens and returns a list of syntax trees, one tree 856 per parsed SQL statement. 857 858 Args: 859 raw_tokens: The list of tokens. 860 sql: The original SQL string, used to produce helpful debug messages. 861 862 Returns: 863 The list of the produced syntax trees. 864 """ 865 return self._parse( 866 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 867 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
869 def parse_into( 870 self, 871 expression_types: exp.IntoType, 872 raw_tokens: t.List[Token], 873 sql: t.Optional[str] = None, 874 ) -> t.List[t.Optional[exp.Expression]]: 875 """ 876 Parses a list of tokens into a given Expression type. If a collection of Expression 877 types is given instead, this method will try to parse the token list into each one 878 of them, stopping at the first for which the parsing succeeds. 879 880 Args: 881 expression_types: The expression type(s) to try and parse the token list into. 882 raw_tokens: The list of tokens. 883 sql: The original SQL string, used to produce helpful debug messages. 884 885 Returns: 886 The target Expression. 887 """ 888 errors = [] 889 for expression_type in ensure_list(expression_types): 890 parser = self.EXPRESSION_PARSERS.get(expression_type) 891 if not parser: 892 raise TypeError(f"No parser registered for {expression_type}") 893 894 try: 895 return self._parse(parser, raw_tokens, sql) 896 except ParseError as e: 897 e.errors[0]["into_expression"] = expression_type 898 errors.append(e) 899 900 raise ParseError( 901 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 902 errors=merge_errors(errors), 903 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
940 def check_errors(self) -> None: 941 """Logs or raises any found errors, depending on the chosen error level setting.""" 942 if self.error_level == ErrorLevel.WARN: 943 for error in self.errors: 944 logger.error(str(error)) 945 elif self.error_level == ErrorLevel.RAISE and self.errors: 946 raise ParseError( 947 concat_messages(self.errors, self.max_errors), 948 errors=merge_errors(self.errors), 949 )
Logs or raises any found errors, depending on the chosen error level setting.
951 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 952 """ 953 Appends an error in the list of recorded errors or raises it, depending on the chosen 954 error level setting. 955 """ 956 token = token or self._curr or self._prev or Token.string("") 957 start = token.start 958 end = token.end + 1 959 start_context = self.sql[max(start - self.error_message_context, 0) : start] 960 highlight = self.sql[start:end] 961 end_context = self.sql[end : end + self.error_message_context] 962 963 error = ParseError.new( 964 f"{message}. Line {token.line}, Col: {token.col}.\n" 965 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 966 description=message, 967 line=token.line, 968 col=token.col, 969 start_context=start_context, 970 highlight=highlight, 971 end_context=end_context, 972 ) 973 974 if self.error_level == ErrorLevel.IMMEDIATE: 975 raise error 976 977 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
979 def expression( 980 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 981 ) -> E: 982 """ 983 Creates a new, validated Expression. 984 985 Args: 986 exp_class: The expression class to instantiate. 987 comments: An optional list of comments to attach to the expression. 988 kwargs: The arguments to set for the expression along with their respective values. 989 990 Returns: 991 The target expression. 992 """ 993 instance = exp_class(**kwargs) 994 instance.add_comments(comments) if comments else self._add_comments(instance) 995 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1002 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1003 """ 1004 Validates an Expression, making sure that all its mandatory arguments are set. 1005 1006 Args: 1007 expression: The expression to validate. 1008 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1009 1010 Returns: 1011 The validated expression. 1012 """ 1013 if self.error_level != ErrorLevel.IGNORE: 1014 for error_message in expression.error_messages(args): 1015 self.raise_error(error_message) 1016 1017 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.