sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 struct_extract_sql, 25 str_position_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30) 31from sqlglot.dialects.hive import Hive 32from sqlglot.dialects.mysql import MySQL 33from sqlglot.helper import apply_index_offset, seq_get 34from sqlglot.tokens import TokenType 35 36 37def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 38 if isinstance(expression.this, exp.Explode): 39 return self.sql( 40 exp.Join( 41 this=exp.Unnest( 42 expressions=[expression.this.this], 43 alias=expression.args.get("alias"), 44 offset=isinstance(expression.this, exp.Posexplode), 45 ), 46 kind="cross", 47 ) 48 ) 49 return self.lateral_sql(expression) 50 51 52def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 53 regex = r"(\w)(\w*)" 54 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 55 56 57def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 58 if expression.args.get("asc") == exp.false(): 59 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 60 else: 61 comparator = None 62 return self.func("ARRAY_SORT", expression.this, comparator) 63 64 65def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 66 if isinstance(expression.parent, exp.Property): 67 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 68 return f"ARRAY[{columns}]" 69 70 if expression.parent: 71 for schema in expression.parent.find_all(exp.Schema): 72 column_defs = schema.find_all(exp.ColumnDef) 73 if column_defs and isinstance(schema.parent, exp.Property): 74 expression.expressions.extend(column_defs) 75 76 return self.schema_sql(expression) 77 78 79def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 80 self.unsupported("Presto does not support exact quantiles") 81 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 82 83 84def _str_to_time_sql( 85 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 86) -> str: 87 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 88 89 90def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 91 time_format = self.format_time(expression) 92 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 93 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 94 return self.sql( 95 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 96 ) 97 98 99def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 100 expression = ts_or_ds_add_cast(expression) 101 unit = unit_to_str(expression) 102 return self.func("DATE_ADD", unit, expression.expression, expression.this) 103 104 105def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 106 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 107 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 108 unit = unit_to_str(expression) 109 return self.func("DATE_DIFF", unit, expr, this) 110 111 112def _build_approx_percentile(args: t.List) -> exp.Expression: 113 if len(args) == 4: 114 return exp.ApproxQuantile( 115 this=seq_get(args, 0), 116 weight=seq_get(args, 1), 117 quantile=seq_get(args, 2), 118 accuracy=seq_get(args, 3), 119 ) 120 if len(args) == 3: 121 return exp.ApproxQuantile( 122 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 123 ) 124 return exp.ApproxQuantile.from_arg_list(args) 125 126 127def _build_from_unixtime(args: t.List) -> exp.Expression: 128 if len(args) == 3: 129 return exp.UnixToTime( 130 this=seq_get(args, 0), 131 hours=seq_get(args, 1), 132 minutes=seq_get(args, 2), 133 ) 134 if len(args) == 2: 135 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 136 137 return exp.UnixToTime.from_arg_list(args) 138 139 140def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 141 if isinstance(expression, exp.Table): 142 if isinstance(expression.this, exp.GenerateSeries): 143 unnest = exp.Unnest(expressions=[expression.this]) 144 145 if expression.alias: 146 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 147 return unnest 148 return expression 149 150 151def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 152 """ 153 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 154 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 155 they're converted into an ARBITRARY call. 156 157 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 158 """ 159 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 160 return self.function_fallback_sql(expression) 161 162 return rename_func("ARBITRARY")(self, expression) 163 164 165def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 166 scale = expression.args.get("scale") 167 timestamp = self.sql(expression, "this") 168 if scale in (None, exp.UnixToTime.SECONDS): 169 return rename_func("FROM_UNIXTIME")(self, expression) 170 171 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 172 173 174def _to_int(expression: exp.Expression) -> exp.Expression: 175 if not expression.type: 176 from sqlglot.optimizer.annotate_types import annotate_types 177 178 annotate_types(expression) 179 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 180 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 181 return expression 182 183 184def _build_to_char(args: t.List) -> exp.TimeToStr: 185 fmt = seq_get(args, 1) 186 if isinstance(fmt, exp.Literal): 187 # We uppercase this to match Teradata's format mapping keys 188 fmt.set("this", fmt.this.upper()) 189 190 # We use "teradata" on purpose here, because the time formats are different in Presto. 191 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 192 return build_formatted_time(exp.TimeToStr, "teradata")(args) 193 194 195class Presto(Dialect): 196 INDEX_OFFSET = 1 197 NULL_ORDERING = "nulls_are_last" 198 TIME_FORMAT = MySQL.TIME_FORMAT 199 TIME_MAPPING = MySQL.TIME_MAPPING 200 STRICT_STRING_CONCAT = True 201 SUPPORTS_SEMI_ANTI_JOIN = False 202 TYPED_DIVISION = True 203 TABLESAMPLE_SIZE_IS_PERCENT = True 204 LOG_BASE_FIRST: t.Optional[bool] = None 205 206 # https://github.com/trinodb/trino/issues/17 207 # https://github.com/trinodb/trino/issues/12289 208 # https://github.com/prestodb/presto/issues/2863 209 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 210 211 class Tokenizer(tokens.Tokenizer): 212 UNICODE_STRINGS = [ 213 (prefix + q, q) 214 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 215 for prefix in ("U&", "u&") 216 ] 217 218 KEYWORDS = { 219 **tokens.Tokenizer.KEYWORDS, 220 "START": TokenType.BEGIN, 221 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 222 "ROW": TokenType.STRUCT, 223 "IPADDRESS": TokenType.IPADDRESS, 224 "IPPREFIX": TokenType.IPPREFIX, 225 } 226 227 KEYWORDS.pop("QUALIFY") 228 229 class Parser(parser.Parser): 230 VALUES_FOLLOWED_BY_PAREN = False 231 232 FUNCTIONS = { 233 **parser.Parser.FUNCTIONS, 234 "ARBITRARY": exp.AnyValue.from_arg_list, 235 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 236 "APPROX_PERCENTILE": _build_approx_percentile, 237 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 238 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 239 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 240 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 241 "CARDINALITY": exp.ArraySize.from_arg_list, 242 "CONTAINS": exp.ArrayContains.from_arg_list, 243 "DATE_ADD": lambda args: exp.DateAdd( 244 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 245 ), 246 "DATE_DIFF": lambda args: exp.DateDiff( 247 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 248 ), 249 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 250 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 251 "DATE_TRUNC": date_trunc_to_time, 252 "ELEMENT_AT": lambda args: exp.Bracket( 253 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 254 ), 255 "FROM_HEX": exp.Unhex.from_arg_list, 256 "FROM_UNIXTIME": _build_from_unixtime, 257 "FROM_UTF8": lambda args: exp.Decode( 258 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 259 ), 260 "NOW": exp.CurrentTimestamp.from_arg_list, 261 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 262 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 263 ), 264 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 265 this=seq_get(args, 0), 266 expression=seq_get(args, 1), 267 replacement=seq_get(args, 2) or exp.Literal.string(""), 268 ), 269 "ROW": exp.Struct.from_arg_list, 270 "SEQUENCE": exp.GenerateSeries.from_arg_list, 271 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 272 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 273 "STRPOS": lambda args: exp.StrPosition( 274 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 275 ), 276 "TO_CHAR": _build_to_char, 277 "TO_HEX": exp.Hex.from_arg_list, 278 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 279 "TO_UTF8": lambda args: exp.Encode( 280 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 281 ), 282 } 283 284 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 285 FUNCTION_PARSERS.pop("TRIM") 286 287 class Generator(generator.Generator): 288 INTERVAL_ALLOWS_PLURAL_FORM = False 289 JOIN_HINTS = False 290 TABLE_HINTS = False 291 QUERY_HINTS = False 292 IS_BOOL_ALLOWED = False 293 TZ_TO_WITH_TIME_ZONE = True 294 NVL2_SUPPORTED = False 295 STRUCT_DELIMITER = ("(", ")") 296 LIMIT_ONLY_LITERALS = True 297 SUPPORTS_SINGLE_ARG_CONCAT = False 298 LIKE_PROPERTY_INSIDE_SCHEMA = True 299 MULTI_ARG_DISTINCT = False 300 SUPPORTS_TO_NUMBER = False 301 302 PROPERTIES_LOCATION = { 303 **generator.Generator.PROPERTIES_LOCATION, 304 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 305 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 306 } 307 308 TYPE_MAPPING = { 309 **generator.Generator.TYPE_MAPPING, 310 exp.DataType.Type.INT: "INTEGER", 311 exp.DataType.Type.FLOAT: "REAL", 312 exp.DataType.Type.BINARY: "VARBINARY", 313 exp.DataType.Type.TEXT: "VARCHAR", 314 exp.DataType.Type.TIMETZ: "TIME", 315 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 316 exp.DataType.Type.STRUCT: "ROW", 317 exp.DataType.Type.DATETIME: "TIMESTAMP", 318 exp.DataType.Type.DATETIME64: "TIMESTAMP", 319 } 320 321 TRANSFORMS = { 322 **generator.Generator.TRANSFORMS, 323 exp.AnyValue: rename_func("ARBITRARY"), 324 exp.ApproxDistinct: lambda self, e: self.func( 325 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 326 ), 327 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 328 exp.ArgMax: rename_func("MAX_BY"), 329 exp.ArgMin: rename_func("MIN_BY"), 330 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 331 exp.ArrayAny: rename_func("ANY_MATCH"), 332 exp.ArrayConcat: rename_func("CONCAT"), 333 exp.ArrayContains: rename_func("CONTAINS"), 334 exp.ArraySize: rename_func("CARDINALITY"), 335 exp.ArrayToString: rename_func("ARRAY_JOIN"), 336 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 337 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 338 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 339 exp.BitwiseLeftShift: lambda self, e: self.func( 340 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 341 ), 342 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 343 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 344 exp.BitwiseRightShift: lambda self, e: self.func( 345 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 346 ), 347 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 348 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 349 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 350 exp.DateAdd: lambda self, e: self.func( 351 "DATE_ADD", 352 unit_to_str(e), 353 _to_int(e.expression), 354 e.this, 355 ), 356 exp.DateDiff: lambda self, e: self.func( 357 "DATE_DIFF", unit_to_str(e), e.expression, e.this 358 ), 359 exp.DateStrToDate: datestrtodate_sql, 360 exp.DateToDi: lambda self, 361 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 362 exp.DateSub: lambda self, e: self.func( 363 "DATE_ADD", 364 unit_to_str(e), 365 _to_int(e.expression * -1), 366 e.this, 367 ), 368 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 369 exp.DiToDate: lambda self, 370 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 371 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 372 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 373 exp.First: _first_last_sql, 374 exp.FirstValue: _first_last_sql, 375 exp.FromTimeZone: lambda self, 376 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 377 exp.Group: transforms.preprocess([transforms.unalias_group]), 378 exp.GroupConcat: lambda self, e: self.func( 379 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 380 ), 381 exp.Hex: rename_func("TO_HEX"), 382 exp.If: if_sql(), 383 exp.ILike: no_ilike_sql, 384 exp.Initcap: _initcap_sql, 385 exp.ParseJSON: rename_func("JSON_PARSE"), 386 exp.Last: _first_last_sql, 387 exp.LastValue: _first_last_sql, 388 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 389 exp.Lateral: _explode_to_unnest_sql, 390 exp.Left: left_to_substring_sql, 391 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 392 exp.LogicalAnd: rename_func("BOOL_AND"), 393 exp.LogicalOr: rename_func("BOOL_OR"), 394 exp.Pivot: no_pivot_sql, 395 exp.Quantile: _quantile_sql, 396 exp.RegexpExtract: regexp_extract_sql, 397 exp.Right: right_to_substring_sql, 398 exp.SafeDivide: no_safe_divide_sql, 399 exp.Schema: _schema_sql, 400 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 401 exp.Select: transforms.preprocess( 402 [ 403 transforms.eliminate_qualify, 404 transforms.eliminate_distinct_on, 405 transforms.explode_to_unnest(1), 406 transforms.eliminate_semi_and_anti_joins, 407 ] 408 ), 409 exp.SortArray: _no_sort_array, 410 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 411 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 412 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 413 exp.StrToTime: _str_to_time_sql, 414 exp.StructExtract: struct_extract_sql, 415 exp.Table: transforms.preprocess([_unnest_sequence]), 416 exp.Timestamp: no_timestamp_sql, 417 exp.TimestampTrunc: timestamptrunc_sql, 418 exp.TimeStrToDate: timestrtotime_sql, 419 exp.TimeStrToTime: timestrtotime_sql, 420 exp.TimeStrToUnix: lambda self, e: self.func( 421 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 422 ), 423 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 424 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 425 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 426 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 427 exp.TsOrDiToDi: lambda self, 428 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 429 exp.TsOrDsAdd: _ts_or_ds_add_sql, 430 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 431 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 432 exp.Unhex: rename_func("FROM_HEX"), 433 exp.UnixToStr: lambda self, 434 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 435 exp.UnixToTime: _unix_to_time_sql, 436 exp.UnixToTimeStr: lambda self, 437 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 438 exp.VariancePop: rename_func("VAR_POP"), 439 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 440 exp.WithinGroup: transforms.preprocess( 441 [transforms.remove_within_group_for_percentiles] 442 ), 443 exp.Xor: bool_xor_sql, 444 } 445 446 RESERVED_KEYWORDS = { 447 "alter", 448 "and", 449 "as", 450 "between", 451 "by", 452 "case", 453 "cast", 454 "constraint", 455 "create", 456 "cross", 457 "current_time", 458 "current_timestamp", 459 "deallocate", 460 "delete", 461 "describe", 462 "distinct", 463 "drop", 464 "else", 465 "end", 466 "escape", 467 "except", 468 "execute", 469 "exists", 470 "extract", 471 "false", 472 "for", 473 "from", 474 "full", 475 "group", 476 "having", 477 "in", 478 "inner", 479 "insert", 480 "intersect", 481 "into", 482 "is", 483 "join", 484 "left", 485 "like", 486 "natural", 487 "not", 488 "null", 489 "on", 490 "or", 491 "order", 492 "outer", 493 "prepare", 494 "right", 495 "select", 496 "table", 497 "then", 498 "true", 499 "union", 500 "using", 501 "values", 502 "when", 503 "where", 504 "with", 505 } 506 507 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 508 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 509 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 510 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 511 # which seems to be using the same time mapping as Hive, as per: 512 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 513 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 514 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 515 parse_with_tz = self.func( 516 "PARSE_DATETIME", 517 value_as_text, 518 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 519 ) 520 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 521 return self.func("TO_UNIXTIME", coalesced) 522 523 def bracket_sql(self, expression: exp.Bracket) -> str: 524 if expression.args.get("safe"): 525 return self.func( 526 "ELEMENT_AT", 527 expression.this, 528 seq_get( 529 apply_index_offset( 530 expression.this, 531 expression.expressions, 532 1 - expression.args.get("offset", 0), 533 ), 534 0, 535 ), 536 ) 537 return super().bracket_sql(expression) 538 539 def struct_sql(self, expression: exp.Struct) -> str: 540 from sqlglot.optimizer.annotate_types import annotate_types 541 542 expression = annotate_types(expression) 543 values: t.List[str] = [] 544 schema: t.List[str] = [] 545 unknown_type = False 546 547 for e in expression.expressions: 548 if isinstance(e, exp.PropertyEQ): 549 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 550 unknown_type = True 551 else: 552 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 553 values.append(self.sql(e, "expression")) 554 else: 555 values.append(self.sql(e)) 556 557 size = len(expression.expressions) 558 559 if not size or len(schema) != size: 560 if unknown_type: 561 self.unsupported( 562 "Cannot convert untyped key-value definitions (try annotate_types)." 563 ) 564 return self.func("ROW", *values) 565 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 566 567 def interval_sql(self, expression: exp.Interval) -> str: 568 if expression.this and expression.text("unit").upper().startswith("WEEK"): 569 return f"({expression.this.name} * INTERVAL '7' DAY)" 570 return super().interval_sql(expression) 571 572 def transaction_sql(self, expression: exp.Transaction) -> str: 573 modes = expression.args.get("modes") 574 modes = f" {', '.join(modes)}" if modes else "" 575 return f"START TRANSACTION{modes}" 576 577 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 578 start = expression.args["start"] 579 end = expression.args["end"] 580 step = expression.args.get("step") 581 582 if isinstance(start, exp.Cast): 583 target_type = start.to 584 elif isinstance(end, exp.Cast): 585 target_type = end.to 586 else: 587 target_type = None 588 589 if target_type and target_type.is_type("timestamp"): 590 if target_type is start.to: 591 end = exp.cast(end, target_type) 592 else: 593 start = exp.cast(start, target_type) 594 595 return self.func("SEQUENCE", start, end, step) 596 597 def offset_limit_modifiers( 598 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 599 ) -> t.List[str]: 600 return [ 601 self.sql(expression, "offset"), 602 self.sql(limit), 603 ] 604 605 def create_sql(self, expression: exp.Create) -> str: 606 """ 607 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 608 so we need to remove them 609 """ 610 kind = expression.args["kind"] 611 schema = expression.this 612 if kind == "VIEW" and schema.expressions: 613 expression.this.set("expressions", None) 614 return super().create_sql(expression)
196class Presto(Dialect): 197 INDEX_OFFSET = 1 198 NULL_ORDERING = "nulls_are_last" 199 TIME_FORMAT = MySQL.TIME_FORMAT 200 TIME_MAPPING = MySQL.TIME_MAPPING 201 STRICT_STRING_CONCAT = True 202 SUPPORTS_SEMI_ANTI_JOIN = False 203 TYPED_DIVISION = True 204 TABLESAMPLE_SIZE_IS_PERCENT = True 205 LOG_BASE_FIRST: t.Optional[bool] = None 206 207 # https://github.com/trinodb/trino/issues/17 208 # https://github.com/trinodb/trino/issues/12289 209 # https://github.com/prestodb/presto/issues/2863 210 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 211 212 class Tokenizer(tokens.Tokenizer): 213 UNICODE_STRINGS = [ 214 (prefix + q, q) 215 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 216 for prefix in ("U&", "u&") 217 ] 218 219 KEYWORDS = { 220 **tokens.Tokenizer.KEYWORDS, 221 "START": TokenType.BEGIN, 222 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 223 "ROW": TokenType.STRUCT, 224 "IPADDRESS": TokenType.IPADDRESS, 225 "IPPREFIX": TokenType.IPPREFIX, 226 } 227 228 KEYWORDS.pop("QUALIFY") 229 230 class Parser(parser.Parser): 231 VALUES_FOLLOWED_BY_PAREN = False 232 233 FUNCTIONS = { 234 **parser.Parser.FUNCTIONS, 235 "ARBITRARY": exp.AnyValue.from_arg_list, 236 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 237 "APPROX_PERCENTILE": _build_approx_percentile, 238 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 239 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 240 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 241 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 242 "CARDINALITY": exp.ArraySize.from_arg_list, 243 "CONTAINS": exp.ArrayContains.from_arg_list, 244 "DATE_ADD": lambda args: exp.DateAdd( 245 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 246 ), 247 "DATE_DIFF": lambda args: exp.DateDiff( 248 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 249 ), 250 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 251 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 252 "DATE_TRUNC": date_trunc_to_time, 253 "ELEMENT_AT": lambda args: exp.Bracket( 254 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 255 ), 256 "FROM_HEX": exp.Unhex.from_arg_list, 257 "FROM_UNIXTIME": _build_from_unixtime, 258 "FROM_UTF8": lambda args: exp.Decode( 259 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 260 ), 261 "NOW": exp.CurrentTimestamp.from_arg_list, 262 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 263 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 264 ), 265 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 266 this=seq_get(args, 0), 267 expression=seq_get(args, 1), 268 replacement=seq_get(args, 2) or exp.Literal.string(""), 269 ), 270 "ROW": exp.Struct.from_arg_list, 271 "SEQUENCE": exp.GenerateSeries.from_arg_list, 272 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 273 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 274 "STRPOS": lambda args: exp.StrPosition( 275 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 276 ), 277 "TO_CHAR": _build_to_char, 278 "TO_HEX": exp.Hex.from_arg_list, 279 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 280 "TO_UTF8": lambda args: exp.Encode( 281 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 282 ), 283 } 284 285 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 286 FUNCTION_PARSERS.pop("TRIM") 287 288 class Generator(generator.Generator): 289 INTERVAL_ALLOWS_PLURAL_FORM = False 290 JOIN_HINTS = False 291 TABLE_HINTS = False 292 QUERY_HINTS = False 293 IS_BOOL_ALLOWED = False 294 TZ_TO_WITH_TIME_ZONE = True 295 NVL2_SUPPORTED = False 296 STRUCT_DELIMITER = ("(", ")") 297 LIMIT_ONLY_LITERALS = True 298 SUPPORTS_SINGLE_ARG_CONCAT = False 299 LIKE_PROPERTY_INSIDE_SCHEMA = True 300 MULTI_ARG_DISTINCT = False 301 SUPPORTS_TO_NUMBER = False 302 303 PROPERTIES_LOCATION = { 304 **generator.Generator.PROPERTIES_LOCATION, 305 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 306 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 307 } 308 309 TYPE_MAPPING = { 310 **generator.Generator.TYPE_MAPPING, 311 exp.DataType.Type.INT: "INTEGER", 312 exp.DataType.Type.FLOAT: "REAL", 313 exp.DataType.Type.BINARY: "VARBINARY", 314 exp.DataType.Type.TEXT: "VARCHAR", 315 exp.DataType.Type.TIMETZ: "TIME", 316 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 317 exp.DataType.Type.STRUCT: "ROW", 318 exp.DataType.Type.DATETIME: "TIMESTAMP", 319 exp.DataType.Type.DATETIME64: "TIMESTAMP", 320 } 321 322 TRANSFORMS = { 323 **generator.Generator.TRANSFORMS, 324 exp.AnyValue: rename_func("ARBITRARY"), 325 exp.ApproxDistinct: lambda self, e: self.func( 326 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 327 ), 328 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 329 exp.ArgMax: rename_func("MAX_BY"), 330 exp.ArgMin: rename_func("MIN_BY"), 331 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 332 exp.ArrayAny: rename_func("ANY_MATCH"), 333 exp.ArrayConcat: rename_func("CONCAT"), 334 exp.ArrayContains: rename_func("CONTAINS"), 335 exp.ArraySize: rename_func("CARDINALITY"), 336 exp.ArrayToString: rename_func("ARRAY_JOIN"), 337 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 338 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 339 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 340 exp.BitwiseLeftShift: lambda self, e: self.func( 341 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 342 ), 343 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 344 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 345 exp.BitwiseRightShift: lambda self, e: self.func( 346 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 347 ), 348 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 349 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 350 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 351 exp.DateAdd: lambda self, e: self.func( 352 "DATE_ADD", 353 unit_to_str(e), 354 _to_int(e.expression), 355 e.this, 356 ), 357 exp.DateDiff: lambda self, e: self.func( 358 "DATE_DIFF", unit_to_str(e), e.expression, e.this 359 ), 360 exp.DateStrToDate: datestrtodate_sql, 361 exp.DateToDi: lambda self, 362 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 363 exp.DateSub: lambda self, e: self.func( 364 "DATE_ADD", 365 unit_to_str(e), 366 _to_int(e.expression * -1), 367 e.this, 368 ), 369 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 370 exp.DiToDate: lambda self, 371 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 372 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 373 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 374 exp.First: _first_last_sql, 375 exp.FirstValue: _first_last_sql, 376 exp.FromTimeZone: lambda self, 377 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 378 exp.Group: transforms.preprocess([transforms.unalias_group]), 379 exp.GroupConcat: lambda self, e: self.func( 380 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 381 ), 382 exp.Hex: rename_func("TO_HEX"), 383 exp.If: if_sql(), 384 exp.ILike: no_ilike_sql, 385 exp.Initcap: _initcap_sql, 386 exp.ParseJSON: rename_func("JSON_PARSE"), 387 exp.Last: _first_last_sql, 388 exp.LastValue: _first_last_sql, 389 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 390 exp.Lateral: _explode_to_unnest_sql, 391 exp.Left: left_to_substring_sql, 392 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 393 exp.LogicalAnd: rename_func("BOOL_AND"), 394 exp.LogicalOr: rename_func("BOOL_OR"), 395 exp.Pivot: no_pivot_sql, 396 exp.Quantile: _quantile_sql, 397 exp.RegexpExtract: regexp_extract_sql, 398 exp.Right: right_to_substring_sql, 399 exp.SafeDivide: no_safe_divide_sql, 400 exp.Schema: _schema_sql, 401 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 402 exp.Select: transforms.preprocess( 403 [ 404 transforms.eliminate_qualify, 405 transforms.eliminate_distinct_on, 406 transforms.explode_to_unnest(1), 407 transforms.eliminate_semi_and_anti_joins, 408 ] 409 ), 410 exp.SortArray: _no_sort_array, 411 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 412 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 413 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 414 exp.StrToTime: _str_to_time_sql, 415 exp.StructExtract: struct_extract_sql, 416 exp.Table: transforms.preprocess([_unnest_sequence]), 417 exp.Timestamp: no_timestamp_sql, 418 exp.TimestampTrunc: timestamptrunc_sql, 419 exp.TimeStrToDate: timestrtotime_sql, 420 exp.TimeStrToTime: timestrtotime_sql, 421 exp.TimeStrToUnix: lambda self, e: self.func( 422 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 423 ), 424 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 425 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 426 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 427 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 428 exp.TsOrDiToDi: lambda self, 429 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 430 exp.TsOrDsAdd: _ts_or_ds_add_sql, 431 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 432 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 433 exp.Unhex: rename_func("FROM_HEX"), 434 exp.UnixToStr: lambda self, 435 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 436 exp.UnixToTime: _unix_to_time_sql, 437 exp.UnixToTimeStr: lambda self, 438 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 439 exp.VariancePop: rename_func("VAR_POP"), 440 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 441 exp.WithinGroup: transforms.preprocess( 442 [transforms.remove_within_group_for_percentiles] 443 ), 444 exp.Xor: bool_xor_sql, 445 } 446 447 RESERVED_KEYWORDS = { 448 "alter", 449 "and", 450 "as", 451 "between", 452 "by", 453 "case", 454 "cast", 455 "constraint", 456 "create", 457 "cross", 458 "current_time", 459 "current_timestamp", 460 "deallocate", 461 "delete", 462 "describe", 463 "distinct", 464 "drop", 465 "else", 466 "end", 467 "escape", 468 "except", 469 "execute", 470 "exists", 471 "extract", 472 "false", 473 "for", 474 "from", 475 "full", 476 "group", 477 "having", 478 "in", 479 "inner", 480 "insert", 481 "intersect", 482 "into", 483 "is", 484 "join", 485 "left", 486 "like", 487 "natural", 488 "not", 489 "null", 490 "on", 491 "or", 492 "order", 493 "outer", 494 "prepare", 495 "right", 496 "select", 497 "table", 498 "then", 499 "true", 500 "union", 501 "using", 502 "values", 503 "when", 504 "where", 505 "with", 506 } 507 508 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 509 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 510 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 511 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 512 # which seems to be using the same time mapping as Hive, as per: 513 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 514 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 515 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 516 parse_with_tz = self.func( 517 "PARSE_DATETIME", 518 value_as_text, 519 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 520 ) 521 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 522 return self.func("TO_UNIXTIME", coalesced) 523 524 def bracket_sql(self, expression: exp.Bracket) -> str: 525 if expression.args.get("safe"): 526 return self.func( 527 "ELEMENT_AT", 528 expression.this, 529 seq_get( 530 apply_index_offset( 531 expression.this, 532 expression.expressions, 533 1 - expression.args.get("offset", 0), 534 ), 535 0, 536 ), 537 ) 538 return super().bracket_sql(expression) 539 540 def struct_sql(self, expression: exp.Struct) -> str: 541 from sqlglot.optimizer.annotate_types import annotate_types 542 543 expression = annotate_types(expression) 544 values: t.List[str] = [] 545 schema: t.List[str] = [] 546 unknown_type = False 547 548 for e in expression.expressions: 549 if isinstance(e, exp.PropertyEQ): 550 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 551 unknown_type = True 552 else: 553 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 554 values.append(self.sql(e, "expression")) 555 else: 556 values.append(self.sql(e)) 557 558 size = len(expression.expressions) 559 560 if not size or len(schema) != size: 561 if unknown_type: 562 self.unsupported( 563 "Cannot convert untyped key-value definitions (try annotate_types)." 564 ) 565 return self.func("ROW", *values) 566 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 567 568 def interval_sql(self, expression: exp.Interval) -> str: 569 if expression.this and expression.text("unit").upper().startswith("WEEK"): 570 return f"({expression.this.name} * INTERVAL '7' DAY)" 571 return super().interval_sql(expression) 572 573 def transaction_sql(self, expression: exp.Transaction) -> str: 574 modes = expression.args.get("modes") 575 modes = f" {', '.join(modes)}" if modes else "" 576 return f"START TRANSACTION{modes}" 577 578 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 579 start = expression.args["start"] 580 end = expression.args["end"] 581 step = expression.args.get("step") 582 583 if isinstance(start, exp.Cast): 584 target_type = start.to 585 elif isinstance(end, exp.Cast): 586 target_type = end.to 587 else: 588 target_type = None 589 590 if target_type and target_type.is_type("timestamp"): 591 if target_type is start.to: 592 end = exp.cast(end, target_type) 593 else: 594 start = exp.cast(start, target_type) 595 596 return self.func("SEQUENCE", start, end, step) 597 598 def offset_limit_modifiers( 599 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 600 ) -> t.List[str]: 601 return [ 602 self.sql(expression, "offset"), 603 self.sql(limit), 604 ] 605 606 def create_sql(self, expression: exp.Create) -> str: 607 """ 608 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 609 so we need to remove them 610 """ 611 kind = expression.args["kind"] 612 schema = expression.this 613 if kind == "VIEW" and schema.expressions: 614 expression.this.set("expressions", None) 615 return super().create_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- COPY_PARAMS_ARE_CSV
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
212 class Tokenizer(tokens.Tokenizer): 213 UNICODE_STRINGS = [ 214 (prefix + q, q) 215 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 216 for prefix in ("U&", "u&") 217 ] 218 219 KEYWORDS = { 220 **tokens.Tokenizer.KEYWORDS, 221 "START": TokenType.BEGIN, 222 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 223 "ROW": TokenType.STRUCT, 224 "IPADDRESS": TokenType.IPADDRESS, 225 "IPPREFIX": TokenType.IPPREFIX, 226 } 227 228 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
230 class Parser(parser.Parser): 231 VALUES_FOLLOWED_BY_PAREN = False 232 233 FUNCTIONS = { 234 **parser.Parser.FUNCTIONS, 235 "ARBITRARY": exp.AnyValue.from_arg_list, 236 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 237 "APPROX_PERCENTILE": _build_approx_percentile, 238 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 239 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 240 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 241 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 242 "CARDINALITY": exp.ArraySize.from_arg_list, 243 "CONTAINS": exp.ArrayContains.from_arg_list, 244 "DATE_ADD": lambda args: exp.DateAdd( 245 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 246 ), 247 "DATE_DIFF": lambda args: exp.DateDiff( 248 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 249 ), 250 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 251 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 252 "DATE_TRUNC": date_trunc_to_time, 253 "ELEMENT_AT": lambda args: exp.Bracket( 254 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 255 ), 256 "FROM_HEX": exp.Unhex.from_arg_list, 257 "FROM_UNIXTIME": _build_from_unixtime, 258 "FROM_UTF8": lambda args: exp.Decode( 259 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 260 ), 261 "NOW": exp.CurrentTimestamp.from_arg_list, 262 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 263 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 264 ), 265 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 266 this=seq_get(args, 0), 267 expression=seq_get(args, 1), 268 replacement=seq_get(args, 2) or exp.Literal.string(""), 269 ), 270 "ROW": exp.Struct.from_arg_list, 271 "SEQUENCE": exp.GenerateSeries.from_arg_list, 272 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 273 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 274 "STRPOS": lambda args: exp.StrPosition( 275 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 276 ), 277 "TO_CHAR": _build_to_char, 278 "TO_HEX": exp.Hex.from_arg_list, 279 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 280 "TO_UTF8": lambda args: exp.Encode( 281 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 282 ), 283 } 284 285 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 286 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
288 class Generator(generator.Generator): 289 INTERVAL_ALLOWS_PLURAL_FORM = False 290 JOIN_HINTS = False 291 TABLE_HINTS = False 292 QUERY_HINTS = False 293 IS_BOOL_ALLOWED = False 294 TZ_TO_WITH_TIME_ZONE = True 295 NVL2_SUPPORTED = False 296 STRUCT_DELIMITER = ("(", ")") 297 LIMIT_ONLY_LITERALS = True 298 SUPPORTS_SINGLE_ARG_CONCAT = False 299 LIKE_PROPERTY_INSIDE_SCHEMA = True 300 MULTI_ARG_DISTINCT = False 301 SUPPORTS_TO_NUMBER = False 302 303 PROPERTIES_LOCATION = { 304 **generator.Generator.PROPERTIES_LOCATION, 305 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 306 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 307 } 308 309 TYPE_MAPPING = { 310 **generator.Generator.TYPE_MAPPING, 311 exp.DataType.Type.INT: "INTEGER", 312 exp.DataType.Type.FLOAT: "REAL", 313 exp.DataType.Type.BINARY: "VARBINARY", 314 exp.DataType.Type.TEXT: "VARCHAR", 315 exp.DataType.Type.TIMETZ: "TIME", 316 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 317 exp.DataType.Type.STRUCT: "ROW", 318 exp.DataType.Type.DATETIME: "TIMESTAMP", 319 exp.DataType.Type.DATETIME64: "TIMESTAMP", 320 } 321 322 TRANSFORMS = { 323 **generator.Generator.TRANSFORMS, 324 exp.AnyValue: rename_func("ARBITRARY"), 325 exp.ApproxDistinct: lambda self, e: self.func( 326 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 327 ), 328 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 329 exp.ArgMax: rename_func("MAX_BY"), 330 exp.ArgMin: rename_func("MIN_BY"), 331 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 332 exp.ArrayAny: rename_func("ANY_MATCH"), 333 exp.ArrayConcat: rename_func("CONCAT"), 334 exp.ArrayContains: rename_func("CONTAINS"), 335 exp.ArraySize: rename_func("CARDINALITY"), 336 exp.ArrayToString: rename_func("ARRAY_JOIN"), 337 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 338 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 339 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 340 exp.BitwiseLeftShift: lambda self, e: self.func( 341 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 342 ), 343 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 344 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 345 exp.BitwiseRightShift: lambda self, e: self.func( 346 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 347 ), 348 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 349 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 350 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 351 exp.DateAdd: lambda self, e: self.func( 352 "DATE_ADD", 353 unit_to_str(e), 354 _to_int(e.expression), 355 e.this, 356 ), 357 exp.DateDiff: lambda self, e: self.func( 358 "DATE_DIFF", unit_to_str(e), e.expression, e.this 359 ), 360 exp.DateStrToDate: datestrtodate_sql, 361 exp.DateToDi: lambda self, 362 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 363 exp.DateSub: lambda self, e: self.func( 364 "DATE_ADD", 365 unit_to_str(e), 366 _to_int(e.expression * -1), 367 e.this, 368 ), 369 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 370 exp.DiToDate: lambda self, 371 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 372 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 373 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 374 exp.First: _first_last_sql, 375 exp.FirstValue: _first_last_sql, 376 exp.FromTimeZone: lambda self, 377 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 378 exp.Group: transforms.preprocess([transforms.unalias_group]), 379 exp.GroupConcat: lambda self, e: self.func( 380 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 381 ), 382 exp.Hex: rename_func("TO_HEX"), 383 exp.If: if_sql(), 384 exp.ILike: no_ilike_sql, 385 exp.Initcap: _initcap_sql, 386 exp.ParseJSON: rename_func("JSON_PARSE"), 387 exp.Last: _first_last_sql, 388 exp.LastValue: _first_last_sql, 389 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 390 exp.Lateral: _explode_to_unnest_sql, 391 exp.Left: left_to_substring_sql, 392 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 393 exp.LogicalAnd: rename_func("BOOL_AND"), 394 exp.LogicalOr: rename_func("BOOL_OR"), 395 exp.Pivot: no_pivot_sql, 396 exp.Quantile: _quantile_sql, 397 exp.RegexpExtract: regexp_extract_sql, 398 exp.Right: right_to_substring_sql, 399 exp.SafeDivide: no_safe_divide_sql, 400 exp.Schema: _schema_sql, 401 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 402 exp.Select: transforms.preprocess( 403 [ 404 transforms.eliminate_qualify, 405 transforms.eliminate_distinct_on, 406 transforms.explode_to_unnest(1), 407 transforms.eliminate_semi_and_anti_joins, 408 ] 409 ), 410 exp.SortArray: _no_sort_array, 411 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 412 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 413 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 414 exp.StrToTime: _str_to_time_sql, 415 exp.StructExtract: struct_extract_sql, 416 exp.Table: transforms.preprocess([_unnest_sequence]), 417 exp.Timestamp: no_timestamp_sql, 418 exp.TimestampTrunc: timestamptrunc_sql, 419 exp.TimeStrToDate: timestrtotime_sql, 420 exp.TimeStrToTime: timestrtotime_sql, 421 exp.TimeStrToUnix: lambda self, e: self.func( 422 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 423 ), 424 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 425 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 426 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 427 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 428 exp.TsOrDiToDi: lambda self, 429 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 430 exp.TsOrDsAdd: _ts_or_ds_add_sql, 431 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 432 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 433 exp.Unhex: rename_func("FROM_HEX"), 434 exp.UnixToStr: lambda self, 435 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 436 exp.UnixToTime: _unix_to_time_sql, 437 exp.UnixToTimeStr: lambda self, 438 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 439 exp.VariancePop: rename_func("VAR_POP"), 440 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 441 exp.WithinGroup: transforms.preprocess( 442 [transforms.remove_within_group_for_percentiles] 443 ), 444 exp.Xor: bool_xor_sql, 445 } 446 447 RESERVED_KEYWORDS = { 448 "alter", 449 "and", 450 "as", 451 "between", 452 "by", 453 "case", 454 "cast", 455 "constraint", 456 "create", 457 "cross", 458 "current_time", 459 "current_timestamp", 460 "deallocate", 461 "delete", 462 "describe", 463 "distinct", 464 "drop", 465 "else", 466 "end", 467 "escape", 468 "except", 469 "execute", 470 "exists", 471 "extract", 472 "false", 473 "for", 474 "from", 475 "full", 476 "group", 477 "having", 478 "in", 479 "inner", 480 "insert", 481 "intersect", 482 "into", 483 "is", 484 "join", 485 "left", 486 "like", 487 "natural", 488 "not", 489 "null", 490 "on", 491 "or", 492 "order", 493 "outer", 494 "prepare", 495 "right", 496 "select", 497 "table", 498 "then", 499 "true", 500 "union", 501 "using", 502 "values", 503 "when", 504 "where", 505 "with", 506 } 507 508 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 509 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 510 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 511 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 512 # which seems to be using the same time mapping as Hive, as per: 513 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 514 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 515 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 516 parse_with_tz = self.func( 517 "PARSE_DATETIME", 518 value_as_text, 519 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 520 ) 521 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 522 return self.func("TO_UNIXTIME", coalesced) 523 524 def bracket_sql(self, expression: exp.Bracket) -> str: 525 if expression.args.get("safe"): 526 return self.func( 527 "ELEMENT_AT", 528 expression.this, 529 seq_get( 530 apply_index_offset( 531 expression.this, 532 expression.expressions, 533 1 - expression.args.get("offset", 0), 534 ), 535 0, 536 ), 537 ) 538 return super().bracket_sql(expression) 539 540 def struct_sql(self, expression: exp.Struct) -> str: 541 from sqlglot.optimizer.annotate_types import annotate_types 542 543 expression = annotate_types(expression) 544 values: t.List[str] = [] 545 schema: t.List[str] = [] 546 unknown_type = False 547 548 for e in expression.expressions: 549 if isinstance(e, exp.PropertyEQ): 550 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 551 unknown_type = True 552 else: 553 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 554 values.append(self.sql(e, "expression")) 555 else: 556 values.append(self.sql(e)) 557 558 size = len(expression.expressions) 559 560 if not size or len(schema) != size: 561 if unknown_type: 562 self.unsupported( 563 "Cannot convert untyped key-value definitions (try annotate_types)." 564 ) 565 return self.func("ROW", *values) 566 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 567 568 def interval_sql(self, expression: exp.Interval) -> str: 569 if expression.this and expression.text("unit").upper().startswith("WEEK"): 570 return f"({expression.this.name} * INTERVAL '7' DAY)" 571 return super().interval_sql(expression) 572 573 def transaction_sql(self, expression: exp.Transaction) -> str: 574 modes = expression.args.get("modes") 575 modes = f" {', '.join(modes)}" if modes else "" 576 return f"START TRANSACTION{modes}" 577 578 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 579 start = expression.args["start"] 580 end = expression.args["end"] 581 step = expression.args.get("step") 582 583 if isinstance(start, exp.Cast): 584 target_type = start.to 585 elif isinstance(end, exp.Cast): 586 target_type = end.to 587 else: 588 target_type = None 589 590 if target_type and target_type.is_type("timestamp"): 591 if target_type is start.to: 592 end = exp.cast(end, target_type) 593 else: 594 start = exp.cast(start, target_type) 595 596 return self.func("SEQUENCE", start, end, step) 597 598 def offset_limit_modifiers( 599 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 600 ) -> t.List[str]: 601 return [ 602 self.sql(expression, "offset"), 603 self.sql(limit), 604 ] 605 606 def create_sql(self, expression: exp.Create) -> str: 607 """ 608 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 609 so we need to remove them 610 """ 611 kind = expression.args["kind"] 612 schema = expression.this 613 if kind == "VIEW" and schema.expressions: 614 expression.this.set("expressions", None) 615 return super().create_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
508 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 509 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 510 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 511 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 512 # which seems to be using the same time mapping as Hive, as per: 513 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 514 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 515 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 516 parse_with_tz = self.func( 517 "PARSE_DATETIME", 518 value_as_text, 519 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 520 ) 521 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 522 return self.func("TO_UNIXTIME", coalesced)
524 def bracket_sql(self, expression: exp.Bracket) -> str: 525 if expression.args.get("safe"): 526 return self.func( 527 "ELEMENT_AT", 528 expression.this, 529 seq_get( 530 apply_index_offset( 531 expression.this, 532 expression.expressions, 533 1 - expression.args.get("offset", 0), 534 ), 535 0, 536 ), 537 ) 538 return super().bracket_sql(expression)
540 def struct_sql(self, expression: exp.Struct) -> str: 541 from sqlglot.optimizer.annotate_types import annotate_types 542 543 expression = annotate_types(expression) 544 values: t.List[str] = [] 545 schema: t.List[str] = [] 546 unknown_type = False 547 548 for e in expression.expressions: 549 if isinstance(e, exp.PropertyEQ): 550 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 551 unknown_type = True 552 else: 553 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 554 values.append(self.sql(e, "expression")) 555 else: 556 values.append(self.sql(e)) 557 558 size = len(expression.expressions) 559 560 if not size or len(schema) != size: 561 if unknown_type: 562 self.unsupported( 563 "Cannot convert untyped key-value definitions (try annotate_types)." 564 ) 565 return self.func("ROW", *values) 566 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
578 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 579 start = expression.args["start"] 580 end = expression.args["end"] 581 step = expression.args.get("step") 582 583 if isinstance(start, exp.Cast): 584 target_type = start.to 585 elif isinstance(end, exp.Cast): 586 target_type = end.to 587 else: 588 target_type = None 589 590 if target_type and target_type.is_type("timestamp"): 591 if target_type is start.to: 592 end = exp.cast(end, target_type) 593 else: 594 start = exp.cast(start, target_type) 595 596 return self.func("SEQUENCE", start, end, step)
606 def create_sql(self, expression: exp.Create) -> str: 607 """ 608 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 609 so we need to remove them 610 """ 611 kind = expression.args["kind"] 612 schema = expression.this 613 if kind == "VIEW" and schema.expressions: 614 expression.this.set("expressions", None) 615 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- OUTER_UNION_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql