-
Notifications
You must be signed in to change notification settings - Fork 13
Fix LookML filter-expression to SQL conversion #241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| """LookML adapter for importing Looker semantic models.""" | ||
|
|
||
| import logging | ||
| import re | ||
| from pathlib import Path | ||
|
|
||
|
|
@@ -10,6 +11,8 @@ | |
| from sidemantic.core.relationship import Relationship | ||
| from sidemantic.core.semantic_graph import SemanticGraph | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def _import_lkml(): | ||
| """Lazily import lkml, raising a clear error if not installed.""" | ||
|
|
@@ -193,107 +196,199 @@ def replace_ref(match: re.Match) -> str: | |
|
|
||
| return resolved | ||
|
|
||
| def _convert_lookml_filter_to_sql(self, field: str, value: str) -> str: | ||
| """Convert a LookML filter value to SQL condition. | ||
|
|
||
| Handles LookML filter syntax: | ||
| - "value" -> field = 'value' | ||
| - "val1,val2,val3" -> field IN ('val1', 'val2', 'val3') | ||
| - "-value" -> field != 'value' (negation) | ||
| - "-val1,-val2" -> field NOT IN ('val1', 'val2') | ||
| - "yes"/"no" -> field = true/false (for yesno dimensions) | ||
| - ">100", ">=50", "<10", "<=5", "!=0" -> numeric comparisons | ||
| - "%pattern%" -> field LIKE '%pattern%' (wildcards) | ||
| - "NULL" -> field IS NULL | ||
| - "-NULL" -> field IS NOT NULL | ||
| - "EMPTY" -> field = '' | ||
| - "-EMPTY" -> field != '' | ||
|
|
||
| Args: | ||
| field: The field name | ||
| value: The LookML filter value | ||
| @staticmethod | ||
| def _filter_is_number(s: str) -> bool: | ||
| """True if ``s`` is a plain numeric literal (incl. signed / decimal).""" | ||
| try: | ||
| float(s) | ||
| return True | ||
| except ValueError: | ||
| return False | ||
|
|
||
| # Tokens that mark a value as a LookML date/interval filter expression | ||
| # (e.g. "last 7 days", "3 months ago", "this year"). These are not yet | ||
| # translated to SQL; we warn rather than silently string-comparing them. | ||
| _DATE_FILTER_RE = re.compile( | ||
| r"(?i)\b(ago|day|days|week|weeks|month|months|year|years|quarter|quarters|" | ||
| r"hour|hours|minute|minutes|second|seconds|today|yesterday|tomorrow|now|fiscal|" | ||
| r"week|month|year)\b" | ||
| ) | ||
|
|
||
| Returns: | ||
| SQL condition string | ||
| def _convert_lookml_filter_to_sql(self, field: str, value: str) -> str: | ||
| """Convert a LookML filter value to a SQL condition. | ||
|
|
||
| Implements the representable parts of Looker's filter expression | ||
| language (https://cloud.google.com/looker/docs/filter-expressions): | ||
|
|
||
| - ``value`` -> ``field = 'value'`` (single quotes escaped) | ||
| - ``a,b,c`` -> ``field IN ('a','b','c')`` (numeric: unquoted) | ||
| - ``-value`` / ``not value`` -> ``field <> 'value'`` | ||
| - ``-a,-b`` -> ``field NOT IN ('a','b')`` | ||
| - ``-%pat%`` / ``not %pat%`` -> ``field NOT LIKE '%pat%'`` | ||
| - ``yes`` / ``no`` -> ``field = true`` / ``field = false`` | ||
| - ``>100``, ``>=5``, ``<=5``, ``<10``, ``!=0``, ``<>0`` -> comparisons | ||
| - ``5 to 10`` -> ``field >= 5 AND field <= 10`` (open: ``5 to`` / ``to 10``) | ||
| - ``[1,10]`` ``(1,10)`` ``[1,10)`` ``(1,10]`` -> inclusive/exclusive ranges | ||
| - ``%pat%`` / ``_at`` -> ``field LIKE '%pat%'`` | ||
| - ``NULL`` / ``-NULL`` -> ``IS NULL`` / ``IS NOT NULL`` | ||
| - ``EMPTY`` -> ``(field IS NULL OR field = '')`` (and the negation) | ||
| - ``before X`` / ``after X`` -> ``field < X`` / ``field > X`` | ||
| - mixed comma lists (operators/wildcards) -> includes OR'd, excludes AND'd | ||
|
|
||
| Date/interval expressions (``last 7 days``, ``3 months ago`` ...) are not | ||
| yet translated: they emit a literal equality but log a warning so the | ||
| silent zero-row match is at least surfaced. | ||
| """ | ||
| # Handle NULL special values | ||
| if value.upper() == "NULL": | ||
| return f"{{model}}.{field} IS NULL" | ||
| if value.upper() == "-NULL": | ||
| return f"{{model}}.{field} IS NOT NULL" | ||
|
|
||
| # Handle EMPTY special values | ||
| if value.upper() == "EMPTY": | ||
| return f"{{model}}.{field} = ''" | ||
| if value.upper() == "-EMPTY": | ||
| return f"{{model}}.{field} != ''" | ||
|
|
||
| # Handle yes/no boolean values | ||
| if value.lower() == "yes": | ||
| return f"{{model}}.{field} = true" | ||
| if value.lower() == "no": | ||
| return f"{{model}}.{field} = false" | ||
|
|
||
| # Check if this is a comma-separated list of values (OR condition) | ||
| # But be careful: ">100,<200" is two comparison operators, not a list | ||
| if "," in value: | ||
| parts = [p.strip() for p in value.split(",")] | ||
|
|
||
| # Check if all parts are negations (NOT IN) | ||
| if all(p.startswith("-") for p in parts): | ||
| # Remove the - prefix from each | ||
| clean_parts = [p[1:] for p in parts] | ||
| # Check if they're all simple strings (not operators) | ||
| if all(not re.match(r"^(>=|<=|!=|<>|>|<)", p) for p in clean_parts): | ||
| quoted = ", ".join(f"'{p}'" for p in clean_parts) | ||
| return f"{{model}}.{field} NOT IN ({quoted})" | ||
|
|
||
| # Check if all parts are simple values (no operators) -> IN clause | ||
| if all(not p.startswith("-") and not re.match(r"^(>=|<=|!=|<>|>|<)", p) for p in parts): | ||
| # Check if all parts are numeric | ||
| if all(p.replace(".", "").replace("-", "").isdigit() for p in parts): | ||
| # Numeric IN clause (no quotes) | ||
| return f"{{model}}.{field} IN ({', '.join(parts)})" | ||
| else: | ||
| # String IN clause (with quotes) | ||
| quoted = ", ".join(f"'{p}'" for p in parts) | ||
| return f"{{model}}.{field} IN ({quoted})" | ||
|
|
||
| # Mixed operators - this is actually multiple filter conditions | ||
| # LookML doesn't really support this in a single filter value | ||
| # Fall through to single value handling (will be slightly wrong but safer) | ||
|
|
||
| # Handle negation prefix for single values | ||
| if value.startswith("-") and not re.match(r"^-(>=|<=|!=|<>|>|<|\d)", value): | ||
| negated_value = value[1:] | ||
| if negated_value.replace(".", "").replace("-", "").isdigit(): | ||
| return f"{{model}}.{field} != {negated_value}" | ||
| else: | ||
| return f"{{model}}.{field} != '{negated_value}'" | ||
|
|
||
| # Handle comparison operators: ">1000", "<=100", ">=5", "<10", "!=0" | ||
| if match := re.match(r"^(>=|<=|!=|<>|>|<)(.+)$", value): | ||
| operator, operand = match.groups() | ||
| operand = operand.strip() | ||
| # Normalize <> to != | ||
| if operator == "<>": | ||
| operator = "!=" | ||
| # Check if operand is numeric | ||
| if operand.replace(".", "").replace("-", "").isdigit(): | ||
| return f"{{model}}.{field} {operator} {operand}" | ||
| else: | ||
| return f"{{model}}.{field} {operator} '{operand}'" | ||
|
|
||
| # Handle wildcard patterns (LIKE) | ||
| if "%" in value or "_" in value: | ||
| return f"{{model}}.{field} LIKE '{value}'" | ||
| col = f"{{model}}.{field}" | ||
|
|
||
| def q(s: str) -> str: | ||
| return "'" + s.replace("'", "''") + "'" | ||
|
|
||
| is_number = self._filter_is_number | ||
|
|
||
| def single(v: str) -> str: | ||
| """Convert one (non-list) LookML filter token to a SQL condition.""" | ||
| v = v.strip() | ||
| up = v.upper() | ||
| if up == "NULL": | ||
| return f"{col} IS NULL" | ||
| if up == "-NULL": | ||
| return f"{col} IS NOT NULL" | ||
| if up == "EMPTY": | ||
| return f"({col} IS NULL OR {col} = '')" | ||
| if up == "-EMPTY": | ||
| return f"({col} IS NOT NULL AND {col} <> '')" | ||
| if v.lower() == "yes": | ||
| return f"{col} = true" | ||
| if v.lower() == "no": | ||
| return f"{col} = false" | ||
|
|
||
| # "not X" / "NOT X" negation | ||
| nm = re.match(r"(?i)^not\s+(.+)$", v) | ||
| neg = nm.group(1).strip() if nm else None | ||
| # single-value "-X" negation (but not a negative number / operator) | ||
| if neg is None and v.startswith("-") and len(v) > 1 and not re.match(r"^-(>=|<=|!=|<>|>|<|\d|\.)", v): | ||
| neg = v[1:] | ||
| if neg is not None: | ||
| if "%" in neg or "_" in neg: | ||
| return f"{col} NOT LIKE {q(neg)}" | ||
| if is_number(neg): | ||
| return f"{col} != {neg}" | ||
| return f"{col} != {q(neg)}" | ||
|
|
||
| # before / after <value> (date or numeric bound). Per Looker, "before" | ||
| # is exclusive and "after" is inclusive (on or after the boundary). | ||
| bm = re.match(r"(?i)^(before|after)\s+(.+)$", v) | ||
| if bm: | ||
| op = "<" if bm.group(1).lower() == "before" else ">=" | ||
| operand = bm.group(2).strip() | ||
| rhs = operand if is_number(operand) else q(operand) | ||
| return f"{col} {op} {rhs}" | ||
|
|
||
| # comparison operators ">=", "<=", "!=", "<>", ">", "<" | ||
| cm = re.match(r"^(>=|<=|!=|<>|>|<)\s*(.+)$", v) | ||
| if cm: | ||
| operator, operand = cm.group(1), cm.group(2).strip() | ||
|
Comment on lines
+289
to
+291
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a numeric filter uses Looker's documented Useful? React with 👍 / 👎. |
||
| if operator == "<>": | ||
| operator = "!=" | ||
| rhs = operand if is_number(operand) else q(operand) | ||
| return f"{col} {operator} {rhs}" | ||
|
|
||
| # wildcard LIKE | ||
| if "%" in v or "_" in v: | ||
| return f"{col} LIKE {q(v)}" | ||
|
|
||
| # numeric equality (incl. negative numbers) | ||
| if is_number(v): | ||
| return f"{col} = {v}" | ||
|
|
||
| # date/interval expression we cannot translate yet -> warn instead of | ||
| # silently emitting a string equality that matches zero rows. | ||
| if self._DATE_FILTER_RE.search(v): | ||
| logger.warning( | ||
| "LookML date/interval filter %r on field %r is not translated to SQL; " | ||
| "emitting a literal equality (will not match as Looker intends).", | ||
| v, | ||
| field, | ||
| ) | ||
|
|
||
| # Handle numeric values | ||
| if value.replace(".", "").replace("-", "").isdigit(): | ||
| return f"{{model}}.{field} = {value}" | ||
| return f"{col} = {q(v)}" | ||
|
|
||
| value = (value or "").strip() | ||
|
|
||
| # Numeric interval: [a,b] (a,b) [a,b) (a,b] | ||
| im = re.match(r"^([\[\(])\s*(-?\d*\.?\d*)\s*,\s*(-?\d*\.?\d*)\s*([\]\)])$", value) | ||
| if im: | ||
| lb, lo, hi, rb = im.groups() | ||
| conds = [] | ||
| if lo != "": | ||
| conds.append(f"{col} >{'=' if lb == '[' else ''} {lo}") | ||
| if hi != "": | ||
| conds.append(f"{col} <{'=' if rb == ']' else ''} {hi}") | ||
| if conds: | ||
| return conds[0] if len(conds) == 1 else "(" + " AND ".join(conds) + ")" | ||
|
|
||
| # Numeric range: "a to b", "a to", "to b" | ||
| rm = re.match(r"(?i)^(-?\d*\.?\d*)\s*to\s*(-?\d*\.?\d*)$", value) | ||
| if rm: | ||
| lo, hi = rm.group(1), rm.group(2) | ||
| conds = [] | ||
| if lo != "": | ||
| conds.append(f"{col} >= {lo}") | ||
| if hi != "": | ||
| conds.append(f"{col} <= {hi}") | ||
| if conds: | ||
| return conds[0] if len(conds) == 1 else "(" + " AND ".join(conds) + ")" | ||
|
|
||
| # Comma-separated list | ||
| if "," in value: | ||
| parts = [p.strip() for p in value.split(",") if p.strip() != ""] | ||
|
|
||
| def is_plain(p: str) -> bool: | ||
| return ( | ||
| not p.startswith("-") | ||
| and not re.match(r"^(>=|<=|!=|<>|>|<)", p) | ||
| and not re.match(r"(?i)^not\s", p) | ||
| and "%" not in p | ||
| and "_" not in p | ||
| ) | ||
|
|
||
| # Default: string equality | ||
| return f"{{model}}.{field} = '{value}'" | ||
| def is_neg_plain(p: str) -> bool: | ||
| return p.startswith("-") and "%" not in p[1:] and "_" not in p[1:] and not re.match(r"^-(\d|\.)", p) | ||
|
|
||
| # A single leading "NOT" negates the whole list: "NOT 1, 2, 3" -> NOT IN (1, 2, 3). | ||
| lead_not = re.match(r"(?i)^not\s+(.+)$", value) | ||
| if lead_not: | ||
| neg_parts = [p.strip() for p in lead_not.group(1).split(",") if p.strip() != ""] | ||
| if neg_parts and all(is_plain(p) for p in neg_parts): | ||
| if all(is_number(p) for p in neg_parts): | ||
| return f"{col} NOT IN ({', '.join(neg_parts)})" | ||
| return f"{col} NOT IN ({', '.join(q(p) for p in neg_parts)})" | ||
|
|
||
| if parts and all(is_plain(p) for p in parts): | ||
| if all(is_number(p) for p in parts): | ||
| return f"{col} IN ({', '.join(parts)})" | ||
| return f"{col} IN ({', '.join(q(p) for p in parts)})" | ||
|
|
||
| if parts and all(is_neg_plain(p) for p in parts): | ||
| clean = [p[1:] for p in parts] | ||
| if all(is_number(p) for p in clean): | ||
| return f"{col} NOT IN ({', '.join(clean)})" | ||
| return f"{col} NOT IN ({', '.join(q(p) for p in clean)})" | ||
|
|
||
| # Mixed list: OR the includes together, AND the exclusions. | ||
| includes, excludes = [], [] | ||
| for p in parts: | ||
| cond = single(p) | ||
| is_exclude = (p.startswith("-") and not re.match(r"^-(\d|\.)", p)) or bool(re.match(r"(?i)^not\s", p)) | ||
| (excludes if is_exclude else includes).append(cond) | ||
|
Comment on lines
+381
to
+384
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Looker's numeric filter syntax treats Useful? React with 👍 / 👎. |
||
| clauses = [] | ||
| if includes: | ||
| clauses.append("(" + " OR ".join(includes) + ")" if len(includes) > 1 else includes[0]) | ||
| clauses.extend(excludes) | ||
| return "(" + " AND ".join(clauses) + ")" if len(clauses) > 1 else clauses[0] | ||
|
|
||
| return single(value) | ||
|
|
||
| def _parse_view(self, view_def: dict) -> Model | None: | ||
| """Parse LookML view into Sidemantic model. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a LookML date filter uses documented relative bounds such as
before 3 days agoorafter Monday, this branch treats the operand as a literal SQL value before the date-expression warning can run, producing conditions like{model}.created_date < '3 days ago'. On dialects that don't parse those English phrases as dates, imported measures either fail or count the wrong rows; detect relative operands and leave them on the warning/fallback path instead of translating them as absolute bounds.Useful? React with 👍 / 👎.