diff --git a/sidemantic/adapters/lookml.py b/sidemantic/adapters/lookml.py index c99912c3..23b510d1 100644 --- a/sidemantic/adapters/lookml.py +++ b/sidemantic/adapters/lookml.py @@ -952,7 +952,9 @@ def _parse_dimension_group( # Timeframes that truncate a timestamp to a coarser time grain. These keep # type="time" with a Sidemantic granularity so they behave as time dimensions. _TIME_GRANULARITY_TIMEFRAMES = { - "time": "hour", + # Looker's "time" timeframe keeps full timestamp precision (to the second); + # truncating to the hour silently collapses sub-hour rows. + "time": "second", "time_of_day": "hour", "hour": "hour", "minute": "minute", @@ -1012,7 +1014,10 @@ def _build_timeframe_dimension( label = dim_group_def.get("label") description = dim_group_def.get("description") - # Time-truncation timeframes -> time dimension with granularity. + # Time-truncation timeframes -> time dimension with granularity. Remember the + # original LookML timeframe so export can round-trip it exactly: several + # timeframes (e.g. "time" and "second") map to the same sidemantic granularity + # and would otherwise collapse to one on export. granularity = self._TIME_GRANULARITY_TIMEFRAMES.get(timeframe) if granularity is not None: return Dimension( @@ -1022,6 +1027,7 @@ def _build_timeframe_dimension( granularity=granularity, label=label, description=description, + meta={"lookml_timeframe": timeframe}, ) # Fiscal quarter/year truncations honoring fiscal_month_offset. The base @@ -2082,25 +2088,66 @@ def _export_view(self, model: Model, graph: SemanticGraph) -> dict: # Group time dimensions by base name time_dims = [d for d in model.dimensions if d.type == "time" and d.granularity] if time_dims: - # Group by base name and collect all timeframes - from collections import defaultdict + # Group by (base name, source SQL): same-prefix time dimensions backed by + # DIFFERENT columns are not one dimension_group, and merging them would + # rewire a field to the wrong source on round-trip. + from collections import Counter - base_name_groups = defaultdict(list) + base_name_groups: dict[tuple[str, str | None], list] = {} for dim in time_dims: - # Extract base name (remove _date, _week, etc suffix) + # Extract the base name by stripping the timeframe suffix. For imported + # dims, use the EXACT stored LookML timeframe (covers every mapped + # timeframe, e.g. millisecond/microsecond, not just the common few); + # for native dims fall back to the common suffix list. base_name = dim.name - for suffix in ["_date", "_week", "_month", "_quarter", "_year", "_time", "_hour"]: - if dim.name.endswith(suffix): - base_name = dim.name[: -len(suffix)] - break - base_name_groups[base_name].append(dim) + stored_tf = (dim.meta or {}).get("lookml_timeframe") + if stored_tf and dim.name.endswith("_" + stored_tf): + base_name = dim.name[: -(len(stored_tf) + 1)] + else: + for suffix in [ + "_date", + "_week", + "_month", + "_quarter", + "_year", + "_time", + "_hour", + "_minute", + "_second", + ]: + if dim.name.endswith(suffix): + base_name = dim.name[: -len(suffix)] + break + base_name_groups.setdefault((base_name, dim.sql), []).append(dim) + + # When one base name spans multiple source SQLs, give the extra groups a + # unique, suffix-free name so they neither collide nor double-suffix on + # re-import (the first keeps the base name; later ones get base_2, base_3...). + # The synthetic name must avoid every other group's base AND names already + # assigned, so e.g. an unrelated "started_2" group doesn't collide. + base_name_counts = Counter(bn for (bn, _) in base_name_groups) + all_bases = {bn for (bn, _) in base_name_groups} + assigned_names: set[str] = set() dimension_groups = [] - for base_name, dims in base_name_groups.items(): - # Map granularity to timeframe + for (base_name, group_sql), dims in base_name_groups.items(): + if base_name_counts[base_name] == 1 or base_name not in assigned_names: + group_name = base_name + else: + n = 2 + while f"{base_name}_{n}" in all_bases or f"{base_name}_{n}" in assigned_names: + n += 1 + group_name = f"{base_name}_{n}" + assigned_names.add(group_name) + # Map granularity to timeframe. Used as a fallback for dimensions not + # imported from LookML (which carry no meta['lookml_timeframe']); a + # second-grain dimension maps to the LookML `second` timeframe so native + # `*_second` fields round-trip instead of collapsing to `time`. granularity_mapping = { - "hour": "time", + "second": "second", + "minute": "minute", + "hour": "hour", "day": "date", "week": "week", "month": "month", @@ -2108,24 +2155,37 @@ def _export_view(self, model: Model, graph: SemanticGraph) -> dict: "year": "year", } - # Collect all timeframes for this base name + # Collect all timeframes for this base name, de-duplicated. LookML's + # "time" and "second" timeframes both import to second granularity + # (sidemantic has no separate "time" grain), so without dedup a group + # containing both would emit duplicate timeframes (e.g. [time, time]) + # and drop a field on re-import. timeframes = [] - sql = None + seen_timeframes = set() for dim in dims: - timeframe = granularity_mapping.get(dim.granularity, "date") - timeframes.append(timeframe) - if dim.sql and not sql: - sql = dim.sql + # Prefer the original LookML timeframe captured at import (so + # "time"/"second"/etc. round-trip distinctly). + timeframe = (dim.meta or {}).get("lookml_timeframe") + if timeframe is None: + # For native (non-import) dims, "time" and "second" share the + # second granularity; use the name suffix to pick the one that + # round-trips the field name, else fall back to the grain map. + if dim.granularity == "second" and dim.name.endswith("_time"): + timeframe = "time" + else: + timeframe = granularity_mapping.get(dim.granularity, "date") + if timeframe not in seen_timeframes: + seen_timeframes.add(timeframe) + timeframes.append(timeframe) dim_group_def = { - "name": base_name, + "name": group_name, "type": "time", "timeframes": timeframes, } - if sql: - sql = sql.replace("{model}", "${TABLE}") - dim_group_def["sql"] = sql + if group_sql: + dim_group_def["sql"] = group_sql.replace("{model}", "${TABLE}") dimension_groups.append(dim_group_def) diff --git a/tests/adapters/lookml/test_advanced_measure_types.py b/tests/adapters/lookml/test_advanced_measure_types.py index ef04d832..d61070fd 100644 --- a/tests/adapters/lookml/test_advanced_measure_types.py +++ b/tests/adapters/lookml/test_advanced_measure_types.py @@ -129,7 +129,7 @@ class TestDimensionGroupTimeframes: def test_time_truncation_timeframes_are_time(self, graph): model = graph.get_model("events_calendar") for tf, gran in [ - ("occurred_time", "hour"), + ("occurred_time", "second"), ("occurred_date", "day"), ("occurred_week", "week"), ("occurred_month", "month"), diff --git a/tests/adapters/lookml/test_edge_cases.py b/tests/adapters/lookml/test_edge_cases.py index d1fcefec..09c8c8ee 100644 --- a/tests/adapters/lookml/test_edge_cases.py +++ b/tests/adapters/lookml/test_edge_cases.py @@ -2290,6 +2290,220 @@ def test_lookml_view_with_unsupported_derived_table_not_defaulted(): assert model.table is None # not fabricated as a physical table named after the view +def test_lookml_time_timeframe_keeps_second_precision(): + """Looker's "time" timeframe keeps to-the-second precision, not hour.""" + graph = _parse_lkml( + """ +view: v { + sql_table_name: t ;; + dimension: id { primary_key: yes type: number sql: ${TABLE}.id ;; } + dimension_group: created { + type: time + timeframes: [time, date, minute] + sql: ${TABLE}.created_at ;; + } +} +""" + ) + model = graph.get_model("v") + assert model.get_dimension("created_time").granularity == "second" + assert model.get_dimension("created_minute").granularity == "minute" + assert model.get_dimension("created_date").granularity == "day" + + +def test_lookml_uncommon_timeframe_suffix_roundtrips(): + """Imported timeframes like millisecond/microsecond round-trip (base derived from stored timeframe).""" + import tempfile + + graph = _parse_lkml( + """ +view: v { + sql_table_name: t ;; + dimension: id { primary_key: yes type: number sql: ${TABLE}.id ;; } + dimension_group: created { + type: time + timeframes: [millisecond, microsecond, date] + sql: ${TABLE}.created_at ;; + } +} +""" + ) + out = tempfile.mktemp(suffix=".lkml") + LookMLAdapter().export(graph, out) + names = {d.name for d in LookMLAdapter().parse(Path(out)).get_model("v").dimensions if d.type == "time"} + assert {"created_millisecond", "created_microsecond"} <= names + assert not any(n.endswith("_millisecond_millisecond") or n.endswith("_microsecond_microsecond") for n in names) + + +def test_lookml_time_grain_roundtrip(): + """time/hour/minute/date grains round-trip through export without grain or suffix corruption.""" + import tempfile + + graph = _parse_lkml( + """ +view: v { + sql_table_name: t ;; + dimension: id { primary_key: yes type: number sql: ${TABLE}.id ;; } + dimension_group: created { + type: time + timeframes: [time, hour, minute, date] + sql: ${TABLE}.created_at ;; + } +} +""" + ) + out = tempfile.mktemp(suffix=".lkml") + LookMLAdapter().export(graph, out) + grains = { + d.name: d.granularity for d in LookMLAdapter().parse(Path(out)).get_model("v").dimensions if d.type == "time" + } + assert grains == { + "created_time": "second", + "created_hour": "hour", + "created_minute": "minute", + "created_date": "day", + } + + +def test_lookml_native_second_grain_roundtrips(): + """A second-grain dimension not imported from LookML (no meta) exports as `second`, not `time`.""" + import tempfile + + from sidemantic import Dimension, Model + from sidemantic.core.semantic_graph import SemanticGraph + + graph = SemanticGraph() + graph.add_model( + Model( + name="v", + table="t", + primary_key="id", + dimensions=[ + Dimension(name="id", type="numeric", sql="id"), + Dimension(name="created_second", type="time", granularity="second", sql="{model}.created_at"), + ], + ) + ) + out = tempfile.mktemp(suffix=".lkml") + LookMLAdapter().export(graph, out) + names = {d.name for d in LookMLAdapter().parse(Path(out)).get_model("v").dimensions if d.type == "time"} + assert "created_second" in names + + +def test_lookml_same_prefix_time_dims_different_sources_not_merged(): + """Same-prefix time dims backed by different columns must not merge to one group (wrong source).""" + import tempfile + + from sidemantic import Dimension, Model + from sidemantic.core.semantic_graph import SemanticGraph + + graph = SemanticGraph() + graph.add_model( + Model( + name="v", + table="t", + primary_key="id", + dimensions=[ + Dimension(name="id", type="numeric", sql="id"), + Dimension(name="started_date", type="time", granularity="day", sql="{model}.started_at"), + Dimension(name="started_second", type="time", granularity="second", sql="{model}.other_at"), + ], + ) + ) + out = tempfile.mktemp(suffix=".lkml") + LookMLAdapter().export(graph, out) + time_dims = [d for d in LookMLAdapter().parse(Path(out)).get_model("v").dimensions if d.type == "time"] + sources = {d.sql for d in time_dims} + # Each field keeps its own source column (no rewiring to the other group's SQL). + assert any("started_at" in (s or "") for s in sources) + assert any("other_at" in (s or "") for s in sources) + # Split groups get suffix-free, collision-free names (no started_date_date etc.). + assert not any(d.name.endswith("_date_date") or d.name.endswith("_hour_hour") for d in time_dims) + + +def test_lookml_split_time_group_names_avoid_existing_bases(): + """A synthetic split-group name must not collide with an existing time-group base.""" + import re + import tempfile + + from sidemantic import Dimension, Model + from sidemantic.core.semantic_graph import SemanticGraph + + graph = SemanticGraph() + graph.add_model( + Model( + name="v", + table="t", + primary_key="id", + dimensions=[ + Dimension(name="id", type="numeric", sql="id"), + Dimension(name="started_date", type="time", granularity="day", sql="{model}.a"), + Dimension(name="started_hour", type="time", granularity="hour", sql="{model}.b"), + Dimension(name="started_2_hour", type="time", granularity="hour", sql="{model}.c"), + ], + ) + ) + out = tempfile.mktemp(suffix=".lkml") + LookMLAdapter().export(graph, out) + names = re.findall(r"dimension_group: (\w+)", open(out).read()) + assert len(names) == len(set(names)), f"colliding dimension_group names: {names}" + + +def test_lookml_native_time_named_second_grain_roundtrips(): + """A native second-grain dim named *_time must export as `time` so the name round-trips.""" + import tempfile + + from sidemantic import Dimension, Model + from sidemantic.core.semantic_graph import SemanticGraph + + graph = SemanticGraph() + graph.add_model( + Model( + name="v", + table="t", + primary_key="id", + dimensions=[ + Dimension(name="id", type="numeric", sql="id"), + Dimension(name="created_time", type="time", granularity="second", sql="{model}.created_at"), + ], + ) + ) + out = tempfile.mktemp(suffix=".lkml") + LookMLAdapter().export(graph, out) + names = {d.name for d in LookMLAdapter().parse(Path(out)).get_model("v").dimensions if d.type == "time"} + assert "created_time" in names # not renamed to created_second + + +def test_lookml_time_and_second_timeframes_no_duplicate_export(): + """A group with both `time` and `second` (both -> second grain) must not emit duplicate timeframes.""" + import re + import tempfile + + graph = _parse_lkml( + """ +view: v { + sql_table_name: t ;; + dimension: id { primary_key: yes type: number sql: ${TABLE}.id ;; } + dimension_group: created { + type: time + timeframes: [time, second, hour] + sql: ${TABLE}.created_at ;; + } +} +""" + ) + out = tempfile.mktemp(suffix=".lkml") + LookMLAdapter().export(graph, out) + text = open(out).read() + timeframes = re.search(r"timeframes:\s*\[([^\]]*)\]", text).group(1) + parts = [p.strip() for p in timeframes.split(",")] + assert len(parts) == len(set(parts)), f"duplicate timeframes exported: {parts}" + # Both `time` and `second` are preserved (no collapse/drop) via the stored + # original timeframe, so re-import keeps every member. + names = {d.name for d in LookMLAdapter().parse(Path(out)).get_model("v").dimensions if d.type == "time"} + assert {"created_time", "created_second", "created_hour"} <= names + + def test_lookml_view_without_table_defaults_to_view_name(): """A view with no sql_table_name/derived_table should default its table to the view name.""" from sidemantic import SemanticLayer diff --git a/tests/adapters/lookml/test_fixtures.py b/tests/adapters/lookml/test_fixtures.py index 766da461..01caf966 100644 --- a/tests/adapters/lookml/test_fixtures.py +++ b/tests/adapters/lookml/test_fixtures.py @@ -69,7 +69,7 @@ def test_redshift_etl_errors_time_dimensions(self): model = self.graph.get_model("redshift_etl_errors") assert model.get_dimension("error_time") is not None assert model.get_dimension("error_time").type == "time" - assert model.get_dimension("error_time").granularity == "hour" + assert model.get_dimension("error_time").granularity == "second" assert model.get_dimension("error_date") is not None assert model.get_dimension("error_date").type == "time" assert model.get_dimension("error_date").granularity == "day" @@ -332,7 +332,7 @@ def test_created_time_group(self): assert model.get_dimension("created_month") is not None assert model.get_dimension("created_year") is not None assert model.get_dimension("created_time") is not None - assert model.get_dimension("created_time").granularity == "hour" + assert model.get_dimension("created_time").granularity == "second" def test_shipped_time_group(self): model = self.graph.get_model("order_items") diff --git a/tests/adapters/lookml/test_parsing.py b/tests/adapters/lookml/test_parsing.py index e8beb23b..30c3b62e 100644 --- a/tests/adapters/lookml/test_parsing.py +++ b/tests/adapters/lookml/test_parsing.py @@ -119,7 +119,8 @@ def test_lookml_adapter_ecommerce(): # Test dimension group with multiple timeframes created_time = orders.get_dimension("created_time") assert created_time is not None - assert created_time.granularity == "hour" + # Looker's "time" timeframe keeps to-the-second precision. + assert created_time.granularity == "second" created_date = orders.get_dimension("created_date") assert created_date is not None