From c5af3dce010d2e0feaab8eae1da2302ca18d9d47 Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Mon, 25 May 2026 14:08:58 +0200 Subject: [PATCH 1/4] DENG-9533 - Go server template: Pub/Sub message builder Add go_server_pubsub outputter generating GleanEventsBuilder with BuildMessage methods returning *pubsub.Message. Wire format is attributes-shape (routing fields on Pub/Sub attributes, gzipped inner Glean ping JSON as the body). Decoder stamps submission_timestamp from publishTime. The template is intentionally stateless beyond app-identity fields - no publishing, batching, retries, shutdown, or metrics. A reference implementation of publisher lifecycle lives in mozilla/glean-server-examples. See mozilla/gcp-ingestion docs/architecture/decoder_service_specification.md for the wire-format contract. --- CHANGELOG.md | 11 + glean_parser/go_server.py | 45 +- glean_parser/templates/go_server.jinja2 | 158 +++++ glean_parser/translate.py | 3 +- tests/test-go/test_publisher.go.tmpl | 88 +++ tests/test_go_server.py | 846 +++++++++++++++--------- 6 files changed, 842 insertions(+), 309 deletions(-) create mode 100644 tests/test-go/test_publisher.go.tmpl diff --git a/CHANGELOG.md b/CHANGELOG.md index 230e2629b..85b015369 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,17 @@ ## Unreleased +- Go server template: add `go_server_pubsub` outputter that generates + Pub/Sub-shaped message builders for Glean pings. `GleanEventsBuilder` + exposes `BuildMessage` methods returning `*pubsub.Message` with + the inner ping JSON (gzip-compressed) as the body and document metadata + fields as message attributes. The template does NOT manage publishing, + batching, retries, shutdown, or metrics - callers own those. A reference + implementation of publisher lifecycle (batching, graceful shutdown, + Prometheus integration) is provided in mozilla/glean-server-examples. + See mozilla/gcp-ingestion `decoder_service_specification.md` for the + wire-format contract. + ## 19.1.0 - Go server: Add support for `labeled_boolean` metrics with static labels ([AE-1250](https://mozilla-hub.atlassian.net/browse/AE-1250)) diff --git a/glean_parser/go_server.py b/glean_parser/go_server.py index b60165351..27436a9f3 100644 --- a/glean_parser/go_server.py +++ b/glean_parser/go_server.py @@ -11,15 +11,18 @@ generates does not use the Glean SDK. It is meant to be used to collect events in server-side environments. In these environments SDK assumptions to measurement window and connectivity don't hold. + Generated code takes care of assembling pings with metrics, and serializing to messages -conforming to Glean schema. +conforming to Glean schema. Two transport modes are supported: +- Cloud Logging (go_server): Logs to stdout in MozLog format for ingestion via GCP log routing +- Pub/Sub (go_server_pubsub): Publishes directly to GCP Pub/Sub topics Warning: this outputter supports limited set of metrics, see `SUPPORTED_METRIC_TYPES` below. Generated code creates two methods for each ping (`RecordPingX` and `RecordPingXWithoutUserInfo`) -that are used for submitting (logging) them. -If pings have `event` metrics assigned, they can be passed to these methods. +that are used for submitting events. If pings have `event` metrics assigned, they can be +passed to these methods. """ from collections import defaultdict @@ -111,7 +114,10 @@ def validate_labeled_boolean(metric: metrics.Metric) -> bool: def output_go( - objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] + objs: metrics.ObjectTree, + output_dir: Path, + options: Optional[Dict[str, Any]], + transport: str = "logging", ) -> None: """ Given a tree of objects, output Go code to `output_dir`. @@ -122,6 +128,8 @@ def output_go( :param objects: A tree of objects (metrics and pings) as returned from `parser.parse_objects`. :param output_dir: Path to an output directory to write to. + :param transport: Transport mode - either "logging" (Cloud Logging) or + "pubsub" (Pub/Sub direct publishing). Default is "logging". """ template = util.get_jinja2_template( @@ -198,5 +206,34 @@ def output_go( pings=ping_to_metrics, events=event_metrics, labeled_booleans=labeled_boolean_metrics, + transport=transport, ) ) + + +def output_go_logger( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Go code using Cloud Logging transport. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary (currently unused for Go). + """ + output_go(objs, output_dir, options, transport="logging") + + +def output_go_pubsub( + objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] = None +) -> None: + """ + Given a tree of objects, output Go code using Pub/Sub transport. + + :param objects: A tree of objects (metrics and pings) as returned from + `parser.parse_objects`. + :param output_dir: Path to an output directory to write to. + :param options: options dictionary (currently unused for Go). + """ + output_go(objs, output_dir, options, transport="pubsub") diff --git a/glean_parser/templates/go_server.jinja2 b/glean_parser/templates/go_server.jinja2 index 54bb94460..bfa6a1255 100644 --- a/glean_parser/templates/go_server.jinja2 +++ b/glean_parser/templates/go_server.jinja2 @@ -9,6 +9,18 @@ package glean // required imports import ( +{% if transport == "pubsub" %} + "bytes" + "compress/gzip" + "encoding/json" + "fmt" + "io" + "sync" + "time" + + "cloud.google.com/go/pubsub" + "github.com/google/uuid" +{% else %} "encoding/json" "errors" "fmt" @@ -17,8 +29,10 @@ import ( "time" "github.com/google/uuid" +{% endif %} ) +{% if transport == "logging" %} // log type string used to identify logs to process in the Moz Data Pipeline var gleanEventMozlogType string = "glean-server-event" @@ -32,6 +46,50 @@ type GleanEventsLogger struct { AppChannel string // Channel to differentiate logs from prod/beta/staging/devel Writer io.Writer // Writer to output to. Normal operation expects os.Stdout } +{% else %} +// GleanEventsBuilder constructs Pub/Sub messages carrying Glean pings. The +// builder is stateless beyond its app-identity fields; callers own the +// pubsub.Client, the pubsub.Topic, batching settings, retries, shutdown +// sequencing, and any publish-result metrics. A reference implementation of +// publisher lifecycle lives in mozilla/glean-server-examples. +type GleanEventsBuilder struct { + AppID string // Application ID to identify application per Glean standards + AppDisplayVersion string // Version of application emitting the event + AppChannel string // Channel to differentiate logs from prod/beta/staging/devel +} + +// gzipPool reuses gzip.Writer instances across calls. gzip.NewWriter() +// allocates ~800 KB internally; pooling + Reset() drops per-message +// allocations to near zero at high publish volumes. +var gzipPool = sync.Pool{ + New: func() interface{} { + return gzip.NewWriter(io.Discard) + }, +} + +// compressPayload gzips data using the package-level writer pool. The +// returned slice is caller-owned; only the *gzip.Writer is returned to the +// pool. +func compressPayload(data []byte) ([]byte, error) { + var buf bytes.Buffer + // Optimistic preallocation: gzip on small JSON typically achieves ~2x ratio. + buf.Grow(len(data) / 2) + + gz := gzipPool.Get().(*gzip.Writer) + gz.Reset(&buf) + + if _, err := gz.Write(data); err != nil { + gzipPool.Put(gz) + return nil, fmt.Errorf("gzip write failed: %w", err) + } + if err := gz.Close(); err != nil { + gzipPool.Put(gz) + return nil, fmt.Errorf("gzip close failed: %w", err) + } + gzipPool.Put(gz) + return buf.Bytes(), nil +} +{% endif %} // exported type for public method parameters type RequestInfo struct { @@ -63,6 +121,7 @@ type pingInfo struct { EndTime string `json:"end_time"` } +{% if transport == "logging" %} type ping struct { DocumentNamespace string `json:"document_namespace"` DocumentType string `json:"document_type"` @@ -72,6 +131,7 @@ type ping struct { IpAddress string `json:"ip_address,omitempty"` Payload string `json:"payload"` } +{% endif %} type metrics map[string]map[string]any @@ -89,14 +149,20 @@ type gleanEvent struct { Extra map[string]string `json:"extra"` } +{% if transport == "logging" %} type logEnvelope struct { Timestamp string Logger string Type string Fields ping } +{% endif %} +{% if transport == "pubsub" %} +func (g GleanEventsBuilder) createClientInfo() clientInfo { +{% else %} func (g GleanEventsLogger) createClientInfo() clientInfo { +{% endif %} // Fields with default values are required in the Glean schema, but not used in server context return clientInfo{ TelemetrySDKBuild: "glean_parser v{{ parser_version }}", @@ -120,6 +186,7 @@ func createPingInfo() pingInfo { } } +{% if transport == "logging" %} func (g GleanEventsLogger) createPing(documentType string, config RequestInfo, payload pingPayload) (ping, error) { payloadJson, err := json.Marshal(payload) if err != nil { @@ -141,7 +208,9 @@ func (g GleanEventsLogger) createPing(documentType string, config RequestInfo, p Payload: string(payloadJson), }, nil } +{% endif %} +{% if transport == "logging" %} // method called by each ping-specific record method. // construct the ping, wrap it in the envelope, and print to stdout func (g GleanEventsLogger) record( @@ -180,6 +249,7 @@ func (g GleanEventsLogger) record( fmt.Fprintln(g.Writer, string(envelopeJson)) return nil } +{% endif %} {# if any ping has an event metric, create methods and types for them #} {% if events %} @@ -265,6 +335,93 @@ type {{ ping|ping_type_name }} struct { {% endif %} } +{% if transport == "pubsub" %} +// Build{{ ping|ping_type_name }}Message constructs a Pub/Sub message carrying +// the given `{{ ping }}` ping. The caller publishes the returned message +// (e.g., topic.Publish(ctx, msg)) and owns batching, retries, shutdown +// sequencing, and any publish-result metrics. +// +// Wire-format contract: see +// docs/architecture/decoder_service_specification.md in mozilla/gcp-ingestion. +func (g GleanEventsBuilder) Build{{ ping|ping_type_name }}Message( + requestInfo RequestInfo, + params {{ ping|ping_type_name }}, +) (*pubsub.Message, error) { + metrics := metrics{ + {% for metric_type, metrics in metrics_by_type.items() %} + {% if metric_type != 'event' %} + "{{ metric_type }}": { + {% for metric in metrics %} + {% if metric_type == 'datetime' %} + "{{ metric|metric_name }}": params.{{ metric|metric_argument_name }}.Format("2006-01-02T15:04:05.000Z"), + {% else %} + "{{ metric|metric_name }}": params.{{ metric|metric_argument_name }}, + {% endif %} + {% endfor %} + }, + {% endif %} + {% endfor %} + } + + events := []gleanEvent{} + {% if metrics_by_type['event'] %} + if params.Event != nil { + events = append(events, params.Event.gleanEvent()) + } + {% endif %} + + payload := pingPayload{ + ClientInfo: g.createClientInfo(), + PingInfo: createPingInfo(), + Metrics: metrics, + Events: events, + } + + payloadJSON, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("marshal ping payload: %w", err) + } + compressed, err := compressPayload(payloadJSON) + if err != nil { + return nil, fmt.Errorf("compress payload: %w", err) + } + + documentID, err := uuid.NewRandom() + if err != nil { + return nil, fmt.Errorf("generate document_id: %w", err) + } + + attributes := map[string]string{ + "document_namespace": g.AppID, + "document_type": "{{ ping }}", + "document_version": "1", + "document_id": documentID.String(), + } + // Skip empty optional attributes; the decoder treats missing and empty + // the same, and Pub/Sub charges for attribute bytes. The publisher does + // not set submission_timestamp; the decoder stamps it from publishTime. + if requestInfo.UserAgent != "" { + attributes["user_agent"] = requestInfo.UserAgent + } + if requestInfo.IpAddress != "" { + attributes["x_forwarded_for"] = requestInfo.IpAddress + } + + return &pubsub.Message{ + Data: compressed, + Attributes: attributes, + }, nil +} + +// Build{{ ping|ping_type_name }}MessageWithoutUserInfo constructs a Pub/Sub +// message carrying the given `{{ ping }}` ping with no request-derived +// attributes. +func (g GleanEventsBuilder) Build{{ ping|ping_type_name }}MessageWithoutUserInfo( + params {{ ping|ping_type_name }}, +) (*pubsub.Message, error) { + return g.Build{{ ping|ping_type_name }}Message(defaultRequestInfo, params) +} +{% else %} // Record and submit `{{ ping }}` ping func (g GleanEventsLogger) Record{{ ping|ping_type_name }}( requestInfo RequestInfo, @@ -309,4 +466,5 @@ func (g GleanEventsLogger) Record{{ ping|ping_type_name}}WithoutUserInfo( ) error { return g.Record{{ ping|ping_type_name }}(defaultRequestInfo, params) } +{% endif %} {% endfor %} diff --git a/glean_parser/translate.py b/glean_parser/translate.py index 61fdc72b4..59612c259 100644 --- a/glean_parser/translate.py +++ b/glean_parser/translate.py @@ -57,7 +57,8 @@ def __init__( OUTPUTTERS = { - "go_server": Outputter(go_server.output_go, []), + "go_server": Outputter(go_server.output_go_logger, []), + "go_server_pubsub": Outputter(go_server.output_go_pubsub, []), "javascript": Outputter(javascript.output_javascript, []), "typescript": Outputter(javascript.output_typescript, []), "javascript_server": Outputter(javascript_server.output_javascript, []), diff --git a/tests/test-go/test_publisher.go.tmpl b/tests/test-go/test_publisher.go.tmpl new file mode 100644 index 000000000..af2e1eba2 --- /dev/null +++ b/tests/test-go/test_publisher.go.tmpl @@ -0,0 +1,88 @@ +package main + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "glean/glean" + "os" + "time" + /* IMPORTS */ + + "cloud.google.com/go/pubsub" + "cloud.google.com/go/pubsub/pstest" + "google.golang.org/api/option" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func main() { + ctx := context.Background() + + // In-process Pub/Sub fake. The Go pubsub client picks up PUBSUB_EMULATOR_HOST + // automatically; pstest exposes its bound address via srv.Addr. + srv := pstest.NewServer() + defer srv.Close() + os.Setenv("PUBSUB_EMULATOR_HOST", srv.Addr) + + // Pre-create the topic on the fake server. Use a transient client; the + // test code below opens its own client to mirror normal app usage. + setupConn, err := grpc.Dial(srv.Addr, + grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + fmt.Fprintln(os.Stderr, "dial:", err) + os.Exit(1) + } + setupClient, err := pubsub.NewClient(ctx, "test-project", option.WithGRPCConn(setupConn)) + if err != nil { + fmt.Fprintln(os.Stderr, "setup-client:", err) + os.Exit(1) + } + if _, err := setupClient.CreateTopic(ctx, "test-topic"); err != nil { + fmt.Fprintln(os.Stderr, "create-topic:", err) + os.Exit(1) + } + setupClient.Close() + setupConn.Close() + + // Client + topic used by the test snippet to publish. + client, err := pubsub.NewClient(ctx, "test-project") + if err != nil { + fmt.Fprintln(os.Stderr, "client:", err) + os.Exit(1) + } + topic := client.Topic("test-topic") + + builder := glean.GleanEventsBuilder{ + AppID: "glean.test", + AppDisplayVersion: "0.0.1", + AppChannel: "nightly", + } + _ = builder // suppress unused warning if the snippet does not reference it + + /* CODE */ + + // Drain whatever was queued, then release the client. topic.Stop + // blocks until the batcher flushes; client.Close releases the gRPC conn. + topic.Stop() + if err := client.Close(); err != nil { + fmt.Fprintln(os.Stderr, "client close:", err) + os.Exit(1) + } + // Brief settle so pstest finalizes any pending acks before we read. + time.Sleep(50 * time.Millisecond) + + msgs := srv.Messages() + out := make([]map[string]interface{}, 0, len(msgs)) + for _, m := range msgs { + out = append(out, map[string]interface{}{ + "data": base64.StdEncoding.EncodeToString(m.Data), + "attributes": m.Attributes, + }) + } + if err := json.NewEncoder(os.Stdout).Encode(out); err != nil { + fmt.Fprintln(os.Stderr, "encode:", err) + os.Exit(1) + } +} diff --git a/tests/test_go_server.py b/tests/test_go_server.py index 10866112e..51ed0245c 100644 --- a/tests/test_go_server.py +++ b/tests/test_go_server.py @@ -4,10 +4,13 @@ # http://creativecommons.org/publicdomain/zero/1.0/ from pathlib import Path +import base64 +import gzip import io import json import pytest import subprocess +import uuid import glean_parser from glean_parser import translate @@ -16,24 +19,106 @@ ROOT = Path(__file__).parent -def test_parser_go_server_ping_no_metrics(tmp_path, capsys): - """Test that no files are generated if only ping definitions - are provided without any metrics.""" +# ============================================================================= +# Cross-transport setup +# ============================================================================= + +TRANSPORTS = ("logging", "pubsub") + +FORMAT_BY_TRANSPORT = { + "logging": "go_server", + "pubsub": "go_server_pubsub", +} + +SCHEMA_URL = ( + "https://raw.githubusercontent.com/mozilla-services/" + "mozilla-pipeline-schemas/main/" + "schemas/glean/glean/glean.1.schema.json" +) + +# YAML fixture lists shared across transports. +YAML_EVENTS_PING = ["go_server_events_only_metrics.yaml"] +YAML_CUSTOM_PING = [ + "go_server_custom_ping_only_metrics.yaml", + "go_server_custom_ping_only_pings.yaml", +] + + +# --- Helpers ---- + + +def _translate_for_transport(transport, glean_module_path, yaml_filenames): + """Translate the given YAML fixtures to the given transport's Go format.""" + yaml_files = [ROOT / "data" / name for name in yaml_filenames] + translate.translate(yaml_files, FORMAT_BY_TRANSPORT[transport], glean_module_path) + + +def _run_go_program(code_dir, template_path, code, imports=""): + """Compile and run the test Go program. Returns raw stdout bytes.""" + with open(template_path, "r") as fp: + tmpl_code = fp.read() + tmpl_code = tmpl_code.replace("/* CODE */", code).replace("/* IMPORTS */", imports) + + with open(code_dir / "test.go", "w") as fp: + fp.write(tmpl_code) + + subprocess.check_call(["go", "mod", "init", "glean"], cwd=code_dir) + subprocess.check_call(["go", "mod", "tidy"], cwd=code_dir) + + return subprocess.check_output(["go", "run", "test.go"], cwd=code_dir) + + +def run_logger(code_dir, code, imports=""): + """Run a logging-transport test Go program and return decoded stdout.""" + out = _run_go_program(code_dir, ROOT / "test-go" / "test.go.tmpl", code, imports) + return out.decode("utf-8") + + +def run_publisher(code_dir, code, imports=""): + """Run a pubsub-transport test Go program against an in-process Pub/Sub + fake (pstest) and return the captured messages as a list of + {"data": base64, "attributes": dict}.""" + out = _run_go_program( + code_dir, ROOT / "test-go" / "test_publisher.go.tmpl", code, imports + ) + return json.loads(out.decode("utf-8")) + + +def validate_payload_against_schema(payload_bytes): + """Validate the inner Glean ping JSON against the pipeline schema.""" + input = io.StringIO(payload_bytes.decode("utf-8")) + output = io.StringIO() + assert validate_ping.validate_ping(input, output, schema_url=SCHEMA_URL) == 0, ( + output.getvalue() + ) + + +# ============================================================================= +# Generation tests +# ============================================================================= + +# --- Cross-transport ---- + + +@pytest.mark.parametrize("transport", TRANSPORTS) +def test_parser_ping_no_metrics(tmp_path, capsys, transport): + """No files are generated if only ping definitions are provided + without any metrics.""" translate.translate( ROOT / "data" / "server_pings.yaml", - "go_server", + FORMAT_BY_TRANSPORT[transport], tmp_path, ) assert all(False for _ in tmp_path.iterdir()) -def test_parser_go_server_metrics_unsupported_type(tmp_path, capsys): - """Test that no files are generated with unsupported metric types.""" +@pytest.mark.parametrize("transport", TRANSPORTS) +def test_parser_metrics_unsupported_type(tmp_path, capsys, transport): + """No files are generated with unsupported metric types; warnings are + emitted for each unsupported type.""" translate.translate( - [ - ROOT / "data" / "go_server_metrics_unsupported.yaml", - ], - "go_server", + [ROOT / "data" / "go_server_metrics_unsupported.yaml"], + FORMAT_BY_TRANSPORT[transport], tmp_path, ) captured = capsys.readouterr() @@ -48,44 +133,46 @@ def test_parser_go_server_metrics_unsupported_type(tmp_path, capsys): assert t in captured.out -def test_parser_go_server_labeled_boolean_without_labels(tmp_path, capsys): - """Test that labeled_boolean without static labels is rejected.""" +@pytest.mark.parametrize("transport", TRANSPORTS) +def test_parser_labeled_boolean_without_labels(tmp_path, capsys, transport): + """labeled_boolean without static labels is rejected (both transports).""" translate.translate( - [ - ROOT / "data" / "go_server_metrics_unsupported.yaml", - ], - "go_server", + [ROOT / "data" / "go_server_metrics_unsupported.yaml"], + FORMAT_BY_TRANSPORT[transport], tmp_path, ) captured = capsys.readouterr() assert "Ignoring labeled_boolean metric without static labels" in captured.out -def test_parser_go_server_labeled_boolean(tmp_path): - """Test that labeled_boolean metrics generate proper struct types.""" +@pytest.mark.parametrize("transport", TRANSPORTS) +def test_parser_labeled_boolean(tmp_path, transport): + """labeled_boolean metrics generate proper struct types (both transports).""" translate.translate( ROOT / "data" / "go_server_labeled_boolean_metrics.yaml", - "go_server", + FORMAT_BY_TRANSPORT[transport], tmp_path, ) assert set(x.name for x in tmp_path.iterdir()) == set(["server_events.go"]) - # Read generated file and verify struct is created with (tmp_path / "server_events.go").open("r", encoding="utf-8") as fd: content = fd.read() - # Check that the labeled_boolean struct type was generated - assert "type TelemetryFeatureFlags struct {" in content - assert "FeatureOne *bool" in content - assert "FeatureTwo *bool" in content - assert "FeatureThree *bool" in content + # The labeled_boolean struct type is generated. + assert "type TelemetryFeatureFlags struct {" in content + assert "FeatureOne *bool" in content + assert "FeatureTwo *bool" in content + assert "FeatureThree *bool" in content - # Check that it's used in the ping struct - assert "TelemetryFeatureFlags TelemetryFeatureFlags" in content + # ...and used in the ping struct. + assert "TelemetryFeatureFlags TelemetryFeatureFlags" in content -def test_parser_go_server_events_only(tmp_path): +# --- Logging-only (golden-file comparisons ---- + + +def test_parser_logging_events_only(tmp_path): """Test that parser works for definitions that only use events ping""" translate.translate( ROOT / "data" / "go_server_events_only_metrics.yaml", @@ -109,7 +196,7 @@ def test_parser_go_server_events_only(tmp_path): assert content == compare -def test_parser_go_server_events_and_custom_ping(tmp_path): +def test_parser_logging_events_and_custom_ping(tmp_path): """Test that parser works for definitions that use events ping and custom pings""" translate.translate( [ @@ -136,7 +223,7 @@ def test_parser_go_server_events_and_custom_ping(tmp_path): assert content == compare -def test_parser_go_server_custom_ping_only(tmp_path): +def test_parser_logging_custom_ping_only(tmp_path): """Test that parser works for definitions that only use custom pings""" translate.translate( [ @@ -163,150 +250,410 @@ def test_parser_go_server_custom_ping_only(tmp_path): assert content == compare -def run_logger(code_dir, code, imports=""): - """ - Run the Go logger and capture the output sent to STDOUT. - """ +def test_parser_logging_backward_compat(tmp_path): + """Default `go_server` outputter generates the logging transport, not + the pubsub one.""" + translate.translate( + ROOT / "data" / "go_server_events_only_metrics.yaml", + "go_server", + tmp_path, + ) - tmpl_code = "" - with open(ROOT / "test-go" / "test.go.tmpl", "r") as fp: - tmpl_code = fp.read() + with (tmp_path / "server_events.go").open("r", encoding="utf-8") as fd: + content = fd.read() - tmpl_code = tmpl_code.replace("/* CODE */", code).replace("/* IMPORTS */", imports) + assert "type GleanEventsLogger struct" in content + assert "type GleanEventsPublisher struct" not in content + assert "io.Writer" in content + assert "type logEnvelope struct" in content + assert "gleanEventMozlogType" in content - with open(code_dir / "test.go", "w") as fp: - fp.write(tmpl_code) + assert "cloud.google.com/go/pubsub" not in content + assert "prometheus" not in content - subprocess.call(["go", "mod", "init", "glean"], cwd=code_dir) - subprocess.call(["go", "mod", "tidy"], cwd=code_dir) + # Logging keeps the envelope-ping wrapper and createPing helper. + assert "type ping struct" in content + assert "func (g GleanEventsLogger) createPing(" in content - return subprocess.check_output(["go", "run", "test.go"], cwd=code_dir).decode( - "utf-8" - ) + # gzip is only used by the pubsub transport. + assert "compress/gzip" not in content + # Record methods use value receiver on the logger. + assert "func (g GleanEventsLogger) RecordEventsPing(" in content + + # SDK build string does not include (pubsub). + assert f"glean_parser v{glean_parser.__version__} (pubsub)" not in content -@pytest.mark.go_dependency -def test_run_logging_events_ping(tmp_path): - glean_module_path = tmp_path / "glean" +# --- Pub/Sub-only (substring-grep structural contract) ---- + + +def test_parser_pubsub_generation(tmp_path): + """Generated Go for the pubsub outputter has the expected structure: + stateless GleanEventsBuilder with Build*Message methods returning a + *pubsub.Message in attributes-shape wire format. No transport + lifecycle code (publisher struct, Flush/Close, Prometheus, finalizer + goroutines) is generated - callers own that.""" translate.translate( - [ - ROOT / "data" / "go_server_events_only_metrics.yaml", - ], - "go_server", - glean_module_path, + ROOT / "data" / "go_server_events_only_metrics.yaml", + "go_server_pubsub", + tmp_path, ) - code = """ - logger.RecordEventsPing( - glean.RequestInfo{ - UserAgent: "glean-test/1.0", - IpAddress: "127.0.0.1", - }, - glean.EventsPing{ - MetricName: "string value", - MetricRequestBool: true, - MetricRequestCount: 10, - MetricRequestDatetime: time.Now(), - MetricRequestStringList: []string{"list", "of", "strings"}, - Event: glean.BackendTestEventEvent{ - EventFieldString: "event extra string value", - EventFieldQuantity: 100, - EventFieldBool: false, + assert set(x.name for x in tmp_path.iterdir()) == set(["server_events.go"]) + + with (tmp_path / "server_events.go").open("r", encoding="utf-8") as fd: + content = fd.read() + + # Pubsub-specific imports. + assert "cloud.google.com/go/pubsub" in content + assert '"bytes"' in content + assert '"compress/gzip"' in content + assert '"io"' in content + assert '"sync"' in content + assert "github.com/google/uuid" in content + + # Builder, not publisher; no lifecycle, no Prometheus, no context plumbing. + assert "type GleanEventsBuilder struct" in content + assert "type GleanEventsPublisher struct" not in content + assert "type GleanEventsLogger struct" not in content + assert "NewGleanEventsPublisher" not in content + assert "ErrPublisherClosed" not in content + assert "gleanPublishTotal" not in content + assert "prometheus" not in content + assert "promauto" not in content + assert "func (g GleanEventsBuilder) Flush" not in content + assert "func (g GleanEventsBuilder) Close" not in content + # No finalizer-goroutine / fence machinery. + assert "sync.Mutex" not in content + assert "sync.WaitGroup" not in content + assert "publishCtx" not in content + assert "cancelPublish" not in content + assert "context.WithCancel" not in content + assert "context.Canceled" not in content + assert "result.Get(" not in content + # No context.Context parameter on any generated method. + assert "ctx context.Context" not in content + assert '"context"' not in content + + # Package-level gzip pool (stateless; not a struct field anymore). + assert "var gzipPool = sync.Pool{" in content + assert "gzip.NewWriter(io.Discard)" in content + assert "gzipPool.Get().(*gzip.Writer)" in content + # compressPayload is a free function, not a method on the builder. + assert "func compressPayload(data []byte) ([]byte, error)" in content + + # Per-ping builder method: value receiver, returns *pubsub.Message. + assert "func (g GleanEventsBuilder) BuildEventsPingMessage(" in content + assert ( + "func (g GleanEventsBuilder) BuildEventsPingMessageWithoutUserInfo(" + in content + ) + assert "(*pubsub.Message, error)" in content + + # Old Record* shape is gone. + assert "func (g *GleanEventsPublisher) RecordEventsPing(" not in content + assert "RecordEventsPing(" not in content + + # No MozLog envelope (logging path) and no envelope-ping wrapper + # (old direct-Pub/Sub PoC shape). + assert "type logEnvelope struct" not in content + assert "gleanEventMozlogType" not in content + assert "type ping struct" not in content + assert "var envelope = ping" not in content + assert "createPing(" not in content + + # Routing fields on Pub/Sub message attributes, not in body JSON. The + # document_type is hardcoded per-ping rather than passed as a parameter + # now, so we check for the literal ping name. + assert "Attributes: attributes," in content + assert '"document_namespace": g.AppID,' in content + assert '"document_type": "events",' in content + assert '"document_version": "1",' in content + assert '"document_id": documentID.String(),' in content + # Optional attributes only set when non-empty. + assert 'attributes["user_agent"] = requestInfo.UserAgent' in content + assert 'attributes["x_forwarded_for"] = requestInfo.IpAddress' in content + # The publisher does not stamp submission_timestamp; the decoder stamps + # it from Pub/Sub publishTime. Comments may mention it as plain words, + # but it must never appear quoted (as an attribute key). + assert '"submission_timestamp"' not in content + # Body is the gzipped inner ping, not an envelope JSON. + assert "Data: compressed," in content + + +# ============================================================================= +# Runtime tests - logging transport +# ============================================================================= +# +# Each scenario records a ping via the generated `Record` method, which +# writes a single MozLog envelope JSON line to the configured Writer. The +# inner Glean ping is in `Fields.payload` (a JSON string); routing fields +# (document_namespace/type/version/id, user_agent, ip_address) sit at +# `Fields.*`. + +LOGGING_SCENARIOS = { + "events_ping": { + "yaml": YAML_EVENTS_PING, + "code": """ + logger.RecordEventsPing( + glean.RequestInfo{ + UserAgent: "glean-test/1.0", + IpAddress: "127.0.0.1", }, - }, - ) - """ + glean.EventsPing{ + MetricName: "string value", + MetricRequestBool: true, + MetricRequestCount: 10, + MetricRequestDatetime: time.Now(), + MetricRequestStringList: []string{"list", "of", "strings"}, + Event: glean.BackendTestEventEvent{ + EventFieldString: "event extra string value", + EventFieldQuantity: 100, + EventFieldBool: false, + }, + }, + ) + """, + "expected_doc_type": "events", + "expected_event_count": 1, + }, + "custom_ping_without_event": { + "yaml": YAML_CUSTOM_PING, + "code": """ + logger.RecordServerTelemetryScenarioOnePing( + glean.RequestInfo{ + UserAgent: "glean-test/1.0", + IpAddress: "127.0.0.1", + }, + glean.ServerTelemetryScenarioOnePing{ + MetricName: "string value", + MetricRequestBool: true, + MetricRequestCount: 20, + MetricRequestDatetime: time.Now(), + MetricRequestStringList: []string{"list", "of", "strings"}, + }, + ) + """, + "expected_doc_type": "server-telemetry-scenario-one", + "expected_event_count": 0, + }, + "custom_ping_with_event": { + "yaml": YAML_CUSTOM_PING, + "code": """ + logger.RecordServerTelemetryScenarioOnePing( + glean.RequestInfo{ + UserAgent: "glean-test/1.0", + IpAddress: "127.0.0.1", + }, + glean.ServerTelemetryScenarioOnePing{ + MetricName: "string value", + MetricRequestBool: true, + MetricRequestCount: 20, + MetricRequestDatetime: time.Now(), + MetricRequestStringList: []string{"list", "of", "strings"}, + Event: glean.BackendSpecialEventEvent{ + EventFieldString: "extra value string", + EventFieldQuantity: 30, + EventFieldBool: true, + }, + }, + ) + """, + "expected_doc_type": "server-telemetry-scenario-one", + "expected_event_count": 1, + }, +} - logged_output = run_logger(tmp_path, code) - logged_output = json.loads(logged_output) - fields = logged_output["Fields"] - payload = fields["payload"] - - assert "glean-server-event" == logged_output["Type"] - assert "glean.test" == fields["document_namespace"] - assert "events" == fields["document_type"] - assert "1" == fields["document_version"] - assert "glean-test/1.0" == fields["user_agent"] - - schema_url = ( - "https://raw.githubusercontent.com/mozilla-services/" - "mozilla-pipeline-schemas/main/" - "schemas/glean/glean/glean.1.schema.json" - ) - input = io.StringIO(payload) - output = io.StringIO() - assert validate_ping.validate_ping(input, output, schema_url=schema_url) == 0, ( - output.getvalue() - ) +@pytest.mark.go_dependency +@pytest.mark.parametrize("scenario_name", list(LOGGING_SCENARIOS)) +def test_run_record_logging(tmp_path, scenario_name): + """Record a ping via the logging transport and assert the MozLog + envelope's wire format + the inner ping content.""" + scenario = LOGGING_SCENARIOS[scenario_name] + glean_module_path = tmp_path / "glean" + _translate_for_transport("logging", glean_module_path, scenario["yaml"]) + + envelope = json.loads(run_logger(tmp_path, scenario["code"])) + fields = envelope["Fields"] + payload = json.loads(fields["payload"]) + + # MozLog envelope: Type tags the log line for the Mozilla pipeline. + assert envelope["Type"] == "glean-server-event" + + # Routing fields on the envelope. + assert fields["document_namespace"] == "glean.test" + assert fields["document_type"] == scenario["expected_doc_type"] + assert fields["document_version"] == "1" + assert fields["user_agent"] == "glean-test/1.0" + assert fields["ip_address"] == "127.0.0.1" + + # Inner ping content + constructor args threaded through into client_info. + assert len(payload["events"]) == scenario["expected_event_count"] + assert payload["client_info"]["app_display_version"] == "0.0.1" + assert payload["client_info"]["app_channel"] == "nightly" + + validate_payload_against_schema(fields["payload"].encode("utf-8")) + + +# ============================================================================= +# Runtime tests - pubsub transport +# ============================================================================= +# +# Each scenario builds a *pubsub.Message via the generated +# `BuildMessage` method, publishes it via topic.Publish, and asserts +# the message arrives at the pstest fake. Routing fields live on the +# message's Attributes; the body is the inner Glean ping gzipped. + +PUBSUB_SCENARIOS = { + "events_ping": { + "yaml": YAML_EVENTS_PING, + "code": """ + msg, err := builder.BuildEventsPingMessage( + glean.RequestInfo{ + UserAgent: "glean-test/1.0", + IpAddress: "127.0.0.1", + }, + glean.EventsPing{ + MetricName: "string value", + MetricRequestBool: true, + MetricRequestCount: 10, + MetricRequestDatetime: time.Now(), + MetricRequestStringList: []string{"list", "of", "strings"}, + Event: glean.BackendTestEventEvent{ + EventFieldString: "event extra string value", + EventFieldQuantity: 100, + EventFieldBool: false, + }, + }, + ) + if err != nil { + fmt.Fprintln(os.Stderr, "build:", err) + os.Exit(1) + } + result := topic.Publish(ctx, msg) + if _, err := result.Get(ctx); err != nil { + fmt.Fprintln(os.Stderr, "publish:", err) + os.Exit(1) + } + """, + "expected_doc_type": "events", + "expected_event_count": 1, + }, + "custom_ping_without_event": { + "yaml": YAML_CUSTOM_PING, + "code": """ + msg, err := builder.BuildServerTelemetryScenarioOnePingMessage( + glean.RequestInfo{ + UserAgent: "glean-test/1.0", + IpAddress: "127.0.0.1", + }, + glean.ServerTelemetryScenarioOnePing{ + MetricName: "string value", + MetricRequestBool: true, + MetricRequestCount: 20, + MetricRequestDatetime: time.Now(), + MetricRequestStringList: []string{"list", "of", "strings"}, + }, + ) + if err != nil { + fmt.Fprintln(os.Stderr, "build:", err) + os.Exit(1) + } + result := topic.Publish(ctx, msg) + if _, err := result.Get(ctx); err != nil { + fmt.Fprintln(os.Stderr, "publish:", err) + os.Exit(1) + } + """, + "expected_doc_type": "server-telemetry-scenario-one", + "expected_event_count": 0, + }, + "custom_ping_with_event": { + "yaml": YAML_CUSTOM_PING, + "code": """ + msg, err := builder.BuildServerTelemetryScenarioOnePingMessage( + glean.RequestInfo{ + UserAgent: "glean-test/1.0", + IpAddress: "127.0.0.1", + }, + glean.ServerTelemetryScenarioOnePing{ + MetricName: "string value", + MetricRequestBool: true, + MetricRequestCount: 20, + MetricRequestDatetime: time.Now(), + MetricRequestStringList: []string{"list", "of", "strings"}, + Event: glean.BackendSpecialEventEvent{ + EventFieldString: "extra value string", + EventFieldQuantity: 30, + EventFieldBool: true, + }, + }, + ) + if err != nil { + fmt.Fprintln(os.Stderr, "build:", err) + os.Exit(1) + } + result := topic.Publish(ctx, msg) + if _, err := result.Get(ctx); err != nil { + fmt.Fprintln(os.Stderr, "publish:", err) + os.Exit(1) + } + """, + "expected_doc_type": "server-telemetry-scenario-one", + "expected_event_count": 1, + }, +} @pytest.mark.go_dependency -def test_run_logging_custom_ping_without_event(tmp_path): +@pytest.mark.parametrize("scenario_name", list(PUBSUB_SCENARIOS)) +def test_run_record_pubsub(tmp_path, scenario_name): + """Build a Pub/Sub message and publish it via topic.Publish; assert + the wire format on the captured message (gzipped body + routing on + attributes).""" + scenario = PUBSUB_SCENARIOS[scenario_name] glean_module_path = tmp_path / "glean" + _translate_for_transport("pubsub", glean_module_path, scenario["yaml"]) - translate.translate( - [ - ROOT / "data" / "go_server_custom_ping_only_metrics.yaml", - ROOT / "data" / "go_server_custom_ping_only_pings.yaml", - ], - "go_server", - glean_module_path, - ) + msgs = run_publisher(tmp_path, scenario["code"]) + assert len(msgs) == 1, f"expected one published message, got {len(msgs)}" + attrs = msgs[0]["attributes"] + payload_bytes = gzip.decompress(base64.b64decode(msgs[0]["data"])) + payload = json.loads(payload_bytes) - code = """ - logger.RecordServerTelemetryScenarioOnePing( - glean.RequestInfo{ - UserAgent: "glean-test/1.0", - IpAddress: "127.0.0.1", - }, - glean.ServerTelemetryScenarioOnePing{ - MetricName: "string value", - MetricRequestBool: true, - MetricRequestCount: 20, - MetricRequestDatetime: time.Now(), - MetricRequestStringList: []string{"list", "of", "strings"}, - }, - ) - """ + # Pub/Sub attributes carry the routing fields. document_id is a + # lowercase v4 UUID; submission_timestamp is set by the decoder from + # publishTime, never by the publisher. + assert attrs["document_namespace"] == "glean.test" + assert attrs["document_type"] == scenario["expected_doc_type"] + assert attrs["document_version"] == "1" + assert attrs["user_agent"] == "glean-test/1.0" + assert attrs["x_forwarded_for"] == "127.0.0.1" + parsed = uuid.UUID(attrs["document_id"]) + assert parsed.version == 4 + assert attrs["document_id"] == attrs["document_id"].lower() + assert "submission_timestamp" not in attrs - logged_output = run_logger(tmp_path, code) - logged_output = json.loads(logged_output) - fields = logged_output["Fields"] - payload = fields["payload"] - - assert "glean-server-event" == logged_output["Type"] - assert "glean.test" == fields["document_namespace"] - assert "server-telemetry-scenario-one" == fields["document_type"] - assert "1" == fields["document_version"] - assert "glean-test/1.0" == fields["user_agent"] - - schema_url = ( - "https://raw.githubusercontent.com/mozilla-services/" - "mozilla-pipeline-schemas/main/" - "schemas/glean/glean/glean.1.schema.json" - ) + # Inner ping content + constructor args threaded through into client_info. + assert len(payload["events"]) == scenario["expected_event_count"] + assert payload["client_info"]["app_display_version"] == "0.0.1" + assert payload["client_info"]["app_channel"] == "nightly" + + validate_payload_against_schema(payload_bytes) - input = io.StringIO(payload) - output = io.StringIO() - assert validate_ping.validate_ping(input, output, schema_url=schema_url) == 0, ( - output.getvalue() - ) + +# ============================================================================= +# Runtime tests - logging-only (Writer-injection paths) +# ============================================================================= @pytest.mark.go_dependency def test_run_logging_discard_writer(tmp_path): + """Setting Writer = io.Discard suppresses output without erroring.""" glean_module_path = tmp_path / "glean" - - translate.translate( - [ - ROOT / "data" / "go_server_custom_ping_only_metrics.yaml", - ROOT / "data" / "go_server_custom_ping_only_pings.yaml", - ], - "go_server", - glean_module_path, - ) + _translate_for_transport("logging", glean_module_path, YAML_CUSTOM_PING) imports = """ "io" @@ -321,7 +668,7 @@ def test_run_logging_discard_writer(tmp_path): IpAddress: "127.0.0.1", }, glean.ServerTelemetryScenarioOnePing{ - MetricName: "string value", + MetricName: "string value", MetricRequestBool: true, MetricRequestCount: 20, MetricRequestDatetime: time.Now(), @@ -333,23 +680,14 @@ def test_run_logging_discard_writer(tmp_path): } """ - # validate the code ran successfully and produced no output - logged_output = run_logger(tmp_path, code, imports=imports) - assert logged_output == "" + assert run_logger(tmp_path, code, imports=imports) == "" @pytest.mark.go_dependency def test_run_logging_nil_writer(tmp_path): + """Setting Writer = nil returns a 'writer not specified' error.""" glean_module_path = tmp_path / "glean" - - translate.translate( - [ - ROOT / "data" / "go_server_custom_ping_only_metrics.yaml", - ROOT / "data" / "go_server_custom_ping_only_pings.yaml", - ], - "go_server", - glean_module_path, - ) + _translate_for_transport("logging", glean_module_path, YAML_CUSTOM_PING) imports = """ "fmt" @@ -374,19 +712,26 @@ def test_run_logging_nil_writer(tmp_path): } """ - # validate only output produced is the printing of the returned error - logged_output = run_logger(tmp_path, code, imports=imports) - assert logged_output == "writer not specified\n" + assert run_logger(tmp_path, code, imports=imports) == "writer not specified\n" + + +# ============================================================================= +# Runtime tests - logging-only (labeled_boolean + nil string_list serialization) +# ============================================================================= +# +# These exercise serialization edge cases on the logging transport only. +# (The pubsub builder uses the same metrics-by-type rendering, so equivalent +# behavior is implicit in the cross-transport scenarios above.) @pytest.mark.go_dependency def test_run_logging_labeled_boolean(tmp_path): + """labeled_boolean metric serializes per-label values, including + explicitly-nil labels.""" glean_module_path = tmp_path / "glean" translate.translate( - [ - ROOT / "data" / "go_server_labeled_boolean_metrics.yaml", - ], + [ROOT / "data" / "go_server_labeled_boolean_metrics.yaml"], "go_server", glean_module_path, ) @@ -411,30 +756,16 @@ def test_run_logging_labeled_boolean(tmp_path): ) """ - logged_output = run_logger(tmp_path, code) - logged_output = json.loads(logged_output) - fields = logged_output["Fields"] - payload_str = fields["payload"] - payload = json.loads(payload_str) - - assert "glean-server-event" == logged_output["Type"] - assert "glean.test" == fields["document_namespace"] - assert "events" == fields["document_type"] - - # Validate payload against Glean schema - schema_url = ( - "https://raw.githubusercontent.com/mozilla-services/" - "mozilla-pipeline-schemas/main/" - "schemas/glean/glean/glean.1.schema.json" - ) + envelope = json.loads(run_logger(tmp_path, code)) + fields = envelope["Fields"] + payload = json.loads(fields["payload"]) - input = io.StringIO(payload_str) - output = io.StringIO() - assert validate_ping.validate_ping(input, output, schema_url=schema_url) == 0, ( - output.getvalue() - ) + assert envelope["Type"] == "glean-server-event" + assert fields["document_namespace"] == "glean.test" + assert fields["document_type"] == "events" + + validate_payload_against_schema(fields["payload"].encode("utf-8")) - # Check that labeled_boolean is properly serialized labeled_boolean_metrics = payload["metrics"]["labeled_boolean"] assert "telemetry.feature_flags" in labeled_boolean_metrics feature_flags = labeled_boolean_metrics["telemetry.feature_flags"] @@ -446,18 +777,16 @@ def test_run_logging_labeled_boolean(tmp_path): @pytest.mark.go_dependency def test_run_logging_labeled_boolean_omitted(tmp_path): - """Test that completely omitted labeled_boolean metrics serialize correctly.""" + """A completely-omitted labeled_boolean metric (zero value) serializes + as an empty object.""" glean_module_path = tmp_path / "glean" translate.translate( - [ - ROOT / "data" / "go_server_labeled_boolean_metrics.yaml", - ], + [ROOT / "data" / "go_server_labeled_boolean_metrics.yaml"], "go_server", glean_module_path, ) - # This code logs an events ping without setting the labeled_boolean metric at all, not even to nil. code = """ _ = time.Now() // satisfy Go's unused import check for "time" logger.RecordEventsPing( @@ -471,42 +800,23 @@ def test_run_logging_labeled_boolean_omitted(tmp_path): ) """ - logged_output = run_logger(tmp_path, code) - logged_output = json.loads(logged_output) - fields = logged_output["Fields"] - payload_str = fields["payload"] - payload = json.loads(payload_str) - - assert "glean-server-event" == logged_output["Type"] - assert "glean.test" == fields["document_namespace"] - assert "events" == fields["document_type"] - - # Validate payload against Glean schema - schema_url = ( - "https://raw.githubusercontent.com/mozilla-services/" - "mozilla-pipeline-schemas/main/" - "schemas/glean/glean/glean.1.schema.json" - ) + envelope = json.loads(run_logger(tmp_path, code)) + fields = envelope["Fields"] + payload = json.loads(fields["payload"]) - input = io.StringIO(payload_str) - output = io.StringIO() - validation_result = validate_ping.validate_ping( - input, output, schema_url=schema_url - ) + assert envelope["Type"] == "glean-server-event" + assert fields["document_namespace"] == "glean.test" + assert fields["document_type"] == "events" - assert validation_result == 0, f"Validation failed. Output: {output.getvalue()}" + validate_payload_against_schema(fields["payload"].encode("utf-8")) - # Check how labeled_boolean is serialized when omitted - labeled_boolean_metrics = payload["metrics"]["labeled_boolean"] - assert "telemetry.feature_flags" in labeled_boolean_metrics - feature_flags = labeled_boolean_metrics["telemetry.feature_flags"] - # Verify it's an empty dict {} + feature_flags = payload["metrics"]["labeled_boolean"]["telemetry.feature_flags"] assert feature_flags == {} @pytest.mark.go_dependency def test_run_logging_nil_string_list(tmp_path): - """Test that nil string_list metrics serialize as empty arrays, not null.""" + """nil string_list metrics serialize as empty arrays, not null.""" glean_module_path = tmp_path / "glean" translate.translate( @@ -525,99 +835,27 @@ def test_run_logging_nil_string_list(tmp_path): IpAddress: "127.0.0.1", }, glean.EventsPing{ - MetricName: "string value", - MetricRequestBool: true, - MetricRequestCount: 10, - MetricRequestDatetime: time.Now(), + MetricName: "string value", + MetricRequestBool: true, + MetricRequestCount: 10, + MetricRequestDatetime: time.Now(), // MetricRequestStringList intentionally omitted (nil) Event: glean.BackendTestEventEvent{ - EventFieldString: "event extra string value", - EventFieldQuantity: 100, - EventFieldBool: false, + EventFieldString: "event extra string value", + EventFieldQuantity: 100, + EventFieldBool: false, }, }, ) """ - logged_output = run_logger(tmp_path, code) - logged_output = json.loads(logged_output) - fields = logged_output["Fields"] - payload_str = fields["payload"] - payload = json.loads(payload_str) + envelope = json.loads(run_logger(tmp_path, code)) + fields = envelope["Fields"] + payload = json.loads(fields["payload"]) - # Verify string_list is an empty array, not null string_list_value = payload["metrics"]["string_list"]["metric.request_string_list"] assert string_list_value == [], ( f"Expected empty array for nil string_list, got: {string_list_value}" ) - # Validate payload against Glean schema - schema_url = ( - "https://raw.githubusercontent.com/mozilla-services/" - "mozilla-pipeline-schemas/main/" - "schemas/glean/glean/glean.1.schema.json" - ) - - input = io.StringIO(payload_str) - output = io.StringIO() - assert validate_ping.validate_ping(input, output, schema_url=schema_url) == 0, ( - output.getvalue() - ) - - -@pytest.mark.go_dependency -def test_run_logging_custom_ping_with_event(tmp_path): - glean_module_path = tmp_path / "glean" - - translate.translate( - [ - ROOT / "data" / "go_server_custom_ping_only_metrics.yaml", - ROOT / "data" / "go_server_custom_ping_only_pings.yaml", - ], - "go_server", - glean_module_path, - ) - - code = """ - logger.RecordServerTelemetryScenarioOnePing( - glean.RequestInfo{ - UserAgent: "glean-test/1.0", - IpAddress: "127.0.0.1", - }, - glean.ServerTelemetryScenarioOnePing{ - MetricName: "string value", - MetricRequestBool: true, - MetricRequestCount: 20, - MetricRequestDatetime: time.Now(), - MetricRequestStringList: []string{"list", "of", "strings"}, - Event: glean.BackendSpecialEventEvent{ - EventFieldString: "exta value string", - EventFieldQuantity: 30, - EventFieldBool: true, - }, - }, - ) - """ - - logged_output = run_logger(tmp_path, code) - logged_output = json.loads(logged_output) - fields = logged_output["Fields"] - payload = fields["payload"] - - assert "glean-server-event" == logged_output["Type"] - assert "glean.test" == fields["document_namespace"] - assert "server-telemetry-scenario-one" == fields["document_type"] - assert "1" == fields["document_version"] - assert "glean-test/1.0" == fields["user_agent"] - - schema_url = ( - "https://raw.githubusercontent.com/mozilla-services/" - "mozilla-pipeline-schemas/main/" - "schemas/glean/glean/glean.1.schema.json" - ) - - input = io.StringIO(payload) - output = io.StringIO() - assert validate_ping.validate_ping(input, output, schema_url=schema_url) == 0, ( - output.getvalue() - ) + validate_payload_against_schema(fields["payload"].encode("utf-8")) From a641d64876dc78cf3b8a0302f2639ceb6e739df8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Mon, 25 May 2026 14:15:31 +0200 Subject: [PATCH 2/4] lint --- tests/test_go_server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_go_server.py b/tests/test_go_server.py index 51ed0245c..ba30ee749 100644 --- a/tests/test_go_server.py +++ b/tests/test_go_server.py @@ -346,8 +346,7 @@ def test_parser_pubsub_generation(tmp_path): # Per-ping builder method: value receiver, returns *pubsub.Message. assert "func (g GleanEventsBuilder) BuildEventsPingMessage(" in content assert ( - "func (g GleanEventsBuilder) BuildEventsPingMessageWithoutUserInfo(" - in content + "func (g GleanEventsBuilder) BuildEventsPingMessageWithoutUserInfo(" in content ) assert "(*pubsub.Message, error)" in content From 8f745fed547d891f3296d61250579e3eb92805ff Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Mon, 25 May 2026 14:27:22 +0200 Subject: [PATCH 3/4] ci: install Go 1.26 for go_server_pubsub tests --- .circleci/config.yml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bbc409b34..556de1d74 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -53,6 +53,27 @@ commands: command: | uv sync --resolution lowest-direct + setup-go: + steps: + - run: + name: Installing Go + # The Ubuntu-default Go available via apt in cimg/python is too + # old for the cloud.google.com/go/pubsub dependency chain (needs + # Go 1.24+). Install a newer release directly. + # To update: pick a release from https://go.dev/dl/ and bump + # GO_VERSION below. + command: | + cd /tmp + GO_VERSION=1.26.3 + GO_TARBALL=go${GO_VERSION}.linux-amd64.tar.gz + curl -sfSL --retry 5 -o "${GO_TARBALL}" "https://go.dev/dl/${GO_TARBALL}" + sudo rm -rf /usr/local/go + sudo tar -C /usr/local -xf "${GO_TARBALL}" + - run: + name: Set Go path + command: | + echo 'export PATH=/usr/local/go/bin:$PATH' >> $BASH_ENV + test-python-version: steps: - run: @@ -62,9 +83,9 @@ commands: sudo apt install \ --yes --no-install-recommends \ openjdk-11-jdk-headless \ - ruby \ - golang-go + ruby make install-kotlin-linters + - setup-go - setup-rust-toolchain - run: name: Test From be8c49e4f17d0d362620f18a9b67e008cf390a36 Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Mon, 25 May 2026 18:31:11 +0200 Subject: [PATCH 4/4] switch to v2 api --- glean_parser/templates/go_server.jinja2 | 8 ++++---- tests/test-go/test_publisher.go.tmpl | 13 ++++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/glean_parser/templates/go_server.jinja2 b/glean_parser/templates/go_server.jinja2 index bfa6a1255..a3871146a 100644 --- a/glean_parser/templates/go_server.jinja2 +++ b/glean_parser/templates/go_server.jinja2 @@ -18,7 +18,7 @@ import ( "sync" "time" - "cloud.google.com/go/pubsub" + pubsub "cloud.google.com/go/pubsub/v2" "github.com/google/uuid" {% else %} "encoding/json" @@ -49,9 +49,9 @@ type GleanEventsLogger struct { {% else %} // GleanEventsBuilder constructs Pub/Sub messages carrying Glean pings. The // builder is stateless beyond its app-identity fields; callers own the -// pubsub.Client, the pubsub.Topic, batching settings, retries, shutdown -// sequencing, and any publish-result metrics. A reference implementation of -// publisher lifecycle lives in mozilla/glean-server-examples. +// pubsub.Client, the pubsub.Publisher, batching settings, retries, shutdown sequencing, and any +// publish-result metrics. A reference implementation of publisher lifecycle +// lives in mozilla/glean-server-examples. type GleanEventsBuilder struct { AppID string // Application ID to identify application per Glean standards AppDisplayVersion string // Version of application emitting the event diff --git a/tests/test-go/test_publisher.go.tmpl b/tests/test-go/test_publisher.go.tmpl index af2e1eba2..f9d90f86d 100644 --- a/tests/test-go/test_publisher.go.tmpl +++ b/tests/test-go/test_publisher.go.tmpl @@ -10,8 +10,9 @@ import ( "time" /* IMPORTS */ - "cloud.google.com/go/pubsub" - "cloud.google.com/go/pubsub/pstest" + pubsub "cloud.google.com/go/pubsub/v2" + "cloud.google.com/go/pubsub/v2/apiv1/pubsubpb" + "cloud.google.com/go/pubsub/v2/pstest" "google.golang.org/api/option" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -39,20 +40,22 @@ func main() { fmt.Fprintln(os.Stderr, "setup-client:", err) os.Exit(1) } - if _, err := setupClient.CreateTopic(ctx, "test-topic"); err != nil { + if _, err := setupClient.TopicAdminClient.CreateTopic(ctx, &pubsubpb.Topic{ + Name: "projects/test-project/topics/test-topic", + }); err != nil { fmt.Fprintln(os.Stderr, "create-topic:", err) os.Exit(1) } setupClient.Close() setupConn.Close() - // Client + topic used by the test snippet to publish. + // Client + publisher used by the test snippet to publish. client, err := pubsub.NewClient(ctx, "test-project") if err != nil { fmt.Fprintln(os.Stderr, "client:", err) os.Exit(1) } - topic := client.Topic("test-topic") + topic := client.Publisher("test-topic") builder := glean.GleanEventsBuilder{ AppID: "glean.test",