Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
- Set the name based on the transaction name when converting a transaction into a segment span. ([#5961](https://github.com/getsentry/relay/pull/5961))
- Set the segment name for child spans based on the transaction name. ([#5959](https://github.com/getsentry/relay/pull/5959))
- Emit missing outcomes in the Playstation and Minidump endpoints, and add/update outcome discard reasons. ([#5966](https://github.com/getsentry/relay/pull/5966))
- Correct the PII status for `SpanData` fields. ([#5995](https://github.com/getsentry/relay/pull/5995))
- Obtain PII values for `SpanData` fields from `sentry-conventions`. ([#5997](https://github.com/getsentry/relay/pull/5997))

**Internal**:

Expand Down
9 changes: 5 additions & 4 deletions relay-event-schema/src/processor/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,10 +594,11 @@ impl<'a> ProcessingState<'a> {

/// Derives the attrs for recursion.
pub fn inner_attrs(&self) -> Option<Cow<'_, FieldAttrs>> {
match self.pii() {
Pii::True => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)),
Pii::False => None,
Pii::Maybe => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)),
match self.attrs().pii {
PiiMode::Static(Pii::True) => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)),
PiiMode::Static(Pii::False) => None,
PiiMode::Static(Pii::Maybe) => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)),
PiiMode::Dynamic(f) => Some(Cow::Owned(DEFAULT_FIELD_ATTRS.pii_dynamic(f))),
Comment on lines -597 to +601
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, when deriving the attributes for recursion, we would eagerly resolve a dynamic PII value function into a static value. Now we instead pass the dynamic function through.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dynamic PII in inner_attrs over-scrubs nested field values

Medium Severity

The PiiMode::Dynamic(f) arm in inner_attrs passes span_data_pii_from_conventions through to children of complex-typed fields (e.g., gen_ai.input.messages of type Annotated<Value>). When such a field contains a nested object, each sub-key (like "role" or "content") is looked up in conventions via state.keys().next(). Sub-keys not found in conventions get Pii::True, whereas previously (with pii = "maybe" on SpanData) they inherited Pii::Maybe. This upgrades scrubbing from "only specific path selectors" to "generic selectors like $string match," causing unintended data removal in nested structures.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 7fa8c6d. Configure here.

}
}

Expand Down
70 changes: 43 additions & 27 deletions relay-event-schema/src/protocol/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use relay_protocol::{
Annotated, Array, Empty, Error, FromValue, Getter, IntoValue, Object, Val, Value,
};

use crate::processor::ProcessValue;
use crate::processor::{Pii, ProcessValue, ProcessingState};
use crate::protocol::{
EventId, IpAddr, JsonLenientString, LenientString, Measurements, OperationType, OriginType,
SpanId, SpanStatus, ThreadId, Timestamp, TraceId,
Expand Down Expand Up @@ -454,12 +454,32 @@ impl Getter for SentryTags {
}
}

/// Determines the `Pii` value for a field of [`SpanData`] by looking it up in `relay-conventions`.
///
/// If the field is not found in the conventions, this returns `Pii::True`
/// as a precaution.
fn span_data_pii_from_conventions(state: &ProcessingState) -> Pii {
fn inner(state: &ProcessingState) -> Option<Pii> {
// `state.keys().next()` is the _last_ segment in the state's
// path, i.e. the field name.
let key = state.keys().next()?;
Comment thread
loewenheim marked this conversation as resolved.

match relay_conventions::attribute_info(key)?.pii {
relay_conventions::Pii::True => Some(Pii::True),
relay_conventions::Pii::False => Some(Pii::False),
relay_conventions::Pii::Maybe => Some(Pii::Maybe),
}
}

inner(state).unwrap_or(Pii::True)
Comment thread
sentry[bot] marked this conversation as resolved.
Comment thread
cursor[bot] marked this conversation as resolved.
}

/// Arbitrary additional data on a span.
///
/// Besides arbitrary user data, this type also contains SDK-provided fields used by the
/// product (see <https://develop.sentry.dev/sdk/performance/span-data-conventions/>).
#[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)]
#[metastructure(trim = false, pii = "maybe")]
#[metastructure(trim = false, pii = "span_data_pii_from_conventions")]
pub struct SpanData {
/// Mobile app start variant.
///
Expand Down Expand Up @@ -594,11 +614,7 @@ pub struct SpanData {
pub gen_ai_response_object: Annotated<Value>,

/// Whether or not the AI model call's response was streamed back asynchronously
#[metastructure(
field = "gen_ai.response.streaming",
legacy_alias = "ai.streaming",
pii = "false"
)]
#[metastructure(field = "gen_ai.response.streaming", legacy_alias = "ai.streaming")]
pub gen_ai_response_streaming: Annotated<Value>,

/// Total output tokens per seconds throughput
Expand Down Expand Up @@ -751,15 +767,11 @@ pub struct SpanData {
pub db_collection_name: Annotated<Value>,

/// The sentry environment.
#[metastructure(
field = "sentry.environment",
legacy_alias = "environment",
pii = "false"
)]
#[metastructure(field = "sentry.environment", legacy_alias = "environment")]
pub environment: Annotated<String>,

/// The release version of the project.
#[metastructure(field = "sentry.release", legacy_alias = "release", pii = "false")]
#[metastructure(field = "sentry.release", legacy_alias = "release")]
pub release: Annotated<LenientString>,

/// The decoded body size of the response (in bytes).
Expand Down Expand Up @@ -811,7 +823,7 @@ pub struct SpanData {
pub thread_name: Annotated<String>,

/// ID of thread from where the span originated.
#[metastructure(field = "thread.id", pii = "false")]
#[metastructure(field = "thread.id")]
pub thread_id: Annotated<ThreadId>,

/// Name of the segment that this span belongs to (see `segment_id`).
Expand All @@ -831,19 +843,19 @@ pub struct SpanData {
pub url_scheme: Annotated<Value>,

/// User Display
#[metastructure(field = "user", pii = "true")]
#[metastructure(field = "user")]
pub user: Annotated<Value>,

/// User email address.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.email", pii = "true")]
#[metastructure(field = "user.email")]
pub user_email: Annotated<String>,

/// User’s full name.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.full_name", pii = "true")]
#[metastructure(field = "user.full_name")]
pub user_full_name: Annotated<String>,

/// Two-letter country code (ISO 3166-1 alpha-2).
Expand Down Expand Up @@ -873,45 +885,50 @@ pub struct SpanData {
/// Unique user hash to correlate information for a user in anonymized form.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.hash", pii = "true")]
#[metastructure(field = "user.hash")]
pub user_hash: Annotated<String>,

/// Unique identifier of the user.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.id", pii = "true")]
#[metastructure(field = "user.id")]
pub user_id: Annotated<String>,

/// Short name or login/username of the user.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.name", pii = "true")]
#[metastructure(field = "user.name")]
pub user_name: Annotated<String>,

/// Array of user roles at the time of the event.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.roles", pii = "true")]
#[metastructure(field = "user.roles")]
pub user_roles: Annotated<Array<String>>,

/// Exclusive Time
#[metastructure(field = "sentry.exclusive_time")]
pub exclusive_time: Annotated<Value>,

/// Profile ID
#[metastructure(field = "profile_id", pii = "false")]
#[metastructure(
field = "profile_id",
// This field is not defined in conventions, so we need to set
// PII explicitly.
pii = "false"
)]
pub profile_id: Annotated<Value>,

/// Replay ID
#[metastructure(field = "sentry.replay_id", legacy_alias = "replay_id", pii = "false")]
#[metastructure(field = "sentry.replay_id", legacy_alias = "replay_id")]
pub replay_id: Annotated<Value>,

/// The sentry SDK (see [`crate::protocol::ClientSdkInfo`]).
#[metastructure(field = "sentry.sdk.name", pii = "false")]
#[metastructure(field = "sentry.sdk.name")]
pub sdk_name: Annotated<String>,

/// The sentry SDK version (see [`crate::protocol::ClientSdkInfo`]).
#[metastructure(field = "sentry.sdk.version", pii = "false")]
#[metastructure(field = "sentry.sdk.version")]
pub sdk_version: Annotated<String>,

/// Slow Frames
Expand Down Expand Up @@ -975,7 +992,7 @@ pub struct SpanData {
pub http_query: Annotated<String>,

/// The client's IP address.
#[metastructure(field = "client.address", pii = "true")]
#[metastructure(field = "client.address")]
pub client_address: Annotated<IpAddr>,

/// The current route in the application.
Expand Down Expand Up @@ -1013,7 +1030,6 @@ pub struct SpanData {
/// Other fields in `span.data`.
#[metastructure(
additional_properties,
pii = "true",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additional properties lose unconditional PII scrubbing safety net

Medium Severity

The other field (additional properties bag) had its explicit pii = "true" removed and now inherits the struct-level dynamic span_data_pii_from_conventions function. When inner_attrs propagates this dynamic function to child entries, any attribute that exists in sentry-conventions with pii = false will no longer be scrubbed — even for unknown, user-supplied data landing in additional_properties. This is a relaxation of the previous unconditional scrubbing policy for the catch-all bag. Combined with the new PiiMode::Dynamic arm in inner_attrs always returning Some(...), deeply nested values within these entries also inherit the dynamic lookup rather than the previous flat Pii::True.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit e48ff37. Configure here.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as we have test coverage for these cases, I don't see that as a problem

retain = true,
skip_serialization = "null" // applies to child elements
)]
Expand Down
3 changes: 3 additions & 0 deletions relay-pii/src/processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,9 @@ fn apply_regex_to_chunks<'a>(
return;
}

// ALERT: This logic assumes that `regex` doesn't match a capture
// group starting on a null byte. If you get an error in debug mode
// about `replacement_chunks` not being empty, check the regex.
static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
let regex = NULL_SPLIT_RE.get_or_init(|| {
#[allow(clippy::trivial_regex)]
Expand Down
2 changes: 1 addition & 1 deletion relay-pii/src/regexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ regex!(
)
)
(
[^/\\\r\n]+
[^/\\\r\n\x00]+
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this regex can match a null byte it causes problems with redactions.

)
"
);
Expand Down
61 changes: 49 additions & 12 deletions tests/integration/test_pii.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,53 @@ def test_scrub_span_sentry_tags_advanced_rules(mini_sentry, relay):
assert event["spans"][0]["sentry_tags"]["user.geo.subregion"] == "***"


def test_http_query(mini_sentry, relay):
@pytest.mark.parametrize(
"field,pii",
[
# SpanData field, pii: true in conventions
("url.full", "true"),
# SpanData field, pii: maybe in conventions
("gen_ai.input.messages", "maybe"),
# SpanData field, pii: false in conventions
("sentry.release", "false"),
# SpanData field, missing in conventions, explicitly set on SpanData
("profile_id", "false"),
# Not a SpanData field, pii: true in conventions
("ai.warnings", "true"),
# Not a SpanData field, pii: maybe in conventions
("process.runtime.name", "maybe"),
# Not a SpanData field, pii: false in conventions
("sentry.cancellation_reason", "false"),
# Not a SpanData field and not defined in conventions
("madeup.field", "true"),
],
)
def test_spandata_conventions(mini_sentry, relay, field, pii):
project_id = 42
relay = relay(
mini_sentry,
)
config = mini_sentry.add_basic_project_config(project_id)
config["config"].setdefault(
"datascrubbingSettings",
{
"scrubData": True,
"scrubDefaults": True,
},
)

config["config"]["piiConfig"] = {
"rules": {
"custom_secret": {
"type": "pattern",
"pattern": r"(X-Amz-Algorithm|X-Amz-Credential)=[^& ]+",
"redaction": {"method": "mask"},
}
"pattern": ".*",
"redaction": {"method": "replace", "text": "[REDACTED]"},
},
},
"applications": {
"$string": ["@anything:mask"],
f"'{field}'": ["custom_secret"],
},
"applications": {"$span.data.*": ["custom_secret"]},
}

relay.send_event(
Expand All @@ -64,20 +96,25 @@ def test_http_query(mini_sentry, relay):
"timestamp": 1778131375.142457,
"start_timestamp": 1778131374.296492,
"exclusive_time": 845.965,
"data": {
"http.query": "X-Amz-Algorithm=Some_algorithm&X-Amz-Credential=foobar/19700101/us-east-1/s3/aws4_request&X-Amz-Signature=579375859378367127495789347628374",
},
"data": {field: "secret value"},
}
],
},
)

event = mini_sentry.get_captured_envelope().get_event()

assert (
event["spans"][0]["data"]["http.query"]
== "******************************&**********************************************************&X-Amz-Signature=579375859378367127495789347628374"
)
value = event["spans"][0]["data"][field]

# pii: true fields should get masked
if pii == "true":
assert value == "************"
# pii: maybe fields should get redacted
elif pii == "maybe":
assert value == "[REDACTED]"
# pii: false fields should be left alone
else:
assert value == "secret value"


@pytest.mark.parametrize(
Expand Down
11 changes: 9 additions & 2 deletions tests/integration/test_spansv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,12 +1510,19 @@ def test_spansv2_attribute_normalization(
http_result = spans_by_id[http_span_id]
assert http_result == {
**common,
"_meta": {
"attributes": {
"url.full": {
"value": {"": {"len": 63, "rem": [["@userpath", "s", 29, 35]]}}
}
}
},
Comment thread
Dav1dde marked this conversation as resolved.
"span_id": http_span_id,
"attributes": {
"sentry.category": {"type": "string", "value": "http"},
"sentry.description": {
"type": "string",
"value": "GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
"value": "GET https://www.service.io/users/[user]/settings/98765-adfghj",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This attribute is filled from url.full, hence inherits the redaction.

},
"sentry.op": {"type": "string", "value": "http.client"},
"sentry.observed_timestamp_nanos": {
Expand All @@ -1528,7 +1535,7 @@ def test_spansv2_attribute_normalization(
"sentry.domain": {"type": "string", "value": "*.service.io"},
"url.full": {
"type": "string",
"value": "https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
"value": "https://www.service.io/users/[user]/settings/98765-adfghj",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This attribute has become pii: "true" in the current conventions version.

},
},
}
Expand Down
Loading