-
Notifications
You must be signed in to change notification settings - Fork 117
feat(pii): Base SpanData PII on relay-conventions #5997
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
eea1bbf
0e8be5e
55ab550
0bbd1e3
e48ff37
677e075
1b71ccb
7fa8c6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -594,10 +594,11 @@ impl<'a> ProcessingState<'a> { | |
|
|
||
| /// Derives the attrs for recursion. | ||
| pub fn inner_attrs(&self) -> Option<Cow<'_, FieldAttrs>> { | ||
| match self.pii() { | ||
| Pii::True => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)), | ||
| Pii::False => None, | ||
| Pii::Maybe => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)), | ||
| match self.attrs().pii { | ||
| PiiMode::Static(Pii::True) => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)), | ||
| PiiMode::Static(Pii::False) => None, | ||
| PiiMode::Static(Pii::Maybe) => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)), | ||
| PiiMode::Dynamic(f) => Some(Cow::Owned(DEFAULT_FIELD_ATTRS.pii_dynamic(f))), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dynamic PII in
|
||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,7 @@ use relay_protocol::{ | |
| Annotated, Array, Empty, Error, FromValue, Getter, IntoValue, Object, Val, Value, | ||
| }; | ||
|
|
||
| use crate::processor::ProcessValue; | ||
| use crate::processor::{Pii, ProcessValue, ProcessingState}; | ||
| use crate::protocol::{ | ||
| EventId, IpAddr, JsonLenientString, LenientString, Measurements, OperationType, OriginType, | ||
| SpanId, SpanStatus, ThreadId, Timestamp, TraceId, | ||
|
|
@@ -454,12 +454,32 @@ impl Getter for SentryTags { | |
| } | ||
| } | ||
|
|
||
| /// Determines the `Pii` value for a field of [`SpanData`] by looking it up in `relay-conventions`. | ||
| /// | ||
| /// If the field is not found in the conventions, this returns `Pii::True` | ||
| /// as a precaution. | ||
| fn span_data_pii_from_conventions(state: &ProcessingState) -> Pii { | ||
| fn inner(state: &ProcessingState) -> Option<Pii> { | ||
| // `state.keys().next()` is the _last_ segment in the state's | ||
| // path, i.e. the field name. | ||
| let key = state.keys().next()?; | ||
|
loewenheim marked this conversation as resolved.
|
||
|
|
||
| match relay_conventions::attribute_info(key)?.pii { | ||
| relay_conventions::Pii::True => Some(Pii::True), | ||
| relay_conventions::Pii::False => Some(Pii::False), | ||
| relay_conventions::Pii::Maybe => Some(Pii::Maybe), | ||
| } | ||
| } | ||
|
|
||
| inner(state).unwrap_or(Pii::True) | ||
|
sentry[bot] marked this conversation as resolved.
cursor[bot] marked this conversation as resolved.
|
||
| } | ||
|
|
||
| /// Arbitrary additional data on a span. | ||
| /// | ||
| /// Besides arbitrary user data, this type also contains SDK-provided fields used by the | ||
| /// product (see <https://develop.sentry.dev/sdk/performance/span-data-conventions/>). | ||
| #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] | ||
| #[metastructure(trim = false, pii = "maybe")] | ||
| #[metastructure(trim = false, pii = "span_data_pii_from_conventions")] | ||
| pub struct SpanData { | ||
| /// Mobile app start variant. | ||
| /// | ||
|
|
@@ -594,11 +614,7 @@ pub struct SpanData { | |
| pub gen_ai_response_object: Annotated<Value>, | ||
|
|
||
| /// Whether or not the AI model call's response was streamed back asynchronously | ||
| #[metastructure( | ||
| field = "gen_ai.response.streaming", | ||
| legacy_alias = "ai.streaming", | ||
| pii = "false" | ||
| )] | ||
| #[metastructure(field = "gen_ai.response.streaming", legacy_alias = "ai.streaming")] | ||
| pub gen_ai_response_streaming: Annotated<Value>, | ||
|
|
||
| /// Total output tokens per seconds throughput | ||
|
|
@@ -751,15 +767,11 @@ pub struct SpanData { | |
| pub db_collection_name: Annotated<Value>, | ||
|
|
||
| /// The sentry environment. | ||
| #[metastructure( | ||
| field = "sentry.environment", | ||
| legacy_alias = "environment", | ||
| pii = "false" | ||
| )] | ||
| #[metastructure(field = "sentry.environment", legacy_alias = "environment")] | ||
| pub environment: Annotated<String>, | ||
|
|
||
| /// The release version of the project. | ||
| #[metastructure(field = "sentry.release", legacy_alias = "release", pii = "false")] | ||
| #[metastructure(field = "sentry.release", legacy_alias = "release")] | ||
| pub release: Annotated<LenientString>, | ||
|
|
||
| /// The decoded body size of the response (in bytes). | ||
|
|
@@ -811,7 +823,7 @@ pub struct SpanData { | |
| pub thread_name: Annotated<String>, | ||
|
|
||
| /// ID of thread from where the span originated. | ||
| #[metastructure(field = "thread.id", pii = "false")] | ||
| #[metastructure(field = "thread.id")] | ||
| pub thread_id: Annotated<ThreadId>, | ||
|
|
||
| /// Name of the segment that this span belongs to (see `segment_id`). | ||
|
|
@@ -831,19 +843,19 @@ pub struct SpanData { | |
| pub url_scheme: Annotated<Value>, | ||
|
|
||
| /// User Display | ||
| #[metastructure(field = "user", pii = "true")] | ||
| #[metastructure(field = "user")] | ||
| pub user: Annotated<Value>, | ||
|
|
||
| /// User email address. | ||
| /// | ||
| /// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/> | ||
| #[metastructure(field = "user.email", pii = "true")] | ||
| #[metastructure(field = "user.email")] | ||
| pub user_email: Annotated<String>, | ||
|
|
||
| /// User’s full name. | ||
| /// | ||
| /// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/> | ||
| #[metastructure(field = "user.full_name", pii = "true")] | ||
| #[metastructure(field = "user.full_name")] | ||
| pub user_full_name: Annotated<String>, | ||
|
|
||
| /// Two-letter country code (ISO 3166-1 alpha-2). | ||
|
|
@@ -873,45 +885,50 @@ pub struct SpanData { | |
| /// Unique user hash to correlate information for a user in anonymized form. | ||
| /// | ||
| /// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/> | ||
| #[metastructure(field = "user.hash", pii = "true")] | ||
| #[metastructure(field = "user.hash")] | ||
| pub user_hash: Annotated<String>, | ||
|
|
||
| /// Unique identifier of the user. | ||
| /// | ||
| /// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/> | ||
| #[metastructure(field = "user.id", pii = "true")] | ||
| #[metastructure(field = "user.id")] | ||
| pub user_id: Annotated<String>, | ||
|
|
||
| /// Short name or login/username of the user. | ||
| /// | ||
| /// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/> | ||
| #[metastructure(field = "user.name", pii = "true")] | ||
| #[metastructure(field = "user.name")] | ||
| pub user_name: Annotated<String>, | ||
|
|
||
| /// Array of user roles at the time of the event. | ||
| /// | ||
| /// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/> | ||
| #[metastructure(field = "user.roles", pii = "true")] | ||
| #[metastructure(field = "user.roles")] | ||
| pub user_roles: Annotated<Array<String>>, | ||
|
|
||
| /// Exclusive Time | ||
| #[metastructure(field = "sentry.exclusive_time")] | ||
| pub exclusive_time: Annotated<Value>, | ||
|
|
||
| /// Profile ID | ||
| #[metastructure(field = "profile_id", pii = "false")] | ||
| #[metastructure( | ||
| field = "profile_id", | ||
| // This field is not defined in conventions, so we need to set | ||
| // PII explicitly. | ||
| pii = "false" | ||
| )] | ||
| pub profile_id: Annotated<Value>, | ||
|
|
||
| /// Replay ID | ||
| #[metastructure(field = "sentry.replay_id", legacy_alias = "replay_id", pii = "false")] | ||
| #[metastructure(field = "sentry.replay_id", legacy_alias = "replay_id")] | ||
| pub replay_id: Annotated<Value>, | ||
|
|
||
| /// The sentry SDK (see [`crate::protocol::ClientSdkInfo`]). | ||
| #[metastructure(field = "sentry.sdk.name", pii = "false")] | ||
| #[metastructure(field = "sentry.sdk.name")] | ||
| pub sdk_name: Annotated<String>, | ||
|
|
||
| /// The sentry SDK version (see [`crate::protocol::ClientSdkInfo`]). | ||
| #[metastructure(field = "sentry.sdk.version", pii = "false")] | ||
| #[metastructure(field = "sentry.sdk.version")] | ||
| pub sdk_version: Annotated<String>, | ||
|
|
||
| /// Slow Frames | ||
|
|
@@ -975,7 +992,7 @@ pub struct SpanData { | |
| pub http_query: Annotated<String>, | ||
|
|
||
| /// The client's IP address. | ||
| #[metastructure(field = "client.address", pii = "true")] | ||
| #[metastructure(field = "client.address")] | ||
| pub client_address: Annotated<IpAddr>, | ||
|
|
||
| /// The current route in the application. | ||
|
|
@@ -1013,7 +1030,6 @@ pub struct SpanData { | |
| /// Other fields in `span.data`. | ||
| #[metastructure( | ||
| additional_properties, | ||
| pii = "true", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Additional properties lose unconditional PII scrubbing safety netMedium Severity The Additional Locations (1)Reviewed by Cursor Bugbot for commit e48ff37. Configure here.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As long as we have test coverage for these cases, I don't see that as a problem |
||
| retain = true, | ||
| skip_serialization = "null" // applies to child elements | ||
| )] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -291,7 +291,7 @@ regex!( | |
| ) | ||
| ) | ||
| ( | ||
| [^/\\\r\n]+ | ||
| [^/\\\r\n\x00]+ | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this regex can match a null byte it causes problems with redactions. |
||
| ) | ||
| " | ||
| ); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1510,12 +1510,19 @@ def test_spansv2_attribute_normalization( | |
| http_result = spans_by_id[http_span_id] | ||
| assert http_result == { | ||
| **common, | ||
| "_meta": { | ||
| "attributes": { | ||
| "url.full": { | ||
| "value": {"": {"len": 63, "rem": [["@userpath", "s", 29, 35]]}} | ||
| } | ||
| } | ||
| }, | ||
|
Dav1dde marked this conversation as resolved.
|
||
| "span_id": http_span_id, | ||
| "attributes": { | ||
| "sentry.category": {"type": "string", "value": "http"}, | ||
| "sentry.description": { | ||
| "type": "string", | ||
| "value": "GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj", | ||
| "value": "GET https://www.service.io/users/[user]/settings/98765-adfghj", | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This attribute is filled from |
||
| }, | ||
| "sentry.op": {"type": "string", "value": "http.client"}, | ||
| "sentry.observed_timestamp_nanos": { | ||
|
|
@@ -1528,7 +1535,7 @@ def test_spansv2_attribute_normalization( | |
| "sentry.domain": {"type": "string", "value": "*.service.io"}, | ||
| "url.full": { | ||
| "type": "string", | ||
| "value": "https://www.service.io/users/01234-qwerty/settings/98765-adfghj", | ||
| "value": "https://www.service.io/users/[user]/settings/98765-adfghj", | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This attribute has become |
||
| }, | ||
| }, | ||
| } | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously, when deriving the attributes for recursion, we would eagerly resolve a dynamic PII value function into a static value. Now we instead pass the dynamic function through.