syntax = "proto3"; import "google/protobuf/timestamp.proto"; message Trace { message CachePolicy { enum Scope { UNKNOWN = 0; PUBLIC = 1; PRIVATE = 2; } Scope scope = 1; int64 max_age_ns = 2; // use 0 for absent, -1 for 0 } message Details { // The variables associated with this query (unless the reporting agent is // configured to keep them all private). Values are JSON: ie, strings are // enclosed in double quotes, etc. The value of a private variable is // the empty string. map variables_json = 4; // This is deprecated and only used for legacy applications // don't include this in traces inside a FullTracesReport; the operation // name for these traces comes from the key of the traces_per_query map. string operation_name = 3; } message Error { string message = 1; // required repeated Location location = 2; uint64 time_ns = 3; string json = 4; } message HTTP { message Values { repeated string value = 1; } enum Method { UNKNOWN = 0; OPTIONS = 1; GET = 2; HEAD = 3; POST = 4; PUT = 5; DELETE = 6; TRACE = 7; CONNECT = 8; PATCH = 9; } Method method = 1; string host = 2; string path = 3; // Should exclude manual blacklist ("Auth" by default) map request_headers = 4; map response_headers = 5; uint32 status_code = 6; bool secure = 8; // TLS was used string protocol = 9; // by convention "HTTP/1.0", "HTTP/1.1", "HTTP/2" or "h2" } message Location { uint32 line = 1; uint32 column = 2; } // We store information on each resolver execution as a Node on a tree. // The structure of the tree corresponds to the structure of the GraphQL // response; it does not indicate the order in which resolvers were // invoked. Note that nodes representing indexes (and the root node) // don't contain all Node fields (eg types and times). message Node { // The name of the field (for Nodes representing a resolver call) or the // index in a list (for intermediate Nodes representing elements of a list). // field_name is the name of the field as it appears in the GraphQL // response: ie, it may be an alias. (In that case, the original_field_name // field holds the actual field name from the schema.) In any context where // we're building up a path, we use the response_name rather than the // original_field_name. oneof id { string response_name = 1; uint32 index = 2; } string original_field_name = 14; // The field's return type; e.g. "String!" for User.email:String! string type = 3; // The field's parent type; e.g. "User" for User.email:String! string parent_type = 13; CachePolicy cache_policy = 5; // relative to the trace's start_time, in ns uint64 start_time = 8; // relative to the trace's start_time, in ns uint64 end_time = 9; repeated Error error = 11; repeated Node child = 12; reserved 4; } // represents a node in the query plan, under which there is a trace tree for that service fetch. // In particular, each fetch node represents a call to an implementing service, and calls to implementing // services may not be unique. See https://github.com/apollographql/apollo-server/blob/main/packages/apollo-gateway/src/QueryPlan.ts // for more information and details. message QueryPlanNode { // This represents a set of nodes to be executed sequentially by the Gateway executor message SequenceNode { repeated QueryPlanNode nodes = 1; } // This represents a set of nodes to be executed in parallel by the Gateway executor message ParallelNode { repeated QueryPlanNode nodes = 1; } // This represents a node to send an operation to an implementing service message FetchNode { // XXX When we want to include more details about the sub-operation that was // executed against this service, we should include that here in each fetch node. // This might include an operation signature, requires directive, reference resolutions, etc. string service_name = 1; bool trace_parsing_failed = 2; // This Trace only contains start_time, end_time, duration_ns, and root; // all timings were calculated **on the federated service**, and clock skew // will be handled by the ingress server. Trace trace = 3; // relative to the outer trace's start_time, in ns, measured in the gateway. uint64 sent_time_offset = 4; // Wallclock times measured in the gateway for when this operation was // sent and received. google.protobuf.Timestamp sent_time = 5; google.protobuf.Timestamp received_time = 6; } // This node represents a way to reach into the response path and attach related entities. // XXX Flatten is really not the right name and this node may be renamed in the query planner. message FlattenNode { repeated ResponsePathElement response_path = 1; QueryPlanNode node = 2; } message ResponsePathElement { oneof id { string field_name = 1; uint32 index = 2; } } oneof node { SequenceNode sequence = 1; ParallelNode parallel = 2; FetchNode fetch = 3; FlattenNode flatten = 4; } } // Wallclock time when the trace began. google.protobuf.Timestamp start_time = 4; // required // Wallclock time when the trace ended. google.protobuf.Timestamp end_time = 3; // required // High precision duration of the trace; may not equal end_time-start_time // (eg, if your machine's clock changed during the trace). uint64 duration_ns = 11; // required // A tree containing information about all resolvers run directly by this // service, including errors. Node root = 14; // ------------------------------------------------------------------------- // Fields below this line are *not* included in federated traces (the traces // sent from federated services to the gateway). // In addition to details.raw_query, we include a "signature" of the query, // which can be normalized: for example, you may want to discard aliases, drop // unused operations and fragments, sort fields, etc. The most important thing // here is that the signature match the signature in StatsReports. In // StatsReports signatures show up as the key in the per_query map (with the // operation name prepended). The signature should be a valid GraphQL query. // All traces must have a signature; if this Trace is in a FullTracesReport // that signature is in the key of traces_per_query rather than in this field. // Engineproxy provides the signature in legacy_signature_needs_resigning // instead. string signature = 19; // Optional: when GraphQL parsing or validation against the GraphQL schema fails, these fields // can include reference to the operation being sent for users to dig into the set of operations // that are failing validation. string unexecutedOperationBody = 27; string unexecutedOperationName = 28; Details details = 6; string client_name = 7; string client_version = 8; HTTP http = 10; CachePolicy cache_policy = 18; // If this Trace was created by a gateway, this is the query plan, including // sub-Traces for federated services. Note that the 'root' tree on the // top-level Trace won't contain any resolvers (though it could contain errors // that occurred in the gateway itself). QueryPlanNode query_plan = 26; // Was this response served from a full query response cache? (In that case // the node tree will have no resolvers.) bool full_query_cache_hit = 20; // Was this query specified successfully as a persisted query hash? bool persisted_query_hit = 21; // Did this query contain both a full query string and a persisted query hash? // (This typically means that a previous request was rejected as an unknown // persisted query.) bool persisted_query_register = 22; // Was this operation registered and a part of the safelist? bool registered_operation = 24; // Was this operation forbidden due to lack of safelisting? bool forbidden_operation = 25; // Some servers don't do field-level instrumentation for every request and assign // each request a "weight" for each request that they do instrument. When this // trace is aggregated into field usage stats, it should count as this value // towards the estimated_execution_count rather than just 1. This value should // typically be at least 1. // // 0 is treated as 1 for backwards compatibility. double field_execution_weight = 31; // removed: Node parse = 12; Node validate = 13; // Id128 server_id = 1; Id128 client_id = 2; // String client_reference_id = 23; String client_address = 9; reserved 1, 2, 9, 12, 13, 23; } // The `service` value embedded within the header key is not guaranteed to contain an actual service, // and, in most cases, the service information is trusted to come from upstream processing. If the // service _is_ specified in this header, then it is checked to match the context that is reporting it. // Otherwise, the service information is deduced from the token context of the reporter and then sent // along via other mechanisms (in Kafka, the `ReportKafkaKey). The other information (hostname, // agent_version, etc.) is sent by the Apollo Engine Reporting agent, but we do not currently save that // information to any of our persistent storage. message ReportHeader { // eg "mygraph@myvariant" string graph_ref = 12; // eg "host-01.example.com" string hostname = 5; // eg "engineproxy 0.1.0" string agent_version = 6; // required // eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`) string service_version = 7; // eg "node v4.6.0" string runtime_version = 8; // eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux" string uname = 9; // An id that is used to represent the schema to Apollo Graph Manager // Using this in place of what used to be schema_hash, since that is no longer // attached to a schema in the backend. string executable_schema_id = 11; reserved 3; // removed string service = 3; } message PathErrorStats { map children = 1; uint64 errors_count = 4; uint64 requests_with_errors_count = 5; } message QueryLatencyStats { repeated sint64 latency_count = 13 [(js_use_toArray)=true]; uint64 request_count = 2; uint64 cache_hits = 3; uint64 persisted_query_hits = 4; uint64 persisted_query_misses = 5; repeated sint64 cache_latency_count = 14 [(js_use_toArray)=true]; PathErrorStats root_error_stats = 7; uint64 requests_with_errors_count = 8; repeated sint64 public_cache_ttl_count = 15 [(js_use_toArray)=true]; repeated sint64 private_cache_ttl_count = 16 [(js_use_toArray)=true]; uint64 registered_operation_count = 11; uint64 forbidden_operation_count = 12; // The number of requests that were executed without field-level // instrumentation (and thus do not contribute to `observed_execution_count` // fields on this message's cousin-twice-removed FieldStats). uint64 requests_without_field_instrumentation = 17; // 1, 6, 9, and 10 were old int64 histograms reserved 1, 6, 9, 10; } message StatsContext { // string client_reference_id = 1; reserved 1; string client_name = 2; string client_version = 3; } message ContextualizedQueryLatencyStats { QueryLatencyStats query_latency_stats = 1; StatsContext context = 2; } message ContextualizedTypeStats { StatsContext context = 1; map per_type_stat = 2; } message FieldStat { string return_type = 3; // required; eg "String!" for User.email:String! // Number of errors whose path is this field. Note that we assume that error // tracking does *not* require field-level instrumentation so this *will* // include errors from requests that don't contribute to the // `observed_execution_count` field (and does not need to be scaled by // field_execution_weight). uint64 errors_count = 4; // Number of times that the resolver for this field is directly observed being // executed. uint64 observed_execution_count = 5; // Same as `count` but potentially scaled upwards if the server was only // performing field-level instrumentation on a sampling of operations. For // example, if the server randomly instruments 1% of requests for this // operation, this number will be 100 times greater than // `observed_execution_count`. (When aggregating a Trace into FieldStats, // this number goes up by the trace's `field_execution_weight` for each // observed field execution, while `observed_execution_count` above goes // up by 1.) uint64 estimated_execution_count = 10; // Number of times the resolver for this field is executed that resulted in // at least one error. "Request" is a misnomer here as this corresponds to // resolver calls, not overall operations. Like `errors_count` above, this // includes all requests rather than just requests with field-level // instrumentation. uint64 requests_with_errors_count = 6; // Duration histogram for the latency of this field. Note that it is scaled in // the same way as estimated_execution_count so its "total count" might be // greater than `observed_execution_count` and may not exactly equal // `estimated_execution_count` due to rounding. repeated sint64 latency_count = 9 [(js_use_toArray)=true]; reserved 1, 2, 7, 8; } message TypeStat { // Key is (eg) "email" for User.email:String! map per_field_stat = 3; reserved 1, 2; } message ReferencedFieldsForType { // Contains (eg) "email" for User.email:String! repeated string field_names = 1; // True if this type is an interface. bool is_interface = 2; } // This is the top-level message used by the new traces ingress. This // is designed for the apollo-engine-reporting TypeScript agent and will // eventually be documented as a public ingress API. This message consists // solely of traces; the equivalent of the StatsReport is automatically // generated server-side from this message. Agent should either send a trace or include it in the stats // for every request in this report. Generally, buffering up until a large // size has been reached (say, 4MB) or 5-10 seconds has passed is appropriate. // This message used to be know as FullTracesReport, but got renamed since it isn't just for traces anymore message Report { ReportHeader header = 1; // key is statsReportKey (# operationName\nsignature) Note that the nested // traces will *not* have a signature or details.operationName (because the // key is adequate). // // We also assume that traces don't have // legacy_per_query_implicit_operation_name, and we don't require them to have // details.raw_query (which would consume a lot of space and has privacy/data // access issues, and isn't currently exposed by our app anyway). map traces_per_query = 5; // This is the time that the requests in this trace are considered to have taken place // If this field is not present the max of the end_time of each trace will be used instead. // If there are no traces and no end_time present the report will not be able to be processed. // Note: This will override the end_time from traces. google.protobuf.Timestamp end_time = 2; // required if no traces in this message // Total number of operations processed during this period. uint64 operation_count = 6; } message ContextualizedStats { StatsContext context = 1; QueryLatencyStats query_latency_stats = 2; // Key is type name. This structure provides data for the count and latency of individual // field executions and thus only reflects operations for which field-level tracing occurred. map per_type_stat = 3; } // A sequence of traces and stats. An individual operation should either be described as a trace // or as part of stats, but not both. message TracesAndStats { repeated Trace trace = 1 [(js_preEncoded)=true]; repeated ContextualizedStats stats_with_context = 2 [(js_use_toArray)=true]; // This describes the fields referenced in the operation. Note that this may // include fields that don't show up in FieldStats (due to being interface fields, // being nested under null fields or empty lists or non-matching fragments or // `@include` or `@skip`, etc). It also may be missing fields that show up in FieldStats // (as FieldStats will include the concrete object type for fields referenced // via an interface type). map referenced_fields_by_type = 4; // This field is used to validate that the algorithm used to construct `stats_with_context` // matches similar algorithms in Apollo's servers. It is otherwise ignored and should not // be included in reports. repeated Trace internal_traces_contributing_to_stats = 3 [(js_preEncoded)=true]; }