sourcegraph/internal/requestclient/client.go
Robert Lin 38d4e83e59
feat/requestclient: propagate original User-Agent as X-Forwarded-For-User-Agent (#64113)
Propagates a for-reference-only record of the first `User-Agent` seen
when a request gets into Sourcegraph across services and contexts. This
allows telemetry to try and indicate where a request originates from
(https://github.com/sourcegraph/sourcegraph/pull/64112), rather than
only having the most recent user-agent.

A new header and `requestclient.Client` property
`X-Forwarded-For-User-Agent` and `ForwardedForUserAgent` is used to
explicitly forward this. Strictly speaking I think we're supposed to
just forward `User-Agent` but it looks like in multiple places we
add/clobber the `User-Agent` ourselves.

The gRPC propagator currently sets user-agent on outgoing requests, this
change also makes that consistent with the HTTP transport, such that
both only explicitly propagate `X-Forwarded-For-User-Agent`

## Test plan

Unit tests
2024-07-29 14:17:25 -07:00

120 lines
3.7 KiB
Go

package requestclient
import (
"context"
"strings"
"sync"
"github.com/sourcegraph/log"
"github.com/sourcegraph/sourcegraph/internal/requestclient/geolocation"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
type clientKey struct{}
// Client carries information about the original client of a request.
type Client struct {
// IP identifies the IP of the client.
IP string
// ForwardedFor identifies the originating IP address of a client. It can
// be a comma-separated list of IP addresses.
//
// Note: This header can be spoofed and relies on trusted clients/proxies.
// For sourcegraph.com we use cloudflare headers to avoid spoofing.
ForwardedFor string
// UserAgent is current value of the User-Agent header:
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
UserAgent string
// ForwardedForUserAgent is first known value of the User-Agent header
// from the original request:
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
//
// It may be equal to UserAgent if no previous ForwardedForUserAgent was
// provided.
//
// Note: This header can be spoofed, and should only be used for reference.
ForwardedForUserAgent string
// wafIPCountryCode is a ISO 3166-1 alpha-2 country code for the
// request client as provided by a WAF (typically Cloudlfare) behind which
// Sourcegraph is hosted.
wafIPCountryCode string
// countryCode and friends are lazily hydrated once by
// (*Client).OriginCountryCode().
countryCode string
countryCodeError error
countryCodeOnce sync.Once
}
// FromContext retrieves the client IP, if available, from context.
func FromContext(ctx context.Context) *Client {
ip, ok := ctx.Value(clientKey{}).(*Client)
if !ok || ip == nil {
return nil
}
return ip
}
// WithClient adds client IP information to context for propagation.
func WithClient(ctx context.Context, client *Client) context.Context {
return context.WithValue(ctx, clientKey{}, client)
}
func (c *Client) LogFields() []log.Field {
if c == nil {
return []log.Field{log.String("requestClient", "<nil>")}
}
var ccField log.Field
if cc, err := c.OriginCountryCode(); err != nil {
ccField = log.NamedError("requestClient.countryCode.error", err)
} else {
ccField = log.String("requestClient.countryCode", cc)
}
return []log.Field{
log.String("requestClient.ip", c.IP),
log.String("requestClient.forwardedFor", c.ForwardedFor),
log.String("requestClient.userAgent", c.UserAgent),
log.String("requestClient.forwardedForUserAgent", c.ForwardedForUserAgent),
ccField,
}
}
// OriginCountryCode returns a best-effort inference of the ISO 3166-1 alpha-2
// country code indicating the geolocation of the request client.
func (c *Client) OriginCountryCode() (string, error) {
c.countryCodeOnce.Do(func() {
c.countryCode, c.countryCodeError = inferOriginCountryCode(c)
})
return c.countryCode, c.countryCodeError
}
func inferOriginCountryCode(c *Client) (string, error) {
// If we have a trusted value already, use that directly.
if c.wafIPCountryCode != "" {
return c.wafIPCountryCode, nil
}
// If we're able to infer a country code from the forwarded-for header,
// use that. We're not too worried about spoofing here because country codes
// are purely for reference and analytics.
if c.ForwardedFor != "" {
// Forwarded-for is a comma-separated list of IP addresses, we only
// want the first one.
ips := strings.Split(c.ForwardedFor, ",")
if cc, err := geolocation.InferCountryCode(ips[0]); err == nil {
return cc, nil
}
}
// Otherwise, we must infer a country code from the IP address.
cc, err := geolocation.InferCountryCode(c.IP)
if err != nil {
return "", errors.Wrap(err, "geolocation.InferCountryCode")
}
return cc, nil
}