From 9af4dd369278aa67c3ae4cdd7725e526ef6d6ba5 Mon Sep 17 00:00:00 2001 From: David Cowden Date: Wed, 6 May 2020 07:39:13 -0700 Subject: [PATCH] acme: Retry challenge validation attempts Section 8.2 of RFC 8555 explains how retries apply to the validation process. However, much is left up to the implementer. Add retries every 12 seconds for 2 minutes after a client requests a validation. The challenge status remains "processing" indefinitely until a distinct conclusion is reached. This allows a client to continually re-request a validation by sending a post-get to the challenge resource until the process fails or succeeds. Challenges in the processing state include information about why a validation did not complete in the error field. The server also includes a Retry-After header to help clients and servers coordinate. Retries are inherently stateful because they're part of the public API. When running step-ca in a highly available setup with replicas, care must be taken to maintain a persistent identifier for each instance "slot". In kubernetes, this implies a *stateful set*. --- acme/api/handler.go | 75 ++++++--- acme/authority.go | 231 +++++++++++++++++++++------ acme/authz.go | 2 +- acme/challenge.go | 378 ++++++++++++++++++-------------------------- acme/common.go | 2 + api/utils.go | 5 - 6 files changed, 401 insertions(+), 292 deletions(-) diff --git a/acme/api/handler.go b/acme/api/handler.go index 31e19b7b..728244d0 100644 --- a/acme/api/handler.go +++ b/acme/api/handler.go @@ -148,51 +148,90 @@ func (h *Handler) GetAuthz(w http.ResponseWriter, r *http.Request) { api.JSON(w, authz) } -// GetChallenge ACME api for retrieving a Challenge. +// ACME api for retrieving the Challenge resource. +// +// Potential Challenges are requested by the client when creating an order. +// Once the client knows the appropriate validation resources are provisioned, +// it makes a POST-as-GET request to this endpoint in order to initiate the +// validation flow. +// +// The validation state machine describes the flow for a challenge. +// +// https://tools.ietf.org/html/rfc8555#section-7.1.6 +// +// Once a validation attempt has completed without error, the challenge's +// status is updated depending on the result (valid|invalid) of the server's +// validation attempt. Once this is the case, a challenge cannot be reset. +// +// If a challenge cannot be completed because no suitable data can be +// acquired the server (whilst communicating retry information) and the +// client (whilst respecting the information from the server) may request +// retries of the validation. +// +// https://tools.ietf.org/html/rfc8555#section-8.2 +// +// Retry status is communicated using the error field and by sending a +// Retry-After header back to the client. +// +// The request body is challenge-specific. The current challenges (http-01, +// dns-01, tls-alpn-01) simply expect an empty object ("{}") in the payload +// of the JWT sent by the client. We don't gain anything by stricly enforcing +// nonexistence of unknown attributes, or, in these three cases, enforcing +// an empty payload. And the spec also says to just ignore it: +// +// > The server MUST ignore any fields in the response object +// > that are not specified as response fields for this type of challenge. +// +// https://tools.ietf.org/html/rfc8555#section-7.5.1 +// func (h *Handler) GetChallenge(w http.ResponseWriter, r *http.Request) { prov, err := provisionerFromContext(r) if err != nil { api.WriteError(w, err) return } + acc, err := accountFromContext(r) if err != nil { api.WriteError(w, err) return } - // Just verify that the payload was set, since we're not strictly adhering - // to ACME V2 spec for reasons specified below. + + // Just verify that the payload was set since the client is required + // to send _something_. _, err = payloadFromContext(r) if err != nil { api.WriteError(w, err) return } - // NOTE: We should be checking that the request is either a POST-as-GET, or - // that the payload is an empty JSON block ({}). However, older ACME clients - // still send a vestigial body (rather than an empty JSON block) and - // strict enforcement would render these clients broken. For the time being - // we'll just ignore the body. var ( - ch *acme.Challenge + ch *acme.Challenge chID = chi.URLParam(r, "chID") ) ch, err = h.Auth.ValidateChallenge(prov, acc.GetID(), chID, acc.GetKey()) if err != nil { api.WriteError(w, err) - } else if ch.Retry.Active { - retryAfter, err := h.Auth.BackoffChallenge(prov, acc.GetID(), chID, acc.GetKey()) - if err != nil { - api.WriteError(w, err) - } else { - w.Header().Add("Retry-After", retryAfter.String()) - api.WriteProcessing(w, ch) - } - } else { + } + + switch ch.Status { + case acme.StatusPending: + panic("validation attempt did not move challenge to the processing state") + // When a transient error occurs, the challenge will not be progressed to the `invalid` state. + // Add a Retry-After header to indicate that the client should check again in the future. + case acme.StatusProcessing: + w.Header().Add("Retry-After", ch.RetryAfter) + w.Header().Add("Cache-Control", "no-cache") + api.JSON(w, ch) + case acme.StatusInvalid: + api.JSON(w, ch) + case acme.StatusValid: getLink := h.Auth.GetLink w.Header().Add("Link", link(getLink(acme.AuthzLink, acme.URLSafeProvisionerName(prov), true, ch.GetAuthzID()), "up")) w.Header().Set("Location", getLink(acme.ChallengeLink, acme.URLSafeProvisionerName(prov), true, ch.GetID())) api.JSON(w, ch) + default: + panic("unexpected challenge state" + ch.Status) } } diff --git a/acme/authority.go b/acme/authority.go index b58dc147..63998acf 100644 --- a/acme/authority.go +++ b/acme/authority.go @@ -5,10 +5,12 @@ import ( "crypto/tls" "crypto/x509" "encoding/base64" - "math" + "log" "net" "net/http" "net/url" + "os" + "strconv" "time" "github.com/pkg/errors" @@ -20,7 +22,6 @@ import ( // Interface is the acme authority interface. type Interface interface { - BackoffChallenge(provisioner.Interface, string, string, *jose.JSONWebKey) (time.Duration, error) DeactivateAccount(provisioner.Interface, string) (*Account, error) FinalizeOrder(provisioner.Interface, string, string, *x509.CertificateRequest) (*Order, error) GetAccount(provisioner.Interface, string) (*Account, error) @@ -56,8 +57,24 @@ var ( orderTable = []byte("acme_orders") ordersByAccountIDTable = []byte("acme_account_orders_index") certTable = []byte("acme_certs") + ordinal int ) +// Ordinal is used during challenge retries to indicate ownership. +func init() { + ordstr := os.Getenv("STEP_CA_ORDINAL"); + if ordstr == "" { + ordinal = 0 + } else { + ord, err := strconv.Atoi(ordstr) + if err != nil { + log.Fatal("Unrecognized ordinal ingeter value.") + panic(nil) + } + ordinal = ord + } +} + // NewAuthority returns a new Authority that implements the ACME interface. func NewAuthority(db nosql.DB, dns, prefix string, signAuth SignAuthority) (*Authority, error) { if _, ok := db.(*database.SimpleDB); !ok { @@ -256,50 +273,192 @@ func (a *Authority) GetAuthz(p provisioner.Interface, accID, authzID string) (*A return az.toACME(a.db, a.dir, p) } -// ValidateChallenge attempts to validate the challenge. +// The challenge validation state machine looks like: +// +// * https://tools.ietf.org/html/rfc8555#section-7.1.6 +// +// While in the processing state, the server may retry as it sees fit. The challenge validation strategy +// needs to be rather specific in order for retries to work in a replicated, crash-proof deployment. +// In general, the goal is to allow requests to hit arbitrary instances of step-ca while managing retry +// responsibility such that multiple instances agree on an owner. Additionally, when a deployment of the +// CA is in progress, the ownership should be carried forward and new, updated (or in general, restarted), +// instances should pick back up where the crashed instance left off. +// +// The steps are: +// +// 1. Upon incoming request to the challenge endpoint, take ownership of the retry responsibility. +// (a) Set Retry.Owner to this instance's ordinal (STEP_CA_ORDINAL). +// (b) Set Retry.NumAttempts to 0 and Retry.MaxAttempts to the desired max. +// (c) Set Challenge.Status to "processing" +// (d) Set retry_after to a time (retryInterval) in the future. +// 2. Perform the validation attempt. +// 3. If the validation attempt results in a challenge that is still processing, schedule a retry. +// +// It's possible that another request to re-attempt the challenge comes in while a retry attempt is +// pending from a previous request. In general, these old attempts will see that Retry.NextAttempt +// is in the future and drop their task. But this also might have happened on another instance, etc. +// +// 4. When the retry timer fires, check to make sure the retry should still process. +// (a) Refresh the challenge from the DB. +// (a) Check that Retry.Owner is equal to this instance's ordinal. +// (b) Check that Retry.NextAttempt is in the past. +// 5. If the retry will commence, immediately update Retry.NextAttempt and save the challenge. +// +// Finally, if this instance is terminated, retries need to be reschedule when the instance restarts. This +// is handled in the acme provisioner (authority/provisioner/acme.go) initialization. +// +// Note: the default ordinal does not need to be changed unless step-ca is running in a replicated scenario. +// func (a *Authority) ValidateChallenge(p provisioner.Interface, accID, chID string, jwk *jose.JSONWebKey) (*Challenge, error) { ch, err := getChallenge(a.db, chID) + + // Validate the challenge belongs to the account owned by the requester. if err != nil { return nil, err } if accID != ch.getAccountID() { return nil, UnauthorizedErr(errors.New("account does not own challenge")) } - retry := ch.getRetry() - if retry.Active { - return ch.toACME(a.db, a.dir, p) + + // Take ownership of the challenge status and retry state. The values must be reset. + up := ch.clone() + up.Status = StatusProcessing + up.Retry = &Retry { + Owner: ordinal, + ProvisionerID: p.GetID(), + NumAttempts: 0, + MaxAttempts: 10, + NextAttempt: time.Now().Add(retryInterval).UTC().Format(time.RFC3339), + + } + err = up.save(a.db, ch) + if err != nil { + return nil, Wrap(err, "error saving challenge") } - retry.Mux.Lock() - defer retry.Mux.Unlock() + ch = up + v, err := a.validate(ch, jwk) + // An error here is non-recoverable. Recoverable errors are set on the challenge object + // and should not be returned directly. + if err != nil { + return nil, Wrap(err, "error attempting challenge validation") + } + err = v.save(a.db, ch) + if err != nil { + return nil, Wrap(err, "error saving challenge") + } + ch = v + + switch ch.getStatus() { + case StatusValid, StatusInvalid: + break + case StatusProcessing: + if ch.getRetry().Active() { + time.AfterFunc(retryInterval, func() { + a.RetryChallenge(ch.getID()) + }) + } + default: + panic("post-validation challenge in unexpected state" + ch.getStatus()) + } + return ch.toACME(a.dir, p) +} + +// The challenge validation process is specific to the type of challenge (dns-01, http-01, tls-alpn-01). +// But, we still pass generic "options" to the polymorphic validate call. +func (a *Authority) validate(ch challenge, jwk *jose.JSONWebKey) (challenge, error) { client := http.Client{ Timeout: time.Duration(30 * time.Second), } dialer := &net.Dialer{ Timeout: 30 * time.Second, } - for ch.getRetry().Active { - ch, err = ch.validate(a.db, jwk, validateOptions{ - httpGet: client.Get, - lookupTxt: net.LookupTXT, - tlsDial: func(network, addr string, config *tls.Config) (*tls.Conn, error) { - return tls.DialWithDialer(dialer, network, addr, config) - }, - }) - if err != nil { - return nil, Wrap(err, "error attempting challenge validation") - } - if ch.getStatus() == StatusValid { - break - } - if ch.getStatus() == StatusInvalid { - return ch.toACME(a.db, a.dir, p) + return ch.validate(jwk, validateOptions{ + httpGet: client.Get, + lookupTxt: net.LookupTXT, + tlsDial: func(network, addr string, config *tls.Config) (*tls.Conn, error) { + return tls.DialWithDialer(dialer, network, addr, config) + }, + }) +} + + +const retryInterval = 12 * time.Second + +// see: ValidateChallenge +func (a *Authority) RetryChallenge(chID string) { + ch, err := getChallenge(a.db, chID) + if err != nil { + return + } + switch ch.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusInvalid, StatusValid: + return + case StatusProcessing: + break + default: + panic("unknown challenge state: " + ch.getStatus()) + } + + // When retrying, check to make sure the ordinal has not changed. + // Make sure there are still retries left. + // Then check to make sure Retry.NextAttempt is in the past. + retry := ch.getRetry() + switch { + case retry.Owner != ordinal: + return + case !retry.Active(): + return + } + t, err := time.Parse(time.RFC3339, retry.NextAttempt) + now := time.Now().UTC() + switch { + case err != nil: + return + case t.Before(now): + return + } + + // Update the db so that other retries simply drop when their timer fires. + up := ch.clone() + up.Retry.NextAttempt = now.Add(retryInterval).UTC().Format(time.RFC3339) + up.Retry.NumAttempts += 1 + err = up.save(a.db, ch) + if err != nil { + return + } + ch = up + + p, err := a.LoadProvisionerByID(retry.ProvisionerID) + acc, err := a.GetAccount(p, ch.getAccountID()) + + v, err := a.validate(up, acc.Key) + if err != nil { + return + } + err = v.save(a.db, ch) + if err != nil { + return + } + ch = v + + switch ch.getStatus() { + case StatusValid, StatusInvalid: + break + case StatusProcessing: + if ch.getRetry().Active() { + time.AfterFunc(retryInterval, func() { + a.RetryChallenge(ch.getID()) + }) } - time.Sleep(ch.getBackoff()) + default: + panic("post-validation challenge in unexpected state " + ch.getStatus()) } - return ch.toACME(a.db, a.dir, p) } + // GetCertificate retrieves the Certificate by ID. func (a *Authority) GetCertificate(accID, certID string) ([]byte, error) { cert, err := getCert(a.db, certID) @@ -312,23 +471,3 @@ func (a *Authority) GetCertificate(accID, certID string) ([]byte, error) { return cert.toACME(a.db, a.dir) } -// BackoffChallenge returns the total time to wait until the next sequence of validation attempts completes -func (a *Authority) BackoffChallenge(p provisioner.Interface, accID, chID string, jwk *jose.JSONWebKey) (time.Duration, error) { - ch, err := getChallenge(a.db, chID) - if err != nil { - return -1, err - } - if accID != ch.getAccountID() { - return -1, UnauthorizedErr(errors.New("account does not own challenge")) - } - - remCalls := ch.getRetry().MaxAttempts - math.Mod(ch.getRetry().Called, ch.getRetry().MaxAttempts) - totBackoff := 0 * time.Second - for i := 0; i < int(remCalls); i++ { - clone := ch.clone() - clone.Retry.Called += float64(i) - totBackoff += clone.getBackoff() - } - - return totBackoff, nil -} diff --git a/acme/authz.go b/acme/authz.go index cdcb15e5..789dcab2 100644 --- a/acme/authz.go +++ b/acme/authz.go @@ -148,7 +148,7 @@ func (ba *baseAuthz) toACME(db nosql.DB, dir *directory, p provisioner.Interface if err != nil { return nil, err } - chs[i], err = ch.toACME(db, dir, p) + chs[i], err = ch.toACME(dir, p) if err != nil { return nil, err } diff --git a/acme/challenge.go b/acme/challenge.go index 9b47c7a6..031dcc7c 100644 --- a/acme/challenge.go +++ b/acme/challenge.go @@ -11,12 +11,9 @@ import ( "encoding/json" "fmt" "io/ioutil" - "math" - "math/rand" "net" "net/http" "strings" - "sync" "time" "github.com/pkg/errors" @@ -34,22 +31,11 @@ type Challenge struct { Validated string `json:"validated,omitempty"` URL string `json:"url"` Error *AError `json:"error,omitempty"` - Retry *Retry `json:"retry"` + RetryAfter string `json:"retry_after,omitempty"` ID string `json:"-"` AuthzID string `json:"-"` } -type Retry struct { - Called float64 `json:"id"` - BaseDelay time.Duration `json:"basedelay"` - MaxDelay time.Duration `json:"maxdelay"` - Multiplier float64 `json:"multiplier"` - Jitter float64 `json:"jitter"` - MaxAttempts float64 `json:"maxattempts"` - Active bool `json:"active"` - Mux sync.Mutex `json:"mux"` -} - // ToLog enables response logging. func (c *Challenge) ToLog() (interface{}, error) { b, err := json.Marshal(c) @@ -82,7 +68,7 @@ type validateOptions struct { // challenge is the interface ACME challenege types must implement. type challenge interface { save(db nosql.DB, swap challenge) error - validate(nosql.DB, *jose.JSONWebKey, validateOptions) (challenge, error) + validate(*jose.JSONWebKey, validateOptions) (challenge, error) getType() string getError() *AError getValue() string @@ -91,18 +77,18 @@ type challenge interface { getAuthzID() string getToken() string getRetry() *Retry - getBackoff() time.Duration clone() *baseChallenge getAccountID() string getValidated() time.Time getCreated() time.Time - toACME(nosql.DB, *directory, provisioner.Interface) (*Challenge, error) + toACME(*directory, provisioner.Interface) (*Challenge, error) } // ChallengeOptions is the type used to created a new Challenge. type ChallengeOptions struct { AccountID string AuthzID string + ProvisionerID string Identifier Identifier } @@ -115,10 +101,10 @@ type baseChallenge struct { Status string `json:"status"` Token string `json:"token"` Value string `json:"value"` - Validated time.Time `json:"validated"` Created time.Time `json:"created"` + Validated time.Time `json:"validated"` Error *AError `json:"error"` - Retry *Retry `json:"retry"` + Retry *Retry `json:"retry"` } func newBaseChallenge(accountID, authzID string) (*baseChallenge, error) { @@ -138,7 +124,6 @@ func newBaseChallenge(accountID, authzID string) (*baseChallenge, error) { Status: StatusPending, Token: token, Created: clock.Now(), - Retry: &Retry{Called:0, BaseDelay:1, MaxDelay: 30, Jitter: 1, MaxAttempts:10, Active:false}, }, nil } @@ -197,28 +182,9 @@ func (bc *baseChallenge) getError() *AError { return bc.Error } -// getBackoff returns the backoff time of the baseChallenge. -func (bc *baseChallenge) getBackoff() time.Duration { - if bc.Retry.Called == 0 { - return bc.Retry.BaseDelay - } - - backoff, max := float64(bc.Retry.BaseDelay), float64(bc.Retry.MaxDelay) - backoff *= math.Pow(bc.Retry.Multiplier, bc.Retry.Called) - if backoff > max { - backoff = max - } - // Introduce Jitter to ensure that clustered requests wont operate in unison - backoff *= 1 + bc.Retry.Jitter*(rand.Float64()*2-1) - if backoff < 0 { - return 0 - } - return time.Duration(backoff) -} - // toACME converts the internal Challenge type into the public acmeChallenge // type for presentation in the ACME protocol. -func (bc *baseChallenge) toACME(db nosql.DB, dir *directory, p provisioner.Interface) (*Challenge, error) { +func (bc *baseChallenge) toACME(dir *directory, p provisioner.Interface) (*Challenge, error) { ac := &Challenge{ Type: bc.getType(), Status: bc.getStatus(), @@ -233,8 +199,8 @@ func (bc *baseChallenge) toACME(db nosql.DB, dir *directory, p provisioner.Inter if bc.Error != nil { ac.Error = bc.Error } - if bc.Retry != nil { - ac.Retry = bc.Retry + if bc.Retry != nil && bc.Status == StatusProcessing { + ac.RetryAfter = bc.Retry.NextAttempt } return ac, nil } @@ -274,10 +240,12 @@ func (bc *baseChallenge) save(db nosql.DB, old challenge) error { func (bc *baseChallenge) clone() *baseChallenge { u := *bc + r := *bc.Retry + u.Retry = &r return &u } -func (bc *baseChallenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (bc *baseChallenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { return nil, ServerInternalErr(errors.New("unimplemented")) } @@ -323,6 +291,21 @@ func unmarshalChallenge(data []byte) (challenge, error) { } } +// Challenge retry information is internally relevant and needs to be stored in the DB, but should not be part +// of the public challenge API apart from the Retry-After header. +type Retry struct { + Owner int `json:"owner"` + ProvisionerID string `json:"provisionerid"` + NumAttempts int `json:"numattempts"` + MaxAttempts int `json:"maxattempts"` + NextAttempt string `json:"nextattempt"` +} + +func (r *Retry) Active() bool { + return r.NumAttempts < r.MaxAttempts +} + + // http01Challenge represents an http-01 acme challenge. type http01Challenge struct { *baseChallenge @@ -347,78 +330,61 @@ func newHTTP01Challenge(db nosql.DB, ops ChallengeOptions) (challenge, error) { // Validate attempts to validate the challenge. If the challenge has been // satisfactorily validated, the 'status' and 'validated' attributes are // updated. -func (hc *http01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (hc *http01Challenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { // If already valid or invalid then return without performing validation. - if hc.getStatus() == StatusValid { - return hc, nil - } - if hc.getStatus() == StatusInvalid { - // TODO: Resolve segfault on upd.save - upd := hc.clone() - upd.Status = StatusPending - if err := upd.save(db, hc); err != nil { - fmt.Printf("Error in save: %s\n\n\n", err) - return nil, err - } - fmt.Print("Through Save\n\n") + switch hc.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusProcessing: + break + case StatusValid, StatusInvalid: return hc, nil + default: + panic("unknown challenge state: " + hc.getStatus()) } - url := fmt.Sprintf("http://%s/.well-known/acme-challenge/%s", hc.Value, hc.Token) + up := &http01Challenge{hc.baseChallenge.clone()} + + url := fmt.Sprintf("http://%s/.well-known/acme-challenge/%s", hc.Value, hc.Token) resp, err := vo.httpGet(url) if err != nil { - if err = hc.iterateRetry(db, ConnectionErr(errors.Wrapf(err, - "error doing http GET for url %s", url))); err != nil { - return nil, err - } - return hc, nil + e := errors.Wrapf(err, "error doing http GET for url %s", url) + up.Error = ConnectionErr(e).ToACME() + return up, nil } + defer resp.Body.Close() + if resp.StatusCode >= 400 { - if err = hc.iterateRetry(db, - ConnectionErr(errors.Errorf("error doing http GET for url %s with status code %d", - url, resp.StatusCode))); err != nil { - return nil, err - } - return hc, nil + e := errors.Errorf("error doing http GET for url %s with status code %d", url, resp.StatusCode) + up.Error = ConnectionErr(e).ToACME() + return up, nil } - defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { - return nil, ServerInternalErr(errors.Wrapf(err, "error reading "+ - "response body for url %s", url)) + e := errors.Wrapf(err, "error reading response body for url %s", url) + up.Error = ServerInternalErr(e).ToACME() + return up, nil } - keyAuth := strings.Trim(string(body), "\r\n") + keyAuth := strings.Trim(string(body), "\r\n") expected, err := KeyAuthorization(hc.Token, jwk) if err != nil { - // TODO retry? return nil, err } if keyAuth != expected { - // TODO should we just bail here, this is a "successful" failure. - if err = hc.iterateRetry(db, RejectedIdentifierErr(errors.Errorf("keyAuthorization does not match; "+ - "expected %s, but got %s", expected, keyAuth))); err != nil { - // TODO what's the difference between returning nil with an error, and returning a retry- - // incremented challenge? I think we should probably only hard error if there are no more - // retries left. - return nil, err - } - return hc, err + // add base challenge fail validation + e := errors.Errorf("keyAuthorization does not match; expected %s, but got %s", expected, keyAuth) + up.Error = RejectedIdentifierErr(e).ToACME() + up.Status = StatusInvalid + return up, nil } - // Update and store the challenge. - upd := &http01Challenge{hc.baseChallenge.clone()} - upd.Status = StatusValid - upd.Error = nil - upd.Validated = clock.Now() - upd.Retry.Active = false - upd.Retry.Called = 0 - - if err := upd.save(db, hc); err != nil { - return nil, err - } - return upd, nil + up.Status = StatusValid + up.Validated = clock.Now() + up.Error = nil + up.Retry = nil + return up, nil } type tlsALPN01Challenge struct { @@ -434,34 +400,39 @@ func newTLSALPN01Challenge(db nosql.DB, ops ChallengeOptions) (challenge, error) bc.Type = "tls-alpn-01" bc.Value = ops.Identifier.Value - hc := &tlsALPN01Challenge{bc} - if err := hc.save(db, nil); err != nil { + tc := &tlsALPN01Challenge{bc} + if err := tc.save(db, nil); err != nil { return nil, err } - return hc, nil + return tc, nil } -func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (tc *tlsALPN01Challenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { // If already valid or invalid then return without performing validation. - if tc.getStatus() == StatusValid || tc.getStatus() == StatusInvalid { + switch tc.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusProcessing: + break + case StatusValid, StatusInvalid: return tc, nil + default: + panic("unknown challenge state: " + tc.getStatus()) } + up := &tlsALPN01Challenge{tc.baseChallenge.clone()} + config := &tls.Config{ NextProtos: []string{"acme-tls/1"}, ServerName: tc.Value, InsecureSkipVerify: true, // we expect a self-signed challenge certificate } - hostPort := net.JoinHostPort(tc.Value, "443") - conn, err := vo.tlsDial("tcp", hostPort, config) if err != nil { - if err = tc.iterateRetry(db, - ConnectionErr(errors.Wrapf(err, "error doing TLS dial for %s", hostPort))); err != nil { - return nil, err - } - return tc, nil + e := errors.Wrapf(err, "error doing TLS dial for %s", hostPort) + up.Error = ConnectionErr(e).ToACME() + return up, nil } defer conn.Close() @@ -469,31 +440,22 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val certs := cs.PeerCertificates if len(certs) == 0 { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("%s challenge for %s resulted in no certificates", - tc.Type, tc.Value))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("%s challenge for %s resulted in no certificates", tc.Type, tc.Value) + up.Error = RejectedIdentifierErr(e).ToACME() + return up, nil } - if !cs.NegotiatedProtocolIsMutual || cs.NegotiatedProtocol != "acme-tls/1" { - if err = tc.iterateRetry(db, RejectedIdentifierErr(errors.Errorf("cannot negotiate ALPN acme-tls/1 protocol for "+ - "tls-alpn-01 challenge"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("cannot negotiate ALPN acme-tls/1 protocol for tls-alpn-01 challenge") + up.Error = RejectedIdentifierErr(e).ToACME() + return up, nil } leafCert := certs[0] - if len(leafCert.DNSNames) != 1 || !strings.EqualFold(leafCert.DNSNames[0], tc.Value) { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "leaf certificate must contain a single DNS name, %v", tc.Value))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "leaf certificate must contain a single DNS name, %v", tc.Value) + up.Error = RejectedIdentifierErr(e).ToACME() + return up, nil } idPeAcmeIdentifier := asn1.ObjectIdentifier{1, 3, 6, 1, 5, 5, 7, 1, 31} @@ -509,46 +471,37 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val for _, ext := range leafCert.Extensions { if idPeAcmeIdentifier.Equal(ext.Id) { if !ext.Critical { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + - "acmeValidationV1 extension not critical"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "acmeValidationV1 extension not critical") + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil } var extValue []byte rest, err := asn1.Unmarshal(ext.Value, &extValue) if err != nil || len(rest) > 0 || len(hashedKeyAuth) != len(extValue) { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "malformed acmeValidationV1 extension value"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "malformed acmeValidationV1 extension value") + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil } if subtle.ConstantTimeCompare(hashedKeyAuth[:], extValue) != 1 { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + "expected acmeValidationV1 extension value %s for this challenge but got %s", - hex.EncodeToString(hashedKeyAuth[:]), hex.EncodeToString(extValue)))); err != nil { - return nil, err - } - return tc, nil + hex.EncodeToString(hashedKeyAuth[:]), hex.EncodeToString(extValue)) + up.Error = IncorrectResponseErr(e).ToACME() + // There is an appropriate value, but it doesn't match. + up.Status = StatusInvalid + return up, nil } - upd := &tlsALPN01Challenge{tc.baseChallenge.clone()} - upd.Status = StatusValid - upd.Error = nil - upd.Validated = clock.Now() - upd.Retry.Active = false - - if err := upd.save(db, tc); err != nil { - return nil, err - } - return upd, nil + up.Validated = clock.Now() + up.Status = StatusValid + up.Error = nil + up.Retry = nil + return up, nil } if idPeAcmeIdentifierV1Obsolete.Equal(ext.Id) { @@ -557,20 +510,15 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val } if foundIDPeAcmeIdentifierV1Obsolete { - // TODO this isn't likely going to change without user action, this is probably a good case for a hard failure... - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "obsolete id-pe-acmeIdentifier in acmeValidationV1 extension"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "obsolete id-pe-acmeIdentifier in acmeValidationV1 extension") + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil } - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "missing acmeValidationV1 extension"))); err != nil { - return nil, err - } + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ + "missing acmeValidationV1 extension") + up.Error = IncorrectResponseErr(e).ToACME() return tc, nil } @@ -595,39 +543,35 @@ func newDNS01Challenge(db nosql.DB, ops ChallengeOptions) (challenge, error) { return dc, nil } -// KeyAuthorization creates the ACME key authorization value from a token -// and a jwk. -func KeyAuthorization(token string, jwk *jose.JSONWebKey) (string, error) { - thumbprint, err := jwk.Thumbprint(crypto.SHA256) - if err != nil { - return "", ServerInternalErr(errors.Wrap(err, "error generating JWK thumbprint")) - } - encPrint := base64.RawURLEncoding.EncodeToString(thumbprint) - return fmt.Sprintf("%s.%s", token, encPrint), nil -} - // validate attempts to validate the challenge. If the challenge has been // satisfactorily validated, the 'status' and 'validated' attributes are // updated. -func (dc *dns01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (dc *dns01Challenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { // If already valid or invalid then return without performing validation. - if dc.getStatus() == StatusValid || dc.getStatus() == StatusInvalid { + switch dc.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusProcessing: + break + case StatusValid, StatusInvalid: return dc, nil + default: + panic("unknown challenge state: " + dc.getStatus()) } + up := &dns01Challenge{dc.baseChallenge.clone()} + // Normalize domain for wildcard DNS names // This is done to avoid making TXT lookups for domains like // _acme-challenge.*.example.com // Instead perform txt lookup for _acme-challenge.example.com domain := strings.TrimPrefix(dc.Value, "*.") + record := "_acme-challenge." + domain - txtRecords, err := vo.lookupTxt("_acme-challenge." + domain) + txtRecords, err := vo.lookupTxt(record) if err != nil { - dnsErr := DNSErr(errors.Wrapf(err, "error looking up TXT "+ - "records for domain %s", domain)) - if err = dc.iterateRetry(db, dnsErr); err != nil { - return nil, err - } + e := errors.Wrapf(err, "error looking up TXT records for domain %s", domain) + up.Error = DNSErr(e).ToACME() return dc, nil } @@ -637,33 +581,39 @@ func (dc *dns01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validat } h := sha256.Sum256([]byte(expectedKeyAuth)) expected := base64.RawURLEncoding.EncodeToString(h[:]) - var found bool + + if len(txtRecords) == 0 { + e := errors.Errorf("no TXT record found at '%s'", record) + up.Error = DNSErr(e).ToACME() + return up, nil + } + for _, r := range txtRecords { if r == expected { - found = true - break - } - } - if !found { - rejectedErr := RejectedIdentifierErr(errors.Errorf("keyAuthorization "+ - "does not match; expected %s, but got %s", expectedKeyAuth, txtRecords)) - if err = dc.iterateRetry(db, rejectedErr); err != nil { - return nil, err + up.Validated = time.Now().UTC() + up.Status = StatusValid + up.Error = nil + up.Retry = nil + return up, nil } - return dc, nil } - // Update and store the challenge. - upd := &dns01Challenge{dc.baseChallenge.clone()} - upd.Status = StatusValid - upd.Error = nil - upd.Validated = time.Now().UTC() - upd.Retry.Active = false - upd.Retry.Called = 0 - if err := upd.save(db, dc); err != nil { - return nil, err + up.Status = StatusInvalid + e := errors.Errorf("keyAuthorization does not match; expected %s, but got %s", + expectedKeyAuth, txtRecords) + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil +} + +// KeyAuthorization creates the ACME key authorization value from a token +// and a jwk. +func KeyAuthorization(token string, jwk *jose.JSONWebKey) (string, *Error) { + thumbprint, err := jwk.Thumbprint(crypto.SHA256) + if err != nil { + return "", ServerInternalErr(errors.Wrap(err, "error generating JWK thumbprint")) } - return upd, nil + encPrint := base64.RawURLEncoding.EncodeToString(thumbprint) + return fmt.Sprintf("%s.%s", token, encPrint), nil } // getChallenge retrieves and unmarshals an ACME challenge type from the database. @@ -681,19 +631,3 @@ func getChallenge(db nosql.DB, id string) (challenge, error) { return ch, nil } -// iterateRetry iterates a challenge's retry and error objects upon a failed validation attempt -func (bc *baseChallenge) iterateRetry(db nosql.DB, error *Error) error { - upd := bc.clone() - upd.Error = error.ToACME() - upd.Error.Subproblems = append(upd.Error.Subproblems, error) - upd.Retry.Called ++ - upd.Retry.Active = true - if math.Mod(upd.Retry.Called , upd.Retry.MaxAttempts) == 0 { - upd.Status = StatusInvalid - upd.Retry.Active = false - } - if err := upd.save(db, bc); err != nil { - return err - } - return nil -} diff --git a/acme/common.go b/acme/common.go index ecdd371a..8b6b2e82 100644 --- a/acme/common.go +++ b/acme/common.go @@ -29,6 +29,8 @@ var ( StatusInvalid = "invalid" // StatusPending -- pending; e.g. an Order that is not ready to be finalized. StatusPending = "pending" + // processing -- e.g. a Challenge that is in the process of being validated. + StatusProcessing = "processing" // StatusDeactivated -- deactivated; e.g. for an Account that is not longer valid. StatusDeactivated = "deactivated" // StatusReady -- ready; e.g. for an Order that is ready to be finalized. diff --git a/api/utils.go b/api/utils.go index 154023d0..0d87a065 100644 --- a/api/utils.go +++ b/api/utils.go @@ -52,11 +52,6 @@ func JSON(w http.ResponseWriter, v interface{}) { JSONStatus(w, v, http.StatusOK) } -// JSON writes the passed value into the http.ResponseWriter. -func WriteProcessing(w http.ResponseWriter, v interface{}) { - JSONStatus(w, v, http.StatusProcessing) -} - // JSONStatus writes the given value into the http.ResponseWriter and the // given status is written as the status code of the response. func JSONStatus(w http.ResponseWriter, v interface{}, status int) {