Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions net/dns/manager_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,25 @@ func (m *windowsManager) setHosts(hosts []*HostEntry) error {
}
const fileMode = 0 // ignored on windows.

// This can fail spuriously with an access denied error, so retry it a
// few times.
for i := 0; i < 5; i++ {
// The hosts file can be locked by antivirus, endpoint security
// (e.g., GlobalProtect, CrowdStrike), or Windows Defender for
// several seconds. Use exponential backoff to tolerate this.
backoff := 50 * time.Millisecond
const maxBackoff = 5 * time.Second
deadline := time.Now().Add(30 * time.Second)
for attempt := 0; ; attempt++ {
if err = atomicfile.WriteFile(hostsFile, outB, fileMode); err == nil {
return nil
}
time.Sleep(10 * time.Millisecond)
if time.Now().After(deadline) {
return fmt.Errorf("setHosts: failed after %d attempts over 30s: %w", attempt+1, err)
}
m.logf("setHosts: attempt %d failed, retrying in %v: %v", attempt+1, backoff, err)
time.Sleep(backoff)
if backoff *= 2; backoff > maxBackoff {
backoff = maxBackoff
}
}
return err
}

// setPrimaryDNS sets the given resolvers and domains as the Tailscale
Expand Down
33 changes: 26 additions & 7 deletions net/dns/nrpt_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type nrptRuleDatabase struct {
isGPRefreshPending atomic.Bool
mu sync.Mutex // protects the fields below
ruleIDs []string
staleRuleIDs []string // rule IDs inherited from a previous run, cleaned up on first WriteSplitDNSConfig
isGPDirty bool
writeAsGP bool
}
Expand All @@ -76,13 +77,17 @@ func newNRPTRuleDatabase(logf logger.Logf) *nrptRuleDatabase {
ret.loadRuleSubkeyNames()
ret.detectWriteAsGP()
ret.watchForGPChanges()
// Best-effort: if our NRPT rule exists, try to delete it. Unlike
// per-interface configuration, NRPT rules survive the unclean
// termination of the Tailscale process, and depending on the
// rule, it may prevent us from reaching login.tailscale.com to
// boot up. The bootstrap resolver logic will save us, but it
// slows down start-up a bunch.
ret.DelAllRuleKeys()
// Track existing NRPT rules as stale rather than deleting them
// immediately. This preserves DNS routing for .coder queries
// during the gap between engine startup and the first successful
// DNS configuration. Stale rules are cleaned up in the first
// call to WriteSplitDNSConfig, after replacement rules are
// confirmed written.
if len(ret.ruleIDs) > 0 {
ret.staleRuleIDs = make([]string, len(ret.ruleIDs))
copy(ret.staleRuleIDs, ret.ruleIDs)
logf("preserved %d existing NRPT rule(s) as stale, will clean up on first config", len(ret.staleRuleIDs))
}
return ret
}

Expand Down Expand Up @@ -180,6 +185,12 @@ func (db *nrptRuleDatabase) DelAllRuleKeys() error {
db.mu.Lock()
defer db.mu.Unlock()

// Also clean up any stale rules from a previous instance.
if len(db.staleRuleIDs) > 0 {
db.delRuleKeys(db.staleRuleIDs)
db.staleRuleIDs = nil
}

if err := db.delRuleKeys(db.ruleIDs); err != nil {
return err
}
Expand Down Expand Up @@ -251,6 +262,14 @@ func (db *nrptRuleDatabase) WriteSplitDNSConfig(servers []string, domains []dnsn
db.mu.Lock()
defer db.mu.Unlock()

// Clean up stale rules from a previous engine instance, now that
// we have new rules to replace them.
if len(db.staleRuleIDs) > 0 {
db.logf("cleaning up %d stale NRPT rule(s) from previous session", len(db.staleRuleIDs))
db.delRuleKeys(db.staleRuleIDs)
db.staleRuleIDs = nil
}

// NRPT has an undocumented restriction that each rule may only be associated
// with a maximum of 50 domains. If we are setting rules for more domains
// than that, we need to split domains into chunks and write out a rule per chunk.
Expand Down
Loading