Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changes

- Add automatic DA retriever walkback when P2P stalls and DA blocks too far ahead [#3262](https://github.com/evstack/ev-node/pull/3262)
- Add `sequencer_blocks_synchronized_total` Prometheus counter metric tracking blocks synced by source (DA/P2P) [#3259](https://github.com/evstack/ev-node/pull/3259)
- Make it easier to override `DefaultMaxBlobSize` by ldflags [#3235](https://github.com/evstack/ev-node/pull/3235)
- Add solo sequencer (simple in memory single sequencer without force inclusion) [#3235](https://github.com/evstack/ev-node/pull/3235)
Expand Down
17 changes: 17 additions & 0 deletions block/internal/da/subscriber.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,23 @@ func (s *Subscriber) HasReachedHead() bool {
return s.headReached.Load()
}

// RewindTo sets localDAHeight back to the given height and signals the catchup
// loop so that DA heights are re-fetched. This is used when the primary source
// (P2P) stalls and DA needs to take over for the missing range.
func (s *Subscriber) RewindTo(daHeight uint64) {
for {
cur := s.localDAHeight.Load()
if daHeight >= cur {
return
}
if s.localDAHeight.CompareAndSwap(cur, daHeight) {
s.headReached.Store(false)
s.signalCatchup()
return
}
}
}

// signalCatchup sends a non-blocking signal to wake catchupLoop.
func (s *Subscriber) signalCatchup() {
select {
Expand Down
38 changes: 38 additions & 0 deletions block/internal/da/subscriber_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,44 @@ func TestSubscriber_RunCatchup(t *testing.T) {
})
}

func TestSubscriber_RewindTo(t *testing.T) {
t.Run("no_op_when_target_is_equal_or_higher", func(t *testing.T) {
sub := NewSubscriber(SubscriberConfig{
Client: testmocks.NewMockClient(t),
Logger: zerolog.Nop(),
Handler: new(MockSubscriberHandler),
Namespaces: [][]byte{[]byte("ns")},
StartHeight: 100,
DABlockTime: time.Millisecond,
})
sub.localDAHeight.Store(100)

sub.RewindTo(100)
assert.Equal(t, uint64(100), sub.LocalDAHeight())

sub.RewindTo(200)
assert.Equal(t, uint64(100), sub.LocalDAHeight())
})

t.Run("rewinds_local_height_and_clears_head", func(t *testing.T) {
sub := NewSubscriber(SubscriberConfig{
Client: testmocks.NewMockClient(t),
Logger: zerolog.Nop(),
Handler: new(MockSubscriberHandler),
Namespaces: [][]byte{[]byte("ns")},
StartHeight: 100,
DABlockTime: time.Millisecond,
})
sub.localDAHeight.Store(150)
sub.headReached.Store(true)

sub.RewindTo(120)

assert.Equal(t, uint64(120), sub.LocalDAHeight())
assert.False(t, sub.HasReachedHead())
})
}

func TestSubscriber_RunSubscription_InlineDoesNotPrematurelyReachHead(t *testing.T) {
ctx, cancel := context.WithCancel(t.Context())
defer cancel()
Expand Down
87 changes: 74 additions & 13 deletions block/internal/syncing/da_follower.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"slices"
"sync"
"sync/atomic"
"time"

"github.com/rs/zerolog"
Expand All @@ -26,10 +27,19 @@ type DAFollower interface {

// daFollower is the concrete implementation of DAFollower.
type daFollower struct {
subscriber *da.Subscriber
retriever DARetriever
eventSink common.EventSink
logger zerolog.Logger
subscriber *da.Subscriber
retriever DARetriever
eventSink common.EventSink
logger zerolog.Logger
nodeHeightFn func() uint64
p2pStalledFn func() bool
startDAHeight uint64

// walkbackActive is set when the follower detects a gap between the
// DA events it just processed and the node's current block height.
// While active, every DA height (even empty ones) triggers a rewind
// so the subscriber walks backwards until the gap is filled.
walkbackActive atomic.Bool

// Priority queue for P2P hint heights (absorbed from DARetriever refactoring #2).
priorityMu sync.Mutex
Expand All @@ -48,6 +58,12 @@ type DAFollowerConfig struct {
DataNamespace []byte // may be nil or equal to Namespace
StartDAHeight uint64
DABlockTime time.Duration
// NodeHeight returns the node's current block height. Used together
// with P2PStalled to detect gaps that need a DA walkback.
NodeHeight func() uint64
// P2PStalled returns true when the P2P sync worker has failed to
// deliver blocks. The follower only walks back when P2P is stalled.
P2PStalled func() bool
}

// NewDAFollower creates a new daFollower.
Expand All @@ -61,6 +77,9 @@ func NewDAFollower(cfg DAFollowerConfig) DAFollower {
retriever: cfg.Retriever,
eventSink: cfg.EventSink,
logger: cfg.Logger.With().Str("component", "da_follower").Logger(),
nodeHeightFn: cfg.NodeHeight,
p2pStalledFn: cfg.P2PStalled,
startDAHeight: cfg.StartDAHeight,
priorityHeights: make([]uint64, 0),
}

Expand Down Expand Up @@ -123,6 +142,13 @@ func (f *daFollower) HandleEvent(ctx context.Context, ev datypes.SubscriptionEve

// HandleCatchup retrieves events at a single DA height and pipes them
// to the event sink. Checks priority heights first.
//
// When a node-height callback is configured, HandleCatchup detects gaps
// between the block heights it just fetched and the node's current height.
// If the smallest block height is above nodeHeight+1 the subscriber is
// rewound by one DA height so it re-fetches the previous height on the
// next iteration. This "walk-back" continues automatically through empty
// DA heights until blocks contiguous with the node are found.
func (f *daFollower) HandleCatchup(ctx context.Context, daHeight uint64) error {
// 1. Drain stale or future priority heights from P2P hints
for priorityHeight := f.popPriorityHeight(); priorityHeight != 0; priorityHeight = f.popPriorityHeight() {
Expand All @@ -134,46 +160,81 @@ func (f *daFollower) HandleCatchup(ctx context.Context, daHeight uint64) error {
Uint64("da_height", priorityHeight).
Msg("fetching priority DA height from P2P hint")

if err := f.fetchAndPipeHeight(ctx, priorityHeight); err != nil {
if _, err := f.fetchAndPipeHeight(ctx, priorityHeight); err != nil {
if errors.Is(err, datypes.ErrHeightFromFuture) {
// Priority hint points to a future height — silently ignore.
f.logger.Debug().Uint64("priority_da_height", priorityHeight).
Msg("priority hint is from future, ignoring")
continue
}
// Roll back so daHeight is attempted again next cycle after backoff.
return err
}
break // continue with daHeight
}

// 2. Normal sequential fetch
if err := f.fetchAndPipeHeight(ctx, daHeight); err != nil {
events, err := f.fetchAndPipeHeight(ctx, daHeight)
if err != nil {
return err
}

// 3. Self-correction: walk back when P2P has stalled and DA blocks skip
// past the node height. Only active when P2P is confirmed stalled to
// avoid unnecessary rewinds during normal DA catchup.
p2pStalled := f.p2pStalledFn != nil && f.p2pStalledFn()
if p2pStalled && f.nodeHeightFn != nil && daHeight > f.startDAHeight {
nodeHeight := f.nodeHeightFn()

needsWalkback := f.walkbackActive.Load()
if len(events) > 0 {
minHeight := events[0].Header.Height()
for _, e := range events[1:] {
if e.Header.Height() < minHeight {
minHeight = e.Header.Height()
}
}
if minHeight <= nodeHeight+1 {
f.walkbackActive.Store(false)
return nil
}
needsWalkback = true
}

if needsWalkback {
f.walkbackActive.Store(true)
f.logger.Info().
Uint64("da_height", daHeight).
Uint64("node_height", nodeHeight).
Int("events", len(events)).
Msg("P2P stalled with gap between DA blocks and node height, walking DA follower back")
f.subscriber.RewindTo(daHeight - 1)
}
} else if !p2pStalled {
f.walkbackActive.Store(false)
}

return nil
}

// fetchAndPipeHeight retrieves events at a single DA height and pipes them.
// It does NOT handle ErrHeightFromFuture — callers must decide how to react
// because the correct response depends on whether this is a normal sequential
// catchup or a priority-hint fetch.
func (f *daFollower) fetchAndPipeHeight(ctx context.Context, daHeight uint64) error {
func (f *daFollower) fetchAndPipeHeight(ctx context.Context, daHeight uint64) ([]common.DAHeightEvent, error) {
events, err := f.retriever.RetrieveFromDA(ctx, daHeight)
if err != nil {
if errors.Is(err, datypes.ErrBlobNotFound) {
return nil
return nil, nil
}
return err
return nil, err
}

for _, event := range events {
if err := f.eventSink.PipeEvent(ctx, event); err != nil {
return err
return nil, err
}
}

return nil
return events, nil
}

// QueuePriorityHeight queues a DA height for priority retrieval.
Expand Down
Loading
Loading