Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(indexer): add to modules and implement proto fields #22544

Merged
merged 29 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
24879d0
progress
facundomedica Nov 18, 2024
8047466
progress
facundomedica Nov 18, 2024
5a52970
progress
facundomedica Nov 18, 2024
ce8b931
progress, need to revisit enums
facundomedica Nov 19, 2024
86fc09a
progrss
facundomedica Nov 19, 2024
7e5a665
remove debug prints
facundomedica Nov 19, 2024
4ebf63f
small fix
facundomedica Nov 19, 2024
38fcf4c
add enums
facundomedica Nov 21, 2024
ee06bf2
add x/auth + CollInterfaceValue schemaCodec
facundomedica Nov 22, 2024
5381823
add some more modules just to make sure these things work
facundomedica Nov 22, 2024
748bac9
added mint
facundomedica Nov 22, 2024
3cd0277
handle nil schematype
facundomedica Nov 25, 2024
4b3b800
remove print
facundomedica Nov 25, 2024
0a73e70
merge main
facundomedica Nov 25, 2024
ebd7397
return errors instead of os.Exit
facundomedica Nov 26, 2024
5196e56
remove toolchain updates
facundomedica Nov 26, 2024
6ed03b5
add cl
facundomedica Nov 26, 2024
6c03930
lint fix
facundomedica Nov 26, 2024
f4a3aa0
fix tests
facundomedica Nov 26, 2024
5bb37a3
fixing stuff
facundomedica Nov 26, 2024
de642e1
fix more tests
facundomedica Nov 26, 2024
668838f
rollback instead
facundomedica Nov 27, 2024
f3cd58f
fix more stuff
facundomedica Nov 27, 2024
80d762e
Merge branch 'main' into facu/staking-indexer1
facundomedica Nov 27, 2024
91674b1
lint
facundomedica Nov 27, 2024
228ffb5
rollback some go mod changes
facundomedica Nov 27, 2024
e320f48
go mod
facundomedica Nov 27, 2024
d94bde6
suggestions from Julien
facundomedica Nov 28, 2024
dc20c04
Merge branch 'main' into facu/staking-indexer1
facundomedica Nov 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions baseapp/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"os"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -905,6 +906,9 @@ func (app *BaseApp) FinalizeBlock(req *abci.FinalizeBlockRequest) (res *abci.Fin
for _, streamingListener := range app.streamingManager.ABCIListeners {
if err := streamingListener.ListenFinalizeBlock(app.finalizeBlockState.Context(), *req, *res); err != nil {
app.logger.Error("ListenFinalizeBlock listening hook failed", "height", req.Height, "err", err)
if app.streamingManager.StopNodeOnErr {
os.Exit(1)
facundomedica marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}()
Expand Down Expand Up @@ -976,8 +980,6 @@ func (app *BaseApp) Commit() (*abci.CommitResponse, error) {
rms.SetCommitHeader(header)
}

app.cms.Commit()

resp := &abci.CommitResponse{
RetainHeight: retainHeight,
}
Expand All @@ -991,10 +993,17 @@ func (app *BaseApp) Commit() (*abci.CommitResponse, error) {
for _, abciListener := range abciListeners {
if err := abciListener.ListenCommit(ctx, *resp, changeSet); err != nil {
app.logger.Error("Commit listening hook failed", "height", blockHeight, "err", err)
if app.streamingManager.StopNodeOnErr {
os.Exit(1)
facundomedica marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}

// Commit after all listeners have been called, in case they error and we
// need to stop before committing.
app.cms.Commit()

// Reset the CheckTx state to the latest committed.
//
// NOTE: This is safe because CometBFT holds a lock on the mempool for
Expand Down
224 changes: 223 additions & 1 deletion codec/collections.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
package codec

import (
"encoding/json"
"fmt"
"reflect"

"github.com/cosmos/gogoproto/proto"
gogotypes "github.com/cosmos/gogoproto/types"
gogoprotoany "github.com/cosmos/gogoproto/types/any"
"google.golang.org/protobuf/encoding/protojson"
protov2 "google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/types/dynamicpb"
"google.golang.org/protobuf/types/known/durationpb"
"google.golang.org/protobuf/types/known/timestamppb"

"cosmossdk.io/collections"
collcodec "cosmossdk.io/collections/codec"
"cosmossdk.io/schema"
)

// BoolValue implements a ValueCodec that saves the bool value
Expand Down Expand Up @@ -51,12 +58,17 @@ type protoMessage[T any] interface {
proto.Message
}

type protoCollValueCodec[T any] interface {
collcodec.HasSchemaCodec[T]
collcodec.ValueCodec[T]
}

// CollValue inits a collections.ValueCodec for a generic gogo protobuf message.
func CollValue[T any, PT protoMessage[T]](cdc interface {
Marshal(proto.Message) ([]byte, error)
Unmarshal([]byte, proto.Message) error
},
) collcodec.ValueCodec[T] {
) protoCollValueCodec[T] {
return &collValue[T, PT]{cdc.(Codec), proto.MessageName(PT(new(T)))}
}

Expand Down Expand Up @@ -91,6 +103,150 @@ func (c collValue[T, PT]) ValueType() string {
return "github.com/cosmos/gogoproto/" + c.messageName
}

type hasUnpackInterfaces interface {
UnpackInterfaces(unpacker gogoprotoany.AnyUnpacker) error // Replace `AnyUnpacker` with the actual type from gogoprotoany
}

func (c collValue[T, PT]) SchemaCodec() (collcodec.SchemaCodec[T], error) {
var (
t T
pt PT
)
msgName := proto.MessageName(pt)
desc, err := proto.HybridResolver.FindDescriptorByName(protoreflect.FullName(msgName))
if err != nil {
return collcodec.SchemaCodec[T]{}, fmt.Errorf("could not find descriptor for %s: %w", msgName, err)
}
schemaFields := protoCols(desc.(protoreflect.MessageDescriptor))

kind := schema.KindForGoValue(t)
if err := kind.Validate(); err == nil {
return collcodec.SchemaCodec[T]{
Fields: []schema.Field{{
// we don't set any name so that this can be set to a good default by the caller
Name: "",
Kind: kind,
}},
// these can be nil because T maps directly to a schema value for this kind
ToSchemaType: nil,
FromSchemaType: nil,
}, nil
} else {
// we default to encoding everything to JSON String
return collcodec.SchemaCodec[T]{
Fields: schemaFields,
ToSchemaType: func(t T) (any, error) {
values := []interface{}{}
msgDesc, ok := desc.(protoreflect.MessageDescriptor)
if !ok {
return nil, fmt.Errorf("expected message descriptor, got %T", desc)
}

nm := dynamicpb.NewMessage(msgDesc)
bz, err := c.cdc.Marshal(any(&t).(PT))
if err != nil {
return nil, err
}

err = c.cdc.Unmarshal(bz, nm)
if err != nil {
return nil, err
}

for _, field := range schemaFields {
// Find the field descriptor by the Protobuf field name
fieldDesc := msgDesc.Fields().ByName(protoreflect.Name(field.Name))
if fieldDesc == nil {
return nil, fmt.Errorf("field %q not found in message %s", field.Name, desc.FullName())
}

val := nm.ProtoReflect().Get(fieldDesc)

// if the field is a map or list, we need to convert it to a slice of values
if fieldDesc.IsList() {
repeatedVals := []interface{}{}
list := val.List()
for i := 0; i < list.Len(); i++ {
repeatedVals = append(repeatedVals, list.Get(i).Interface())
}
values = append(values, repeatedVals)
continue
}

switch fieldDesc.Kind() {
case protoreflect.BoolKind:
values = append(values, val.Bool())
case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind,
protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
values = append(values, val.Int())
case protoreflect.Uint32Kind, protoreflect.Fixed32Kind, protoreflect.Uint64Kind,
protoreflect.Fixed64Kind:
values = append(values, val.Uint())
case protoreflect.FloatKind, protoreflect.DoubleKind:
values = append(values, val.Float())
case protoreflect.StringKind:
values = append(values, val.String())
case protoreflect.BytesKind:
values = append(values, val.Bytes())
case protoreflect.EnumKind:
// TODO: support enums better, with actual enums
values = append(values, string(fieldDesc.Enum().Values().ByNumber(val.Enum()).Name()))
case protoreflect.MessageKind:
msg := val.Interface().(*dynamicpb.Message)
msgbz, err := c.cdc.Marshal(msg)
if err != nil {
return nil, err
}

if field.Kind == schema.TimeKind {
// make it a time.Time
ts := &timestamppb.Timestamp{}
err = c.cdc.Unmarshal(msgbz, ts)
if err != nil {
return nil, fmt.Errorf("error unmarshalling timestamp: %w %x %s", err, msgbz, fieldDesc.FullName())
}
values = append(values, ts.AsTime())
} else if field.Kind == schema.DurationKind {
// make it a time.Duration
dur := &durationpb.Duration{}
err = c.cdc.Unmarshal(msgbz, dur)
if err != nil {
return nil, fmt.Errorf("error unmarshalling duration: %w", err)
}
values = append(values, dur.AsDuration())
} else {
// if not a time or duration, just keep it as a JSON object
// we might want to change this to include the entire object as separate fields
bz, err := c.cdc.MarshalJSON(msg)
if err != nil {
return nil, fmt.Errorf("error marshalling message: %w", err)
}

values = append(values, json.RawMessage(bz))
}
}

}

// if there's only one value, return it directly
if len(values) == 1 {
return values[0], nil
}
return values, nil
},
FromSchemaType: func(a any) (T, error) {
var t T
sz, ok := a.(string)
if !ok {
return t, fmt.Errorf("expected string, got %T", a)
}
err := json.Unmarshal([]byte(sz), &t)
return t, err
},
}, nil
}
}

type protoMessageV2[T any] interface {
*T
protov2.Message
Expand Down Expand Up @@ -179,3 +335,69 @@ func (c collInterfaceValue[T]) ValueType() string {
var t T
return fmt.Sprintf("%T", t)
}

func protoCols(desc protoreflect.MessageDescriptor) []schema.Field {
nFields := desc.Fields()
cols := make([]schema.Field, 0, nFields.Len())
for i := 0; i < nFields.Len(); i++ {
f := nFields.Get(i)
cols = append(cols, protoCol(f))
}
return cols
}

func protoCol(f protoreflect.FieldDescriptor) schema.Field {
col := schema.Field{Name: string(f.Name())}
if f.IsMap() || f.IsList() {
col.Kind = schema.JSONKind
col.Nullable = true
} else {
switch f.Kind() {
case protoreflect.BoolKind:
col.Kind = schema.BoolKind
case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
col.Kind = schema.Int32Kind
case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
col.Kind = schema.Int64Kind
case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
col.Kind = schema.Int64Kind
case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
col.Kind = schema.Uint64Kind
case protoreflect.FloatKind:
col.Kind = schema.Float32Kind
case protoreflect.DoubleKind:
col.Kind = schema.Float64Kind
case protoreflect.StringKind:
col.Kind = schema.StringKind
case protoreflect.BytesKind:
col.Kind = schema.BytesKind
case protoreflect.EnumKind:
// for now we'll use a string for enums.
col.Kind = schema.StringKind
// TODO: support enums
// col.Kind = schema.EnumKind
// enumDesc := f.Enum()
// var vals []string
// n := enumDesc.Values().Len()
// for i := 0; i < n; i++ {
// vals = append(vals, string(enumDesc.Values().Get(i).Name()))
// }
// col.ReferencedType = string(enumDesc.Name())
case protoreflect.MessageKind:
col.Nullable = true
fullName := f.Message().FullName()
if fullName == "google.protobuf.Timestamp" {
col.Kind = schema.TimeKind
} else if fullName == "google.protobuf.Duration" {
col.Kind = schema.DurationKind
} else {
col.Kind = schema.JSONKind
}
}
if f.HasPresence() {
col.Nullable = true
}
}

return col
}
4 changes: 3 additions & 1 deletion collections/codec/indexing.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ func FallbackSchemaCodec[T any]() SchemaCodec[T] {
Kind: kind,
}},
// these can be nil because T maps directly to a schema value for this kind
ToSchemaType: nil,
ToSchemaType: func(t T) (any, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead can we just have the nil case be handled properly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, it was already there for keys

return t, nil
},
FromSchemaType: nil,
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion indexer/postgres/create_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func (tm *objectIndexer) createTable(ctx context.Context, conn dbConn) error {

sqlStr := buf.String()
if tm.options.logger != nil {
tm.options.logger.Debug("Creating table %s", "table", tm.tableName(), "sql", sqlStr)
tm.options.logger.Debug("Creating table", "table", tm.tableName(), "sql", sqlStr)
}
_, err = conn.ExecContext(ctx, sqlStr)
return err
Expand Down
21 changes: 20 additions & 1 deletion indexer/postgres/listener.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,26 @@ func (i *indexerImpl) listener() appdata.Listener {
return mm.initializeSchema(i.ctx, i.tx)
},
StartBlock: func(data appdata.StartBlockData) error {
_, err := i.tx.Exec("INSERT INTO block (number) VALUES ($1)", data.Height)
var (
headerBz []byte
err error
)

if data.HeaderJSON != nil {
headerBz, err = data.HeaderJSON()
if err != nil {
return err
}
} else if data.HeaderBytes != nil {
headerBz, err = data.HeaderBytes()
if err != nil {
return err
}
}
Comment on lines +25 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add validation for header data presence

The current implementation doesn't validate that at least one of HeaderJSON or HeaderBytes is present. This could lead to nil being inserted into the database.

Consider adding this validation at the start:

 var (
   headerBz []byte
   err     error
 )
+
+if data.HeaderJSON == nil && data.HeaderBytes == nil {
+  return fmt.Errorf("either HeaderJSON or HeaderBytes must be provided")
+}

 if data.HeaderJSON != nil {
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
var (
headerBz []byte
err error
)
if data.HeaderJSON != nil {
headerBz, err = data.HeaderJSON()
if err != nil {
return err
}
} else if data.HeaderBytes != nil {
headerBz, err = data.HeaderBytes()
if err != nil {
return err
}
}
var (
headerBz []byte
err error
)
if data.HeaderJSON == nil && data.HeaderBytes == nil {
return fmt.Errorf("either HeaderJSON or HeaderBytes must be provided")
}
if data.HeaderJSON != nil {
headerBz, err = data.HeaderJSON()
if err != nil {
return err
}
} else if data.HeaderBytes != nil {
headerBz, err = data.HeaderBytes()
if err != nil {
return err
}
}


// TODO: verify the format of headerBz, otherwise we'll get `ERROR: invalid input syntax for type json (SQLSTATE 22P02)`
_, err = i.tx.Exec("INSERT INTO block (number, header) VALUES ($1, $2)", data.Height, headerBz)

Comment on lines +25 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add validation for headerBz to prevent SQL errors

The current implementation lacks validation for the header data before inserting it into the database, which could lead to SQL errors as noted in the TODO comment.

Consider adding validation before the SQL execution:

+ if !json.Valid(headerBz) {
+   return fmt.Errorf("invalid JSON format in header data at height %d", data.Height)
+ }
_, err = i.tx.Exec("INSERT INTO block (number, header) VALUES ($1, $2)", data.Height, headerBz)

Additionally, consider adding debug logging to help with troubleshooting:

+ if i.logger != nil {
+   i.logger.Debug("StartBlock", "height", data.Height, "header_size", len(headerBz))
+ }

Committable suggestion skipped: line range outside the PR's diff.

Comment on lines +42 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Enhance error handling and use prepared statements

The TODO comment indicates a potential SQL error due to invalid JSON format. Additionally, the query could benefit from using prepared statements for better security and performance.

Consider this implementation:

-// TODO: verify the format of headerBz, otherwise we'll get `ERROR: invalid input syntax for type json (SQLSTATE 22P02)`
-_, err = i.tx.Exec("INSERT INTO block (number, header) VALUES ($1, $2)", data.Height, headerBz)
+// Validate JSON format before insertion
+if !json.Valid(headerBz) {
+    return fmt.Errorf("invalid JSON format in header data")
+}
+
+const query = `INSERT INTO block (number, header) VALUES ($1, $2::jsonb)`
+_, err = i.tx.Exec(query, data.Height, headerBz)
+if err != nil {
+    return fmt.Errorf("failed to insert block data: %w", err)
+}

This change:

  1. Validates JSON format before insertion
  2. Uses the jsonb type cast for better type safety
  3. Provides more specific error context
  4. Uses a constant for the query string
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// TODO: verify the format of headerBz, otherwise we'll get `ERROR: invalid input syntax for type json (SQLSTATE 22P02)`
_, err = i.tx.Exec("INSERT INTO block (number, header) VALUES ($1, $2)", data.Height, headerBz)
// Validate JSON format before insertion
if !json.Valid(headerBz) {
return fmt.Errorf("invalid JSON format in header data")
}
const query = `INSERT INTO block (number, header) VALUES ($1, $2::jsonb)`
_, err = i.tx.Exec(query, data.Height, headerBz)
if err != nil {
return fmt.Errorf("failed to insert block data: %w", err)
}

return err
},
OnObjectUpdate: func(data appdata.ObjectUpdateData) error {
Expand Down
11 changes: 7 additions & 4 deletions schema/appdata/async.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ func AsyncListener(opts AsyncListenerOptions, listener Listener) Listener {
done := ctx.Done()

go func() {
defer func() {
close(packetChan)
if opts.DoneWaitGroup != nil {
opts.DoneWaitGroup.Done()
}
}()

if opts.DoneWaitGroup != nil {
opts.DoneWaitGroup.Add(1)
}
Expand Down Expand Up @@ -74,10 +81,6 @@ func AsyncListener(opts AsyncListenerOptions, listener Listener) Listener {
}

case <-done:
close(packetChan)
if opts.DoneWaitGroup != nil {
opts.DoneWaitGroup.Done()
}
return
}
}
Expand Down
4 changes: 4 additions & 0 deletions schema/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ type Field struct {
// Validate validates the field.
func (c Field) Validate(typeSet TypeSet) error {
// valid name
if c.Name == "" {
return fmt.Errorf("field name cannot be empty, might be missing the named key codec")
}

if !ValidateName(c.Name) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does ValidateName not handle this case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it does handle it but I thought it was nice to have a more descriptive error for the empty case which would be the most common while implementing

return fmt.Errorf("invalid field name %q", c.Name)
}
Expand Down
Loading
Loading