Skip to content

Commit

Permalink
feat: implement batch map
Browse files Browse the repository at this point in the history
Signed-off-by: Sidhant Kohli <[email protected]>
  • Loading branch information
kohlisid committed Jun 28, 2024
1 parent c103eea commit 46a2caf
Show file tree
Hide file tree
Showing 16 changed files with 636 additions and 51 deletions.
264 changes: 218 additions & 46 deletions pkg/apis/proto/map/v1/map.pb.go

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions pkg/apis/proto/map/v1/map.proto
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ service Map {

// IsReady is the heartbeat endpoint for gRPC.
rpc IsReady(google.protobuf.Empty) returns (ReadyResponse);

// BatchMapFn applies a
rpc BatchMapFn(stream MapRequest) returns (stream BatchMapResponse);
}

/**
Expand All @@ -24,6 +27,7 @@ message MapRequest {
google.protobuf.Timestamp event_time = 3;
google.protobuf.Timestamp watermark = 4;
map<string, string> headers = 5;
string id = 6;
}

/**
Expand All @@ -38,6 +42,19 @@ message MapResponse {
repeated Result results = 1;
}

/**
* MapResponse represents a response element.
*/
message BatchMapResponse {
message Result {
repeated string keys = 1;
bytes value = 2;
repeated string tags = 3;
}
repeated Result results = 1;
string id = 2;
}

/**
* ReadyResponse is the health check result.
*/
Expand Down
75 changes: 73 additions & 2 deletions pkg/apis/proto/map/v1/map_grpc.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions pkg/apis/proto/map/v1/mapmock/mapmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions pkg/mapper/examples/batchmap/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
####################################################################################################
# base
####################################################################################################
FROM alpine:3.12.3 as base
RUN apk update && apk upgrade && \
apk add ca-certificates && \
apk --no-cache add tzdata

COPY dist/flatmap-example /bin/flatmap-example
RUN chmod +x /bin/flatmap-example

####################################################################################################
# flatmap
####################################################################################################
FROM scratch as flatmap
ARG ARCH
COPY --from=base /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
COPY --from=base /bin/flatmap-example /bin/flatmap-example
ENTRYPOINT [ "/bin/flatmap-example" ]
18 changes: 18 additions & 0 deletions pkg/mapper/examples/batchmap/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
TAG ?= mapbatchv3
PUSH ?= true

.PHONY: build
build:
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -v -o ./dist/flatmap-example main.go

.PHONY: image-push
image-push: build
docker buildx build -t "quay.io/kohlisid/numaflow-go/map-flatmap:${TAG}" --platform linux/amd64,linux/arm64 --target flatmap . --push

.PHONY: image
image: build
docker build -t "quay.io/kohlisid/numaflow-go/map-flatmap:${TAG}" --target flatmap .
@if [ "$(PUSH)" = "true" ]; then docker push "quay.io/kohlisid/numaflow-go/map-flatmap:${TAG}"; fi

clean:
-rm -rf ./dist
3 changes: 3 additions & 0 deletions pkg/mapper/examples/batchmap/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Flatmap

An example User Defined Function that demonstrates how to write a `flatmap` User Defined Function.
17 changes: 17 additions & 0 deletions pkg/mapper/examples/batchmap/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
module flatmap

go 1.20

replace github.com/numaproj/numaflow-go => ../../../..

require github.com/numaproj/numaflow-go v0.7.0-rc2

require (
github.com/golang/protobuf v1.5.3 // indirect
golang.org/x/net v0.9.0 // indirect
golang.org/x/sys v0.7.0 // indirect
golang.org/x/text v0.9.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect
google.golang.org/grpc v1.57.0 // indirect
google.golang.org/protobuf v1.31.0 // indirect
)
24 changes: 24 additions & 0 deletions pkg/mapper/examples/batchmap/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 h1:0nDDozoAU19Qb2HwhXadU8OcsiO/09cnTqhUtq2MEOM=
google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA=
google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw=
google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
34 changes: 34 additions & 0 deletions pkg/mapper/examples/batchmap/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package main

import (
"context"
"log"
"strings"

"github.com/numaproj/numaflow-go/pkg/mapper"
)

func mapFn(_ context.Context, datums []mapper.Datum) mapper.BatchResponses {
batchResponses := mapper.BatchResponsesBuilder()
log.Println("MYDEBUG: length of input ", len(datums))
for _, d := range datums {
msg := d.Value()
_ = d.EventTime() // Event time is available
_ = d.Watermark() // Watermark is available
// Split the msg into an array with comma.
strs := strings.Split(string(msg), ",")
results := mapper.NewBatchResponse(d.Id())
for _, s := range strs {
results = results.Append(mapper.NewMessage([]byte(s)))
}
batchResponses = batchResponses.Append(results)
}
return batchResponses
}

func main() {
err := mapper.NewBatchMapServer(mapper.BatchMapperFunc(mapFn)).Start(context.Background())
if err != nil {
log.Panic("Failed to start map function server: ", err)
}
}
64 changes: 64 additions & 0 deletions pkg/mapper/examples/batchmap/pipe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
apiVersion: numaflow.numaproj.io/v1alpha1
kind: Pipeline
metadata:
name: simple-pipeline
spec:
watermark:
disabled: true # Optional, defaults to false.
limits:
readBatchSize: 500
vertices:
# - name: in d
# source:
# http: { }
- name: in
scale:
min: 1
source:
# A self data generating source
generator:
rpu: 10
duration: 1s
- name: batchmap
scale:
min: 1
max: 1
metadata:
annotations:
numaflow.numaproj.io/batch-map: "true"
limits:
readBatchSize: 500
udf:
container:
# image: "quay.io/numaio/numaflow-go/map-flatmap:stable"
image: "quay.io/kohlisid/numaflow-go/map-flatmap:mapbatchv3"
imagePullPolicy: Always
containerTemplate:
resources:
limits:
memory: 1Gi
requests:
memory: 1Gi
env:
- name: NUMAFLOW_DEBUG
value: "true" # DO NOT forget the double quotes!!!
# - name: cat
# udf:
# builtin:
# name: cat # A built-in UDF which simply cats the message
- name: out
scale:
min: 1
max: 1
sink:
# A simple log printing sink
log: {}
edges:
# - from: in
# to: cat
# - from: cat
# to: out
- from: in
to: batchmap
- from: batchmap
to: out
Loading

0 comments on commit 46a2caf

Please sign in to comment.