Skip to content

Implemented KVCacheAwareScorer #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,47 @@ FROM quay.io/projectquay/golang:1.24 AS builder
ARG TARGETOS
ARG TARGETARCH

# ENV GOPROXY=https://goproxy.io,direct
# Install build tools
RUN dnf install -y gcc-c++ libstdc++ libstdc++-devel && dnf clean all

WORKDIR /workspace

## NeuralMagic internal repos pull config
ARG GIT_NM_USER
ARG NM_TOKEN
### use git token
RUN echo -e "machine github.com\n\tlogin ${GIT_NM_USER}\n\tpassword ${NM_TOKEN}" >> ~/.netrc
ENV GOPRIVATE=github.com/neuralmagic
ENV GIT_TERMINAL_PROMPT=1

# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download
RUN rm -rf ~/.netrc # remove git token

# Copy the go source
COPY cmd ./cmd
COPY pkg ./pkg
COPY internal ./internal
COPY api ./api

# HuggingFace tokenizer bindings
RUN mkdir -p lib
RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.20.2/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
RUN ranlib lib/*.a

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -o bin/epp cmd/epp/main.go cmd/epp/health.go
ENV CGO_ENABLED=1
ENV GOOS=${TARGETOS:-linux}
ENV GOARCH=${TARGETARCH}
RUN go build -o bin/epp -ldflags="-extldflags '-L$(pwd)/lib'" cmd/epp/main.go cmd/epp/health.go

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
Expand Down
159 changes: 146 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ DEV_VERSION ?= 0.0.1
PROD_VERSION ?= 0.0.0
IMAGE_TAG_BASE ?= quay.io/vllm-d/$(PROJECT_NAME)/epp
IMG = $(IMAGE_TAG_BASE):$(DEV_VERSION)
NAMESPACE ?= hc4ai-operator

# CONTAINER_TOOL := $(shell command -v docker >/dev/null 2>&1 && echo docker || command -v podman >/dev/null 2>&1 && echo podman || echo "")
BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(CONTAINER_TOOL))
Expand Down Expand Up @@ -448,22 +449,15 @@ buildah-build: check-builder load-version-json ## Build and push image (multi-ar
@echo "✅ Using builder: $(BUILDER)"
@if [ "$(BUILDER)" = "buildah" ]; then \
echo "🔧 Buildah detected: Performing multi-arch build..."; \
FINAL_TAG=$(IMG); \
for arch in amd64; do \
ARCH_TAG=$$FINAL_TAG-$$arch; \
echo "📦 Building for architecture: $$arch"; \
buildah build --arch=$$arch --os=linux -t $(IMG)-$$arch . || exit 1; \
echo "🚀 Pushing image: $(IMG)-$$arch"; \
buildah push $(IMG)-$$arch docker://$(IMG)-$$arch || exit 1; \
done; \
echo "🧼 Removing existing manifest (if any)..."; \
buildah manifest rm $$FINAL_TAG || true; \
echo "🧱 Creating and pushing manifest list: $(IMG)"; \
buildah manifest create $(IMG); \
for arch in amd64; do \
ARCH_TAG=$$FINAL_TAG-$$arch; \
buildah manifest add $$FINAL_TAG $$ARCH_TAG; \
done; \
buildah manifest add $(IMG) $(IMG)-amd64; \
buildah manifest push --all $(IMG) docker://$(IMG); \
elif [ "$(BUILDER)" = "docker" ]; then \
echo "🐳 Docker detected: Building with buildx..."; \
Expand All @@ -485,7 +479,12 @@ buildah-build: check-builder load-version-json ## Build and push image (multi-ar
.PHONY: image-build
image-build: check-container-tool load-version-json ## Build container image using $(CONTAINER_TOOL)
@printf "\033[33;1m==== Building container image $(IMG) ====\033[0m\n"
$(CONTAINER_TOOL) build --build-arg TARGETOS=$(TARGETOS) --build-arg TARGETARCH=$(TARGETARCH) -t $(IMG) .
$(CONTAINER_TOOL) build --platform=$(TARGETOS)/$(TARGETARCH) \
--build-arg TARGETOS=$(TARGETOS) \
--build-arg TARGETARCH=$(TARGETARCH) \
--build-arg GIT_NM_USER=$(GIT_NM_USER)\
--build-arg NM_TOKEN=$(NM_TOKEN) \
-t $(IMG) .

.PHONY: image-push
image-push: check-container-tool load-version-json ## Push container image $(IMG) to registry
Expand Down Expand Up @@ -517,6 +516,142 @@ uninstall-docker: check-container-tool ## Uninstall app from $(CONTAINER_TOOL)
-$(CONTAINER_TOOL) stop $(PROJECT_NAME)-container && $(CONTAINER_TOOL) rm $(PROJECT_NAME)-container
@echo "$(CONTAINER_TOOL) uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"

### Kubernetes Targets (kubectl)

# TODO: currently incorrect because it depends on OpenShift APIs.
# See: https://github.com/neuralmagic/gateway-api-inference-extension/issues/14
.PHONY: install-k8s
install-k8s: check-kubectl check-kustomize check-envsubst ## Install on Kubernetes
export PROJECT_NAME=${PROJECT_NAME}
export NAMESPACE=${NAMESPACE}
@echo "Creating namespace (if needed) and setting context to $(NAMESPACE)..."
kubectl create namespace $(NAMESPACE) 2>/dev/null || true
kubectl config set-context --current --namespace=$(NAMESPACE)
@echo "Deploying resources from deploy/ ..."
# Build the kustomization from deploy, substitute variables, and apply the YAML
kustomize build deploy/environments/openshift | envsubst | kubectl apply -f -
@echo "Waiting for pod to become ready..."
sleep 5
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -o jsonpath='{.items[0].metadata.name}'); \
echo "Kubernetes installation complete."; \
echo "To use the app, run:"; \
echo "alias $(PROJECT_NAME)='kubectl exec -n $(NAMESPACE) -it $$POD -- /app/$(PROJECT_NAME)'"

# TODO: currently incorrect because it depends on OpenShift APIs.
# See: https://github.com/neuralmagic/gateway-api-inference-extension/issues/14
.PHONY: uninstall-k8s
uninstall-k8s: check-kubectl check-kustomize check-envsubst ## Uninstall from Kubernetes
export PROJECT_NAME=${PROJECT_NAME}
export NAMESPACE=${NAMESPACE}
@echo "Removing resources from Kubernetes..."
kustomize build deploy/environments/openshift | envsubst | kubectl delete --force -f - || true
POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -o jsonpath='{.items[0].metadata.name}'); \
echo "Deleting pod: $$POD"; \
kubectl delete pod "$$POD" --force --grace-period=0 || true; \
echo "Kubernetes uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"

### OpenShift Targets (oc)

# ------------------------------------------------------------------------------
# OpenShift Infrastructure Installer
#
# This target deploys infrastructure requirements for the entire cluster.
# Among other things, this includes CRDs and operators which all users of the
# cluster need for development (e.g. Gateway API, Istio, etc).
#
# **Warning**: Only run this if you're certain you should be running it. It
# has implications for all users of the cluster!
# ------------------------------------------------------------------------------
.PHONY: install-openshift-infrastructure
install-openshift-infrastructure:
ifeq ($(strip $(INFRASTRUCTURE_OVERRIDE)),true)
@echo "INFRASTRUCTURE_OVERRIDE is set to true, deploying infrastructure components"
@echo "Installing CRDs for Gateway API & GIE"
kustomize build deploy/components/crds | kubectl apply --server-side --force-conflicts -f -
@echo "Installing the Istio Sail Operator and CRDs for Istio"
kustomize build --enable-helm deploy/components/sail-operator | kubectl apply --server-side --force-conflicts -f -
@echo "Installing the Istio Control Plane"
kustomize build deploy/components/istio-control-plane | kubectl apply -f -
else
$(error "Error: The environment variable INFRASTRUCTURE_OVERRIDE must be set to true in order to run this target.")
endif

# ------------------------------------------------------------------------------
# OpenShift Infrastructure Uninstaller
#
# This target removes all infrastructure components (e.g. CRDs, operators,
# etc) for the entire cluster.
#
# **Warning**: Only run this if you're certain you should be running it. **This
# will disrupt everyone using the cluster**. Generally this should only be run
# when the infrastructure components have undergone very significant change, and
# you need to do a hard cleanup and re-deploy.
# ------------------------------------------------------------------------------
.PHONY: uninstall-openshift-infrastructure
uninstall-openshift-infrastructure:
ifeq ($(strip $(INFRASTRUCTURE_OVERRIDE)),true)
@echo "INFRASTRUCTURE_OVERRIDE is set to true, removing infrastructure components"
@echo "Uninstalling the Istio Control Plane"
kustomize build deploy/components/istio-control-plane | kubectl delete -f - || true
@echo "Uninstalling the Istio Sail Operator and CRDs for Istio"
kustomize build --enable-helm deploy/components/sail-operator | kubectl delete -f - || true
@echo "Uninstalling CRDs for Gateway API & GIE"
kustomize build deploy/components/crds | kubectl delete -f - || true
else
$(error "Error: The environment variable INFRASTRUCTURE_OVERRIDE must be set to true in order to run this target.")
endif

# ------------------------------------------------------------------------------
# OpenShift Installer
#
# This target deploys components in a namespace on an OpenShift cluster for
# a developer to do development and testing cycles.
# ------------------------------------------------------------------------------
.PHONY: install-openshift
install-openshift: check-kubectl check-kustomize check-envsubst ## Install on OpenShift
@echo $$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION
@echo "Creating namespace $(NAMESPACE)..."
kubectl create namespace $(NAMESPACE) 2>/dev/null || true
@echo "Deploying common resources from deploy/ ..."
# Build and substitute the base manifests from deploy, then apply them
kustomize build deploy/environments/openshift | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -n $(NAMESPACE) -f -
@echo "Waiting for pod to become ready..."
sleep 5
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
echo "OpenShift installation complete."; \
echo "To use the app, run:"; \
echo "alias $(PROJECT_NAME)='kubectl exec -n $(NAMESPACE) -it $$POD -- /app/$(PROJECT_NAME)'"

# ------------------------------------------------------------------------------
# OpenShift Uninstaller
#
# This target cleans up a developer's testing and development namespace,
# removing all components therein.
# ------------------------------------------------------------------------------
.PHONY: uninstall-openshift
uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall from OpenShift
@echo "Removing resources from OpenShift..."
kustomize build deploy/environments/openshift | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete --force -f - || true
# @if kubectl api-resources --api-group=route.openshift.io | grep -q Route; then \
# envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \
# fi
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
echo "Deleting pod: $$POD"; \
kubectl delete pod "$$POD" --force --grace-period=0 || true; \
echo "OpenShift uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"

### RBAC Targets (using kustomize and envsubst)

.PHONY: install-rbac
install-rbac: check-kubectl check-kustomize check-envsubst ## Install RBAC
@echo "Applying RBAC configuration from deploy/rbac..."
kustomize build deploy/environments/openshift/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -f -

.PHONY: uninstall-rbac
uninstall-rbac: check-kubectl check-kustomize check-envsubst ## Uninstall RBAC
@echo "Removing RBAC configuration from deploy/rbac..."
kustomize build deploy/environments/openshift/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete -f - || true


##@ Version Extraction
.PHONY: version dev-registry prod-registry extract-version-info
Expand Down Expand Up @@ -652,11 +787,9 @@ check-alias: check-container-tool
echo "✅ Alias is likely to work: alias $(PROJECT_NAME)='$(CONTAINER_TOOL) exec -it $(PROJECT_NAME)-container /app/$(PROJECT_NAME)'"; \
fi

# This is being used for tekton builds in the CI/CD pipeline, to provide a
# default namespace to do a test deployment of the Kubernetes dev environment.
.PHONY: print-namespace
print-namespace:
@echo "hc4ai-operator"
print-namespace: ## Print the current namespace
@echo "$(NAMESPACE)"

.PHONY: print-project-name
print-project-name: ## Print the current project name
Expand Down
21 changes: 14 additions & 7 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
module sigs.k8s.io/gateway-api-inference-extension

go 1.24.0
go 1.24.1

toolchain go1.24.2

require (
github.com/elastic/crd-ref-docs v0.1.0
github.com/envoyproxy/go-control-plane/envoy v1.32.4
github.com/go-logr/logr v1.4.2
github.com/google/go-cmp v0.7.0
github.com/google/uuid v1.6.0
github.com/neuralmagic/kvcache-manager v0.0.0-20250422070607-db465f8aaa71
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0
github.com/prometheus/client_golang v1.22.0
Expand Down Expand Up @@ -41,7 +45,9 @@ require (
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
github.com/daulet/tokenizers v1.20.2 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
Expand All @@ -66,9 +72,9 @@ require (
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.11 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand All @@ -90,6 +96,7 @@ require (
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/redis/go-redis/v9 v9.7.3 // indirect
github.com/spf13/cobra v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stoewer/go-strcase v1.3.0 // indirect
Expand All @@ -104,15 +111,15 @@ require (
go.opentelemetry.io/otel/trace v1.34.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/automaxprocs v1.6.0 // indirect
golang.org/x/crypto v0.36.0 // indirect
golang.org/x/crypto v0.37.0 // indirect
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
golang.org/x/mod v0.24.0 // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/net v0.39.0 // indirect
golang.org/x/oauth2 v0.27.0 // indirect
golang.org/x/sync v0.12.0 // indirect
golang.org/x/sync v0.13.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/term v0.30.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/term v0.31.0 // indirect
golang.org/x/text v0.24.0 // indirect
golang.org/x/time v0.7.0 // indirect
golang.org/x/tools v0.31.0 // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
Expand Down
Loading
Loading