•   about 2 years ago

Databricks RAG tutorial pipeline - 404 Client Error

Hi,

I am running the RAG tutorial notebooks and got a 404 error. I am running this notebook: 04-Deploy-Model-as-Endpoint.
I could successfuly run first, second and third notebooks. I have setup my environment in oregon (us-west-2) and I verified the uri for previous notebooks. I am not sure how to solve this error.

Code:

import urllib
import json
import mlflow

mlflow.set_registry_uri('databricks-uc')
client = MlflowClient()
model_name = f"{catalog}.{db}.dbdemos_advanced_chatbot_model"
serving_endpoint_name = f"dbdemos_endpoint_advanced_{catalog}_{db}"[:63]
latest_model = client.get_model_version_by_alias(model_name, "prod")

#TODO: use the sdk once model serving is available.
serving_client = EndpointApiClient()
# Start the endpoint using the REST API (you can do it using the UI directly)
auto_capture_config = {
"catalog_name": catalog,
"schema_name": db,
"table_name_prefix": serving_endpoint_name
}
environment_vars={"DATABRICKS_TOKEN": "{{secrets/dbdemos/rag_sp_token}}"}
serving_client.create_endpoint_if_not_exists(serving_endpoint_name, model_name=model_name, model_version = latest_model.version, workload_size="Small", scale_to_zero_enabled=True, wait_start = True, auto_capture_config=auto_capture_config, environment_vars=environment_vars)

Error:

HTTPError: 404 Client Error: Not Found for url: https://oregon.cloud.databricks.com/api/2.0/serving-endpoints
File , line 20
14 auto_capture_config = {
15 "catalog_name": catalog,
16 "schema_name": db,
17 "table_name_prefix": serving_endpoint_name
18 }
19 environment_vars={"DATABRICKS_TOKEN": "{{secrets/dbdemos/rag_sp_token}}"}
---> 20 serving_client.create_endpoint_if_not_exists(serving_endpoint_name, model_name=model_name, model_version = latest_model.version, workload_size="Small", scale_to_zero_enabled=True, wait_start = True, auto_capture_config=auto_capture_config, environment_vars=environment_vars)
File , line 32, in EndpointApiClient.create_endpoint_if_not_exists(self, endpoint_name, model_name, model_version, workload_size, scale_to_zero_enabled, wait_start, auto_capture_config, environment_vars)
24 models = [{
25 "model_name": model_name,
26 "model_version": model_version,
(...)
29 "environment_vars": environment_vars
30 }]
31 if not self.inference_endpoint_exists(endpoint_name):
---> 32 r = self.create_inference_endpoint(endpoint_name, models, auto_capture_config)
33 #Make sure we have the proper version deployed
34 else:
35 ep = self.get_inference_endpoint(endpoint_name)
File , line 9, in EndpointApiClient.create_inference_endpoint(self, endpoint_name, served_models, auto_capture_config)
7 def create_inference_endpoint(self, endpoint_name, served_models, auto_capture_config = None):
8 data = {"name": endpoint_name, "config": {"served_models": served_models, "auto_capture_config": auto_capture_config}}
----> 9 return self._post("api/2.0/serving-endpoints", data)
File , line 97, in EndpointApiClient._post(self, uri, data, allow_error)
96 def _post(self, uri, data = {}, allow_error = False):
---> 97 return self._process(requests.post(f"{self.base_url}/{uri}", json=data, headers=self.headers), allow_error)
File , line 107, in EndpointApiClient._process(self, r, allow_error)
105 def _process(self, r, allow_error = False):
106 if r.status_code == 500 or r.status_code == 403 or not allow_error:
--> 107 r.raise_for_status()
108 return r.json()
File /databricks/python/lib/python3.10/site-packages/requests/models.py:1021, in Response.raise_for_status(self)
1016 http_error_msg = (
1017 f"{self.status_code} Server Error: {reason} for url: {self.url}"
1018 )
1020 if http_error_msg:
-> 1021 raise HTTPError(http_error_msg, response=self)

Comments are closed.