Skip to content

Commit 051202a

Browse files
committed
fix: use platform namespace for http route
1 parent 1d96cb2 commit 051202a

File tree

1 file changed

+64
-21
lines changed

1 file changed

+64
-21
lines changed

src/codeflare_sdk/ray/cluster/cluster.py

Lines changed: 64 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,8 @@ def is_dashboard_ready(self) -> bool:
387387
388388
This method attempts to send a GET request to the cluster dashboard URI.
389389
If the request is successful (HTTP status code 200), it returns True.
390+
For OAuth-protected dashboards, a 302 redirect to the OAuth login page
391+
also indicates the dashboard is ready (the OAuth proxy is working).
390392
If an SSL error occurs, it returns False, indicating the dashboard is not ready.
391393
392394
Returns:
@@ -399,11 +401,14 @@ def is_dashboard_ready(self) -> bool:
399401
return False
400402

401403
try:
404+
# Don't follow redirects - we want to see the redirect response
405+
# A 302 redirect from OAuth proxy indicates the dashboard is ready
402406
response = requests.get(
403407
dashboard_uri,
404408
headers=self._client_headers,
405409
timeout=5,
406410
verify=self._client_verify_tls,
411+
allow_redirects=False,
407412
)
408413
except requests.exceptions.SSLError: # pragma no cover
409414
# SSL exception occurs when oauth ingress has been created but cluster is not up
@@ -412,7 +417,11 @@ def is_dashboard_ready(self) -> bool:
412417
# Any other exception (connection errors, timeouts, etc.)
413418
return False
414419

415-
if response.status_code == 200:
420+
# Dashboard is ready if:
421+
# - 200: Dashboard is accessible (no auth required or already authenticated)
422+
# - 302: OAuth redirect - dashboard and OAuth proxy are ready, just needs authentication
423+
# - 401/403: OAuth is working and blocking unauthenticated requests - dashboard is ready
424+
if response.status_code in (200, 302, 401, 403):
416425
return True
417426
else:
418427
return False
@@ -1156,33 +1165,73 @@ def _get_dashboard_url_from_httproute(
11561165
Attempts to get the Ray dashboard URL from an HTTPRoute resource.
11571166
This is used for RHOAI v3.0+ clusters that use Gateway API.
11581167
1168+
In new RHOAI versions, Ray clusters have HTTPRoutes in the KubeRay namespace.
1169+
If no HTTPRoute is found, the cluster uses OpenShift Routes (old RHOAI) or
1170+
Ingresses (Kind), and this function should return None to allow fallback.
1171+
1172+
HTTPRoutes are labeled with ray.io/cluster-name and ray.io/cluster-namespace.
1173+
We search for the HTTPRoute directly by these labels.
1174+
11591175
Args:
11601176
cluster_name: Name of the Ray cluster
11611177
namespace: Namespace of the Ray cluster
11621178
11631179
Returns:
1164-
Dashboard URL if HTTPRoute is found, None otherwise
1180+
Dashboard URL if HTTPRoute is found, None otherwise (triggers fallback to Routes/Ingress)
11651181
"""
11661182
try:
11671183
config_check()
11681184
api_instance = client.CustomObjectsApi(get_api_client())
11691185

1170-
# Try to get HTTPRoute for this Ray cluster
1186+
label_selector = (
1187+
f"ray.io/cluster-name={cluster_name},ray.io/cluster-namespace={namespace}"
1188+
)
1189+
1190+
# Try cluster-wide search first (if permissions allow)
11711191
try:
1172-
httproute = api_instance.get_namespaced_custom_object(
1192+
httproutes = api_instance.list_cluster_custom_object(
11731193
group="gateway.networking.k8s.io",
11741194
version="v1",
1175-
namespace=namespace,
11761195
plural="httproutes",
1177-
name=cluster_name,
1196+
label_selector=label_selector,
11781197
)
1179-
except client.exceptions.ApiException as e:
1180-
if e.status == 404:
1181-
# HTTPRoute not found - this is expected for SDK v0.31.1 and below or Kind clusters
1198+
items = httproutes.get("items", [])
1199+
if items:
1200+
httproute = items[0]
1201+
else:
1202+
# No HTTPRoute found
1203+
return None
1204+
except Exception:
1205+
# No cluster-wide permissions, try namespace-specific search
1206+
search_namespaces = [
1207+
"redhat-ods-applications",
1208+
"opendatahub",
1209+
"default",
1210+
"ray-system",
1211+
]
1212+
1213+
httproute = None
1214+
for ns in search_namespaces:
1215+
try:
1216+
httproutes = api_instance.list_namespaced_custom_object(
1217+
group="gateway.networking.k8s.io",
1218+
version="v1",
1219+
namespace=ns,
1220+
plural="httproutes",
1221+
label_selector=label_selector,
1222+
)
1223+
items = httproutes.get("items", [])
1224+
if items:
1225+
httproute = items[0]
1226+
break
1227+
except client.ApiException:
1228+
continue
1229+
1230+
if not httproute:
1231+
# No HTTPRoute found
11821232
return None
1183-
raise
11841233

1185-
# Get the Gateway reference from HTTPRoute
1234+
# Extract Gateway reference and construct dashboard URL
11861235
parent_refs = httproute.get("spec", {}).get("parentRefs", [])
11871236
if not parent_refs:
11881237
return None
@@ -1203,7 +1252,6 @@ def _get_dashboard_url_from_httproute(
12031252
name=gateway_name,
12041253
)
12051254

1206-
# Extract hostname from Gateway listeners
12071255
listeners = gateway.get("spec", {}).get("listeners", [])
12081256
if not listeners:
12091257
return None
@@ -1212,14 +1260,9 @@ def _get_dashboard_url_from_httproute(
12121260
if not hostname:
12131261
return None
12141262

1215-
# Construct the dashboard URL using RHOAI v3.0+ Gateway API pattern
1216-
# The HTTPRoute existence confirms v3.0+, so we use the standard path pattern
1217-
# Format: https://{hostname}/ray/{namespace}/{cluster-name}
1218-
protocol = "https" # Gateway API uses HTTPS
1219-
dashboard_url = f"{protocol}://{hostname}/ray/{namespace}/{cluster_name}"
1220-
1221-
return dashboard_url
1263+
# Construct dashboard URL: https://{hostname}/ray/{namespace}/{cluster-name}
1264+
return f"https://{hostname}/ray/{namespace}/{cluster_name}"
12221265

1223-
except Exception as e: # pragma: no cover
1224-
# If any error occurs, return None to fall back to OpenShift Route
1266+
except Exception: # pragma: no cover
1267+
# Any error means no HTTPRoute - fall back to Routes/Ingress
12251268
return None

0 commit comments

Comments
 (0)