fix: add retry limit to startup wait loops to prevent infinite hang

Each service wait loop now fails after 30 retries (~60s) instead of
spinning forever when a port is occupied by a stuck process.
Also added cleanup label that kills partially-started services on failure.
This commit is contained in:
2026-05-23 09:20:55 +08:00
parent 40adf50702
commit 9a4f51d378
2 changed files with 51 additions and 4 deletions
+12 -1
View File
@@ -29,10 +29,21 @@ start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicor
:: 等待验证服务就绪 (用 PowerShell 检测)
echo [等待] 验证服务就绪...
set /a RETRY=0
:wait_val
ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_val
if not errorlevel 1 goto val_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] 验证服务启动超时,请检查端口 8001 是否被占用
taskkill /F /FI "WINDOWTITLE eq jrxml-validator*" >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
pause
exit /b 1
)
goto wait_val
:val_ok
echo :8001 就绪
:: 启动 API 服务 (前台,Ctrl+C 退出)
+39 -3
View File
@@ -23,19 +23,35 @@ echo.
:: 1. 验证服务
echo [1/3] 验证服务 :8001
start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('validation_service.main:app',host='0.0.0.0',port=8001,reload=False)"
set /a RETRY=0
:wait_val
ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_val
if not errorlevel 1 goto val_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] 验证服务启动超时,请检查端口 8001 是否被占用
goto cleanup
)
goto wait_val
:val_ok
echo :8001 就绪
:: 2. API 服务
echo [2/3] API 服务 :8000
start "jrxml-api" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('api_server:app',host='0.0.0.0',port=8000,reload=False)"
set /a RETRY=0
:wait_api
ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8000/api/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_api
if not errorlevel 1 goto api_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] API 服务启动超时,请检查端口 8000 是否被占用
goto cleanup
)
goto wait_api
:api_ok
echo :8000 就绪
:: 3. 前端
@@ -49,10 +65,18 @@ if not exist "%~dp0frontend\node_modules" (
cd /d "%~dp0frontend"
start "jrxml-frontend" /MIN cmd /c "npm run dev"
cd /d "%~dp0"
set /a RETRY=0
:wait_fe
ping -n 3 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:5173 -TimeoutSec 3 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_fe
if not errorlevel 1 goto fe_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] 前端启动超时,请检查端口 5173 是否被占用
goto cleanup
)
goto wait_fe
:fe_ok
echo :5173 就绪
echo.
@@ -64,3 +88,15 @@ echo 验证: http://localhost:8001/health
echo 运行 stop.bat 停止所有服务
echo ================================================
pause
exit /b 0
:cleanup
echo [清理] 停止已启动的服务...
taskkill /F /FI "WINDOWTITLE eq jrxml-validator*" >nul 2>&1
taskkill /F /FI "WINDOWTITLE eq jrxml-api*" >nul 2>&1
taskkill /F /FI "WINDOWTITLE eq jrxml-frontend*" >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
echo 已清理,请重试
pause
exit /b 1