fix: add retry limit to startup wait loops to prevent infinite hang

Each service wait loop now fails after 30 retries (~60s) instead of
spinning forever when a port is occupied by a stuck process.
Also added cleanup label that kills partially-started services on failure.
This commit is contained in:
2026-05-23 09:20:55 +08:00
parent 40adf50702
commit 9a4f51d378
2 changed files with 51 additions and 4 deletions
+12 -1
View File
@@ -29,10 +29,21 @@ start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicor
:: 等待验证服务就绪 (用 PowerShell 检测) :: 等待验证服务就绪 (用 PowerShell 检测)
echo [等待] 验证服务就绪... echo [等待] 验证服务就绪...
set /a RETRY=0
:wait_val :wait_val
ping -n 2 127.0.0.1 >nul ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1 powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_val if not errorlevel 1 goto val_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] 验证服务启动超时,请检查端口 8001 是否被占用
taskkill /F /FI "WINDOWTITLE eq jrxml-validator*" >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
pause
exit /b 1
)
goto wait_val
:val_ok
echo :8001 就绪 echo :8001 就绪
:: 启动 API 服务 (前台,Ctrl+C 退出) :: 启动 API 服务 (前台,Ctrl+C 退出)
+39 -3
View File
@@ -23,19 +23,35 @@ echo.
:: 1. 验证服务 :: 1. 验证服务
echo [1/3] 验证服务 :8001 echo [1/3] 验证服务 :8001
start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('validation_service.main:app',host='0.0.0.0',port=8001,reload=False)" start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('validation_service.main:app',host='0.0.0.0',port=8001,reload=False)"
set /a RETRY=0
:wait_val :wait_val
ping -n 2 127.0.0.1 >nul ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1 powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_val if not errorlevel 1 goto val_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] 验证服务启动超时,请检查端口 8001 是否被占用
goto cleanup
)
goto wait_val
:val_ok
echo :8001 就绪 echo :8001 就绪
:: 2. API 服务 :: 2. API 服务
echo [2/3] API 服务 :8000 echo [2/3] API 服务 :8000
start "jrxml-api" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('api_server:app',host='0.0.0.0',port=8000,reload=False)" start "jrxml-api" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('api_server:app',host='0.0.0.0',port=8000,reload=False)"
set /a RETRY=0
:wait_api :wait_api
ping -n 2 127.0.0.1 >nul ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8000/api/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1 powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8000/api/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_api if not errorlevel 1 goto api_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] API 服务启动超时,请检查端口 8000 是否被占用
goto cleanup
)
goto wait_api
:api_ok
echo :8000 就绪 echo :8000 就绪
:: 3. 前端 :: 3. 前端
@@ -49,10 +65,18 @@ if not exist "%~dp0frontend\node_modules" (
cd /d "%~dp0frontend" cd /d "%~dp0frontend"
start "jrxml-frontend" /MIN cmd /c "npm run dev" start "jrxml-frontend" /MIN cmd /c "npm run dev"
cd /d "%~dp0" cd /d "%~dp0"
set /a RETRY=0
:wait_fe :wait_fe
ping -n 3 127.0.0.1 >nul ping -n 3 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:5173 -TimeoutSec 3 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1 powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:5173 -TimeoutSec 3 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_fe if not errorlevel 1 goto fe_ok
set /a RETRY+=1
if %RETRY% GEQ 30 (
echo [失败] 前端启动超时,请检查端口 5173 是否被占用
goto cleanup
)
goto wait_fe
:fe_ok
echo :5173 就绪 echo :5173 就绪
echo. echo.
@@ -64,3 +88,15 @@ echo 验证: http://localhost:8001/health
echo 运行 stop.bat 停止所有服务 echo 运行 stop.bat 停止所有服务
echo ================================================ echo ================================================
pause pause
exit /b 0
:cleanup
echo [清理] 停止已启动的服务...
taskkill /F /FI "WINDOWTITLE eq jrxml-validator*" >nul 2>&1
taskkill /F /FI "WINDOWTITLE eq jrxml-api*" >nul 2>&1
taskkill /F /FI "WINDOWTITLE eq jrxml-frontend*" >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
echo 已清理,请重试
pause
exit /b 1