3fab33a8d4
In the original method, only linear regression is used to perform simple trend extrapolation, which leads to insufficient prediction accuracy. This optimization adopts time series model, and uses the auto_arima method of pmdarima to automatically select appropriate model parameters (including p, d, q and seasonal parameters) according to historical data. It significantly improves the suitability of the model in time series modeling. In this way, the model can better capture the trend and periodicity of the data, and predict the future heat more reasonable and accurate.
70 lines
2.6 KiB
Python
70 lines
2.6 KiB
Python
import numpy as np
|
|
import datetime
|
|
import pandas as pd
|
|
from pmdarima import auto_arima
|
|
|
|
def datetime_to_number(date: str):
|
|
"""Convert a date string 'YYYY-MM-DD' to a relative day number."""
|
|
date_number = datetime.datetime.strptime(date, "%Y-%m-%d")
|
|
base_number = datetime.datetime.strptime("2024-1-1", "%Y-%m-%d")
|
|
return (date_number - base_number).days
|
|
|
|
def predict_future_values(data, forecast_days=5):
|
|
"""
|
|
Use auto_arima from pmdarima to fit a suitable ARIMA/SARIMA model for the time series,
|
|
then predict future values for the specified number of days.
|
|
|
|
Parameters:
|
|
data: dict, keys are date strings 'YYYY-MM-DD', values are integer counts
|
|
forecast_days: int, number of days to predict into the future
|
|
|
|
Returns:
|
|
predictions: dict, keys are future date strings 'YYYY-MM-DD', values are predicted integers (≥0)
|
|
"""
|
|
if not data:
|
|
return {}
|
|
|
|
# Sort data by date
|
|
sorted_dates = sorted(data.keys(), key=lambda d: datetime.datetime.strptime(d, "%Y-%m-%d"))
|
|
start_date = sorted_dates[0]
|
|
end_date = sorted_dates[-1]
|
|
|
|
# Create a full date range to ensure continuity in the time series
|
|
full_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
|
ts = pd.Series(0, index=full_range, dtype=float)
|
|
for d in data:
|
|
ts[pd.to_datetime(d)] = data[d]
|
|
|
|
# Simple smoothing: optional step to reduce noise (moving average over 3 days)
|
|
# This is a mild smoothing to handle noisy data. You can comment this out if not needed.
|
|
ts_smoothed = ts.rolling(window=3, min_periods=1).mean()
|
|
|
|
# Fit the time series with auto_arima to find the best parameters
|
|
model = auto_arima(ts_smoothed,
|
|
start_p=1, start_q=1,
|
|
max_p=5, max_q=5,
|
|
seasonal=False,
|
|
trace=False, error_action='ignore', suppress_warnings=True, stepwise=True)
|
|
|
|
# Predict the future values
|
|
forecast = model.predict(n_periods=forecast_days)
|
|
# Construct future dates
|
|
last_date = pd.to_datetime(end_date)
|
|
future_dates = [last_date + datetime.timedelta(days=i) for i in range(1, forecast_days+1)]
|
|
|
|
# Convert forecast results to dict with non-negative integers
|
|
predictions = {}
|
|
for d, v in zip(future_dates, forecast):
|
|
predictions[d.strftime("%Y-%m-%d")] = max(int(round(v)), 0)
|
|
|
|
return predictions
|
|
|
|
if __name__ == '__main__':
|
|
data = {
|
|
'2024-06-15': 1, '2024-06-18': 1, '2024-06-22': 1,
|
|
'2024-06-23': 1, '2024-07-01': 3, '2024-07-02': 4,
|
|
'2024-07-03': 4, '2024-07-04': 14
|
|
}
|
|
preds = predict_future_values(data)
|
|
print(preds)
|