🐛 — Fix worker timeout for old results cleaning in recurring tasks (fix #84)

💥 Old results are now removed by their age, not based on their number.

💥 Warning: `max_results` setting has been replaced by `max_results_age`, which is a duration.
Use `argos server generate-config > /etc/argos/config.yaml-dist` to generate
a new example configuration file.
This commit is contained in:
Luc Didry 2025-02-18 17:04:26 +01:00
parent 32f2518294
commit 211ac32028
No known key found for this signature in database
GPG key ID: EA868E12D0257E3C
8 changed files with 48 additions and 52 deletions

View file

@ -2,6 +2,14 @@
## [Unreleased] ## [Unreleased]
- 🐛 — Fix worker timeout for old results cleaning in recurring tasks (#84)
💥 Old results are now removed by their age, not based on their number.
💥 Warning: `max_results` setting has been replaced by `max_results_age`, which is a duration.
Use `argos server generate-config > /etc/argos/config.yaml-dist` to generate
a new example configuration file.
## 0.8.2 ## 0.8.2
Date: 2025-02-18 Date: 2025-02-18
@ -26,7 +34,7 @@ Date: 2025-02-18
- 🐛 — Automatically reconnect to LDAP if unreachable (#81) - 🐛 — Automatically reconnect to LDAP if unreachable (#81)
- 🐛 — Better httpx.RequestError handling (#83) - 🐛 — Better httpx.RequestError handling (#83)
💥 Warning: there is now new settings to add to your configuration file. 💥 Warning: there is new settings to add to your configuration file.
Use `argos server generate-config > /etc/argos/config.yaml-dist` to generate Use `argos server generate-config > /etc/argos/config.yaml-dist` to generate
a new example configuration file. a new example configuration file.

View file

@ -152,9 +152,11 @@ ssl:
# Argos will execute some tasks in the background for you # Argos will execute some tasks in the background for you
# every 2 minutes and needs some configuration for that # every 2 minutes and needs some configuration for that
recurring_tasks: recurring_tasks:
# Max number of results per tasks you want to keep # Maximum age of results
# Minimum value is 1, default is 100 # Use m for minutes, h for hours, d for days
max_results: 100 # w for weeks, M for months, y for years
# See https://github.com/timwedde/durations_nlp#scales-reference for details
max_results_age: "1d"
# Max number of seconds a task can be locked # Max number of seconds a task can be locked
# Minimum value is 61, default is 100 # Minimum value is 61, default is 100
max_lock_seconds: 100 max_lock_seconds: 100

View file

@ -49,17 +49,14 @@ class SSL(BaseModel):
class RecurringTasks(BaseModel): class RecurringTasks(BaseModel):
max_results: int max_results_age: float
max_lock_seconds: int max_lock_seconds: int
time_without_agent: int time_without_agent: int
@field_validator("max_results", mode="before") @field_validator("max_results_age", mode="before")
def parse_max_results(cls, value): def parse_max_results_age(cls, value):
"""Ensure that max_results is higher than 0""" """Convert the configured maximum results age to seconds"""
if value >= 1: return Duration(value).to_seconds()
return value
return 100
@field_validator("max_lock_seconds", mode="before") @field_validator("max_lock_seconds", mode="before")
def parse_max_lock_seconds(cls, value): def parse_max_lock_seconds(cls, value):

View file

@ -133,15 +133,17 @@ async def recurring_tasks() -> None:
with app.state.SessionLocal() as db: with app.state.SessionLocal() as db:
config = app.state.config.recurring_tasks config = app.state.config.recurring_tasks
removed = await queries.remove_old_results(db, config.max_results)
logger.info("%i result(s) removed", removed)
updated = await queries.release_old_locks(db, config.max_lock_seconds)
logger.info("%i lock(s) released", updated)
agents = await queries.get_recent_agents_count(db, config.time_without_agent) agents = await queries.get_recent_agents_count(db, config.time_without_agent)
if agents == 0: if agents == 0:
no_agent_alert(app.state.config) no_agent_alert(app.state.config)
logger.info("Agent presence checked")
removed = await queries.remove_old_results(db, config.max_results_age)
logger.info("%i result(s) removed", removed)
updated = await queries.release_old_locks(db, config.max_lock_seconds)
logger.info("%i lock(s) released", updated)
processed_jobs = await queries.process_jobs(db) processed_jobs = await queries.process_jobs(db)
logger.info("%i job(s) processed", processed_jobs) logger.info("%i job(s) processed", processed_jobs)

View file

@ -4,7 +4,7 @@ from hashlib import sha256
from typing import List from typing import List
from urllib.parse import urljoin from urllib.parse import urljoin
from sqlalchemy import asc, desc, func, Select from sqlalchemy import asc, func, Select
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from argos import schemas from argos import schemas
@ -409,28 +409,13 @@ async def reschedule_all(db: Session):
db.commit() db.commit()
async def remove_old_results(db: Session, max_results: int): async def remove_old_results(db: Session, max_results_age: float):
tasks = db.query(Task).all() """Remove old results, base on age"""
deleted = 0 max_acceptable_time = datetime.now() - timedelta(seconds=max_results_age)
for task in tasks: deleted = (
# Get the id of the oldest result to keep db.query(Result).filter(Result.submitted_at < max_acceptable_time).delete()
subquery = ( )
db.query(Result.id) db.commit()
.filter(Result.task_id == task.id)
.order_by(desc(Result.id))
.limit(max_results)
.subquery()
)
min_id = db.query(func.min(subquery.c.id)).scalar() # pylint: disable-msg=not-callable
# Delete all the results older than min_id
if min_id:
deleted += (
db.query(Result)
.where(Result.id < min_id, Result.task_id == task.id)
.delete()
)
db.commit()
return deleted return deleted

View file

@ -60,7 +60,7 @@ Options:
--max-tasks INTEGER Number of concurrent tasks this agent can run --max-tasks INTEGER Number of concurrent tasks this agent can run
--wait-time INTEGER Waiting time between two polls on the server --wait-time INTEGER Waiting time between two polls on the server
(seconds) (seconds)
--log-level [DEBUG|INFO|WARNING|ERROR|CRITICAL] --log-level [debug|info|warning|error|critical]
--user-agent TEXT A custom string to append to the User-Agent --user-agent TEXT A custom string to append to the User-Agent
header header
--help Show this message and exit. --help Show this message and exit.

View file

@ -43,9 +43,11 @@ ssl:
# Argos will execute some tasks in the background for you # Argos will execute some tasks in the background for you
# every 2 minutes and needs some configuration for that # every 2 minutes and needs some configuration for that
recurring_tasks: recurring_tasks:
# Max number of results per tasks you want to keep # Maximum age of results
# Minimum value is 1, default is 100 # Use m for minutes, h for hours, d for days
max_results: 100 # w for weeks, M for months, y for years
# See https://github.com/timwedde/durations_nlp#scales-reference for details
max_results_age: "1d"
# Max number of seconds a task can be locked # Max number of seconds a task can be locked
# Minimum value is 61, default is 100 # Minimum value is 61, default is 100
max_lock_seconds: 100 max_lock_seconds: 100

View file

@ -10,9 +10,9 @@ from argos.server.models import Result, Task, User
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_remove_old_results(db, ten_tasks): # pylint: disable-msg=redefined-outer-name async def test_remove_old_results(db, ten_tasks): # pylint: disable-msg=redefined-outer-name
for _task in ten_tasks: for _task in ten_tasks:
for _ in range(5): for iterator in range(5):
result = Result( result = Result(
submitted_at=datetime.now(), submitted_at=datetime.now() - timedelta(seconds=iterator * 2),
status="success", status="success",
context={"foo": "bar"}, context={"foo": "bar"},
task=_task, task=_task,
@ -24,12 +24,12 @@ async def test_remove_old_results(db, ten_tasks): # pylint: disable-msg=redefi
# So we have 5 results per tasks # So we have 5 results per tasks
assert db.query(Result).count() == 50 assert db.query(Result).count() == 50
# Keep only 2 # Keep only those newer than 1 second ago
deleted = await queries.remove_old_results(db, 2) deleted = await queries.remove_old_results(db, 6)
assert deleted == 30 assert deleted == 20
assert db.query(Result).count() == 20 assert db.query(Result).count() == 30
for _task in ten_tasks: for _task in ten_tasks:
assert db.query(Result).filter(Result.task == _task).count() == 2 assert db.query(Result).filter(Result.task == _task).count() == 3
@pytest.mark.asyncio @pytest.mark.asyncio
@ -245,7 +245,7 @@ def empty_config():
), ),
ssl=schemas.config.SSL(thresholds=[]), ssl=schemas.config.SSL(thresholds=[]),
recurring_tasks=schemas.config.RecurringTasks( recurring_tasks=schemas.config.RecurringTasks(
max_results=100, max_results_age="6s",
max_lock_seconds=120, max_lock_seconds=120,
time_without_agent=300, time_without_agent=300,
), ),