— No need cron tasks for agents watching (fix #76)

This commit is contained in:
Luc Didry 2025-02-17 14:54:25 +01:00
parent fd0c68cd4c
commit 8d82f7f9d6
No known key found for this signature in database
GPG key ID: EA868E12D0257E3C
10 changed files with 142 additions and 128 deletions

View file

@ -5,6 +5,7 @@
- ✨ — Allow to customize agent User-Agent header (#78)
- 📝 — Document how to add data to requests (#77)
- ✨ — No need cron tasks for DB cleaning anymore (#74 and #75)
- ✨ — No need cron tasks for agents watching (#76)
## 0.7.4

View file

@ -140,47 +140,6 @@ def start(host, port, config, reload):
uvicorn.run("argos.server:app", host=host, port=port, reload=reload)
def validate_time_without_agent(ctx, param, value):
if value <= 0:
raise click.BadParameter("Should be a positive integer")
return value
@server.command()
@click.option(
"--time-without-agent",
default=5,
help="Time without seeing an agent after which a warning will be issued, in minutes. "
"Default is 5 minutes.",
callback=validate_time_without_agent,
)
@click.option(
"--config",
default="argos-config.yaml",
help="Path of the configuration file. "
"If ARGOS_YAML_FILE environment variable is set, its value will be used instead.",
envvar="ARGOS_YAML_FILE",
callback=validate_config_access,
)
@coroutine
async def watch_agents(time_without_agent, config):
"""Watch agents (to run routinely)
Issues a warning if no agent has been seen by the server for a given time.
"""
# Its mandatory to do it before the imports
os.environ["ARGOS_YAML_FILE"] = config
# The imports are made here otherwise the agent will need server configuration files.
from argos.server import queries
db = await get_db()
agents = await queries.get_recent_agents_count(db, time_without_agent)
if agents == 0:
click.echo(f"No agent has been seen in the last {time_without_agent} minutes.")
sysexit(1)
@server.command(short_help="Load or reload tasks configuration")
@click.option(
"--config",
@ -537,6 +496,7 @@ async def test_mail(config, domain, severity):
check="body-contains",
expected="foo",
frequency=1,
ip_version=4,
selected_by="test",
selected_at=now,
)
@ -601,6 +561,7 @@ async def test_gotify(config, domain, severity):
check="body-contains",
expected="foo",
frequency=1,
ip_version=4,
selected_by="test",
selected_at=now,
)
@ -668,6 +629,7 @@ async def test_apprise(config, domain, severity, apprise_group):
check="body-contains",
expected="foo",
frequency=1,
ip_version=4,
selected_by="test",
selected_at=now,
)

View file

@ -102,6 +102,10 @@ general:
- local
unknown:
- local
# This alert is triggered when no Argos agent has been seen in a while
# See recurring_tasks.time_without_agent below
no_agent:
- local
# Mail configuration is quite straight-forward
# mail:
# mailfrom: no-reply@example.org
@ -145,15 +149,19 @@ ssl:
- "1d": critical
- "5d": warning
# Argos will do some cleaning in the background for you
# Argos will execute some tasks in the background for you
# every 2 minutes and needs some configuration for that
cleaning:
recurring_tasks:
# Max number of results per tasks you want to keep
# Minimum value is 1, default is 100
max_results: 100
# Max number of seconds a task can be locked
# Minimum value is 61, default is 100
max_lock_seconds: 100
# Max number of minutes without seing an agent
# before sending an alert
# Minimum value is 1, default is 5
time_without_agent: 5
# It's also possible to define the checks in another file
# with the include syntax:

View file

@ -48,9 +48,10 @@ class SSL(BaseModel):
thresholds: List[Annotated[Tuple[int, Severity], BeforeValidator(parse_threshold)]]
class Cleaning(BaseModel):
class RecurringTasks(BaseModel):
max_results: int
max_lock_seconds: int
time_without_agent: int
@field_validator("max_results", mode="before")
def parse_max_results(cls, value):
@ -68,6 +69,14 @@ class Cleaning(BaseModel):
return 100
@field_validator("time_without_agent", mode="before")
def parse_time_without_agent(cls, value):
"""Ensure that time_without_agent is at least one minute"""
if value >= 1:
return value
return 5
class WebsiteCheck(BaseModel):
key: str
@ -211,6 +220,7 @@ class Alert(BaseModel):
warning: List[str]
critical: List[str]
unknown: List[str]
no_agent: List[str]
class GotifyUrl(BaseModel):
@ -285,5 +295,5 @@ class Config(BaseModel):
general: General
service: Service
ssl: SSL
cleaning: Cleaning
recurring_tasks: RecurringTasks
websites: List[Website]

View file

@ -74,6 +74,91 @@ def get_icon_from_severity(severity: str) -> str:
return icon
def send_mail(mail: EmailMessage, config: Mail):
"""Send message by mail"""
if config.ssl:
logger.debug("Mail notification: SSL")
context = ssl.create_default_context()
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
else:
smtp = smtplib.SMTP(
host=config.host, # type: ignore
port=config.port,
)
if config.starttls:
logger.debug("Mail notification: STARTTLS")
context = ssl.create_default_context()
smtp.starttls(context=context)
if config.auth is not None:
logger.debug("Mail notification: authentification")
smtp.login(config.auth.login, config.auth.password)
for address in config.addresses:
logger.debug("Sending mail to %s", address)
logger.debug(mail.get_body())
smtp.send_message(mail, to_addrs=address)
def send_gotify_msg(config, payload):
"""Send message with gotify"""
headers = {"accept": "application/json", "content-type": "application/json"}
for url in config:
logger.debug("Sending gotify message(s) to %s", url.url)
for token in url.tokens:
try:
res = httpx.post(
f"{url.url}message",
params={"token": token},
headers=headers,
json=payload,
)
res.raise_for_status()
except httpx.RequestError as err:
logger.error(
"An error occurred while sending a message to %s with token %s",
err.request.url,
token,
)
def no_agent_alert(config: Config):
"""Alert"""
msg = "You should check whats going on with your Argos agents."
twa = config.recurring_tasks.time_without_agent
if twa > 1:
subject = f"No agent has been seen within the last {twa} minutes"
else:
subject = "No agent has been seen within the last minute"
if "local" in config.general.alerts.no_agent:
logger.error(subject)
if config.general.mail is not None and "mail" in config.general.alerts.no_agent:
mail = EmailMessage()
mail["Subject"] = f"[Argos] {subject}"
mail["From"] = config.general.mail.mailfrom
mail.set_content(msg)
send_mail(mail, config.general.mail)
if config.general.gotify is not None and "gotify" in config.general.alerts.no_agent:
priority = 9
payload = {"title": subject, "message": msg, "priority": priority}
send_gotify_msg(config.general.gotify, payload)
if config.general.apprise is not None:
for notif_way in config.general.alerts.no_agent:
if notif_way.startswith("apprise:"):
group = notif_way[8:]
apobj = apprise.Apprise()
for channel in config.general.apprise[group]:
apobj.add(channel)
apobj.notify(title=subject, body=msg)
def handle_alert(config: Config, result, task, severity, old_severity, request): # pylint: disable-msg=too-many-positional-arguments
"""Dispatch alert through configured alert channels"""
@ -163,36 +248,13 @@ See results of task on {request.url_for('get_task_results_view', task_id=task.id
] = f"[Argos] {icon} {urlparse(task.url).netloc} (IPv{task.ip_version}): status {severity}"
mail["From"] = config.mailfrom
mail.set_content(msg)
if config.ssl:
logger.debug("Mail notification: SSL")
context = ssl.create_default_context()
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
else:
smtp = smtplib.SMTP(
host=config.host, # type: ignore
port=config.port,
)
if config.starttls:
logger.debug("Mail notification: STARTTLS")
context = ssl.create_default_context()
smtp.starttls(context=context)
if config.auth is not None:
logger.debug("Mail notification: authentification")
smtp.login(config.auth.login, config.auth.password)
for address in config.addresses:
logger.debug("Sending mail to %s", address)
logger.debug(msg)
smtp.send_message(mail, to_addrs=address)
send_mail(mail, config)
def notify_with_gotify( # pylint: disable-msg=too-many-positional-arguments
result, task, severity: str, old_severity: str, config: List[GotifyUrl], request
) -> None:
logger.debug("Will send gotify notification")
headers = {"accept": "application/json", "content-type": "application/json"}
icon = get_icon_from_severity(severity)
priority = 9
@ -228,20 +290,4 @@ See results of task on <{request.url_for('get_task_results_view', task_id=task.i
payload = {"title": subject, "message": msg, "priority": priority, "extras": extras}
for url in config:
logger.debug("Sending gotify message(s) to %s", url.url)
for token in url.tokens:
try:
res = httpx.post(
f"{url.url}message",
params={"token": token},
headers=headers,
json=payload,
)
res.raise_for_status()
except httpx.RequestError as err:
logger.error(
"An error occurred while sending a message to %s with token %s",
err.request.url,
token,
)
send_gotify_msg(config, payload)

View file

@ -13,6 +13,7 @@ from sqlalchemy.orm import sessionmaker
from argos.logging import logger, set_log_level
from argos.server import models, routes, queries
from argos.server.alerting import no_agent_alert
from argos.server.exceptions import NotAuthenticatedException, auth_exception_handler
from argos.server.settings import read_yaml_config
@ -128,20 +129,24 @@ def create_manager(cookie_secret: str) -> LoginManager:
@repeat_every(seconds=120, logger=logger)
async def cleanup() -> None:
async def recurring_tasks() -> None:
"""Recurring DB cleanup and watch-agents tasks"""
set_log_level("info", quiet=True)
logger.info("Start DB cleanup tasks.")
logger.info("Start background recurring tasks")
with app.state.SessionLocal() as db:
removed = await queries.remove_old_results(
db, app.state.config.cleaning.max_results
)
updated = await queries.release_old_locks(
db, app.state.config.cleaning.max_lock_seconds
)
config = app.state.config.recurring_tasks
removed = await queries.remove_old_results(db, config.max_results)
logger.info("%i results removed", removed)
updated = await queries.release_old_locks(db, config.max_lock_seconds)
logger.info("%i locks released", updated)
agents = await queries.get_recent_agents_count(db, config.time_without_agent)
if agents == 0:
no_agent_alert(app.state.config)
logger.info("Background recurring tasks ended")
@asynccontextmanager
async def lifespan(appli: FastAPI):
@ -159,7 +164,7 @@ async def lifespan(appli: FastAPI):
"There is no tasks in the database. "
'Please launch the command "argos server reload-config"'
)
await cleanup()
await recurring_tasks()
yield

View file

@ -94,7 +94,6 @@ Commands:
test-gotify Send a test gotify notification
test-mail Send a test email
user User management
watch-agents Watch agents (to run routinely)
```
<!--[[[end]]]
@ -151,33 +150,6 @@ Options:
-->
### Server watch-agents
<!--
.. [[[cog
help(["server", "watch-agents", "--help"])
.. ]]] -->
```man
Usage: argos server watch-agents [OPTIONS]
Watch agents (to run routinely)
Issues a warning if no agent has been seen by the server for a given time.
Options:
--time-without-agent INTEGER Time without seeing an agent after which a
warning will be issued, in minutes. Default is 5
minutes.
--config TEXT Path of the configuration file. If
ARGOS_YAML_FILE environment variable is set, its
value will be used instead.
--help Show this message and exit.
```
<!--[[[end]]]
-->
### Server reload-config
<!--

View file

@ -14,7 +14,9 @@ description: Many thanks to their developers!
- [Alembic](https://alembic.sqlalchemy.org) is used for DB migrations;
- [Tenacity](https://github.com/jd/tenacity) a small utility to retry a function in case an error occured;
- [Uvicorn](https://www.uvicorn.org/) is the tool used to run our server;
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production.
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production;
- [Apprise](https://github.com/caronc/apprise/wiki) allows Argos to send notifications through a lot of channels;
- [FastAPI Utilities](https://fastapiutils.github.io/fastapi-utils/) is in charge of recurring tasks.
## CSS framework

View file

@ -26,6 +26,8 @@ general:
- local
unknown:
- local
no_agent:
- local
service:
secrets:
# Secrets can be generated using `argos server generate-token`.
@ -38,15 +40,19 @@ ssl:
- "1d": critical
- "5d": warning
# Argos will do some cleaning in the background for you
# Argos will execute some tasks in the background for you
# every 2 minutes and needs some configuration for that
cleaning:
recurring_tasks:
# Max number of results per tasks you want to keep
# Minimum value is 1, default is 100
max_results: 100
# Max number of seconds a task can be locked
# Minimum value is 61, default is 100
max_lock_seconds: 100
# Max number of seconds without seing an agent
# before sending an alert
# Minimum value is 61, default is 300
time_without_agent: 300
# It's also possible to define the checks in another file
# with the include syntax:

View file

@ -235,6 +235,7 @@ def empty_config():
warning=["", ""],
critical=["", ""],
unknown=["", ""],
no_agent=["", ""],
),
),
service=schemas.config.Service(
@ -243,9 +244,10 @@ def empty_config():
]
),
ssl=schemas.config.SSL(thresholds=[]),
cleaning=schemas.config.Cleaning(
recurring_tasks=schemas.config.RecurringTasks(
max_results=100,
max_lock_seconds=120,
time_without_agent=300,
),
websites=[],
)