mirror of
https://framagit.org/framasoft/framaspace/argos.git
synced 2025-04-28 09:52:38 +02:00
✨ — No need cron tasks for agents watching (fix #76)
This commit is contained in:
parent
fd0c68cd4c
commit
8d82f7f9d6
10 changed files with 142 additions and 128 deletions
|
@ -5,6 +5,7 @@
|
|||
- ✨ — Allow to customize agent User-Agent header (#78)
|
||||
- 📝 — Document how to add data to requests (#77)
|
||||
- ✨ — No need cron tasks for DB cleaning anymore (#74 and #75)
|
||||
- ✨ — No need cron tasks for agents watching (#76)
|
||||
|
||||
## 0.7.4
|
||||
|
||||
|
|
|
@ -140,47 +140,6 @@ def start(host, port, config, reload):
|
|||
uvicorn.run("argos.server:app", host=host, port=port, reload=reload)
|
||||
|
||||
|
||||
def validate_time_without_agent(ctx, param, value):
|
||||
if value <= 0:
|
||||
raise click.BadParameter("Should be a positive integer")
|
||||
return value
|
||||
|
||||
|
||||
@server.command()
|
||||
@click.option(
|
||||
"--time-without-agent",
|
||||
default=5,
|
||||
help="Time without seeing an agent after which a warning will be issued, in minutes. "
|
||||
"Default is 5 minutes.",
|
||||
callback=validate_time_without_agent,
|
||||
)
|
||||
@click.option(
|
||||
"--config",
|
||||
default="argos-config.yaml",
|
||||
help="Path of the configuration file. "
|
||||
"If ARGOS_YAML_FILE environment variable is set, its value will be used instead.",
|
||||
envvar="ARGOS_YAML_FILE",
|
||||
callback=validate_config_access,
|
||||
)
|
||||
@coroutine
|
||||
async def watch_agents(time_without_agent, config):
|
||||
"""Watch agents (to run routinely)
|
||||
|
||||
Issues a warning if no agent has been seen by the server for a given time.
|
||||
"""
|
||||
# It’s mandatory to do it before the imports
|
||||
os.environ["ARGOS_YAML_FILE"] = config
|
||||
|
||||
# The imports are made here otherwise the agent will need server configuration files.
|
||||
from argos.server import queries
|
||||
|
||||
db = await get_db()
|
||||
agents = await queries.get_recent_agents_count(db, time_without_agent)
|
||||
if agents == 0:
|
||||
click.echo(f"No agent has been seen in the last {time_without_agent} minutes.")
|
||||
sysexit(1)
|
||||
|
||||
|
||||
@server.command(short_help="Load or reload tasks’ configuration")
|
||||
@click.option(
|
||||
"--config",
|
||||
|
@ -537,6 +496,7 @@ async def test_mail(config, domain, severity):
|
|||
check="body-contains",
|
||||
expected="foo",
|
||||
frequency=1,
|
||||
ip_version=4,
|
||||
selected_by="test",
|
||||
selected_at=now,
|
||||
)
|
||||
|
@ -601,6 +561,7 @@ async def test_gotify(config, domain, severity):
|
|||
check="body-contains",
|
||||
expected="foo",
|
||||
frequency=1,
|
||||
ip_version=4,
|
||||
selected_by="test",
|
||||
selected_at=now,
|
||||
)
|
||||
|
@ -668,6 +629,7 @@ async def test_apprise(config, domain, severity, apprise_group):
|
|||
check="body-contains",
|
||||
expected="foo",
|
||||
frequency=1,
|
||||
ip_version=4,
|
||||
selected_by="test",
|
||||
selected_at=now,
|
||||
)
|
||||
|
|
|
@ -102,6 +102,10 @@ general:
|
|||
- local
|
||||
unknown:
|
||||
- local
|
||||
# This alert is triggered when no Argos agent has been seen in a while
|
||||
# See recurring_tasks.time_without_agent below
|
||||
no_agent:
|
||||
- local
|
||||
# Mail configuration is quite straight-forward
|
||||
# mail:
|
||||
# mailfrom: no-reply@example.org
|
||||
|
@ -145,15 +149,19 @@ ssl:
|
|||
- "1d": critical
|
||||
- "5d": warning
|
||||
|
||||
# Argos will do some cleaning in the background for you
|
||||
# Argos will execute some tasks in the background for you
|
||||
# every 2 minutes and needs some configuration for that
|
||||
cleaning:
|
||||
recurring_tasks:
|
||||
# Max number of results per tasks you want to keep
|
||||
# Minimum value is 1, default is 100
|
||||
max_results: 100
|
||||
# Max number of seconds a task can be locked
|
||||
# Minimum value is 61, default is 100
|
||||
max_lock_seconds: 100
|
||||
# Max number of minutes without seing an agent
|
||||
# before sending an alert
|
||||
# Minimum value is 1, default is 5
|
||||
time_without_agent: 5
|
||||
|
||||
# It's also possible to define the checks in another file
|
||||
# with the include syntax:
|
||||
|
|
|
@ -48,9 +48,10 @@ class SSL(BaseModel):
|
|||
thresholds: List[Annotated[Tuple[int, Severity], BeforeValidator(parse_threshold)]]
|
||||
|
||||
|
||||
class Cleaning(BaseModel):
|
||||
class RecurringTasks(BaseModel):
|
||||
max_results: int
|
||||
max_lock_seconds: int
|
||||
time_without_agent: int
|
||||
|
||||
@field_validator("max_results", mode="before")
|
||||
def parse_max_results(cls, value):
|
||||
|
@ -68,6 +69,14 @@ class Cleaning(BaseModel):
|
|||
|
||||
return 100
|
||||
|
||||
@field_validator("time_without_agent", mode="before")
|
||||
def parse_time_without_agent(cls, value):
|
||||
"""Ensure that time_without_agent is at least one minute"""
|
||||
if value >= 1:
|
||||
return value
|
||||
|
||||
return 5
|
||||
|
||||
|
||||
class WebsiteCheck(BaseModel):
|
||||
key: str
|
||||
|
@ -211,6 +220,7 @@ class Alert(BaseModel):
|
|||
warning: List[str]
|
||||
critical: List[str]
|
||||
unknown: List[str]
|
||||
no_agent: List[str]
|
||||
|
||||
|
||||
class GotifyUrl(BaseModel):
|
||||
|
@ -285,5 +295,5 @@ class Config(BaseModel):
|
|||
general: General
|
||||
service: Service
|
||||
ssl: SSL
|
||||
cleaning: Cleaning
|
||||
recurring_tasks: RecurringTasks
|
||||
websites: List[Website]
|
||||
|
|
|
@ -74,6 +74,91 @@ def get_icon_from_severity(severity: str) -> str:
|
|||
return icon
|
||||
|
||||
|
||||
def send_mail(mail: EmailMessage, config: Mail):
|
||||
"""Send message by mail"""
|
||||
|
||||
if config.ssl:
|
||||
logger.debug("Mail notification: SSL")
|
||||
context = ssl.create_default_context()
|
||||
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
|
||||
else:
|
||||
smtp = smtplib.SMTP(
|
||||
host=config.host, # type: ignore
|
||||
port=config.port,
|
||||
)
|
||||
if config.starttls:
|
||||
logger.debug("Mail notification: STARTTLS")
|
||||
context = ssl.create_default_context()
|
||||
smtp.starttls(context=context)
|
||||
|
||||
if config.auth is not None:
|
||||
logger.debug("Mail notification: authentification")
|
||||
smtp.login(config.auth.login, config.auth.password)
|
||||
|
||||
for address in config.addresses:
|
||||
logger.debug("Sending mail to %s", address)
|
||||
logger.debug(mail.get_body())
|
||||
smtp.send_message(mail, to_addrs=address)
|
||||
|
||||
|
||||
def send_gotify_msg(config, payload):
|
||||
"""Send message with gotify"""
|
||||
headers = {"accept": "application/json", "content-type": "application/json"}
|
||||
|
||||
for url in config:
|
||||
logger.debug("Sending gotify message(s) to %s", url.url)
|
||||
for token in url.tokens:
|
||||
try:
|
||||
res = httpx.post(
|
||||
f"{url.url}message",
|
||||
params={"token": token},
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
res.raise_for_status()
|
||||
except httpx.RequestError as err:
|
||||
logger.error(
|
||||
"An error occurred while sending a message to %s with token %s",
|
||||
err.request.url,
|
||||
token,
|
||||
)
|
||||
|
||||
|
||||
def no_agent_alert(config: Config):
|
||||
"""Alert"""
|
||||
msg = "You should check what’s going on with your Argos agents."
|
||||
twa = config.recurring_tasks.time_without_agent
|
||||
if twa > 1:
|
||||
subject = f"No agent has been seen within the last {twa} minutes"
|
||||
else:
|
||||
subject = "No agent has been seen within the last minute"
|
||||
|
||||
if "local" in config.general.alerts.no_agent:
|
||||
logger.error(subject)
|
||||
|
||||
if config.general.mail is not None and "mail" in config.general.alerts.no_agent:
|
||||
mail = EmailMessage()
|
||||
mail["Subject"] = f"[Argos] {subject}"
|
||||
mail["From"] = config.general.mail.mailfrom
|
||||
mail.set_content(msg)
|
||||
send_mail(mail, config.general.mail)
|
||||
|
||||
if config.general.gotify is not None and "gotify" in config.general.alerts.no_agent:
|
||||
priority = 9
|
||||
payload = {"title": subject, "message": msg, "priority": priority}
|
||||
send_gotify_msg(config.general.gotify, payload)
|
||||
|
||||
if config.general.apprise is not None:
|
||||
for notif_way in config.general.alerts.no_agent:
|
||||
if notif_way.startswith("apprise:"):
|
||||
group = notif_way[8:]
|
||||
apobj = apprise.Apprise()
|
||||
for channel in config.general.apprise[group]:
|
||||
apobj.add(channel)
|
||||
|
||||
apobj.notify(title=subject, body=msg)
|
||||
|
||||
|
||||
def handle_alert(config: Config, result, task, severity, old_severity, request): # pylint: disable-msg=too-many-positional-arguments
|
||||
"""Dispatch alert through configured alert channels"""
|
||||
|
||||
|
@ -163,36 +248,13 @@ See results of task on {request.url_for('get_task_results_view', task_id=task.id
|
|||
] = f"[Argos] {icon} {urlparse(task.url).netloc} (IPv{task.ip_version}): status {severity}"
|
||||
mail["From"] = config.mailfrom
|
||||
mail.set_content(msg)
|
||||
|
||||
if config.ssl:
|
||||
logger.debug("Mail notification: SSL")
|
||||
context = ssl.create_default_context()
|
||||
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
|
||||
else:
|
||||
smtp = smtplib.SMTP(
|
||||
host=config.host, # type: ignore
|
||||
port=config.port,
|
||||
)
|
||||
if config.starttls:
|
||||
logger.debug("Mail notification: STARTTLS")
|
||||
context = ssl.create_default_context()
|
||||
smtp.starttls(context=context)
|
||||
|
||||
if config.auth is not None:
|
||||
logger.debug("Mail notification: authentification")
|
||||
smtp.login(config.auth.login, config.auth.password)
|
||||
|
||||
for address in config.addresses:
|
||||
logger.debug("Sending mail to %s", address)
|
||||
logger.debug(msg)
|
||||
smtp.send_message(mail, to_addrs=address)
|
||||
send_mail(mail, config)
|
||||
|
||||
|
||||
def notify_with_gotify( # pylint: disable-msg=too-many-positional-arguments
|
||||
result, task, severity: str, old_severity: str, config: List[GotifyUrl], request
|
||||
) -> None:
|
||||
logger.debug("Will send gotify notification")
|
||||
headers = {"accept": "application/json", "content-type": "application/json"}
|
||||
|
||||
icon = get_icon_from_severity(severity)
|
||||
priority = 9
|
||||
|
@ -228,20 +290,4 @@ See results of task on <{request.url_for('get_task_results_view', task_id=task.i
|
|||
|
||||
payload = {"title": subject, "message": msg, "priority": priority, "extras": extras}
|
||||
|
||||
for url in config:
|
||||
logger.debug("Sending gotify message(s) to %s", url.url)
|
||||
for token in url.tokens:
|
||||
try:
|
||||
res = httpx.post(
|
||||
f"{url.url}message",
|
||||
params={"token": token},
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
res.raise_for_status()
|
||||
except httpx.RequestError as err:
|
||||
logger.error(
|
||||
"An error occurred while sending a message to %s with token %s",
|
||||
err.request.url,
|
||||
token,
|
||||
)
|
||||
send_gotify_msg(config, payload)
|
||||
|
|
|
@ -13,6 +13,7 @@ from sqlalchemy.orm import sessionmaker
|
|||
|
||||
from argos.logging import logger, set_log_level
|
||||
from argos.server import models, routes, queries
|
||||
from argos.server.alerting import no_agent_alert
|
||||
from argos.server.exceptions import NotAuthenticatedException, auth_exception_handler
|
||||
from argos.server.settings import read_yaml_config
|
||||
|
||||
|
@ -128,20 +129,24 @@ def create_manager(cookie_secret: str) -> LoginManager:
|
|||
|
||||
|
||||
@repeat_every(seconds=120, logger=logger)
|
||||
async def cleanup() -> None:
|
||||
async def recurring_tasks() -> None:
|
||||
"""Recurring DB cleanup and watch-agents tasks"""
|
||||
set_log_level("info", quiet=True)
|
||||
logger.info("Start DB cleanup tasks.")
|
||||
logger.info("Start background recurring tasks")
|
||||
with app.state.SessionLocal() as db:
|
||||
removed = await queries.remove_old_results(
|
||||
db, app.state.config.cleaning.max_results
|
||||
)
|
||||
updated = await queries.release_old_locks(
|
||||
db, app.state.config.cleaning.max_lock_seconds
|
||||
)
|
||||
|
||||
config = app.state.config.recurring_tasks
|
||||
removed = await queries.remove_old_results(db, config.max_results)
|
||||
logger.info("%i results removed", removed)
|
||||
|
||||
updated = await queries.release_old_locks(db, config.max_lock_seconds)
|
||||
logger.info("%i locks released", updated)
|
||||
|
||||
agents = await queries.get_recent_agents_count(db, config.time_without_agent)
|
||||
if agents == 0:
|
||||
no_agent_alert(app.state.config)
|
||||
|
||||
logger.info("Background recurring tasks ended")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(appli: FastAPI):
|
||||
|
@ -159,7 +164,7 @@ async def lifespan(appli: FastAPI):
|
|||
"There is no tasks in the database. "
|
||||
'Please launch the command "argos server reload-config"'
|
||||
)
|
||||
await cleanup()
|
||||
await recurring_tasks()
|
||||
|
||||
yield
|
||||
|
||||
|
|
28
docs/cli.md
28
docs/cli.md
|
@ -94,7 +94,6 @@ Commands:
|
|||
test-gotify Send a test gotify notification
|
||||
test-mail Send a test email
|
||||
user User management
|
||||
watch-agents Watch agents (to run routinely)
|
||||
```
|
||||
|
||||
<!--[[[end]]]
|
||||
|
@ -151,33 +150,6 @@ Options:
|
|||
-->
|
||||
|
||||
|
||||
### Server watch-agents
|
||||
|
||||
<!--
|
||||
.. [[[cog
|
||||
help(["server", "watch-agents", "--help"])
|
||||
.. ]]] -->
|
||||
|
||||
```man
|
||||
Usage: argos server watch-agents [OPTIONS]
|
||||
|
||||
Watch agents (to run routinely)
|
||||
|
||||
Issues a warning if no agent has been seen by the server for a given time.
|
||||
|
||||
Options:
|
||||
--time-without-agent INTEGER Time without seeing an agent after which a
|
||||
warning will be issued, in minutes. Default is 5
|
||||
minutes.
|
||||
--config TEXT Path of the configuration file. If
|
||||
ARGOS_YAML_FILE environment variable is set, its
|
||||
value will be used instead.
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
<!--[[[end]]]
|
||||
-->
|
||||
|
||||
### Server reload-config
|
||||
|
||||
<!--
|
||||
|
|
|
@ -14,7 +14,9 @@ description: Many thanks to their developers!
|
|||
- [Alembic](https://alembic.sqlalchemy.org) is used for DB migrations;
|
||||
- [Tenacity](https://github.com/jd/tenacity) a small utility to retry a function in case an error occured;
|
||||
- [Uvicorn](https://www.uvicorn.org/) is the tool used to run our server;
|
||||
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production.
|
||||
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production;
|
||||
- [Apprise](https://github.com/caronc/apprise/wiki) allows Argos to send notifications through a lot of channels;
|
||||
- [FastAPI Utilities](https://fastapiutils.github.io/fastapi-utils/) is in charge of recurring tasks.
|
||||
|
||||
## CSS framework
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@ general:
|
|||
- local
|
||||
unknown:
|
||||
- local
|
||||
no_agent:
|
||||
- local
|
||||
service:
|
||||
secrets:
|
||||
# Secrets can be generated using `argos server generate-token`.
|
||||
|
@ -38,15 +40,19 @@ ssl:
|
|||
- "1d": critical
|
||||
- "5d": warning
|
||||
|
||||
# Argos will do some cleaning in the background for you
|
||||
# Argos will execute some tasks in the background for you
|
||||
# every 2 minutes and needs some configuration for that
|
||||
cleaning:
|
||||
recurring_tasks:
|
||||
# Max number of results per tasks you want to keep
|
||||
# Minimum value is 1, default is 100
|
||||
max_results: 100
|
||||
# Max number of seconds a task can be locked
|
||||
# Minimum value is 61, default is 100
|
||||
max_lock_seconds: 100
|
||||
# Max number of seconds without seing an agent
|
||||
# before sending an alert
|
||||
# Minimum value is 61, default is 300
|
||||
time_without_agent: 300
|
||||
|
||||
# It's also possible to define the checks in another file
|
||||
# with the include syntax:
|
||||
|
|
|
@ -235,6 +235,7 @@ def empty_config():
|
|||
warning=["", ""],
|
||||
critical=["", ""],
|
||||
unknown=["", ""],
|
||||
no_agent=["", ""],
|
||||
),
|
||||
),
|
||||
service=schemas.config.Service(
|
||||
|
@ -243,9 +244,10 @@ def empty_config():
|
|||
]
|
||||
),
|
||||
ssl=schemas.config.SSL(thresholds=[]),
|
||||
cleaning=schemas.config.Cleaning(
|
||||
recurring_tasks=schemas.config.RecurringTasks(
|
||||
max_results=100,
|
||||
max_lock_seconds=120,
|
||||
time_without_agent=300,
|
||||
),
|
||||
websites=[],
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue