mirror of
https://framagit.org/framasoft/framaspace/argos.git
synced 2025-04-28 18:02:41 +02:00
Compare commits
24 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
9389e3a005 | ||
![]() |
159a6e2427 | ||
![]() |
211ac32028 | ||
![]() |
32f2518294 | ||
![]() |
38cc06e972 | ||
![]() |
4b78919937 | ||
![]() |
d8f30ebccd | ||
![]() |
09674f73ef | ||
![]() |
c63093bb2f | ||
![]() |
657624ed35 | ||
![]() |
471c1eae91 | ||
![]() |
c3708af32a | ||
![]() |
23fea9fffa | ||
![]() |
a48c7b74e6 | ||
![]() |
8d82f7f9d6 | ||
![]() |
fd0c68cd4c | ||
![]() |
c98cd9c017 | ||
![]() |
73e7a8f414 | ||
![]() |
db54dd2cdd | ||
![]() |
1b484da27a | ||
![]() |
07f87a0f7d | ||
![]() |
60f3079140 | ||
![]() |
ca709dca62 | ||
![]() |
0f099b9df4 |
27 changed files with 591 additions and 316 deletions
53
CHANGELOG.md
53
CHANGELOG.md
|
@ -2,11 +2,62 @@
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## 0.9.0
|
||||
|
||||
Date: 2025-02-18
|
||||
|
||||
- 🐛 — Fix worker timeout for old results cleaning in recurring tasks (#84)
|
||||
|
||||
💥 Old results are now removed by their age, not based on their number.
|
||||
|
||||
💥 Warning: `max_results` setting has been replaced by `max_results_age`, which is a duration.
|
||||
Use `argos server generate-config > /etc/argos/config.yaml-dist` to generate
|
||||
a new example configuration file.
|
||||
|
||||
## 0.8.2
|
||||
|
||||
Date: 2025-02-18
|
||||
|
||||
- 🐛 — Fix recurring tasks with gunicorn
|
||||
|
||||
## 0.8.1
|
||||
|
||||
Date: 2025-02-18
|
||||
|
||||
- 🐛 — Fix todo enum in jobs table
|
||||
|
||||
## 0.8.0
|
||||
|
||||
Date: 2025-02-18
|
||||
|
||||
- ✨ — Allow to customize agent User-Agent header (#78)
|
||||
- 📝 — Document how to add data to requests (#77)
|
||||
- ✨ — No need cron tasks for DB cleaning anymore (#74 and #75)
|
||||
- ✨ — No need cron tasks for agents watching (#76)
|
||||
- ✨ — Reload configuration asynchronously (#79)
|
||||
- 🐛 — Automatically reconnect to LDAP if unreachable (#81)
|
||||
- 🐛 — Better httpx.RequestError handling (#83)
|
||||
|
||||
💥 Warning: there is new settings to add to your configuration file.
|
||||
Use `argos server generate-config > /etc/argos/config.yaml-dist` to generate
|
||||
a new example configuration file.
|
||||
|
||||
💥 You don’t need cron tasks anymore!
|
||||
Remove your old cron tasks as they will now do nothing but generating errors.
|
||||
|
||||
NB: You may want to add `--enqueue` to `reload-config` command in your systemd file.
|
||||
|
||||
## 0.7.4
|
||||
|
||||
Date: 2025-02-12
|
||||
|
||||
- 🐛 — Fix method enum in tasks table (thx to Dryusdan)
|
||||
|
||||
## 0.7.3
|
||||
|
||||
Date: 2025-01-26
|
||||
|
||||
🐛 — Fix bug in retry_before_notification logic when success
|
||||
- 🐛 — Fix bug in retry_before_notification logic when success
|
||||
|
||||
## 0.7.2
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
VERSION = "0.7.3"
|
||||
VERSION = "0.9.0"
|
||||
|
|
|
@ -37,11 +37,17 @@ def log_failure(retry_state):
|
|||
class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
|
||||
"""The Argos agent is responsible for running the checks and reporting the results."""
|
||||
|
||||
def __init__(self, server: str, auth: str, max_tasks: int, wait_time: int):
|
||||
def __init__( # pylint: disable-msg=too-many-positional-arguments
|
||||
self, server: str, auth: str, max_tasks: int, wait_time: int, user_agent: str
|
||||
):
|
||||
self.server = server
|
||||
self.max_tasks = max_tasks
|
||||
self.wait_time = wait_time
|
||||
self.auth = auth
|
||||
if user_agent == "":
|
||||
self.ua = user_agent
|
||||
else:
|
||||
self.ua = f" - {user_agent}"
|
||||
self._http_client: httpx.AsyncClient | None = None
|
||||
self._http_client_v4: httpx.AsyncClient | None = None
|
||||
self._http_client_v6: httpx.AsyncClient | None = None
|
||||
|
@ -53,13 +59,13 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
|
|||
async def run(self):
|
||||
auth_header = {
|
||||
"Authorization": f"Bearer {self.auth}",
|
||||
"User-Agent": f"Argos Panoptes agent {VERSION}",
|
||||
"User-Agent": f"Argos Panoptes agent {VERSION}{self.ua}",
|
||||
}
|
||||
self._http_client = httpx.AsyncClient(headers=auth_header)
|
||||
|
||||
ua_header = {
|
||||
"User-Agent": f"Argos Panoptes {VERSION} "
|
||||
"(about: https://argos-monitoring.framasoft.org/)",
|
||||
f"(about: https://argos-monitoring.framasoft.org/){self.ua}",
|
||||
}
|
||||
self._http_client_v4 = httpx.AsyncClient(
|
||||
headers=ua_header,
|
||||
|
@ -78,6 +84,7 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
|
|||
await asyncio.sleep(self.wait_time)
|
||||
|
||||
async def _do_request(self, group: str, details: dict):
|
||||
logger.debug("_do_request for group %s", group)
|
||||
headers = {}
|
||||
if details["request_data"] is not None:
|
||||
request_data = json.loads(details["request_data"])
|
||||
|
@ -114,6 +121,7 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
|
|||
)
|
||||
except httpx.ReadError:
|
||||
sleep(1)
|
||||
logger.warning("httpx.ReadError for group %s, re-emit request", group)
|
||||
if details["request_data"] is None or request_data["data"] is None:
|
||||
response = await http_client.request( # type: ignore[union-attr]
|
||||
method=details["method"], url=details["url"], timeout=60
|
||||
|
@ -132,6 +140,9 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
|
|||
data=request_data["data"],
|
||||
timeout=60,
|
||||
)
|
||||
except httpx.RequestError as err:
|
||||
logger.warning("httpx.RequestError for group %s", group)
|
||||
response = err
|
||||
|
||||
self._res_cache[group] = response
|
||||
|
||||
|
@ -141,15 +152,21 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
|
|||
|
||||
check_class = get_registered_check(task.check)
|
||||
check = check_class(task)
|
||||
result = await check.run(self._res_cache[task.task_group])
|
||||
status = result.status
|
||||
context = result.context
|
||||
|
||||
response = self._res_cache[task.task_group]
|
||||
if isinstance(response, httpx.Response):
|
||||
result = await check.run(response)
|
||||
status = result.status
|
||||
context = result.context
|
||||
else:
|
||||
status = "failure"
|
||||
context = SerializableException.from_exception(response)
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
status = "error"
|
||||
context = SerializableException.from_exception(err)
|
||||
msg = f"An exception occured when running {_task}. {err.__class__.__name__} : {err}"
|
||||
logger.error(msg)
|
||||
|
||||
return AgentResult(task_id=task.id, status=status, context=context)
|
||||
|
||||
async def _get_and_complete_tasks(self):
|
||||
|
|
|
@ -92,7 +92,12 @@ def version():
|
|||
default="INFO",
|
||||
type=click.Choice(logging.LOG_LEVELS, case_sensitive=False),
|
||||
)
|
||||
def agent(server_url, auth, max_tasks, wait_time, log_level):
|
||||
@click.option(
|
||||
"--user-agent",
|
||||
default="",
|
||||
help="A custom string to append to the User-Agent header",
|
||||
)
|
||||
def agent(server_url, auth, max_tasks, wait_time, log_level, user_agent): # pylint: disable-msg=too-many-positional-arguments
|
||||
"""Get and run tasks for the provided server. Will wait for new tasks.
|
||||
|
||||
Usage: argos agent https://argos.example.org "auth-token-here"
|
||||
|
@ -108,7 +113,7 @@ def agent(server_url, auth, max_tasks, wait_time, log_level):
|
|||
from argos.logging import logger
|
||||
|
||||
logger.setLevel(log_level)
|
||||
agent_ = ArgosAgent(server_url, auth, max_tasks, wait_time)
|
||||
agent_ = ArgosAgent(server_url, auth, max_tasks, wait_time, user_agent)
|
||||
asyncio.run(agent_.run())
|
||||
|
||||
|
||||
|
@ -135,101 +140,6 @@ def start(host, port, config, reload):
|
|||
uvicorn.run("argos.server:app", host=host, port=port, reload=reload)
|
||||
|
||||
|
||||
def validate_max_lock_seconds(ctx, param, value):
|
||||
if value <= 60:
|
||||
raise click.BadParameter("Should be strictly higher than 60")
|
||||
return value
|
||||
|
||||
|
||||
def validate_max_results(ctx, param, value):
|
||||
if value <= 0:
|
||||
raise click.BadParameter("Should be a positive integer")
|
||||
return value
|
||||
|
||||
|
||||
@server.command()
|
||||
@click.option(
|
||||
"--max-results",
|
||||
default=100,
|
||||
help="Number of results per task to keep",
|
||||
callback=validate_max_results,
|
||||
)
|
||||
@click.option(
|
||||
"--max-lock-seconds",
|
||||
default=100,
|
||||
help=(
|
||||
"The number of seconds after which a lock is "
|
||||
"considered stale, must be higher than 60 "
|
||||
"(the checks have a timeout value of 60 seconds)"
|
||||
),
|
||||
callback=validate_max_lock_seconds,
|
||||
)
|
||||
@click.option(
|
||||
"--config",
|
||||
default="argos-config.yaml",
|
||||
help="Path of the configuration file. "
|
||||
"If ARGOS_YAML_FILE environment variable is set, its value will be used instead. "
|
||||
"Default value: argos-config.yaml and /etc/argos/config.yaml as fallback.",
|
||||
envvar="ARGOS_YAML_FILE",
|
||||
callback=validate_config_access,
|
||||
)
|
||||
@coroutine
|
||||
async def cleandb(max_results, max_lock_seconds, config):
|
||||
"""Clean the database (to run routinely)
|
||||
|
||||
\b
|
||||
- Removes old results from the database.
|
||||
- Removes locks from tasks that have been locked for too long.
|
||||
"""
|
||||
# It’s mandatory to do it before the imports
|
||||
os.environ["ARGOS_YAML_FILE"] = config
|
||||
|
||||
# The imports are made here otherwise the agent will need server configuration files.
|
||||
from argos.server import queries
|
||||
|
||||
db = await get_db()
|
||||
removed = await queries.remove_old_results(db, max_results)
|
||||
updated = await queries.release_old_locks(db, max_lock_seconds)
|
||||
|
||||
click.echo(f"{removed} results removed")
|
||||
click.echo(f"{updated} locks released")
|
||||
|
||||
|
||||
@server.command()
|
||||
@click.option(
|
||||
"--time-without-agent",
|
||||
default=5,
|
||||
help="Time without seeing an agent after which a warning will be issued, in minutes. "
|
||||
"Default is 5 minutes.",
|
||||
callback=validate_max_results,
|
||||
)
|
||||
@click.option(
|
||||
"--config",
|
||||
default="argos-config.yaml",
|
||||
help="Path of the configuration file. "
|
||||
"If ARGOS_YAML_FILE environment variable is set, its value will be used instead.",
|
||||
envvar="ARGOS_YAML_FILE",
|
||||
callback=validate_config_access,
|
||||
)
|
||||
@coroutine
|
||||
async def watch_agents(time_without_agent, config):
|
||||
"""Watch agents (to run routinely)
|
||||
|
||||
Issues a warning if no agent has been seen by the server for a given time.
|
||||
"""
|
||||
# It’s mandatory to do it before the imports
|
||||
os.environ["ARGOS_YAML_FILE"] = config
|
||||
|
||||
# The imports are made here otherwise the agent will need server configuration files.
|
||||
from argos.server import queries
|
||||
|
||||
db = await get_db()
|
||||
agents = await queries.get_recent_agents_count(db, time_without_agent)
|
||||
if agents == 0:
|
||||
click.echo(f"No agent has been seen in the last {time_without_agent} minutes.")
|
||||
sysexit(1)
|
||||
|
||||
|
||||
@server.command(short_help="Load or reload tasks’ configuration")
|
||||
@click.option(
|
||||
"--config",
|
||||
|
@ -240,23 +150,40 @@ async def watch_agents(time_without_agent, config):
|
|||
envvar="ARGOS_YAML_FILE",
|
||||
callback=validate_config_access,
|
||||
)
|
||||
@click.option(
|
||||
"--enqueue/--no-enqueue",
|
||||
default=False,
|
||||
help="Let Argos main recurring tasks handle configuration’s loading. "
|
||||
"It may delay the application of the new configuration up to 2 minutes. "
|
||||
"Default is --no-enqueue",
|
||||
)
|
||||
@coroutine
|
||||
async def reload_config(config):
|
||||
async def reload_config(config, enqueue):
|
||||
"""Read tasks’ configuration and add/delete tasks in database if needed"""
|
||||
# It’s mandatory to do it before the imports
|
||||
os.environ["ARGOS_YAML_FILE"] = config
|
||||
|
||||
# The imports are made here otherwise the agent will need server configuration files.
|
||||
from argos.server import queries
|
||||
from argos.server.main import read_config
|
||||
from argos.server.settings import read_config
|
||||
|
||||
_config = read_config(config)
|
||||
|
||||
db = await get_db()
|
||||
changed = await queries.update_from_config(db, _config)
|
||||
|
||||
click.echo(f"{changed['added']} tasks added")
|
||||
click.echo(f"{changed['vanished']} tasks deleted")
|
||||
config_changed = await queries.has_config_changed(db, _config)
|
||||
if not config_changed:
|
||||
click.echo("Config has not change")
|
||||
else:
|
||||
if enqueue:
|
||||
msg = await queries.update_from_config_later(db, config_file=config)
|
||||
|
||||
click.echo(msg)
|
||||
else:
|
||||
changed = await queries.update_from_config(db, _config)
|
||||
|
||||
click.echo(f"{changed['added']} task(s) added")
|
||||
click.echo(f"{changed['vanished']} task(s) deleted")
|
||||
|
||||
|
||||
@server.command()
|
||||
|
@ -570,8 +497,8 @@ async def test_mail(config, domain, severity):
|
|||
|
||||
from argos.logging import set_log_level
|
||||
from argos.server.alerting import notify_by_mail
|
||||
from argos.server.main import read_config
|
||||
from argos.server.models import Result, Task
|
||||
from argos.server.settings import read_config
|
||||
|
||||
conf = read_config(config)
|
||||
|
||||
|
@ -586,6 +513,7 @@ async def test_mail(config, domain, severity):
|
|||
check="body-contains",
|
||||
expected="foo",
|
||||
frequency=1,
|
||||
ip_version=4,
|
||||
selected_by="test",
|
||||
selected_at=now,
|
||||
)
|
||||
|
@ -634,8 +562,8 @@ async def test_gotify(config, domain, severity):
|
|||
|
||||
from argos.logging import set_log_level
|
||||
from argos.server.alerting import notify_with_gotify
|
||||
from argos.server.main import read_config
|
||||
from argos.server.models import Result, Task
|
||||
from argos.server.settings import read_config
|
||||
|
||||
conf = read_config(config)
|
||||
|
||||
|
@ -650,6 +578,7 @@ async def test_gotify(config, domain, severity):
|
|||
check="body-contains",
|
||||
expected="foo",
|
||||
frequency=1,
|
||||
ip_version=4,
|
||||
selected_by="test",
|
||||
selected_at=now,
|
||||
)
|
||||
|
@ -701,8 +630,8 @@ async def test_apprise(config, domain, severity, apprise_group):
|
|||
|
||||
from argos.logging import set_log_level
|
||||
from argos.server.alerting import notify_with_apprise
|
||||
from argos.server.main import read_config
|
||||
from argos.server.models import Result, Task
|
||||
from argos.server.settings import read_config
|
||||
|
||||
conf = read_config(config)
|
||||
|
||||
|
@ -717,6 +646,7 @@ async def test_apprise(config, domain, severity, apprise_group):
|
|||
check="body-contains",
|
||||
expected="foo",
|
||||
frequency=1,
|
||||
ip_version=4,
|
||||
selected_by="test",
|
||||
selected_at=now,
|
||||
)
|
||||
|
|
|
@ -81,6 +81,12 @@ general:
|
|||
# To disable the IPv6 check of domains:
|
||||
# ipv6: false
|
||||
|
||||
# Argos root path
|
||||
# If not present, default value is ""
|
||||
# Set it to /foo if you want to use argos at /foo/ instead of /
|
||||
# on your web server
|
||||
# root_path: "/foo"
|
||||
|
||||
# Which way do you want to be warned when a check goes to that severity?
|
||||
# "local" emits a message in the server log
|
||||
# You’ll need to configure mail, gotify or apprise below to be able to use
|
||||
|
@ -96,11 +102,10 @@ general:
|
|||
- local
|
||||
unknown:
|
||||
- local
|
||||
# Argos root path
|
||||
# If not present, default value is ""
|
||||
# Set it to /foo if you want to use argos at /foo/ instead of /
|
||||
# on your web server
|
||||
# root_path: "/foo"
|
||||
# This alert is triggered when no Argos agent has been seen in a while
|
||||
# See recurring_tasks.time_without_agent below
|
||||
no_agent:
|
||||
- local
|
||||
# Mail configuration is quite straight-forward
|
||||
# mail:
|
||||
# mailfrom: no-reply@example.org
|
||||
|
@ -144,6 +149,22 @@ ssl:
|
|||
- "1d": critical
|
||||
- "5d": warning
|
||||
|
||||
# Argos will execute some tasks in the background for you
|
||||
# every 2 minutes and needs some configuration for that
|
||||
recurring_tasks:
|
||||
# Maximum age of results
|
||||
# Use m for minutes, h for hours, d for days
|
||||
# w for weeks, M for months, y for years
|
||||
# See https://github.com/timwedde/durations_nlp#scales-reference for details
|
||||
max_results_age: "1d"
|
||||
# Max number of seconds a task can be locked
|
||||
# Minimum value is 61, default is 100
|
||||
max_lock_seconds: 100
|
||||
# Max number of minutes without seing an agent
|
||||
# before sending an alert
|
||||
# Minimum value is 1, default is 5
|
||||
time_without_agent: 5
|
||||
|
||||
# It's also possible to define the checks in another file
|
||||
# with the include syntax:
|
||||
#
|
||||
|
|
|
@ -14,9 +14,10 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
# XXX Does not work ?
|
||||
def set_log_level(log_level):
|
||||
def set_log_level(log_level: str, quiet: bool = False):
|
||||
level = getattr(logging, log_level.upper(), None)
|
||||
if not isinstance(level, int):
|
||||
raise ValueError(f"Invalid log level: {log_level}")
|
||||
logger.setLevel(level=level)
|
||||
logger.info("Log level set to %s", log_level)
|
||||
if not quiet:
|
||||
logger.info("Log level set to %s", log_level)
|
||||
|
|
|
@ -48,6 +48,33 @@ class SSL(BaseModel):
|
|||
thresholds: List[Annotated[Tuple[int, Severity], BeforeValidator(parse_threshold)]]
|
||||
|
||||
|
||||
class RecurringTasks(BaseModel):
|
||||
max_results_age: float
|
||||
max_lock_seconds: int
|
||||
time_without_agent: int
|
||||
|
||||
@field_validator("max_results_age", mode="before")
|
||||
def parse_max_results_age(cls, value):
|
||||
"""Convert the configured maximum results age to seconds"""
|
||||
return Duration(value).to_seconds()
|
||||
|
||||
@field_validator("max_lock_seconds", mode="before")
|
||||
def parse_max_lock_seconds(cls, value):
|
||||
"""Ensure that max_lock_seconds is higher or equal to agent’s requests timeout (60)"""
|
||||
if value > 60:
|
||||
return value
|
||||
|
||||
return 100
|
||||
|
||||
@field_validator("time_without_agent", mode="before")
|
||||
def parse_time_without_agent(cls, value):
|
||||
"""Ensure that time_without_agent is at least one minute"""
|
||||
if value >= 1:
|
||||
return value
|
||||
|
||||
return 5
|
||||
|
||||
|
||||
class WebsiteCheck(BaseModel):
|
||||
key: str
|
||||
value: str | List[str] | Dict[str, str]
|
||||
|
@ -190,6 +217,7 @@ class Alert(BaseModel):
|
|||
warning: List[str]
|
||||
critical: List[str]
|
||||
unknown: List[str]
|
||||
no_agent: List[str]
|
||||
|
||||
|
||||
class GotifyUrl(BaseModel):
|
||||
|
@ -264,4 +292,5 @@ class Config(BaseModel):
|
|||
general: General
|
||||
service: Service
|
||||
ssl: SSL
|
||||
recurring_tasks: RecurringTasks
|
||||
websites: List[Website]
|
||||
|
|
|
@ -8,11 +8,25 @@ from typing import Literal
|
|||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from argos.schemas.utils import IPVersion, Method
|
||||
from argos.schemas.utils import IPVersion, Method, Todo
|
||||
|
||||
# XXX Refactor using SQLModel to avoid duplication of model data
|
||||
|
||||
|
||||
class Job(BaseModel):
|
||||
"""Tasks needing to be executed in recurring tasks processing.
|
||||
It’s quite like a job queue."""
|
||||
|
||||
id: int
|
||||
todo: Todo
|
||||
args: str
|
||||
current: bool
|
||||
added_at: datetime
|
||||
|
||||
def __str__(self):
|
||||
return f"Job ({self.id}): {self.todo}"
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
"""A task corresponds to a check to execute"""
|
||||
|
||||
|
|
|
@ -6,3 +6,5 @@ IPVersion = Literal["4", "6"]
|
|||
Method = Literal[
|
||||
"GET", "HEAD", "POST", "OPTIONS", "CONNECT", "TRACE", "PUT", "PATCH", "DELETE"
|
||||
]
|
||||
|
||||
Todo = Literal["RELOAD_CONFIG"]
|
||||
|
|
|
@ -74,6 +74,91 @@ def get_icon_from_severity(severity: str) -> str:
|
|||
return icon
|
||||
|
||||
|
||||
def send_mail(mail: EmailMessage, config: Mail):
|
||||
"""Send message by mail"""
|
||||
|
||||
if config.ssl:
|
||||
logger.debug("Mail notification: SSL")
|
||||
context = ssl.create_default_context()
|
||||
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
|
||||
else:
|
||||
smtp = smtplib.SMTP(
|
||||
host=config.host, # type: ignore
|
||||
port=config.port,
|
||||
)
|
||||
if config.starttls:
|
||||
logger.debug("Mail notification: STARTTLS")
|
||||
context = ssl.create_default_context()
|
||||
smtp.starttls(context=context)
|
||||
|
||||
if config.auth is not None:
|
||||
logger.debug("Mail notification: authentification")
|
||||
smtp.login(config.auth.login, config.auth.password)
|
||||
|
||||
for address in config.addresses:
|
||||
logger.debug("Sending mail to %s", address)
|
||||
logger.debug(mail.get_body())
|
||||
smtp.send_message(mail, to_addrs=address)
|
||||
|
||||
|
||||
def send_gotify_msg(config, payload):
|
||||
"""Send message with gotify"""
|
||||
headers = {"accept": "application/json", "content-type": "application/json"}
|
||||
|
||||
for url in config:
|
||||
logger.debug("Sending gotify message(s) to %s", url.url)
|
||||
for token in url.tokens:
|
||||
try:
|
||||
res = httpx.post(
|
||||
f"{url.url}message",
|
||||
params={"token": token},
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
res.raise_for_status()
|
||||
except httpx.RequestError as err:
|
||||
logger.error(
|
||||
"An error occurred while sending a message to %s with token %s",
|
||||
err.request.url,
|
||||
token,
|
||||
)
|
||||
|
||||
|
||||
def no_agent_alert(config: Config):
|
||||
"""Alert"""
|
||||
msg = "You should check what’s going on with your Argos agents."
|
||||
twa = config.recurring_tasks.time_without_agent
|
||||
if twa > 1:
|
||||
subject = f"No agent has been seen within the last {twa} minutes"
|
||||
else:
|
||||
subject = "No agent has been seen within the last minute"
|
||||
|
||||
if "local" in config.general.alerts.no_agent:
|
||||
logger.error(subject)
|
||||
|
||||
if config.general.mail is not None and "mail" in config.general.alerts.no_agent:
|
||||
mail = EmailMessage()
|
||||
mail["Subject"] = f"[Argos] {subject}"
|
||||
mail["From"] = config.general.mail.mailfrom
|
||||
mail.set_content(msg)
|
||||
send_mail(mail, config.general.mail)
|
||||
|
||||
if config.general.gotify is not None and "gotify" in config.general.alerts.no_agent:
|
||||
priority = 9
|
||||
payload = {"title": subject, "message": msg, "priority": priority}
|
||||
send_gotify_msg(config.general.gotify, payload)
|
||||
|
||||
if config.general.apprise is not None:
|
||||
for notif_way in config.general.alerts.no_agent:
|
||||
if notif_way.startswith("apprise:"):
|
||||
group = notif_way[8:]
|
||||
apobj = apprise.Apprise()
|
||||
for channel in config.general.apprise[group]:
|
||||
apobj.add(channel)
|
||||
|
||||
apobj.notify(title=subject, body=msg)
|
||||
|
||||
|
||||
def handle_alert(config: Config, result, task, severity, old_severity, request): # pylint: disable-msg=too-many-positional-arguments
|
||||
"""Dispatch alert through configured alert channels"""
|
||||
|
||||
|
@ -163,36 +248,13 @@ See results of task on {request.url_for('get_task_results_view', task_id=task.id
|
|||
] = f"[Argos] {icon} {urlparse(task.url).netloc} (IPv{task.ip_version}): status {severity}"
|
||||
mail["From"] = config.mailfrom
|
||||
mail.set_content(msg)
|
||||
|
||||
if config.ssl:
|
||||
logger.debug("Mail notification: SSL")
|
||||
context = ssl.create_default_context()
|
||||
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
|
||||
else:
|
||||
smtp = smtplib.SMTP(
|
||||
host=config.host, # type: ignore
|
||||
port=config.port,
|
||||
)
|
||||
if config.starttls:
|
||||
logger.debug("Mail notification: STARTTLS")
|
||||
context = ssl.create_default_context()
|
||||
smtp.starttls(context=context)
|
||||
|
||||
if config.auth is not None:
|
||||
logger.debug("Mail notification: authentification")
|
||||
smtp.login(config.auth.login, config.auth.password)
|
||||
|
||||
for address in config.addresses:
|
||||
logger.debug("Sending mail to %s", address)
|
||||
logger.debug(msg)
|
||||
smtp.send_message(mail, to_addrs=address)
|
||||
send_mail(mail, config)
|
||||
|
||||
|
||||
def notify_with_gotify( # pylint: disable-msg=too-many-positional-arguments
|
||||
result, task, severity: str, old_severity: str, config: List[GotifyUrl], request
|
||||
) -> None:
|
||||
logger.debug("Will send gotify notification")
|
||||
headers = {"accept": "application/json", "content-type": "application/json"}
|
||||
|
||||
icon = get_icon_from_severity(severity)
|
||||
priority = 9
|
||||
|
@ -228,20 +290,4 @@ See results of task on <{request.url_for('get_task_results_view', task_id=task.i
|
|||
|
||||
payload = {"title": subject, "message": msg, "priority": priority, "extras": extras}
|
||||
|
||||
for url in config:
|
||||
logger.debug("Sending gotify message(s) to %s", url.url)
|
||||
for token in url.tokens:
|
||||
try:
|
||||
res = httpx.post(
|
||||
f"{url.url}message",
|
||||
params={"token": token},
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
res.raise_for_status()
|
||||
except httpx.RequestError as err:
|
||||
logger.error(
|
||||
"An error occurred while sending a message to %s with token %s",
|
||||
err.request.url,
|
||||
token,
|
||||
)
|
||||
send_gotify_msg(config, payload)
|
||||
|
|
|
@ -1,19 +1,20 @@
|
|||
import os
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi_login import LoginManager
|
||||
from pydantic import ValidationError
|
||||
from fastapi_utils.tasks import repeat_every
|
||||
from psutil import Process
|
||||
from sqlalchemy import create_engine, event
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from argos.logging import logger
|
||||
from argos.logging import logger, set_log_level
|
||||
from argos.server import models, routes, queries
|
||||
from argos.server.alerting import no_agent_alert
|
||||
from argos.server.exceptions import NotAuthenticatedException, auth_exception_handler
|
||||
from argos.server.settings import read_yaml_config
|
||||
from argos.server.settings import read_config
|
||||
|
||||
|
||||
def get_application() -> FastAPI:
|
||||
|
@ -39,9 +40,7 @@ def get_application() -> FastAPI:
|
|||
if config.general.ldap is not None:
|
||||
import ldap
|
||||
|
||||
l = ldap.initialize(config.general.ldap.uri)
|
||||
l.simple_bind_s(config.general.ldap.bind_dn, config.general.ldap.bind_pwd)
|
||||
appli.state.ldap = l
|
||||
appli.state.ldap = ldap.initialize(config.general.ldap.uri)
|
||||
|
||||
@appli.state.manager.user_loader()
|
||||
async def query_user(user: str) -> None | str | models.User:
|
||||
|
@ -71,17 +70,6 @@ async def connect_to_db(appli):
|
|||
return appli.state.db
|
||||
|
||||
|
||||
def read_config(yaml_file):
|
||||
try:
|
||||
config = read_yaml_config(yaml_file)
|
||||
return config
|
||||
except ValidationError as err:
|
||||
logger.error("Errors where found while reading configuration:")
|
||||
for error in err.errors():
|
||||
logger.error("%s is %s", error["loc"], error["type"])
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def setup_database(appli):
|
||||
config = appli.state.config
|
||||
db_url = str(config.general.db.url)
|
||||
|
@ -126,8 +114,47 @@ def create_manager(cookie_secret: str) -> LoginManager:
|
|||
)
|
||||
|
||||
|
||||
@repeat_every(seconds=120, logger=logger)
|
||||
async def recurring_tasks() -> None:
|
||||
"""Recurring DB cleanup and watch-agents tasks"""
|
||||
# If we are using gunicorn
|
||||
if not hasattr(app.state, "SessionLocal"):
|
||||
parent_process = Process(os.getppid())
|
||||
children = parent_process.children(recursive=True)
|
||||
# Start the task only once, not for every worker
|
||||
if children[0].pid == os.getpid():
|
||||
# and we need to setup database engine
|
||||
setup_database(app)
|
||||
else:
|
||||
return None
|
||||
|
||||
set_log_level("info", quiet=True)
|
||||
logger.info("Start background recurring tasks")
|
||||
|
||||
with app.state.SessionLocal() as db:
|
||||
config = app.state.config.recurring_tasks
|
||||
|
||||
agents = await queries.get_recent_agents_count(db, config.time_without_agent)
|
||||
if agents == 0:
|
||||
no_agent_alert(app.state.config)
|
||||
logger.info("Agent presence checked")
|
||||
|
||||
removed = await queries.remove_old_results(db, config.max_results_age)
|
||||
logger.info("%i result(s) removed", removed)
|
||||
|
||||
updated = await queries.release_old_locks(db, config.max_lock_seconds)
|
||||
logger.info("%i lock(s) released", updated)
|
||||
|
||||
processed_jobs = await queries.process_jobs(db)
|
||||
logger.info("%i job(s) processed", processed_jobs)
|
||||
|
||||
logger.info("Background recurring tasks ended")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(appli):
|
||||
async def lifespan(appli: FastAPI):
|
||||
"""Server start and stop actions
|
||||
|
||||
Setup database connection then close it at shutdown.
|
||||
|
@ -142,6 +169,7 @@ async def lifespan(appli):
|
|||
"There is no tasks in the database. "
|
||||
'Please launch the command "argos server reload-config"'
|
||||
)
|
||||
await recurring_tasks()
|
||||
|
||||
yield
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
"""Add job queue
|
||||
|
||||
Revision ID: 5f6cb30db996
|
||||
Revises: bd4b4962696a
|
||||
Create Date: 2025-02-17 16:56:36.673511
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "5f6cb30db996"
|
||||
down_revision: Union[str, None] = "bd4b4962696a"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"jobs",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("todo", sa.Enum("RELOAD_CONFIG", name="todo_enum"), nullable=False),
|
||||
sa.Column("args", sa.String(), nullable=False),
|
||||
sa.Column(
|
||||
"current", sa.Boolean(), server_default=sa.sql.false(), nullable=False
|
||||
),
|
||||
sa.Column("added_at", sa.DateTime(), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("jobs")
|
|
@ -5,6 +5,7 @@ Revises: c780864dc407
|
|||
Create Date: 2024-11-26 14:40:27.510587
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
|
@ -19,22 +20,25 @@ depends_on: Union[str, Sequence[str], None] = None
|
|||
|
||||
|
||||
def upgrade() -> None:
|
||||
enum = sa.Enum(
|
||||
"GET",
|
||||
"HEAD",
|
||||
"POST",
|
||||
"OPTIONS",
|
||||
"CONNECT",
|
||||
"TRACE",
|
||||
"PUT",
|
||||
"PATCH",
|
||||
"DELETE",
|
||||
name="method",
|
||||
create_type=False,
|
||||
)
|
||||
enum.create(op.get_bind(), checkfirst=True)
|
||||
with op.batch_alter_table("tasks", schema=None) as batch_op:
|
||||
batch_op.add_column(
|
||||
sa.Column(
|
||||
"method",
|
||||
sa.Enum(
|
||||
"GET",
|
||||
"HEAD",
|
||||
"POST",
|
||||
"OPTIONS",
|
||||
"CONNECT",
|
||||
"TRACE",
|
||||
"PUT",
|
||||
"PATCH",
|
||||
"DELETE",
|
||||
name="method",
|
||||
),
|
||||
enum,
|
||||
nullable=False,
|
||||
server_default="GET",
|
||||
)
|
||||
|
@ -44,3 +48,4 @@ def upgrade() -> None:
|
|||
def downgrade() -> None:
|
||||
with op.batch_alter_table("tasks", schema=None) as batch_op:
|
||||
batch_op.drop_column("method")
|
||||
sa.Enum(name="method").drop(op.get_bind(), checkfirst=True)
|
||||
|
|
|
@ -14,7 +14,7 @@ from sqlalchemy.schema import Index
|
|||
|
||||
from argos.checks import BaseCheck, get_registered_check
|
||||
from argos.schemas import WebsiteCheck
|
||||
from argos.schemas.utils import IPVersion, Method
|
||||
from argos.schemas.utils import IPVersion, Method, Todo
|
||||
|
||||
|
||||
def compute_task_group(context) -> str:
|
||||
|
@ -33,6 +33,19 @@ class Base(DeclarativeBase):
|
|||
type_annotation_map = {List[WebsiteCheck]: JSON, dict: JSON}
|
||||
|
||||
|
||||
class Job(Base):
|
||||
"""
|
||||
Job queue emulation
|
||||
"""
|
||||
|
||||
__tablename__ = "jobs"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
todo: Mapped[Todo] = mapped_column(Enum("RELOAD_CONFIG", name="todo_enum"))
|
||||
args: Mapped[str] = mapped_column()
|
||||
current: Mapped[bool] = mapped_column(insert_default=False)
|
||||
added_at: Mapped[datetime] = mapped_column()
|
||||
|
||||
|
||||
class Task(Base):
|
||||
"""
|
||||
There is one task per check.
|
||||
|
|
|
@ -4,12 +4,13 @@ from hashlib import sha256
|
|||
from typing import List
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from sqlalchemy import asc, desc, func, Select
|
||||
from sqlalchemy import asc, func, Select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from argos import schemas
|
||||
from argos.logging import logger
|
||||
from argos.server.models import Result, Task, ConfigCache, User
|
||||
from argos.server.models import ConfigCache, Job, Result, Task, User
|
||||
from argos.server.settings import read_config
|
||||
|
||||
|
||||
async def list_tasks(db: Session, agent_id: str, limit: int = 100):
|
||||
|
@ -219,12 +220,50 @@ async def has_config_changed(db: Session, config: schemas.Config) -> bool: # py
|
|||
return True
|
||||
|
||||
|
||||
async def update_from_config_later(db: Session, config_file):
|
||||
"""Ask Argos to reload configuration in a recurring task"""
|
||||
jobs = (
|
||||
db.query(Job)
|
||||
.filter(
|
||||
Job.todo == "RELOAD_CONFIG",
|
||||
Job.args == config_file,
|
||||
Job.current == False,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
if jobs:
|
||||
return "There is already a config reloading job in the job queue, for the same file"
|
||||
|
||||
job = Job(todo="RELOAD_CONFIG", args=config_file, added_at=datetime.now())
|
||||
db.add(job)
|
||||
db.commit()
|
||||
|
||||
return "Config reloading has been added in the job queue"
|
||||
|
||||
|
||||
async def process_jobs(db: Session) -> int:
|
||||
"""Process job queue"""
|
||||
jobs = db.query(Job).filter(Job.current == False).all()
|
||||
if jobs:
|
||||
for job in jobs:
|
||||
job.current = True
|
||||
db.commit()
|
||||
if job.todo == "RELOAD_CONFIG":
|
||||
logger.info("Processing job %i: %s %s", job.id, job.todo, job.args)
|
||||
_config = read_config(job.args)
|
||||
changed = await update_from_config(db, _config)
|
||||
logger.info("%i task(s) added", changed["added"])
|
||||
logger.info("%i task(s) deleted", changed["vanished"])
|
||||
db.delete(job)
|
||||
|
||||
db.commit()
|
||||
return len(jobs)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
async def update_from_config(db: Session, config: schemas.Config): # pylint: disable-msg=too-many-branches
|
||||
"""Update tasks from config file"""
|
||||
config_changed = await has_config_changed(db, config)
|
||||
if not config_changed:
|
||||
return {"added": 0, "vanished": 0}
|
||||
|
||||
max_task_id = (
|
||||
db.query(func.max(Task.id).label("max_id")).all() # pylint: disable-msg=not-callable
|
||||
)[0].max_id
|
||||
|
@ -339,7 +378,8 @@ async def update_from_config(db: Session, config: schemas.Config): # pylint: di
|
|||
)
|
||||
db.commit()
|
||||
logger.info(
|
||||
"%i tasks has been removed since not in config file anymore", vanished_tasks
|
||||
"%i task(s) has been removed since not in config file anymore",
|
||||
vanished_tasks,
|
||||
)
|
||||
return {"added": len(tasks), "vanished": vanished_tasks}
|
||||
|
||||
|
@ -369,28 +409,13 @@ async def reschedule_all(db: Session):
|
|||
db.commit()
|
||||
|
||||
|
||||
async def remove_old_results(db: Session, max_results: int):
|
||||
tasks = db.query(Task).all()
|
||||
deleted = 0
|
||||
for task in tasks:
|
||||
# Get the id of the oldest result to keep
|
||||
subquery = (
|
||||
db.query(Result.id)
|
||||
.filter(Result.task_id == task.id)
|
||||
.order_by(desc(Result.id))
|
||||
.limit(max_results)
|
||||
.subquery()
|
||||
)
|
||||
min_id = db.query(func.min(subquery.c.id)).scalar() # pylint: disable-msg=not-callable
|
||||
|
||||
# Delete all the results older than min_id
|
||||
if min_id:
|
||||
deleted += (
|
||||
db.query(Result)
|
||||
.where(Result.id < min_id, Result.task_id == task.id)
|
||||
.delete()
|
||||
)
|
||||
db.commit()
|
||||
async def remove_old_results(db: Session, max_results_age: float):
|
||||
"""Remove old results, base on age"""
|
||||
max_acceptable_time = datetime.now() - timedelta(seconds=max_results_age)
|
||||
deleted = (
|
||||
db.query(Result).filter(Result.submitted_at < max_acceptable_time).delete()
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return deleted
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@ from fastapi import Depends, HTTPException, Request
|
|||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from fastapi_login import LoginManager
|
||||
|
||||
from argos.logging import logger
|
||||
|
||||
auth_scheme = HTTPBearer()
|
||||
|
||||
|
||||
|
@ -33,12 +35,19 @@ async def verify_token(
|
|||
return token
|
||||
|
||||
|
||||
async def find_ldap_user(config, ldap, user: str) -> str | None:
|
||||
async def find_ldap_user(config, ldapobj, user: str) -> str | None:
|
||||
"""Do a LDAP search for user and return its dn"""
|
||||
import ldap
|
||||
import ldap.filter as ldap_filter
|
||||
from ldapurl import LDAP_SCOPE_SUBTREE
|
||||
|
||||
result = ldap.search_s(
|
||||
try:
|
||||
ldapobj.simple_bind_s(config.general.ldap.bind_dn, config.general.ldap.bind_pwd)
|
||||
except ldap.LDAPError as err: # pylint: disable-msg=no-member
|
||||
logger.error("LDAP error: %s", err)
|
||||
return None
|
||||
|
||||
result = ldapobj.search_s(
|
||||
config.general.ldap.user_tree,
|
||||
LDAP_SCOPE_SUBTREE,
|
||||
filterstr=ldap_filter.filter_format(
|
||||
|
|
|
@ -90,6 +90,15 @@ async def post_login(
|
|||
from ldap import INVALID_CREDENTIALS # pylint: disable-msg=no-name-in-module
|
||||
from argos.server.routes.dependencies import find_ldap_user
|
||||
|
||||
invalid_credentials = templates.TemplateResponse(
|
||||
"login.html",
|
||||
{
|
||||
"request": request,
|
||||
"msg": "Sorry, invalid username or bad password. "
|
||||
"Or the LDAP server is unreachable (see logs to verify).",
|
||||
},
|
||||
)
|
||||
|
||||
ldap_dn = await find_ldap_user(config, request.app.state.ldap, username)
|
||||
if ldap_dn is None:
|
||||
return invalid_credentials
|
||||
|
|
|
@ -1,12 +1,26 @@
|
|||
"""Pydantic schemas for server"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from yamlinclude import YamlIncludeConstructor
|
||||
from pydantic import ValidationError
|
||||
|
||||
from argos.logging import logger
|
||||
from argos.schemas.config import Config
|
||||
|
||||
|
||||
def read_config(yaml_file):
|
||||
try:
|
||||
config = read_yaml_config(yaml_file)
|
||||
return config
|
||||
except ValidationError as err:
|
||||
logger.error("Errors where found while reading configuration:")
|
||||
for error in err.errors():
|
||||
logger.error("%s is %s", error["loc"], error["type"])
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def read_yaml_config(filename: str) -> Config:
|
||||
parsed = _load_yaml(filename)
|
||||
return Config(**parsed)
|
||||
|
|
|
@ -82,6 +82,48 @@ caption: argos-config.yaml
|
|||
- json-is: '{"foo": "bar", "baz": 42}'
|
||||
```
|
||||
|
||||
## Add data to requests
|
||||
|
||||
If you want to specify query parameters, just put them in the path:
|
||||
|
||||
```{code-block} yaml
|
||||
websites:
|
||||
- domain: "https://contact.example.org"
|
||||
paths:
|
||||
- path: "/index.php?action=show_messages"
|
||||
method: "GET"
|
||||
```
|
||||
|
||||
If you want, for example, to test a form and send some data to it:
|
||||
|
||||
```{code-block} yaml
|
||||
websites:
|
||||
- domain: "https://contact.example.org"
|
||||
paths:
|
||||
- path: "/"
|
||||
method: "POST"
|
||||
request_data:
|
||||
# These are the data sent to the server: title and msg
|
||||
data:
|
||||
title: "Hello my friend"
|
||||
msg: "How are you today?"
|
||||
# To send data as JSON (optional, default is false):
|
||||
is_json: true
|
||||
```
|
||||
|
||||
If you need to send some headers in the request:
|
||||
|
||||
```{code-block} yaml
|
||||
websites:
|
||||
- domain: "https://contact.example.org"
|
||||
paths:
|
||||
- path: "/api/mail"
|
||||
method: "PUT"
|
||||
request_data:
|
||||
headers:
|
||||
Authorization: "Bearer foo-bar-baz"
|
||||
```
|
||||
|
||||
## SSL certificate expiration
|
||||
|
||||
Checks that the SSL certificate will not expire soon. You need to define the thresholds in the configuration, and set the `on-check` option to enable the check.
|
||||
|
|
78
docs/cli.md
78
docs/cli.md
|
@ -60,7 +60,9 @@ Options:
|
|||
--max-tasks INTEGER Number of concurrent tasks this agent can run
|
||||
--wait-time INTEGER Waiting time between two polls on the server
|
||||
(seconds)
|
||||
--log-level [DEBUG|INFO|WARNING|ERROR|CRITICAL]
|
||||
--log-level [debug|info|warning|error|critical]
|
||||
--user-agent TEXT A custom string to append to the User-Agent
|
||||
header
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
|
@ -82,7 +84,6 @@ Options:
|
|||
--help Show this message and exit.
|
||||
|
||||
Commands:
|
||||
cleandb Clean the database (to run routinely)
|
||||
generate-config Output a self-documented example config file.
|
||||
generate-token Generate a token for agents
|
||||
migrate Run database migrations
|
||||
|
@ -93,7 +94,6 @@ Commands:
|
|||
test-gotify Send a test gotify notification
|
||||
test-mail Send a test email
|
||||
user User management
|
||||
watch-agents Watch agents (to run routinely)
|
||||
```
|
||||
|
||||
<!--[[[end]]]
|
||||
|
@ -150,65 +150,6 @@ Options:
|
|||
-->
|
||||
|
||||
|
||||
### Server cleandb
|
||||
<!--
|
||||
.. [[[cog
|
||||
help(["server", "cleandb", "--help"])
|
||||
.. ]]] -->
|
||||
|
||||
```man
|
||||
Usage: argos server cleandb [OPTIONS]
|
||||
|
||||
Clean the database (to run routinely)
|
||||
|
||||
- Removes old results from the database.
|
||||
- Removes locks from tasks that have been locked for too long.
|
||||
|
||||
Options:
|
||||
--max-results INTEGER Number of results per task to keep
|
||||
--max-lock-seconds INTEGER The number of seconds after which a lock is
|
||||
considered stale, must be higher than 60 (the
|
||||
checks have a timeout value of 60 seconds)
|
||||
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE
|
||||
environment variable is set, its value will be
|
||||
used instead. Default value: argos-config.yaml and
|
||||
/etc/argos/config.yaml as fallback.
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
<!--[[[end]]]
|
||||
-->
|
||||
|
||||
### Server watch-agents
|
||||
|
||||
<!--
|
||||
.. [[[cog
|
||||
help(["server", "cleandb", "--help"])
|
||||
.. ]]] -->
|
||||
|
||||
```man
|
||||
Usage: argos server cleandb [OPTIONS]
|
||||
|
||||
Clean the database (to run routinely)
|
||||
|
||||
- Removes old results from the database.
|
||||
- Removes locks from tasks that have been locked for too long.
|
||||
|
||||
Options:
|
||||
--max-results INTEGER Number of results per task to keep
|
||||
--max-lock-seconds INTEGER The number of seconds after which a lock is
|
||||
considered stale, must be higher than 60 (the
|
||||
checks have a timeout value of 60 seconds)
|
||||
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE
|
||||
environment variable is set, its value will be
|
||||
used instead. Default value: argos-config.yaml and
|
||||
/etc/argos/config.yaml as fallback.
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
<!--[[[end]]]
|
||||
-->
|
||||
|
||||
### Server reload-config
|
||||
|
||||
<!--
|
||||
|
@ -222,10 +163,15 @@ Usage: argos server reload-config [OPTIONS]
|
|||
Read tasks’ configuration and add/delete tasks in database if needed
|
||||
|
||||
Options:
|
||||
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE environment
|
||||
variable is set, its value will be used instead. Default value:
|
||||
argos-config.yaml and /etc/argos/config.yaml as fallback.
|
||||
--help Show this message and exit.
|
||||
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE
|
||||
environment variable is set, its value will be used
|
||||
instead. Default value: argos-config.yaml and
|
||||
/etc/argos/config.yaml as fallback.
|
||||
--enqueue / --no-enqueue Let Argos main recurring tasks handle
|
||||
configuration’s loading. It may delay the
|
||||
application of the new configuration up to 2
|
||||
minutes. Default is --no-enqueue
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
<!--[[[end]]]
|
||||
|
|
|
@ -14,7 +14,9 @@ description: Many thanks to their developers!
|
|||
- [Alembic](https://alembic.sqlalchemy.org) is used for DB migrations;
|
||||
- [Tenacity](https://github.com/jd/tenacity) a small utility to retry a function in case an error occured;
|
||||
- [Uvicorn](https://www.uvicorn.org/) is the tool used to run our server;
|
||||
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production.
|
||||
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production;
|
||||
- [Apprise](https://github.com/caronc/apprise/wiki) allows Argos to send notifications through a lot of channels;
|
||||
- [FastAPI Utilities](https://fastapiutils.github.io/fastapi-utils/) is in charge of recurring tasks.
|
||||
|
||||
## CSS framework
|
||||
|
||||
|
|
|
@ -15,3 +15,7 @@ venv/bin/alembic -c argos/server/migrations/alembic.ini revision \
|
|||
```
|
||||
|
||||
Edit the created file to remove comments and adapt it to make sure the migration is complete (Alembic is not powerful enough to cover all the corner cases).
|
||||
|
||||
In case you want to add an `Enum` type and use it in an existing table, please have a look at [`argos/server/migrations/versions/dcf73fa19fce_specify_check_method.py`](https://framagit.org/framasoft/framaspace/argos/-/blob/main/argos/server/migrations/versions/dcf73fa19fce_specify_check_method.py).
|
||||
|
||||
If you want to add an `Enum` type in a new table, you can do like in [`argos/server/migrations/versions/7d480e6f1112_initial_migrations.py`](https://framagit.org/framasoft/framaspace/argos/-/blob/main/argos/server/migrations/versions/7d480e6f1112_initial_migrations.py)
|
||||
|
|
|
@ -191,18 +191,6 @@ The only requirement is that the agent can reach the server through HTTP or HTTP
|
|||
argos agent http://localhost:8000 "auth-token"
|
||||
```
|
||||
|
||||
## Cleaning the database
|
||||
|
||||
You have to run cleaning task periodically. `argos server cleandb --help` will give you more information on how to do that.
|
||||
|
||||
Here is a crontab example, which will clean the db each hour:
|
||||
|
||||
```bash
|
||||
# Run the cleaning tasks every hour (at minute 7)
|
||||
# Keeps 10 results per task, and remove tasks’ locks older than 1 hour
|
||||
7 * * * * argos server cleandb --max-results 10 --max-lock-seconds 3600
|
||||
```
|
||||
|
||||
## Watch the agents
|
||||
|
||||
In order to be sure that agents are up and communicate with the server, you can periodically run the `argos server watch-agents` command.
|
||||
|
|
|
@ -90,13 +90,13 @@ User=argos
|
|||
WorkingDirectory=/opt/argos/
|
||||
EnvironmentFile=/etc/default/argos-server
|
||||
ExecStartPre=/opt/argos/venv/bin/argos server migrate
|
||||
ExecStartPre=/opt/argos/venv/bin/argos server reload-config
|
||||
ExecStartPre=/opt/argos/venv/bin/argos server reload-config --enqueue
|
||||
ExecStart=/opt/argos/venv/bin/gunicorn "argos.server.main:get_application()" \\
|
||||
--workers \$ARGOS_SERVER_WORKERS \\
|
||||
--worker-class uvicorn.workers.UvicornWorker \\
|
||||
--bind \$ARGOS_SERVER_SOCKET \\
|
||||
--forwarded-allow-ips \$ARGOS_SERVER_FORWARDED_ALLOW_IPS
|
||||
ExecReload=/opt/argos/venv/bin/argos server reload-config
|
||||
ExecReload=/opt/argos/venv/bin/argos server reload-config --enqueue
|
||||
SyslogIdentifier=argos-server
|
||||
|
||||
[Install]
|
||||
|
@ -153,8 +153,7 @@ If all works well, you have to put some cron tasks in `argos` crontab:
|
|||
|
||||
```bash
|
||||
cat <<EOF | crontab -u argos -
|
||||
*/10 * * * * /opt/argos/venv/bin/argos server cleandb --max-lock-seconds 120 --max-results 1200
|
||||
*/10 * * * * /opt/argos/venv/bin/argos server watch-agents --time-without-agent 10
|
||||
*/10 * * * * /opt/argos/venv/bin/argos server watch-agents --time-without-agent 10:
|
||||
EOF
|
||||
```
|
||||
|
||||
|
|
|
@ -28,10 +28,12 @@ dependencies = [
|
|||
"durations-nlp>=1.0.1,<2",
|
||||
"fastapi>=0.103,<0.104",
|
||||
"fastapi-login>=1.10.0,<2",
|
||||
"fastapi-utils>=0.8.0,<0.9",
|
||||
"httpx>=0.27.2,<0.28.0",
|
||||
"Jinja2>=3.0,<4",
|
||||
"jsonpointer>=3.0,<4",
|
||||
"passlib>=1.7.4,<2",
|
||||
"psutil>=5.9.8,<6",
|
||||
"psycopg2-binary>=2.9,<3",
|
||||
"pydantic[email]>=2.4,<3",
|
||||
"pydantic-settings>=2.0,<3",
|
||||
|
@ -41,6 +43,7 @@ dependencies = [
|
|||
"sqlalchemy[asyncio]>=2.0,<3",
|
||||
"sqlalchemy-utils>=0.41,<1",
|
||||
"tenacity>=8.2,<9",
|
||||
"typing_inspect>=0.9.0,<1",
|
||||
"uvicorn>=0.23,<1",
|
||||
]
|
||||
|
||||
|
|
|
@ -1,11 +1,21 @@
|
|||
---
|
||||
general:
|
||||
# Except for frequency and recheck_delay settings, changes in general
|
||||
# section of the configuration will need a restart of argos server.
|
||||
db:
|
||||
# The database URL, as defined in SQLAlchemy docs:
|
||||
# https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls
|
||||
url: "sqlite:////tmp/test-argos.db"
|
||||
# Can be "production", "dev", "test".
|
||||
# If not present, default value is "production"
|
||||
env: test
|
||||
# To get a good string for cookie_secret, run:
|
||||
# openssl rand -hex 32
|
||||
cookie_secret: "foo-bar-baz"
|
||||
|
||||
# Default delay for checks.
|
||||
# Can be superseeded in domain configuration.
|
||||
# For ex., to run checks every 5 minutes:
|
||||
frequency: "1m"
|
||||
alerts:
|
||||
ok:
|
||||
|
@ -16,12 +26,37 @@ general:
|
|||
- local
|
||||
unknown:
|
||||
- local
|
||||
no_agent:
|
||||
- local
|
||||
service:
|
||||
secrets:
|
||||
# Secrets can be generated using `argos server generate-token`.
|
||||
# You need at least one. Write them as a list, like:
|
||||
# - secret_token
|
||||
- "O4kt8Max9/k0EmHaEJ0CGGYbBNFmK8kOZNIoUk3Kjwc"
|
||||
- "x1T1VZR51pxrv5pQUyzooMG4pMUvHNMhA5y/3cUsYVs="
|
||||
ssl:
|
||||
thresholds:
|
||||
- "1d": critical
|
||||
"5d": warning
|
||||
- "5d": warning
|
||||
|
||||
# Argos will execute some tasks in the background for you
|
||||
# every 2 minutes and needs some configuration for that
|
||||
recurring_tasks:
|
||||
# Maximum age of results
|
||||
# Use m for minutes, h for hours, d for days
|
||||
# w for weeks, M for months, y for years
|
||||
# See https://github.com/timwedde/durations_nlp#scales-reference for details
|
||||
max_results_age: "1d"
|
||||
# Max number of seconds a task can be locked
|
||||
# Minimum value is 61, default is 100
|
||||
max_lock_seconds: 100
|
||||
# Max number of seconds without seing an agent
|
||||
# before sending an alert
|
||||
# Minimum value is 61, default is 300
|
||||
time_without_agent: 300
|
||||
|
||||
# It's also possible to define the checks in another file
|
||||
# with the include syntax:
|
||||
#
|
||||
websites: !include websites.yaml
|
||||
|
|
|
@ -10,9 +10,9 @@ from argos.server.models import Result, Task, User
|
|||
@pytest.mark.asyncio
|
||||
async def test_remove_old_results(db, ten_tasks): # pylint: disable-msg=redefined-outer-name
|
||||
for _task in ten_tasks:
|
||||
for _ in range(5):
|
||||
for iterator in range(5):
|
||||
result = Result(
|
||||
submitted_at=datetime.now(),
|
||||
submitted_at=datetime.now() - timedelta(seconds=iterator * 2),
|
||||
status="success",
|
||||
context={"foo": "bar"},
|
||||
task=_task,
|
||||
|
@ -24,12 +24,12 @@ async def test_remove_old_results(db, ten_tasks): # pylint: disable-msg=redefi
|
|||
|
||||
# So we have 5 results per tasks
|
||||
assert db.query(Result).count() == 50
|
||||
# Keep only 2
|
||||
deleted = await queries.remove_old_results(db, 2)
|
||||
assert deleted == 30
|
||||
assert db.query(Result).count() == 20
|
||||
# Keep only those newer than 1 second ago
|
||||
deleted = await queries.remove_old_results(db, 6)
|
||||
assert deleted == 20
|
||||
assert db.query(Result).count() == 30
|
||||
for _task in ten_tasks:
|
||||
assert db.query(Result).filter(Result.task == _task).count() == 2
|
||||
assert db.query(Result).filter(Result.task == _task).count() == 3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
@ -235,6 +235,7 @@ def empty_config():
|
|||
warning=["", ""],
|
||||
critical=["", ""],
|
||||
unknown=["", ""],
|
||||
no_agent=["", ""],
|
||||
),
|
||||
),
|
||||
service=schemas.config.Service(
|
||||
|
@ -243,6 +244,11 @@ def empty_config():
|
|||
]
|
||||
),
|
||||
ssl=schemas.config.SSL(thresholds=[]),
|
||||
recurring_tasks=schemas.config.RecurringTasks(
|
||||
max_results_age="6s",
|
||||
max_lock_seconds=120,
|
||||
time_without_agent=300,
|
||||
),
|
||||
websites=[],
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in a new issue