🔀 Merge remote-tracking branch 'origin/develop'

This commit is contained in:
Luc Didry 2025-02-18 13:48:47 +01:00
commit c63093bb2f
No known key found for this signature in database
GPG key ID: EA868E12D0257E3C
25 changed files with 505 additions and 270 deletions

View file

@ -2,6 +2,21 @@
## [Unreleased]
- ✨ — Allow to customize agent User-Agent header (#78)
- 📝 — Document how to add data to requests (#77)
- ✨ — No need cron tasks for DB cleaning anymore (#74 and #75)
- ✨ — No need cron tasks for agents watching (#76)
- ✨ — Reload configuration asynchronously (#79)
- 🐛 — Automatically reconnect to LDAP if unreachable (#81)
- 🐛 — Better httpx.RequestError handling (#83)
💥 Warning: there is now new settings to add to your configuration file.
Use `argos server generate-config > /etc/argos/config.yaml-dist` to generate
a new example configuration file.
💥 You dont need cron tasks anymore!
Remove your old cron tasks as they will now do nothing but generating errors.
## 0.7.4
Date: 2025-02-12

View file

@ -37,11 +37,17 @@ def log_failure(retry_state):
class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
"""The Argos agent is responsible for running the checks and reporting the results."""
def __init__(self, server: str, auth: str, max_tasks: int, wait_time: int):
def __init__( # pylint: disable-msg=too-many-positional-arguments
self, server: str, auth: str, max_tasks: int, wait_time: int, user_agent: str
):
self.server = server
self.max_tasks = max_tasks
self.wait_time = wait_time
self.auth = auth
if user_agent == "":
self.ua = user_agent
else:
self.ua = f" - {user_agent}"
self._http_client: httpx.AsyncClient | None = None
self._http_client_v4: httpx.AsyncClient | None = None
self._http_client_v6: httpx.AsyncClient | None = None
@ -53,13 +59,13 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
async def run(self):
auth_header = {
"Authorization": f"Bearer {self.auth}",
"User-Agent": f"Argos Panoptes agent {VERSION}",
"User-Agent": f"Argos Panoptes agent {VERSION}{self.ua}",
}
self._http_client = httpx.AsyncClient(headers=auth_header)
ua_header = {
"User-Agent": f"Argos Panoptes {VERSION} "
"(about: https://argos-monitoring.framasoft.org/)",
f"(about: https://argos-monitoring.framasoft.org/){self.ua}",
}
self._http_client_v4 = httpx.AsyncClient(
headers=ua_header,
@ -78,6 +84,7 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
await asyncio.sleep(self.wait_time)
async def _do_request(self, group: str, details: dict):
logger.debug("_do_request for group %s", group)
headers = {}
if details["request_data"] is not None:
request_data = json.loads(details["request_data"])
@ -114,6 +121,7 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
)
except httpx.ReadError:
sleep(1)
logger.warning("httpx.ReadError for group %s, re-emit request", group)
if details["request_data"] is None or request_data["data"] is None:
response = await http_client.request( # type: ignore[union-attr]
method=details["method"], url=details["url"], timeout=60
@ -132,6 +140,9 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
data=request_data["data"],
timeout=60,
)
except httpx.RequestError as err:
logger.warning("httpx.RequestError for group %s", group)
response = err
self._res_cache[group] = response
@ -141,15 +152,21 @@ class ArgosAgent: # pylint: disable-msg=too-many-instance-attributes
check_class = get_registered_check(task.check)
check = check_class(task)
result = await check.run(self._res_cache[task.task_group])
status = result.status
context = result.context
response = self._res_cache[task.task_group]
if isinstance(response, httpx.Response):
result = await check.run(response)
status = result.status
context = result.context
else:
status = "failure"
context = SerializableException.from_exception(response)
except Exception as err: # pylint: disable=broad-except
status = "error"
context = SerializableException.from_exception(err)
msg = f"An exception occured when running {_task}. {err.__class__.__name__} : {err}"
logger.error(msg)
return AgentResult(task_id=task.id, status=status, context=context)
async def _get_and_complete_tasks(self):

View file

@ -92,7 +92,12 @@ def version():
default="INFO",
type=click.Choice(logging.LOG_LEVELS, case_sensitive=False),
)
def agent(server_url, auth, max_tasks, wait_time, log_level):
@click.option(
"--user-agent",
default="",
help="A custom string to append to the User-Agent header",
)
def agent(server_url, auth, max_tasks, wait_time, log_level, user_agent): # pylint: disable-msg=too-many-positional-arguments
"""Get and run tasks for the provided server. Will wait for new tasks.
Usage: argos agent https://argos.example.org "auth-token-here"
@ -108,7 +113,7 @@ def agent(server_url, auth, max_tasks, wait_time, log_level):
from argos.logging import logger
logger.setLevel(log_level)
agent_ = ArgosAgent(server_url, auth, max_tasks, wait_time)
agent_ = ArgosAgent(server_url, auth, max_tasks, wait_time, user_agent)
asyncio.run(agent_.run())
@ -135,101 +140,6 @@ def start(host, port, config, reload):
uvicorn.run("argos.server:app", host=host, port=port, reload=reload)
def validate_max_lock_seconds(ctx, param, value):
if value <= 60:
raise click.BadParameter("Should be strictly higher than 60")
return value
def validate_max_results(ctx, param, value):
if value <= 0:
raise click.BadParameter("Should be a positive integer")
return value
@server.command()
@click.option(
"--max-results",
default=100,
help="Number of results per task to keep",
callback=validate_max_results,
)
@click.option(
"--max-lock-seconds",
default=100,
help=(
"The number of seconds after which a lock is "
"considered stale, must be higher than 60 "
"(the checks have a timeout value of 60 seconds)"
),
callback=validate_max_lock_seconds,
)
@click.option(
"--config",
default="argos-config.yaml",
help="Path of the configuration file. "
"If ARGOS_YAML_FILE environment variable is set, its value will be used instead. "
"Default value: argos-config.yaml and /etc/argos/config.yaml as fallback.",
envvar="ARGOS_YAML_FILE",
callback=validate_config_access,
)
@coroutine
async def cleandb(max_results, max_lock_seconds, config):
"""Clean the database (to run routinely)
\b
- Removes old results from the database.
- Removes locks from tasks that have been locked for too long.
"""
# Its mandatory to do it before the imports
os.environ["ARGOS_YAML_FILE"] = config
# The imports are made here otherwise the agent will need server configuration files.
from argos.server import queries
db = await get_db()
removed = await queries.remove_old_results(db, max_results)
updated = await queries.release_old_locks(db, max_lock_seconds)
click.echo(f"{removed} results removed")
click.echo(f"{updated} locks released")
@server.command()
@click.option(
"--time-without-agent",
default=5,
help="Time without seeing an agent after which a warning will be issued, in minutes. "
"Default is 5 minutes.",
callback=validate_max_results,
)
@click.option(
"--config",
default="argos-config.yaml",
help="Path of the configuration file. "
"If ARGOS_YAML_FILE environment variable is set, its value will be used instead.",
envvar="ARGOS_YAML_FILE",
callback=validate_config_access,
)
@coroutine
async def watch_agents(time_without_agent, config):
"""Watch agents (to run routinely)
Issues a warning if no agent has been seen by the server for a given time.
"""
# Its mandatory to do it before the imports
os.environ["ARGOS_YAML_FILE"] = config
# The imports are made here otherwise the agent will need server configuration files.
from argos.server import queries
db = await get_db()
agents = await queries.get_recent_agents_count(db, time_without_agent)
if agents == 0:
click.echo(f"No agent has been seen in the last {time_without_agent} minutes.")
sysexit(1)
@server.command(short_help="Load or reload tasks configuration")
@click.option(
"--config",
@ -240,23 +150,40 @@ async def watch_agents(time_without_agent, config):
envvar="ARGOS_YAML_FILE",
callback=validate_config_access,
)
@click.option(
"--enqueue/--no-enqueue",
default=False,
help="Let Argos main recurring tasks handle configurations loading. "
"It may delay the application of the new configuration up to 2 minutes. "
"Default is --no-enqueue",
)
@coroutine
async def reload_config(config):
async def reload_config(config, enqueue):
"""Read tasks configuration and add/delete tasks in database if needed"""
# Its mandatory to do it before the imports
os.environ["ARGOS_YAML_FILE"] = config
# The imports are made here otherwise the agent will need server configuration files.
from argos.server import queries
from argos.server.main import read_config
from argos.server.settings import read_config
_config = read_config(config)
db = await get_db()
changed = await queries.update_from_config(db, _config)
click.echo(f"{changed['added']} tasks added")
click.echo(f"{changed['vanished']} tasks deleted")
config_changed = await queries.has_config_changed(db, _config)
if not config_changed:
click.echo("Config has not change")
else:
if enqueue:
msg = await queries.update_from_config_later(db, config_file=config)
click.echo(msg)
else:
changed = await queries.update_from_config(db, _config)
click.echo(f"{changed['added']} task(s) added")
click.echo(f"{changed['vanished']} task(s) deleted")
@server.command()
@ -570,8 +497,8 @@ async def test_mail(config, domain, severity):
from argos.logging import set_log_level
from argos.server.alerting import notify_by_mail
from argos.server.main import read_config
from argos.server.models import Result, Task
from argos.server.settings import read_config
conf = read_config(config)
@ -586,6 +513,7 @@ async def test_mail(config, domain, severity):
check="body-contains",
expected="foo",
frequency=1,
ip_version=4,
selected_by="test",
selected_at=now,
)
@ -634,8 +562,8 @@ async def test_gotify(config, domain, severity):
from argos.logging import set_log_level
from argos.server.alerting import notify_with_gotify
from argos.server.main import read_config
from argos.server.models import Result, Task
from argos.server.settings import read_config
conf = read_config(config)
@ -650,6 +578,7 @@ async def test_gotify(config, domain, severity):
check="body-contains",
expected="foo",
frequency=1,
ip_version=4,
selected_by="test",
selected_at=now,
)
@ -701,8 +630,8 @@ async def test_apprise(config, domain, severity, apprise_group):
from argos.logging import set_log_level
from argos.server.alerting import notify_with_apprise
from argos.server.main import read_config
from argos.server.models import Result, Task
from argos.server.settings import read_config
conf = read_config(config)
@ -717,6 +646,7 @@ async def test_apprise(config, domain, severity, apprise_group):
check="body-contains",
expected="foo",
frequency=1,
ip_version=4,
selected_by="test",
selected_at=now,
)

View file

@ -81,6 +81,12 @@ general:
# To disable the IPv6 check of domains:
# ipv6: false
# Argos root path
# If not present, default value is ""
# Set it to /foo if you want to use argos at /foo/ instead of /
# on your web server
# root_path: "/foo"
# Which way do you want to be warned when a check goes to that severity?
# "local" emits a message in the server log
# Youll need to configure mail, gotify or apprise below to be able to use
@ -96,11 +102,10 @@ general:
- local
unknown:
- local
# Argos root path
# If not present, default value is ""
# Set it to /foo if you want to use argos at /foo/ instead of /
# on your web server
# root_path: "/foo"
# This alert is triggered when no Argos agent has been seen in a while
# See recurring_tasks.time_without_agent below
no_agent:
- local
# Mail configuration is quite straight-forward
# mail:
# mailfrom: no-reply@example.org
@ -144,6 +149,20 @@ ssl:
- "1d": critical
- "5d": warning
# Argos will execute some tasks in the background for you
# every 2 minutes and needs some configuration for that
recurring_tasks:
# Max number of results per tasks you want to keep
# Minimum value is 1, default is 100
max_results: 100
# Max number of seconds a task can be locked
# Minimum value is 61, default is 100
max_lock_seconds: 100
# Max number of minutes without seing an agent
# before sending an alert
# Minimum value is 1, default is 5
time_without_agent: 5
# It's also possible to define the checks in another file
# with the include syntax:
#

View file

@ -14,9 +14,10 @@ logger = logging.getLogger(__name__)
# XXX Does not work ?
def set_log_level(log_level):
def set_log_level(log_level: str, quiet: bool = False):
level = getattr(logging, log_level.upper(), None)
if not isinstance(level, int):
raise ValueError(f"Invalid log level: {log_level}")
logger.setLevel(level=level)
logger.info("Log level set to %s", log_level)
if not quiet:
logger.info("Log level set to %s", log_level)

View file

@ -48,6 +48,36 @@ class SSL(BaseModel):
thresholds: List[Annotated[Tuple[int, Severity], BeforeValidator(parse_threshold)]]
class RecurringTasks(BaseModel):
max_results: int
max_lock_seconds: int
time_without_agent: int
@field_validator("max_results", mode="before")
def parse_max_results(cls, value):
"""Ensure that max_results is higher than 0"""
if value >= 1:
return value
return 100
@field_validator("max_lock_seconds", mode="before")
def parse_max_lock_seconds(cls, value):
"""Ensure that max_lock_seconds is higher or equal to agents requests timeout (60)"""
if value > 60:
return value
return 100
@field_validator("time_without_agent", mode="before")
def parse_time_without_agent(cls, value):
"""Ensure that time_without_agent is at least one minute"""
if value >= 1:
return value
return 5
class WebsiteCheck(BaseModel):
key: str
value: str | List[str] | Dict[str, str]
@ -190,6 +220,7 @@ class Alert(BaseModel):
warning: List[str]
critical: List[str]
unknown: List[str]
no_agent: List[str]
class GotifyUrl(BaseModel):
@ -264,4 +295,5 @@ class Config(BaseModel):
general: General
service: Service
ssl: SSL
recurring_tasks: RecurringTasks
websites: List[Website]

View file

@ -8,11 +8,25 @@ from typing import Literal
from pydantic import BaseModel, ConfigDict
from argos.schemas.utils import IPVersion, Method
from argos.schemas.utils import IPVersion, Method, Todo
# XXX Refactor using SQLModel to avoid duplication of model data
class Job(BaseModel):
"""Tasks needing to be executed in recurring tasks processing.
Its quite like a job queue."""
id: int
todo: Todo
args: str
current: bool
added_at: datetime
def __str__(self):
return f"Job ({self.id}): {self.todo}"
class Task(BaseModel):
"""A task corresponds to a check to execute"""

View file

@ -6,3 +6,5 @@ IPVersion = Literal["4", "6"]
Method = Literal[
"GET", "HEAD", "POST", "OPTIONS", "CONNECT", "TRACE", "PUT", "PATCH", "DELETE"
]
Todo = Literal["RELOAD_CONFIG"]

View file

@ -74,6 +74,91 @@ def get_icon_from_severity(severity: str) -> str:
return icon
def send_mail(mail: EmailMessage, config: Mail):
"""Send message by mail"""
if config.ssl:
logger.debug("Mail notification: SSL")
context = ssl.create_default_context()
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
else:
smtp = smtplib.SMTP(
host=config.host, # type: ignore
port=config.port,
)
if config.starttls:
logger.debug("Mail notification: STARTTLS")
context = ssl.create_default_context()
smtp.starttls(context=context)
if config.auth is not None:
logger.debug("Mail notification: authentification")
smtp.login(config.auth.login, config.auth.password)
for address in config.addresses:
logger.debug("Sending mail to %s", address)
logger.debug(mail.get_body())
smtp.send_message(mail, to_addrs=address)
def send_gotify_msg(config, payload):
"""Send message with gotify"""
headers = {"accept": "application/json", "content-type": "application/json"}
for url in config:
logger.debug("Sending gotify message(s) to %s", url.url)
for token in url.tokens:
try:
res = httpx.post(
f"{url.url}message",
params={"token": token},
headers=headers,
json=payload,
)
res.raise_for_status()
except httpx.RequestError as err:
logger.error(
"An error occurred while sending a message to %s with token %s",
err.request.url,
token,
)
def no_agent_alert(config: Config):
"""Alert"""
msg = "You should check whats going on with your Argos agents."
twa = config.recurring_tasks.time_without_agent
if twa > 1:
subject = f"No agent has been seen within the last {twa} minutes"
else:
subject = "No agent has been seen within the last minute"
if "local" in config.general.alerts.no_agent:
logger.error(subject)
if config.general.mail is not None and "mail" in config.general.alerts.no_agent:
mail = EmailMessage()
mail["Subject"] = f"[Argos] {subject}"
mail["From"] = config.general.mail.mailfrom
mail.set_content(msg)
send_mail(mail, config.general.mail)
if config.general.gotify is not None and "gotify" in config.general.alerts.no_agent:
priority = 9
payload = {"title": subject, "message": msg, "priority": priority}
send_gotify_msg(config.general.gotify, payload)
if config.general.apprise is not None:
for notif_way in config.general.alerts.no_agent:
if notif_way.startswith("apprise:"):
group = notif_way[8:]
apobj = apprise.Apprise()
for channel in config.general.apprise[group]:
apobj.add(channel)
apobj.notify(title=subject, body=msg)
def handle_alert(config: Config, result, task, severity, old_severity, request): # pylint: disable-msg=too-many-positional-arguments
"""Dispatch alert through configured alert channels"""
@ -163,36 +248,13 @@ See results of task on {request.url_for('get_task_results_view', task_id=task.id
] = f"[Argos] {icon} {urlparse(task.url).netloc} (IPv{task.ip_version}): status {severity}"
mail["From"] = config.mailfrom
mail.set_content(msg)
if config.ssl:
logger.debug("Mail notification: SSL")
context = ssl.create_default_context()
smtp = smtplib.SMTP_SSL(host=config.host, port=config.port, context=context)
else:
smtp = smtplib.SMTP(
host=config.host, # type: ignore
port=config.port,
)
if config.starttls:
logger.debug("Mail notification: STARTTLS")
context = ssl.create_default_context()
smtp.starttls(context=context)
if config.auth is not None:
logger.debug("Mail notification: authentification")
smtp.login(config.auth.login, config.auth.password)
for address in config.addresses:
logger.debug("Sending mail to %s", address)
logger.debug(msg)
smtp.send_message(mail, to_addrs=address)
send_mail(mail, config)
def notify_with_gotify( # pylint: disable-msg=too-many-positional-arguments
result, task, severity: str, old_severity: str, config: List[GotifyUrl], request
) -> None:
logger.debug("Will send gotify notification")
headers = {"accept": "application/json", "content-type": "application/json"}
icon = get_icon_from_severity(severity)
priority = 9
@ -228,20 +290,4 @@ See results of task on <{request.url_for('get_task_results_view', task_id=task.i
payload = {"title": subject, "message": msg, "priority": priority, "extras": extras}
for url in config:
logger.debug("Sending gotify message(s) to %s", url.url)
for token in url.tokens:
try:
res = httpx.post(
f"{url.url}message",
params={"token": token},
headers=headers,
json=payload,
)
res.raise_for_status()
except httpx.RequestError as err:
logger.error(
"An error occurred while sending a message to %s with token %s",
err.request.url,
token,
)
send_gotify_msg(config, payload)

View file

@ -1,19 +1,19 @@
import os
import sys
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi_login import LoginManager
from pydantic import ValidationError
from fastapi_utils.tasks import repeat_every
from sqlalchemy import create_engine, event
from sqlalchemy.orm import sessionmaker
from argos.logging import logger
from argos.logging import logger, set_log_level
from argos.server import models, routes, queries
from argos.server.alerting import no_agent_alert
from argos.server.exceptions import NotAuthenticatedException, auth_exception_handler
from argos.server.settings import read_yaml_config
from argos.server.settings import read_config
def get_application() -> FastAPI:
@ -39,9 +39,7 @@ def get_application() -> FastAPI:
if config.general.ldap is not None:
import ldap
l = ldap.initialize(config.general.ldap.uri)
l.simple_bind_s(config.general.ldap.bind_dn, config.general.ldap.bind_pwd)
appli.state.ldap = l
appli.state.ldap = ldap.initialize(config.general.ldap.uri)
@appli.state.manager.user_loader()
async def query_user(user: str) -> None | str | models.User:
@ -71,17 +69,6 @@ async def connect_to_db(appli):
return appli.state.db
def read_config(yaml_file):
try:
config = read_yaml_config(yaml_file)
return config
except ValidationError as err:
logger.error("Errors where found while reading configuration:")
for error in err.errors():
logger.error("%s is %s", error["loc"], error["type"])
sys.exit(1)
def setup_database(appli):
config = appli.state.config
db_url = str(config.general.db.url)
@ -126,8 +113,31 @@ def create_manager(cookie_secret: str) -> LoginManager:
)
@repeat_every(seconds=120, logger=logger)
async def recurring_tasks() -> None:
"""Recurring DB cleanup and watch-agents tasks"""
set_log_level("info", quiet=True)
logger.info("Start background recurring tasks")
with app.state.SessionLocal() as db:
config = app.state.config.recurring_tasks
removed = await queries.remove_old_results(db, config.max_results)
logger.info("%i result(s) removed", removed)
updated = await queries.release_old_locks(db, config.max_lock_seconds)
logger.info("%i lock(s) released", updated)
agents = await queries.get_recent_agents_count(db, config.time_without_agent)
if agents == 0:
no_agent_alert(app.state.config)
processed_jobs = await queries.process_jobs(db)
logger.info("%i job(s) processed", processed_jobs)
logger.info("Background recurring tasks ended")
@asynccontextmanager
async def lifespan(appli):
async def lifespan(appli: FastAPI):
"""Server start and stop actions
Setup database connection then close it at shutdown.
@ -142,6 +152,7 @@ async def lifespan(appli):
"There is no tasks in the database. "
'Please launch the command "argos server reload-config"'
)
await recurring_tasks()
yield

View file

@ -0,0 +1,43 @@
"""Add job queue
Revision ID: 5f6cb30db996
Revises: bd4b4962696a
Create Date: 2025-02-17 16:56:36.673511
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "5f6cb30db996"
down_revision: Union[str, None] = "bd4b4962696a"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
enum = sa.Enum(
"RELOAD_CONFIG",
name="todo_enum",
create_type=False,
)
enum.create(op.get_bind(), checkfirst=True)
op.create_table(
"jobs",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("todo", enum, nullable=False),
sa.Column("args", sa.String(), nullable=False),
sa.Column(
"current", sa.Boolean(), server_default=sa.sql.false(), nullable=False
),
sa.Column("added_at", sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
def downgrade() -> None:
op.drop_table("jobs")
sa.Enum(name="todo_enum").drop(op.get_bind(), checkfirst=True)

View file

@ -14,7 +14,7 @@ from sqlalchemy.schema import Index
from argos.checks import BaseCheck, get_registered_check
from argos.schemas import WebsiteCheck
from argos.schemas.utils import IPVersion, Method
from argos.schemas.utils import IPVersion, Method, Todo
def compute_task_group(context) -> str:
@ -33,6 +33,19 @@ class Base(DeclarativeBase):
type_annotation_map = {List[WebsiteCheck]: JSON, dict: JSON}
class Job(Base):
"""
Job queue emulation
"""
__tablename__ = "jobs"
id: Mapped[int] = mapped_column(primary_key=True)
todo: Mapped[Todo] = mapped_column(Enum("RELOAD_CONFIG", name="todo_enum"))
args: Mapped[str] = mapped_column()
current: Mapped[bool] = mapped_column(insert_default=False)
added_at: Mapped[datetime] = mapped_column()
class Task(Base):
"""
There is one task per check.

View file

@ -9,7 +9,8 @@ from sqlalchemy.orm import Session
from argos import schemas
from argos.logging import logger
from argos.server.models import Result, Task, ConfigCache, User
from argos.server.models import ConfigCache, Job, Result, Task, User
from argos.server.settings import read_config
async def list_tasks(db: Session, agent_id: str, limit: int = 100):
@ -219,12 +220,50 @@ async def has_config_changed(db: Session, config: schemas.Config) -> bool: # py
return True
async def update_from_config_later(db: Session, config_file):
"""Ask Argos to reload configuration in a recurring task"""
jobs = (
db.query(Job)
.filter(
Job.todo == "RELOAD_CONFIG",
Job.args == config_file,
Job.current == False,
)
.all()
)
if jobs:
return "There is already a config reloading job in the job queue, for the same file"
job = Job(todo="RELOAD_CONFIG", args=config_file, added_at=datetime.now())
db.add(job)
db.commit()
return "Config reloading has been added in the job queue"
async def process_jobs(db: Session) -> int:
"""Process job queue"""
jobs = db.query(Job).filter(Job.current == False).all()
if jobs:
for job in jobs:
job.current = True
db.commit()
if job.todo == "RELOAD_CONFIG":
logger.info("Processing job %i: %s %s", job.id, job.todo, job.args)
_config = read_config(job.args)
changed = await update_from_config(db, _config)
logger.info("%i task(s) added", changed["added"])
logger.info("%i task(s) deleted", changed["vanished"])
db.delete(job)
db.commit()
return len(jobs)
return 0
async def update_from_config(db: Session, config: schemas.Config): # pylint: disable-msg=too-many-branches
"""Update tasks from config file"""
config_changed = await has_config_changed(db, config)
if not config_changed:
return {"added": 0, "vanished": 0}
max_task_id = (
db.query(func.max(Task.id).label("max_id")).all() # pylint: disable-msg=not-callable
)[0].max_id
@ -339,7 +378,8 @@ async def update_from_config(db: Session, config: schemas.Config): # pylint: di
)
db.commit()
logger.info(
"%i tasks has been removed since not in config file anymore", vanished_tasks
"%i task(s) has been removed since not in config file anymore",
vanished_tasks,
)
return {"added": len(tasks), "vanished": vanished_tasks}

View file

@ -2,6 +2,8 @@ from fastapi import Depends, HTTPException, Request
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from fastapi_login import LoginManager
from argos.logging import logger
auth_scheme = HTTPBearer()
@ -33,12 +35,19 @@ async def verify_token(
return token
async def find_ldap_user(config, ldap, user: str) -> str | None:
async def find_ldap_user(config, ldapobj, user: str) -> str | None:
"""Do a LDAP search for user and return its dn"""
import ldap
import ldap.filter as ldap_filter
from ldapurl import LDAP_SCOPE_SUBTREE
result = ldap.search_s(
try:
ldapobj.simple_bind_s(config.general.ldap.bind_dn, config.general.ldap.bind_pwd)
except ldap.LDAPError as err: # pylint: disable-msg=no-member
logger.error("LDAP error: %s", err)
return None
result = ldapobj.search_s(
config.general.ldap.user_tree,
LDAP_SCOPE_SUBTREE,
filterstr=ldap_filter.filter_format(

View file

@ -90,6 +90,15 @@ async def post_login(
from ldap import INVALID_CREDENTIALS # pylint: disable-msg=no-name-in-module
from argos.server.routes.dependencies import find_ldap_user
invalid_credentials = templates.TemplateResponse(
"login.html",
{
"request": request,
"msg": "Sorry, invalid username or bad password. "
"Or the LDAP server is unreachable (see logs to verify).",
},
)
ldap_dn = await find_ldap_user(config, request.app.state.ldap, username)
if ldap_dn is None:
return invalid_credentials

View file

@ -1,12 +1,26 @@
"""Pydantic schemas for server"""
import sys
from pathlib import Path
import yaml
from yamlinclude import YamlIncludeConstructor
from pydantic import ValidationError
from argos.logging import logger
from argos.schemas.config import Config
def read_config(yaml_file):
try:
config = read_yaml_config(yaml_file)
return config
except ValidationError as err:
logger.error("Errors where found while reading configuration:")
for error in err.errors():
logger.error("%s is %s", error["loc"], error["type"])
sys.exit(1)
def read_yaml_config(filename: str) -> Config:
parsed = _load_yaml(filename)
return Config(**parsed)

View file

@ -82,6 +82,48 @@ caption: argos-config.yaml
- json-is: '{"foo": "bar", "baz": 42}'
```
## Add data to requests
If you want to specify query parameters, just put them in the path:
```{code-block} yaml
websites:
- domain: "https://contact.example.org"
paths:
- path: "/index.php?action=show_messages"
method: "GET"
```
If you want, for example, to test a form and send some data to it:
```{code-block} yaml
websites:
- domain: "https://contact.example.org"
paths:
- path: "/"
method: "POST"
request_data:
# These are the data sent to the server: title and msg
data:
title: "Hello my friend"
msg: "How are you today?"
# To send data as JSON (optional, default is false):
is_json: true
```
If you need to send some headers in the request:
```{code-block} yaml
websites:
- domain: "https://contact.example.org"
paths:
- path: "/api/mail"
method: "PUT"
request_data:
headers:
Authorization: "Bearer foo-bar-baz"
```
## SSL certificate expiration
Checks that the SSL certificate will not expire soon. You need to define the thresholds in the configuration, and set the `on-check` option to enable the check.

View file

@ -61,6 +61,8 @@ Options:
--wait-time INTEGER Waiting time between two polls on the server
(seconds)
--log-level [DEBUG|INFO|WARNING|ERROR|CRITICAL]
--user-agent TEXT A custom string to append to the User-Agent
header
--help Show this message and exit.
```
@ -82,7 +84,6 @@ Options:
--help Show this message and exit.
Commands:
cleandb Clean the database (to run routinely)
generate-config Output a self-documented example config file.
generate-token Generate a token for agents
migrate Run database migrations
@ -93,7 +94,6 @@ Commands:
test-gotify Send a test gotify notification
test-mail Send a test email
user User management
watch-agents Watch agents (to run routinely)
```
<!--[[[end]]]
@ -150,65 +150,6 @@ Options:
-->
### Server cleandb
<!--
.. [[[cog
help(["server", "cleandb", "--help"])
.. ]]] -->
```man
Usage: argos server cleandb [OPTIONS]
Clean the database (to run routinely)
- Removes old results from the database.
- Removes locks from tasks that have been locked for too long.
Options:
--max-results INTEGER Number of results per task to keep
--max-lock-seconds INTEGER The number of seconds after which a lock is
considered stale, must be higher than 60 (the
checks have a timeout value of 60 seconds)
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE
environment variable is set, its value will be
used instead. Default value: argos-config.yaml and
/etc/argos/config.yaml as fallback.
--help Show this message and exit.
```
<!--[[[end]]]
-->
### Server watch-agents
<!--
.. [[[cog
help(["server", "cleandb", "--help"])
.. ]]] -->
```man
Usage: argos server cleandb [OPTIONS]
Clean the database (to run routinely)
- Removes old results from the database.
- Removes locks from tasks that have been locked for too long.
Options:
--max-results INTEGER Number of results per task to keep
--max-lock-seconds INTEGER The number of seconds after which a lock is
considered stale, must be higher than 60 (the
checks have a timeout value of 60 seconds)
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE
environment variable is set, its value will be
used instead. Default value: argos-config.yaml and
/etc/argos/config.yaml as fallback.
--help Show this message and exit.
```
<!--[[[end]]]
-->
### Server reload-config
<!--
@ -222,10 +163,15 @@ Usage: argos server reload-config [OPTIONS]
Read tasks configuration and add/delete tasks in database if needed
Options:
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE environment
variable is set, its value will be used instead. Default value:
argos-config.yaml and /etc/argos/config.yaml as fallback.
--help Show this message and exit.
--config TEXT Path of the configuration file. If ARGOS_YAML_FILE
environment variable is set, its value will be used
instead. Default value: argos-config.yaml and
/etc/argos/config.yaml as fallback.
--enqueue / --no-enqueue Let Argos main recurring tasks handle
configurations loading. It may delay the
application of the new configuration up to 2
minutes. Default is --no-enqueue
--help Show this message and exit.
```
<!--[[[end]]]

View file

@ -14,7 +14,9 @@ description: Many thanks to their developers!
- [Alembic](https://alembic.sqlalchemy.org) is used for DB migrations;
- [Tenacity](https://github.com/jd/tenacity) a small utility to retry a function in case an error occured;
- [Uvicorn](https://www.uvicorn.org/) is the tool used to run our server;
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production.
- [Gunicorn](https://gunicorn.org/) is the recommended WSGI HTTP server for production;
- [Apprise](https://github.com/caronc/apprise/wiki) allows Argos to send notifications through a lot of channels;
- [FastAPI Utilities](https://fastapiutils.github.io/fastapi-utils/) is in charge of recurring tasks.
## CSS framework

View file

@ -15,3 +15,5 @@ venv/bin/alembic -c argos/server/migrations/alembic.ini revision \
```
Edit the created file to remove comments and adapt it to make sure the migration is complete (Alembic is not powerful enough to cover all the corner cases).
In case you want to add an `Enum` type, please have a look at [argos/server/migrations/versions/dcf73fa19fce_specify_check_method.py](https://framagit.org/framasoft/framaspace/argos/-/blob/main/argos/server/migrations/versions/dcf73fa19fce_specify_check_method.py).

View file

@ -191,18 +191,6 @@ The only requirement is that the agent can reach the server through HTTP or HTTP
argos agent http://localhost:8000 "auth-token"
```
## Cleaning the database
You have to run cleaning task periodically. `argos server cleandb --help` will give you more information on how to do that.
Here is a crontab example, which will clean the db each hour:
```bash
# Run the cleaning tasks every hour (at minute 7)
# Keeps 10 results per task, and remove tasks locks older than 1 hour
7 * * * * argos server cleandb --max-results 10 --max-lock-seconds 3600
```
## Watch the agents
In order to be sure that agents are up and communicate with the server, you can periodically run the `argos server watch-agents` command.

View file

@ -96,7 +96,7 @@ ExecStart=/opt/argos/venv/bin/gunicorn "argos.server.main:get_application()" \\
--worker-class uvicorn.workers.UvicornWorker \\
--bind \$ARGOS_SERVER_SOCKET \\
--forwarded-allow-ips \$ARGOS_SERVER_FORWARDED_ALLOW_IPS
ExecReload=/opt/argos/venv/bin/argos server reload-config
ExecReload=/opt/argos/venv/bin/argos server reload-config --enqueue
SyslogIdentifier=argos-server
[Install]
@ -153,8 +153,7 @@ If all works well, you have to put some cron tasks in `argos` crontab:
```bash
cat <<EOF | crontab -u argos -
*/10 * * * * /opt/argos/venv/bin/argos server cleandb --max-lock-seconds 120 --max-results 1200
*/10 * * * * /opt/argos/venv/bin/argos server watch-agents --time-without-agent 10
*/10 * * * * /opt/argos/venv/bin/argos server watch-agents --time-without-agent 10:
EOF
```

View file

@ -28,6 +28,7 @@ dependencies = [
"durations-nlp>=1.0.1,<2",
"fastapi>=0.103,<0.104",
"fastapi-login>=1.10.0,<2",
"fastapi-utils>=0.8.0,<0.9",
"httpx>=0.27.2,<0.28.0",
"Jinja2>=3.0,<4",
"jsonpointer>=3.0,<4",
@ -41,6 +42,7 @@ dependencies = [
"sqlalchemy[asyncio]>=2.0,<3",
"sqlalchemy-utils>=0.41,<1",
"tenacity>=8.2,<9",
"typing_inspect>=0.9.0,<1",
"uvicorn>=0.23,<1",
]

View file

@ -1,11 +1,21 @@
---
general:
# Except for frequency and recheck_delay settings, changes in general
# section of the configuration will need a restart of argos server.
db:
# The database URL, as defined in SQLAlchemy docs:
# https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls
url: "sqlite:////tmp/test-argos.db"
# Can be "production", "dev", "test".
# If not present, default value is "production"
env: test
# To get a good string for cookie_secret, run:
# openssl rand -hex 32
cookie_secret: "foo-bar-baz"
# Default delay for checks.
# Can be superseeded in domain configuration.
# For ex., to run checks every 5 minutes:
frequency: "1m"
alerts:
ok:
@ -16,12 +26,35 @@ general:
- local
unknown:
- local
no_agent:
- local
service:
secrets:
# Secrets can be generated using `argos server generate-token`.
# You need at least one. Write them as a list, like:
# - secret_token
- "O4kt8Max9/k0EmHaEJ0CGGYbBNFmK8kOZNIoUk3Kjwc"
- "x1T1VZR51pxrv5pQUyzooMG4pMUvHNMhA5y/3cUsYVs="
ssl:
thresholds:
- "1d": critical
"5d": warning
- "5d": warning
# Argos will execute some tasks in the background for you
# every 2 minutes and needs some configuration for that
recurring_tasks:
# Max number of results per tasks you want to keep
# Minimum value is 1, default is 100
max_results: 100
# Max number of seconds a task can be locked
# Minimum value is 61, default is 100
max_lock_seconds: 100
# Max number of seconds without seing an agent
# before sending an alert
# Minimum value is 61, default is 300
time_without_agent: 300
# It's also possible to define the checks in another file
# with the include syntax:
#
websites: !include websites.yaml

View file

@ -235,6 +235,7 @@ def empty_config():
warning=["", ""],
critical=["", ""],
unknown=["", ""],
no_agent=["", ""],
),
),
service=schemas.config.Service(
@ -243,6 +244,11 @@ def empty_config():
]
),
ssl=schemas.config.SSL(thresholds=[]),
recurring_tasks=schemas.config.RecurringTasks(
max_results=100,
max_lock_seconds=120,
time_without_agent=300,
),
websites=[],
)