Speed-up SQL queriying by using DISTINCT statement

This commit is contained in:
Alexis Métaireau 2023-10-18 16:31:47 +02:00
parent 6338d7d9e6
commit d6c048fb5b
9 changed files with 35 additions and 18 deletions

3
.gitignore vendored
View file

@ -1,4 +1,5 @@
__pycache__
*.egg-info
.vscode
venv
venv
.env

View file

@ -6,6 +6,7 @@ Todo:
- [x] Use Postgresql as a database
- [x] Expose a simple read-only website.
- [ ] Agents should wait and retry on timeout
- [ ] Last seen agents
- [ ] Use background tasks for alerting
- [ ] Add a command to generate new authentication tokens

View file

@ -71,9 +71,7 @@ def setup_database(app, settings):
logger.debug(f"Using database URL {settings.database_url}")
if settings.database_url.startswith("sqlite:////tmp"):
logger.warning("Using sqlite in /tmp is not recommended for production")
engine = create_engine(
settings.database_url,
)
engine = create_engine(settings.database_url)
app.state.SessionLocal = sessionmaker(
autocommit=False, autoflush=False, bind=engine
)

View file

@ -55,7 +55,7 @@ async def create_results(
return {"result_ids": [r.id for r in db_results]}
@route.get("/stats", dependencies=[Depends(verify_token)])
@route.get("/stats")
async def get_stats(db: Session = Depends(get_db)):
return {
"upcoming_tasks_count": await queries.count_tasks(db, selected=False),
@ -64,7 +64,7 @@ async def get_stats(db: Session = Depends(get_db)):
}
@route.get("/severities", dependencies=[Depends(verify_token)])
@route.get("/severities")
async def get_severity_counts(db: Session = Depends(get_db)):
"""Returns the number of results per severity"""
counts = await queries.get_severity_counts(db)

View file

@ -3,6 +3,7 @@ from urllib.parse import urlparse
from fastapi import APIRouter, Depends, Request
from fastapi.templating import Jinja2Templates
from sqlalchemy import desc
from sqlalchemy.orm import Session
from argos.schemas import Config
@ -18,24 +19,38 @@ templates = Jinja2Templates(directory="argos/server/templates")
async def read_tasks(request: Request, db: Session = Depends(get_db)):
tasks = db.query(Task).order_by(Task.domain).all()
results = (
db.query(Task, Result)
.join(Result)
.distinct(Task.id)
.order_by(Task.id, desc(Result.submitted_at))
.all()
)
domains_severities = defaultdict(list)
domains_last_checks = defaultdict(list)
for task in tasks:
severity = task.severity or "to-process"
for task, result in results:
severity = result.severity or "to-process"
domain = urlparse(task.url).netloc
domains_severities[domain].append(severity)
domains_last_checks[domain].append(task.last_result.submitted_at)
domains_last_checks[domain].append(result.submitted_at)
def _max_severity(severities):
severity_level = {"ok": 1, "warning": 2, "critical": 3, "to-process": "4"}
severity_level = {"ok": 1, "warning": 2, "critical": 3, "to-process": 4}
return max(severities, key=severity_level.get)
domains = {key: _max_severity(value) for key, value in domains_severities.items()}
domains = [(key, _max_severity(value)) for key, value in domains_severities.items()]
last_checks = {key: max(value) for key, value in domains_last_checks.items()}
domains.sort(key=lambda x: x[1])
return templates.TemplateResponse(
"index.html",
{"request": request, "domains": domains, "last_checks": last_checks},
{
"request": request,
"domains": domains,
"last_checks": last_checks,
"total_task_count": len(tasks),
},
)

View file

@ -2,5 +2,6 @@
code {
white-space: pre-wrap;
}

View file

@ -2,7 +2,7 @@
{% block title %}<h2>{{domain}}</h2>{% endblock %}
{% block content %}
<div id="domains" class="frame">
<table id="domains-list">
<table id="domains-list" role="grid">
<thead>
<tr>
<th>URL</th>
@ -15,11 +15,11 @@
<tbody id="domains-body">
{% for task in tasks %}
<tr title="Completed at {{ task.completed_at }}">
<tr>
<td>{{ task.url }}</td>
<td>{{ task.check }}</td>
<td>{{ task.expected }}</td>
<td class="status highlight"><a href="/result/{{ task.last_result.id }}">{{ task.status }}</a></td>
<td ><a data-tooltip="Completed at {{ task.completed_at }}" href="/result/{{ task.last_result.id }}">{{ task.status }}</a></td>
<td><a href="/task/{{task.id}}/results">view all</a></td>
</tr>
{% endfor %}

View file

@ -1,8 +1,9 @@
{% extends "base.html" %}
{% block content %}
<div id="domains" class="frame">
<p>{{domains | length}} domains, {{ total_task_count }} tasks</p>
<table id="domains-list">
<table id="domains-list" role="grid">
<thead>
<tr>
<th>Domain</th>
@ -11,7 +12,7 @@
</thead>
<tbody id="domains-body">
{% for domain, status in domains.items() %}
{% for (domain, status) in domains %}
<tr>
<td>
<a href="/domain/{{domain}}">

View file

@ -2,7 +2,7 @@
{% block title %}<h2>{{ task }}</h2>{% endblock %}
{% block content %}
<code>{{ description }}</code>
<table>
<table role="grid">
<thead>
<tr>
<th>Submitted at</th>