Speed-up SQL queriying by using DISTINCT statement

This commit is contained in:
Alexis Métaireau 2023-10-18 16:31:47 +02:00
parent 6338d7d9e6
commit d6c048fb5b
9 changed files with 35 additions and 18 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@ __pycache__
*.egg-info *.egg-info
.vscode .vscode
venv venv
.env

View file

@ -6,6 +6,7 @@ Todo:
- [x] Use Postgresql as a database - [x] Use Postgresql as a database
- [x] Expose a simple read-only website. - [x] Expose a simple read-only website.
- [ ] Agents should wait and retry on timeout
- [ ] Last seen agents - [ ] Last seen agents
- [ ] Use background tasks for alerting - [ ] Use background tasks for alerting
- [ ] Add a command to generate new authentication tokens - [ ] Add a command to generate new authentication tokens

View file

@ -71,9 +71,7 @@ def setup_database(app, settings):
logger.debug(f"Using database URL {settings.database_url}") logger.debug(f"Using database URL {settings.database_url}")
if settings.database_url.startswith("sqlite:////tmp"): if settings.database_url.startswith("sqlite:////tmp"):
logger.warning("Using sqlite in /tmp is not recommended for production") logger.warning("Using sqlite in /tmp is not recommended for production")
engine = create_engine( engine = create_engine(settings.database_url)
settings.database_url,
)
app.state.SessionLocal = sessionmaker( app.state.SessionLocal = sessionmaker(
autocommit=False, autoflush=False, bind=engine autocommit=False, autoflush=False, bind=engine
) )

View file

@ -55,7 +55,7 @@ async def create_results(
return {"result_ids": [r.id for r in db_results]} return {"result_ids": [r.id for r in db_results]}
@route.get("/stats", dependencies=[Depends(verify_token)]) @route.get("/stats")
async def get_stats(db: Session = Depends(get_db)): async def get_stats(db: Session = Depends(get_db)):
return { return {
"upcoming_tasks_count": await queries.count_tasks(db, selected=False), "upcoming_tasks_count": await queries.count_tasks(db, selected=False),
@ -64,7 +64,7 @@ async def get_stats(db: Session = Depends(get_db)):
} }
@route.get("/severities", dependencies=[Depends(verify_token)]) @route.get("/severities")
async def get_severity_counts(db: Session = Depends(get_db)): async def get_severity_counts(db: Session = Depends(get_db)):
"""Returns the number of results per severity""" """Returns the number of results per severity"""
counts = await queries.get_severity_counts(db) counts = await queries.get_severity_counts(db)

View file

@ -3,6 +3,7 @@ from urllib.parse import urlparse
from fastapi import APIRouter, Depends, Request from fastapi import APIRouter, Depends, Request
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from sqlalchemy import desc
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from argos.schemas import Config from argos.schemas import Config
@ -18,24 +19,38 @@ templates = Jinja2Templates(directory="argos/server/templates")
async def read_tasks(request: Request, db: Session = Depends(get_db)): async def read_tasks(request: Request, db: Session = Depends(get_db)):
tasks = db.query(Task).order_by(Task.domain).all() tasks = db.query(Task).order_by(Task.domain).all()
results = (
db.query(Task, Result)
.join(Result)
.distinct(Task.id)
.order_by(Task.id, desc(Result.submitted_at))
.all()
)
domains_severities = defaultdict(list) domains_severities = defaultdict(list)
domains_last_checks = defaultdict(list) domains_last_checks = defaultdict(list)
for task in tasks: for task, result in results:
severity = task.severity or "to-process" severity = result.severity or "to-process"
domain = urlparse(task.url).netloc domain = urlparse(task.url).netloc
domains_severities[domain].append(severity) domains_severities[domain].append(severity)
domains_last_checks[domain].append(task.last_result.submitted_at) domains_last_checks[domain].append(result.submitted_at)
def _max_severity(severities): def _max_severity(severities):
severity_level = {"ok": 1, "warning": 2, "critical": 3, "to-process": "4"} severity_level = {"ok": 1, "warning": 2, "critical": 3, "to-process": 4}
return max(severities, key=severity_level.get) return max(severities, key=severity_level.get)
domains = {key: _max_severity(value) for key, value in domains_severities.items()} domains = [(key, _max_severity(value)) for key, value in domains_severities.items()]
last_checks = {key: max(value) for key, value in domains_last_checks.items()} last_checks = {key: max(value) for key, value in domains_last_checks.items()}
domains.sort(key=lambda x: x[1])
return templates.TemplateResponse( return templates.TemplateResponse(
"index.html", "index.html",
{"request": request, "domains": domains, "last_checks": last_checks}, {
"request": request,
"domains": domains,
"last_checks": last_checks,
"total_task_count": len(tasks),
},
) )

View file

@ -2,5 +2,6 @@
code { code {
white-space: pre-wrap; white-space: pre-wrap;
} }

View file

@ -2,7 +2,7 @@
{% block title %}<h2>{{domain}}</h2>{% endblock %} {% block title %}<h2>{{domain}}</h2>{% endblock %}
{% block content %} {% block content %}
<div id="domains" class="frame"> <div id="domains" class="frame">
<table id="domains-list"> <table id="domains-list" role="grid">
<thead> <thead>
<tr> <tr>
<th>URL</th> <th>URL</th>
@ -15,11 +15,11 @@
<tbody id="domains-body"> <tbody id="domains-body">
{% for task in tasks %} {% for task in tasks %}
<tr title="Completed at {{ task.completed_at }}"> <tr>
<td>{{ task.url }}</td> <td>{{ task.url }}</td>
<td>{{ task.check }}</td> <td>{{ task.check }}</td>
<td>{{ task.expected }}</td> <td>{{ task.expected }}</td>
<td class="status highlight"><a href="/result/{{ task.last_result.id }}">{{ task.status }}</a></td> <td ><a data-tooltip="Completed at {{ task.completed_at }}" href="/result/{{ task.last_result.id }}">{{ task.status }}</a></td>
<td><a href="/task/{{task.id}}/results">view all</a></td> <td><a href="/task/{{task.id}}/results">view all</a></td>
</tr> </tr>
{% endfor %} {% endfor %}

View file

@ -1,8 +1,9 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block content %} {% block content %}
<div id="domains" class="frame"> <div id="domains" class="frame">
<p>{{domains | length}} domains, {{ total_task_count }} tasks</p>
<table id="domains-list"> <table id="domains-list" role="grid">
<thead> <thead>
<tr> <tr>
<th>Domain</th> <th>Domain</th>
@ -11,7 +12,7 @@
</thead> </thead>
<tbody id="domains-body"> <tbody id="domains-body">
{% for domain, status in domains.items() %} {% for (domain, status) in domains %}
<tr> <tr>
<td> <td>
<a href="/domain/{{domain}}"> <a href="/domain/{{domain}}">

View file

@ -2,7 +2,7 @@
{% block title %}<h2>{{ task }}</h2>{% endblock %} {% block title %}<h2>{{ task }}</h2>{% endblock %}
{% block content %} {% block content %}
<code>{{ description }}</code> <code>{{ description }}</code>
<table> <table role="grid">
<thead> <thead>
<tr> <tr>
<th>Submitted at</th> <th>Submitted at</th>