blog.notmyidea.org/plugins/simplereader.py
2025-01-15 15:56:09 +01:00

204 lines
6.9 KiB
Python

import locale
import os.path
import re
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from markdown.preprocessors import Preprocessor
from markdown_include.include import MarkdownInclude
from pelican import signals
from pelican.readers import Markdown, MarkdownReader, pelican_open
from pelican.utils import get_date, slugify
def set_locale(dest):
try:
locale.setlocale(locale.LC_TIME, f"{dest}.UTF8")
except Exception:
locale.setlocale(locale.LC_TIME, f"{dest}")
set_locale("fr_FR")
class WorklogPreprocessor(Preprocessor):
pattern = re.compile(
r"""
(?:(\w+)\s+)? # Day name
(\d{1,2})\s+ # Day number
([\wéû]+)\s+ # Month name
(\d{4})\s+ # Year
\(
(\d{1,2})h # Hours (mandatory)
(?:\s+facturées)? # Optionally 'facturées', if not present, assume hours are 'facturées'
(?:,\s*(\d{1,2})h\s*bénévoles)? # Optionally 'volunteer hours 'bénévoles'
,? # An optional comma
\s* # Optional whitespace
(?:fun\s+)? # Optionally 'fun' (text) followed by whitespace
(\d)/5 # Happiness rating (mandatory, always present)
\) # Closing parenthesis
""",
re.VERBOSE | re.UNICODE,
)
def __init__(self, *args, **kwargs):
self.data = {}
self.monthly_hours = defaultdict(lambda: defaultdict(int))
super().__init__(*args, **kwargs)
def run(self, lines):
# set_locale('en_US')
new_lines = []
for line in lines:
if line.startswith("##"):
match = re.search(self.pattern, line)
if not match:
raise ValueError("Unable to parse worklog title", line)
(
day_of_week,
day,
month,
year,
payed_hours,
volunteer_hours,
happiness,
) = match.groups()
volunteer_hours = int(volunteer_hours) if volunteer_hours else 0
payed_hours = int(payed_hours)
happiness = int(happiness)
date = datetime.strptime(f"{day} {month} {year}", "%d %B %Y")
self.data[date.strftime("%Y-%m-%d")] = {
"payed_hours": payed_hours,
"volunteer_hours": volunteer_hours,
"happiness": happiness,
}
current_date = date.strftime("%Y/%m")
self.monthly_hours[current_date]["payed"] += payed_hours
self.monthly_hours[current_date]["volunteered"] += volunteer_hours
displayed_date = date.strftime("%A %d %B %Y")
# Replace the line with just the date
new_lines.append(f"## 🗓️ {displayed_date}")
else:
new_lines.append(line)
# set_locale('fr_FR')
return new_lines
def compute_data(self, metadata):
"""Do the operations on the data.
This is run once, after everything has been parsed
"""
payed_hours = sum([item["payed_hours"] for item in self.data.values()])
volunteer_hours = sum([item["volunteer_hours"] for item in self.data.values()])
data = dict(
data=self.data,
payed_hours=payed_hours,
volunteer_hours=volunteer_hours,
monthly_hours=self.monthly_hours,
template="worklog",
)
if "total_days" in metadata:
total_hours = int(metadata["total_days"]) * 7
data.update(
dict(
total_hours=total_hours,
percentage=round(payed_hours / total_hours * 100),
)
)
return data
class SimpleReader(MarkdownReader):
enabled = True
file_extensions = ["md"]
def __init__(self, *args, **kwargs):
super(SimpleReader, self).__init__(*args, **kwargs)
self.settings["MARKDOWN"]["extensions"].append("markdown.extensions.toc")
self.settings["MARKDOWN"]["extensions"].append(
MarkdownInclude({"base_path": self.settings["PATH"]})
)
self.settings["MARKDOWN"]["extension_configs"].update(
{"markdown.extensions.toc": {"toc_depth": 3}}
)
def read(self, source_path):
self._source_path = source_path
self._md = Markdown(**self.settings["MARKDOWN"])
is_worklog = Path(source_path).parent.match("pages/worklog")
if is_worklog:
worklog = WorklogPreprocessor(self._md)
self._md.preprocessors.register(worklog, "worklog", 20)
with pelican_open(source_path) as text:
content = self._md.convert(text)
if hasattr(self._md, "Meta"):
metadata = self._parse_metadata(self._md.Meta)
else:
metadata = {}
# Add the worklog info to the metadata
if is_worklog:
metadata["worklog"] = worklog.compute_data(metadata)
# Add the TOC to the metadata.
if len(self._md.toc) > 300:
metadata["table_of_contents"] = self._md.toc
# Get the title from the first title
if "title" not in metadata and len(self._md.toc_tokens):
first_title = self._md.toc_tokens[0]
metadata["title"] = first_title["name"]
content = content.replace(
'<h1 id="{id}">{name}</h1>'.format(**first_title), ""
)
# Get the date from the filename, if possible.
parts = os.path.splitext(os.path.basename(source_path))[0].split("-")
if len(parts) > 3:
metadata["date"] = get_date("-".join(parts[:3]))
category = os.path.basename(
os.path.abspath(os.path.join(source_path, os.pardir))
)
if category in ("Desserts", "Lactofermentation", "recettes"):
category = "recettes"
if not metadata.get("date"):
metadata["date"] = get_date("2024-05-02")
metadata["title"] = Path(source_path).stem
metadata["category"] = self.process_metadata("category", category)
if "slug" not in metadata:
metadata["slug"] = slugify(
metadata["title"], self.settings.get("SLUG_REGEX_SUBSTITUTIONS", [])
)
try:
lang = self.settings["CATEGORIES_DESCRIPTION"].get(category)[3]
except Exception:
lang = "en"
metadata["lang"] = lang
return content, metadata
def add_reader(readers):
readers.reader_classes["md"] = SimpleReader
# This is how pelican works.
def register():
signals.readers_init.connect(add_reader)