blog.notmyidea.org/plugins/simplereader.py

import locale
import os.path
import re
from collections import defaultdict
from datetime import datetime
from pathlib import Path

from markdown.preprocessors import Preprocessor
from pelican import signals
from pelican.readers import Markdown, MarkdownReader, pelican_open
from pelican.utils import get_date, slugify

try:
    locale.setlocale(locale.LC_TIME, "fr_FR.UTF8")
except Exception:
    locale.setlocale(locale.LC_TIME, "fr_FR")


class WorklogPreprocessor(Preprocessor):
    pattern = re.compile(
        r"""
        (?:(\w+)\s+)?                 # Day name
        (\d{1,2})\s+                  # Day number
        ([\wéû]+)\s+                  # Month name
        (\d{4})\s+                    # Year
        \(
        (\d{1,2})h                    # Hours (mandatory)
        (?:\s+facturées)?             # Optionally 'facturées', if not present, assume hours are 'facturées'
        (?:,\s*(\d{1,2})h\s*bénévoles)? # Optionally 'volunteer hours 'bénévoles'
        ,?                            # An optional comma
        \s*                           # Optional whitespace
        (?:fun\s+)?                   # Optionally 'fun' (text) followed by whitespace
        (\d)/5                        # Happiness rating (mandatory, always present)
        \)                            # Closing parenthesis
        """,
        re.VERBOSE | re.UNICODE,
    )

    def __init__(self, *args, **kwargs):
        self.data = {}
        self.payed_monthly = defaultdict(int)
        super().__init__(*args, **kwargs)

    def run(self, lines):
        new_lines = []
        for line in lines:
            if line.startswith("##"):
                match = re.search(self.pattern, line)
                if not match:
                    raise ValueError("Unable to parse worklog title", line)
                (
                    day_of_week,
                    day,
                    month,
                    year,
                    payed_hours,
                    volunteer_hours,
                    happiness,
                ) = match.groups()

                volunteer_hours = int(volunteer_hours) if volunteer_hours else 0
                payed_hours = int(payed_hours)
                happiness = int(happiness)
                date = datetime.strptime(f"{day} {month} {year}", "%d %B %Y")
                self.data[date.strftime("%Y-%m-%d")] = {
                    "payed_hours": payed_hours,
                    "volunteer_hours": volunteer_hours,
                    "happiness": happiness,
                }
                self.payed_monthly[date.strftime("%Y/%m")] += payed_hours
                displayed_date = date.strftime("%A %d %B %Y")

                # Replace the line with just the date
                new_lines.append(f"## 🗓️ {displayed_date}")
            else:
                new_lines.append(line)
        return new_lines

    def compute_data(self, metadata):
        """Do the operations on the data.

        This is run once, after everything has been parsed
        """
        payed_hours = sum([item["payed_hours"] for item in self.data.values()])
        volunteer_hours = sum([item["volunteer_hours"] for item in self.data.values()])

        data = dict(
            data=self.data,
            payed_hours=payed_hours,
            volunteer_hours=volunteer_hours,
            payed_monthly=self.payed_monthly,
            template="worklog",
        )
        if "total_days" in metadata:
            total_hours = int(metadata["total_days"]) * 7
            data.update(
                dict(
                    total_hours=total_hours,
                    percentage=round(payed_hours / total_hours * 100),
                )
            )

        return data


class SimpleReader(MarkdownReader):
    enabled = True

    file_extensions = ["md"]

    def __init__(self, *args, **kwargs):
        super(SimpleReader, self).__init__(*args, **kwargs)
        self.settings["MARKDOWN"]["extensions"].append("markdown.extensions.toc")
        self.settings["MARKDOWN"]["extension_configs"].update({'markdown.extensions.toc': {'toc_depth': 3}})

    def read(self, source_path):
        self._source_path = source_path
        self._md = Markdown(**self.settings["MARKDOWN"])

        is_worklog = Path(source_path).parent.match("pages/worklog")

        if is_worklog:
            worklog = WorklogPreprocessor(self._md)
            self._md.preprocessors.register(worklog, "worklog", 20)

        with pelican_open(source_path) as text:
            content = self._md.convert(text)

        if hasattr(self._md, "Meta"):
            metadata = self._parse_metadata(self._md.Meta)
        else:
            metadata = {}

        # Add the worklog info to the metadata
        if is_worklog:
            metadata["worklog"] = worklog.compute_data(metadata)

        # Add the TOC to the metadata.
        if len(self._md.toc) > 300:
            metadata["table_of_contents"] = self._md.toc

        # Get the title from the first title
        if "title" not in metadata and len(self._md.toc_tokens):
            first_title = self._md.toc_tokens[0]
            metadata["title"] = first_title["name"]
            content = content.replace(
                '<h1 id="{id}">{name}</h1>'.format(**first_title), ""
            )

        # Get the date from the filename, if possible.
        parts = os.path.splitext(os.path.basename(source_path))[0].split("-")
        if len(parts) > 3:
            metadata["date"] = get_date("-".join(parts[:3]))

        if "slug" not in metadata:
            metadata["slug"] = slugify(
                metadata["title"], self.settings.get("SLUG_REGEX_SUBSTITUTIONS", [])
            )

        category = os.path.basename(
            os.path.abspath(os.path.join(source_path, os.pardir))
        )
        metadata["category"] = self.process_metadata("category", category)
        return content, metadata


def add_reader(readers):
    readers.reader_classes["md"] = SimpleReader


# This is how pelican works.
def register():
    signals.readers_init.connect(add_reader)