import locale import os.path import re from collections import defaultdict from datetime import datetime from pathlib import Path from markdown.preprocessors import Preprocessor from markdown_include.include import MarkdownInclude from pelican import signals from pelican.readers import Markdown, MarkdownReader, pelican_open from pelican.utils import get_date, slugify def set_locale(dest): try: locale.setlocale(locale.LC_TIME, f"{dest}.UTF8") except Exception: locale.setlocale(locale.LC_TIME, f"{dest}") set_locale("fr_FR") class WorklogPreprocessor(Preprocessor): pattern = re.compile( r""" (?:(\w+)\s+)? # Day name (\d{1,2})\s+ # Day number ([\wéû]+)\s+ # Month name (\d{4})\s+ # Year \( (\d{1,2})h # Hours (mandatory) (?:\s+facturées)? # Optionally 'facturées', if not present, assume hours are 'facturées' (?:,\s*(\d{1,2})h\s*bénévoles)? # Optionally 'volunteer hours 'bénévoles' ,? # An optional comma \s* # Optional whitespace (?:fun\s+)? # Optionally 'fun' (text) followed by whitespace (\d)/5 # Happiness rating (mandatory, always present) \) # Closing parenthesis """, re.VERBOSE | re.UNICODE, ) def __init__(self, *args, **kwargs): self.data = {} self.monthly_hours = defaultdict(lambda: defaultdict(int)) super().__init__(*args, **kwargs) def run(self, lines): # set_locale('en_US') new_lines = [] for line in lines: if line.startswith("##"): match = re.search(self.pattern, line) if not match: raise ValueError("Unable to parse worklog title", line) ( day_of_week, day, month, year, payed_hours, volunteer_hours, happiness, ) = match.groups() volunteer_hours = int(volunteer_hours) if volunteer_hours else 0 payed_hours = int(payed_hours) happiness = int(happiness) date = datetime.strptime(f"{day} {month} {year}", "%d %B %Y") self.data[date.strftime("%Y-%m-%d")] = { "payed_hours": payed_hours, "volunteer_hours": volunteer_hours, "happiness": happiness, } current_date = date.strftime("%Y/%m") self.monthly_hours[current_date]["payed"] += payed_hours self.monthly_hours[current_date]["volunteered"] += volunteer_hours displayed_date = date.strftime("%A %d %B %Y") # Replace the line with just the date new_lines.append(f"## 🗓️ {displayed_date}") else: new_lines.append(line) # set_locale('fr_FR') return new_lines def compute_data(self, metadata): """Do the operations on the data. This is run once, after everything has been parsed """ payed_hours = sum([item["payed_hours"] for item in self.data.values()]) volunteer_hours = sum([item["volunteer_hours"] for item in self.data.values()]) data = dict( data=self.data, payed_hours=payed_hours, volunteer_hours=volunteer_hours, monthly_hours=self.monthly_hours, template="worklog", ) if "total_days" in metadata: total_hours = int(metadata["total_days"]) * 7 data.update( dict( total_hours=total_hours, percentage=round(payed_hours / total_hours * 100), ) ) return data class SimpleReader(MarkdownReader): enabled = True file_extensions = ["md"] def __init__(self, *args, **kwargs): super(SimpleReader, self).__init__(*args, **kwargs) self.settings["MARKDOWN"]["extensions"].append("markdown.extensions.toc") self.settings["MARKDOWN"]["extensions"].append( MarkdownInclude({"base_path": self.settings["PATH"]}) ) self.settings["MARKDOWN"]["extension_configs"].update( {"markdown.extensions.toc": {"toc_depth": 3}} ) def read(self, source_path): self._source_path = source_path self._md = Markdown(**self.settings["MARKDOWN"]) is_worklog = Path(source_path).parent.match("pages/worklog") if is_worklog: worklog = WorklogPreprocessor(self._md) self._md.preprocessors.register(worklog, "worklog", 20) with pelican_open(source_path) as text: content = self._md.convert(text) if hasattr(self._md, "Meta"): metadata = self._parse_metadata(self._md.Meta) else: metadata = {} # Add the worklog info to the metadata if is_worklog: metadata["worklog"] = worklog.compute_data(metadata) # Add the TOC to the metadata. if len(self._md.toc) > 300: metadata["table_of_contents"] = self._md.toc # Get the title from the first title if "title" not in metadata and len(self._md.toc_tokens): first_title = self._md.toc_tokens[0] metadata["title"] = first_title["name"] content = content.replace( '

{name}

'.format(**first_title), "" ) # Get the date from the filename, if possible. parts = os.path.splitext(os.path.basename(source_path))[0].split("-") if len(parts) > 3: metadata["date"] = get_date("-".join(parts[:3])) category = os.path.basename( os.path.abspath(os.path.join(source_path, os.pardir)) ) if category in ("Desserts", "Lactofermentation", "recettes"): category = "recettes" if not metadata.get("date"): metadata["date"] = get_date("2024-05-02") metadata["title"] = Path(source_path).stem metadata["category"] = self.process_metadata("category", category) if "slug" not in metadata: metadata["slug"] = slugify( metadata["title"], self.settings.get("SLUG_REGEX_SUBSTITUTIONS", []) ) try: lang = self.settings["CATEGORIES_DESCRIPTION"].get(category)[3] except Exception: lang = "en" metadata["lang"] = lang return content, metadata def add_reader(readers): readers.reader_classes["md"] = SimpleReader # This is how pelican works. def register(): signals.readers_init.connect(add_reader)