mirror of
https://github.com/almet/notmyidea.git
synced 2025-04-28 19:42:37 +02:00
115 lines
3 KiB
Python
115 lines
3 KiB
Python
import json
|
|
import os
|
|
import sqlite3
|
|
import subprocess
|
|
|
|
import click
|
|
|
|
DATABASE = "quotes.db"
|
|
|
|
|
|
def recreate_database(db_path):
|
|
if os.path.exists(db_path):
|
|
os.remove(db_path)
|
|
conn = sqlite3.connect(db_path)
|
|
cur = conn.cursor()
|
|
cur.execute("CREATE TABLE quotes (id INTEGER PRIMARY KEY, content TEXT)")
|
|
conn.commit()
|
|
return conn
|
|
|
|
|
|
def extract_quotes(input_file):
|
|
with open(input_file, "r") as file:
|
|
quote_lines = []
|
|
quotes = []
|
|
for line in file:
|
|
if line.startswith(">"):
|
|
quote_lines.append(line)
|
|
else:
|
|
if quote_lines:
|
|
quote = "\n".join(quote_lines).strip()
|
|
quotes.append(quote)
|
|
quote_lines = []
|
|
return quotes
|
|
|
|
|
|
def insert_quotes_to_db(conn, quotes):
|
|
cur = conn.cursor()
|
|
for quote in quotes:
|
|
cur.execute("INSERT INTO quotes (content) VALUES (?)", (quote,))
|
|
conn.commit()
|
|
|
|
|
|
@click.command()
|
|
@click.argument("input_markdown_file", type=click.Path(exists=True))
|
|
@click.argument("clusters", type=int)
|
|
def main(input_markdown_file, clusters):
|
|
"""Process Markdown files and generate output with clustered quotes."""
|
|
conn = recreate_database(DATABASE)
|
|
quotes = extract_quotes(input_markdown_file)
|
|
insert_quotes_to_db(conn, quotes)
|
|
|
|
subprocess.run(
|
|
[
|
|
"llm",
|
|
"embed-multi",
|
|
"quotes",
|
|
"-d",
|
|
DATABASE,
|
|
"--sql",
|
|
"SELECT id, content FROM quotes WHERE content <> ''",
|
|
"--store",
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
cluster_result = subprocess.run(
|
|
[
|
|
"llm",
|
|
"cluster",
|
|
"quotes",
|
|
str(clusters),
|
|
"-d",
|
|
DATABASE,
|
|
"--summary",
|
|
"--prompt",
|
|
"Titre pour l'ensemble de ces citations (court)",
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
if cluster_result.returncode == 0:
|
|
cluster_data = json.loads(cluster_result.stdout)
|
|
else:
|
|
click.echo("Clustering failed:", err=True)
|
|
click.echo(cluster_result.stderr, err=True)
|
|
conn.close()
|
|
return
|
|
|
|
for group in cluster_data:
|
|
summary = group["summary"]
|
|
item_ids = [item["id"] for item in group["items"]]
|
|
|
|
print(f"## {summary}\n\n")
|
|
|
|
for item_id in item_ids:
|
|
try:
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT content FROM quotes WHERE id=?", (item_id,))
|
|
quote = cur.fetchone()
|
|
if quote:
|
|
print(f"{quote[0]}\n\n")
|
|
else:
|
|
click.echo(f"> Quote with ID {item_id} not found.\n\n", err=True)
|
|
except sqlite3.Error as e:
|
|
click.echo(
|
|
f"> Error fetching quote with ID {item_id}: {e}\n\n", err=True
|
|
)
|
|
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|