handle markdown to LaTeX differently

2024-02-13 20:00:16 +01:00 · 2024-02-13 20:00:16 +01:00 · ef741249d3
parent 4b6d0d1634
commit ef741249d3
2 changed files with 198 additions and 194 deletions
--- a/rendercv/data_models.py
+++ b/rendercv/data_models.py
@ -23,7 +23,6 @@ import json
 import re
 import ssl
 import pathlib
-import copy

 import pydantic
 import pydantic_extra_types.phone_numbers as pydantic_phone_numbers
@ -1071,193 +1070,9 @@ class RenderCVDataModel(RenderCVBaseModel):
            return theme_data_model


-def escape_latex_characters(string: str) -> str:
-    """Escape $\\LaTeX$ characters in a string.
-
-    This function is called during the reading of the input file. Before the validation
-    process, each input field's special $\\LaTeX$ characters are escaped.
-
-    Example:
-        ```python
-        escape_latex_characters("This is a # string.")
-        ```
-        will return:
-        `#!python "This is a \\# string."`
-    """
-
-    # Dictionary of escape characters:
-    escape_characters = {
-        "#": "\\#",
-        # "$": "\\$", # Don't escape $ as it is used for math mode
-        "%": "\\%",
-        "&": "\\&",
-        "~": "\\textasciitilde{}",
-        # "_": "\\_", # Don't escape _ as it is used for math mode
-        # "^": "\\textasciicircum{}", # Don't escape ^ as it is used for math mode
-    }
-
-    # Don't escape links as hyperref package will do it automatically:
-
-    # Find all the links in the sentence:
-    links = re.findall(r"\[.*?\]\(.*?\)", string)
-
-    # Replace the links with a placeholder:
-    for link in links:
-        string = string.replace(link, "!!-link-!!")
-
-    # Loop through the letters of the sentence and if you find an escape character,
-    # replace it with its LaTeX equivalent:
-    copy_of_the_string = list(string)
-    for i, character in enumerate(copy_of_the_string):
-        if character in escape_characters:
-            new_character = escape_characters[character]
-            copy_of_the_string[i] = new_character
-
-    string = "".join(copy_of_the_string)
-    # Replace the links with the original links:
-    for link in links:
-        string = string.replace("!!-link-!!", link)
-
-    return string
-
-
-def markdown_to_latex(markdown_string: str) -> str:
-    """Convert a markdown string to LaTeX.
-
-    This function is called during the reading of the input file. Before the validation
-    process, each input field is converted from markdown to LaTeX.
-
-    Example:
-        ```python
-        markdown_to_latex("This is a **bold** text with an [*italic link*](https://google.com).")
-        ```
-
-        will return:
-
-        `#!pytjon "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}."`
-
-    Args:
-        markdown_string (str): The markdown string to convert.
-
-    Returns:
-        str: The LaTeX string.
-    """
-    # convert links
-    links = re.findall(r"\[([^\]\[]*)\]\((.*?)\)", markdown_string)
-    if links is not None:
-        for link in links:
-            link_text = link[0]
-            link_url = link[1]
-
-            old_link_string = f"[{link_text}]({link_url})"
-            new_link_string = "\\href{" + link_url + "}{" + link_text + "}"
-
-            markdown_string = markdown_string.replace(old_link_string, new_link_string)
-
-    # convert bold
-    bolds = re.findall(r"\*\*([^\*]*)\*\*", markdown_string)
-    if bolds is not None:
-        for bold_text in bolds:
-            old_bold_text = f"**{bold_text}**"
-            new_bold_text = "\\textbf{" + bold_text + "}"
-
-            markdown_string = markdown_string.replace(old_bold_text, new_bold_text)
-
-    # convert italic
-    italics = re.findall(r"\*([^\*]*)\*", markdown_string)
-    if italics is not None:
-        for italic_text in italics:
-            old_italic_text = f"*{italic_text}*"
-            new_italic_text = "\\textit{" + italic_text + "}"
-
-            markdown_string = markdown_string.replace(old_italic_text, new_italic_text)
-
-    # convert code
-    codes = re.findall(r"`([^`]*)`", markdown_string)
-    if codes is not None:
-        for code_text in codes:
-            old_code_text = f"`{code_text}`"
-            new_code_text = "\\texttt{" + code_text + "}"
-
-            markdown_string = markdown_string.replace(old_code_text, new_code_text)
-
-    latex_string = markdown_string
-
-    return latex_string
-
-
-def convert_a_markdown_dictionary_to_a_latex_dictionary(
-    dictionary: dict[str, Any],
-) -> dict[str, Any]:
-    """
-    Recursively loop through a dictionary and convert all the markdown strings (keys and
-    values) to LaTeX. Also, escape special LaTeX characters in the keys and values.
-
-    Example:
-        ```python
-        convert_a_markdown_dictionary_to_a_latex_dictionary(
-            {
-                "key1": "This is a **bold** text with an [*italic link*](https://google.com).",
-                "key2": "This is a **bold** text with an [*italic link*](https://google.com).",
-                "**key3**": {
-                    "key4": "This is a **bold** text with an [*italic link*](https://google.com).",
-                    "key5": "This is a **bold** text with an [*italic link*](https://google.com).",
-                },
-            }
-        )
-        ```
-
-        will return:
-
-        ```python
-        {
-            "key1": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
-            "key2": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
-            "\\textbf{key3}": {
-                "key4": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
-                "key5": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
-            },
-        }
-        ```
-
-    Args:
-        dictionary (dict): The dictionary to convert.
-    Returns:
-        dict: The LaTeX dictionary.
-    """
-    for key, value in dictionary.copy().items():
-        if isinstance(value, str):
-            # if the value is a string, then apply markdown_to_latex and
-            # escape_latex_characters to it:
-            result = escape_latex_characters(value)
-            dictionary[key] = markdown_to_latex(result)
-        elif isinstance(value, list):
-            # if the value is a list, then loop through the list and apply
-            # markdown_to_latex and escape_latex_characters to each item:
-            for index, item in enumerate(value):
-                if isinstance(item, str):
-                    result = escape_latex_characters(item)
-                    dictionary[key][index] = markdown_to_latex(result)
-                elif isinstance(item, dict):
-                    # if the item is a dictionary, then call loop_through_dictionary
-                    # again:
-                    dictionary[key][index] = (
-                        convert_a_markdown_dictionary_to_a_latex_dictionary(item)
-                    )
-        elif isinstance(value, dict):
-            # if the value is a dictionary, then call loop_through_dictionary again:
-            dictionary[key] = convert_a_markdown_dictionary_to_a_latex_dictionary(value)
-
-        # do the same for the key:
-        result = escape_latex_characters(key)
-        dictionary[markdown_to_latex(result)] = dictionary.pop(key)
-
-    return dictionary
-
-
 def read_input_file(
    file_path: pathlib.Path,
-) -> tuple[RenderCVDataModel, RenderCVDataModel]:
+) -> RenderCVDataModel:
    """Read the input file and return two instances of RenderCVDataModel. The first
    instance is the data model with LaTeX strings and the second instance is the data
    model with markdown strings.
@ -1282,16 +1097,12 @@ def read_input_file(
        )

    file_content = file_path.read_text(encoding="utf-8")
-    original_dictionary: dict[str, Any] = ruamel.yaml.YAML().load(file_content)
-    parsed_dictionary = convert_a_markdown_dictionary_to_a_latex_dictionary(
-        copy.deepcopy(original_dictionary)
-    )
+    input_as_dictionary: dict[str, Any] = ruamel.yaml.YAML().load(file_content)

    # validate the parsed dictionary by creating an instance of RenderCVDataModel:
-    data_model_markdown = RenderCVDataModel(**original_dictionary)
-    data_model_latex = RenderCVDataModel(**parsed_dictionary)
+    rendercv_data_model = RenderCVDataModel(**input_as_dictionary)

-    return data_model_latex, data_model_markdown
+    return rendercv_data_model


 def get_a_sample_data_model(name: str) -> RenderCVDataModel:
--- a/rendercv/renderer.py
+++ b/rendercv/renderer.py
@ -15,12 +15,12 @@ import pathlib
 import importlib.resources
 import shutil
 import sys
+import copy
 from datetime import date as Date
 from typing import Optional, Literal, Any

 import jinja2
 import markdown
-import fpdf

 from . import data_models as dm

@ -134,6 +134,16 @@ class LaTeXFile(TemplatedFile):
    data model and Jinja2 templates. It inherits from the TemplatedFile class.
    """

+    def __init__(
+        self,
+        data_model: dm.RenderCVDataModel,
+        environment: jinja2.Environment,
+    ):
+        data_model = transform_markdown_data_model_to_latex_data_model(
+            copy.deepcopy(data_model)
+        )
+        super().__init__(data_model, environment)
+
    def render_templates(self):
        """Render and return all the templates for the $\\LaTeX$ file.

@ -312,6 +322,189 @@ class MarkdownFile(TemplatedFile):
        file_path.write_text(self.get_markdown_code(), encoding="utf-8")


+def escape_latex_characters(string: str) -> str:
+    """Escape $\\LaTeX$ characters in a string.
+
+    This function is called during the reading of the input file. Before the validation
+    process, each input field's special $\\LaTeX$ characters are escaped.
+
+    Example:
+        ```python
+        escape_latex_characters("This is a # string.")
+        ```
+        will return:
+        `#!python "This is a \\# string."`
+    """
+
+    # Dictionary of escape characters:
+    escape_characters = {
+        "#": "\\#",
+        # "$": "\\$", # Don't escape $ as it is used for math mode
+        "%": "\\%",
+        "&": "\\&",
+        "~": "\\textasciitilde{}",
+        # "_": "\\_", # Don't escape _ as it is used for math mode
+        # "^": "\\textasciicircum{}", # Don't escape ^ as it is used for math mode
+    }
+
+    # Don't escape links as hyperref package will do it automatically:
+
+    # Find all the links in the sentence:
+    links = re.findall(r"\[.*?\]\(.*?\)", string)
+
+    # Replace the links with a placeholder:
+    for link in links:
+        string = string.replace(link, "!!-link-!!")
+
+    # Loop through the letters of the sentence and if you find an escape character,
+    # replace it with its LaTeX equivalent:
+    copy_of_the_string = list(string)
+    for i, character in enumerate(copy_of_the_string):
+        if character in escape_characters:
+            new_character = escape_characters[character]
+            copy_of_the_string[i] = new_character
+
+    string = "".join(copy_of_the_string)
+    # Replace the links with the original links:
+    for link in links:
+        string = string.replace("!!-link-!!", link)
+
+    return string
+
+
+def markdown_to_latex(markdown_string: str) -> str:
+    """Convert a markdown string to LaTeX.
+
+    This function is called during the reading of the input file. Before the validation
+    process, each input field is converted from markdown to LaTeX.
+
+    Example:
+        ```python
+        markdown_to_latex("This is a **bold** text with an [*italic link*](https://google.com).")
+        ```
+
+        will return:
+
+        `#!pytjon "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}."`
+
+    Args:
+        markdown_string (str): The markdown string to convert.
+
+    Returns:
+        str: The LaTeX string.
+    """
+    # convert links
+    links = re.findall(r"\[([^\]\[]*)\]\((.*?)\)", markdown_string)
+    if links is not None:
+        for link in links:
+            link_text = link[0]
+            link_url = link[1]
+
+            old_link_string = f"[{link_text}]({link_url})"
+            new_link_string = "\\href{" + link_url + "}{" + link_text + "}"
+
+            markdown_string = markdown_string.replace(old_link_string, new_link_string)
+
+    # convert bold
+    bolds = re.findall(r"\*\*([^\*]*)\*\*", markdown_string)
+    if bolds is not None:
+        for bold_text in bolds:
+            old_bold_text = f"**{bold_text}**"
+            new_bold_text = "\\textbf{" + bold_text + "}"
+
+            markdown_string = markdown_string.replace(old_bold_text, new_bold_text)
+
+    # convert italic
+    italics = re.findall(r"\*([^\*]*)\*", markdown_string)
+    if italics is not None:
+        for italic_text in italics:
+            old_italic_text = f"*{italic_text}*"
+            new_italic_text = "\\textit{" + italic_text + "}"
+
+            markdown_string = markdown_string.replace(old_italic_text, new_italic_text)
+
+    # convert code
+    codes = re.findall(r"`([^`]*)`", markdown_string)
+    if codes is not None:
+        for code_text in codes:
+            old_code_text = f"`{code_text}`"
+            new_code_text = "\\texttt{" + code_text + "}"
+
+            markdown_string = markdown_string.replace(old_code_text, new_code_text)
+
+    latex_string = markdown_string
+
+    return latex_string
+
+
+def transform_markdown_data_model_to_latex_data_model(
+    data_model: dm.RenderCVDataModel,
+) -> dm.RenderCVDataModel:
+    """
+    Recursively loop through a `RenderCVDataModel` and convert all the markdown strings
+    (user input is in markdown format) to LaTeX strings. Also, escape special LaTeX
+    characters.
+
+    Args:
+        data_model (RenderCVDataModel): The data model to transform.
+    Returns:
+        dict: The data model with LaTeX strings.
+    """
+    data_model_as_dict = data_model.model_dump()
+    for key, value in data_model_as_dict.items():
+        if isinstance(value, str):
+            # if the value is a string, then apply markdown_to_latex and
+            # escape_latex_characters to it:
+            result = markdown_to_latex(escape_latex_characters(value))
+            # update data_model object's attribute with the new value:
+            setattr(data_model, key, result)
+        elif isinstance(value, list):
+            # if the value is a list, then loop through the list and apply
+            # markdown_to_latex and escape_latex_characters to each item:
+            transformed_list = []
+            for index, item in enumerate(value):
+                if isinstance(item, str):
+                    result = markdown_to_latex(escape_latex_characters(item))
+                    transformed_list.append(result)
+                elif isinstance(item, dict):
+                    # if the item is a dictionary, then it means it's a sub data model.
+                    # So, call transform_markdown_data_model_to_latex_data_model again:
+                    sub_data_model = getattr(data_model, key)[index]
+                    transformed_sub_data_model = (
+                        transform_markdown_data_model_to_latex_data_model(
+                            sub_data_model
+                        )
+                    )
+                    transformed_list.append(transformed_sub_data_model)
+
+            # update data_model object's attribute with the new value:
+            setattr(data_model, key, transformed_list)
+        elif isinstance(value, dict):
+            if key == "sections_input":
+                # Then it means it's the `sections` field:
+                sections = getattr(data_model, key)
+                for section_title, entries in sections.items():
+                    transformed_entries = []
+                    for entry in entries:
+                        transformed_entry = (
+                            transform_markdown_data_model_to_latex_data_model(entry)
+                        )
+                        transformed_entries.append(transformed_entry)
+                setattr(data_model, key, sections)
+            else:
+                # Then it means it's a sub data model.
+                # So, call transform_markdown_data_model_to_latex_data_model again:
+                sub_data_model = getattr(data_model, key)
+                transformed_sub_data_model = (
+                    transform_markdown_data_model_to_latex_data_model(sub_data_model)
+                )
+
+                # update data_model object's attribute with the new value:
+                setattr(data_model, key, transformed_sub_data_model)
+
+    return data_model
+
+
 def make_matched_part_something(
    value: str, something: str, match_str: Optional[str] = None
 ) -> str: