handle markdown to LaTeX differently

2024-02-13 20:00:16 +01:00 · 2024-02-13 20:00:16 +01:00 · ef741249d3
parent 4b6d0d1634
commit ef741249d3
2 changed files with 198 additions and 194 deletions
--- a/rendercv/data_models.py
+++ b/rendercv/data_models.py
@ -23,7 +23,6 @@ import json
 import re
 import ssl
 import pathlib
 import copy
 import pydantic
 import pydantic_extra_types.phone_numbers as pydantic_phone_numbers
@ -1071,193 +1070,9 @@ class RenderCVDataModel(RenderCVBaseModel):
            return theme_data_model
 def escape_latex_characters(string: str) -> str:
    """Escape $\\LaTeX$ characters in a string.
    This function is called during the reading of the input file. Before the validation
    process, each input field's special $\\LaTeX$ characters are escaped.
    Example:
        ```python
        escape_latex_characters("This is a # string.")
        ```
        will return:
        `#!python "This is a \\# string."`
    """
    # Dictionary of escape characters:
    escape_characters = {
        "#": "\\#",
        # "$": "\\$", # Don't escape $ as it is used for math mode
        "%": "\\%",
        "&": "\\&",
        "~": "\\textasciitilde{}",
        # "_": "\\_", # Don't escape _ as it is used for math mode
        # "^": "\\textasciicircum{}", # Don't escape ^ as it is used for math mode
    }
    # Don't escape links as hyperref package will do it automatically:
    # Find all the links in the sentence:
    links = re.findall(r"\[.*?\]\(.*?\)", string)
    # Replace the links with a placeholder:
    for link in links:
        string = string.replace(link, "!!-link-!!")
    # Loop through the letters of the sentence and if you find an escape character,
    # replace it with its LaTeX equivalent:
    copy_of_the_string = list(string)
    for i, character in enumerate(copy_of_the_string):
        if character in escape_characters:
            new_character = escape_characters[character]
            copy_of_the_string[i] = new_character
    string = "".join(copy_of_the_string)
    # Replace the links with the original links:
    for link in links:
        string = string.replace("!!-link-!!", link)
    return string
 def markdown_to_latex(markdown_string: str) -> str:
    """Convert a markdown string to LaTeX.
    This function is called during the reading of the input file. Before the validation
    process, each input field is converted from markdown to LaTeX.
    Example:
        ```python
        markdown_to_latex("This is a **bold** text with an [*italic link*](https://google.com).")
        ```
        will return:
        `#!pytjon "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}."`
    Args:
        markdown_string (str): The markdown string to convert.
    Returns:
        str: The LaTeX string.
    """
    # convert links
    links = re.findall(r"\[([^\]\[]*)\]\((.*?)\)", markdown_string)
    if links is not None:
        for link in links:
            link_text = link[0]
            link_url = link[1]
            old_link_string = f"[{link_text}]({link_url})"
            new_link_string = "\\href{" + link_url + "}{" + link_text + "}"
            markdown_string = markdown_string.replace(old_link_string, new_link_string)
    # convert bold
    bolds = re.findall(r"\*\*([^\*]*)\*\*", markdown_string)
    if bolds is not None:
        for bold_text in bolds:
            old_bold_text = f"**{bold_text}**"
            new_bold_text = "\\textbf{" + bold_text + "}"
            markdown_string = markdown_string.replace(old_bold_text, new_bold_text)
    # convert italic
    italics = re.findall(r"\*([^\*]*)\*", markdown_string)
    if italics is not None:
        for italic_text in italics:
            old_italic_text = f"*{italic_text}*"
            new_italic_text = "\\textit{" + italic_text + "}"
            markdown_string = markdown_string.replace(old_italic_text, new_italic_text)
    # convert code
    codes = re.findall(r"`([^`]*)`", markdown_string)
    if codes is not None:
        for code_text in codes:
            old_code_text = f"`{code_text}`"
            new_code_text = "\\texttt{" + code_text + "}"
            markdown_string = markdown_string.replace(old_code_text, new_code_text)
    latex_string = markdown_string
    return latex_string
 def convert_a_markdown_dictionary_to_a_latex_dictionary(
    dictionary: dict[str, Any],
 ) -> dict[str, Any]:
    """
    Recursively loop through a dictionary and convert all the markdown strings (keys and
    values) to LaTeX. Also, escape special LaTeX characters in the keys and values.
    Example:
        ```python
        convert_a_markdown_dictionary_to_a_latex_dictionary(
            {
                "key1": "This is a **bold** text with an [*italic link*](https://google.com).",
                "key2": "This is a **bold** text with an [*italic link*](https://google.com).",
                "**key3**": {
                    "key4": "This is a **bold** text with an [*italic link*](https://google.com).",
                    "key5": "This is a **bold** text with an [*italic link*](https://google.com).",
                },
            }
        )
        ```
        will return:
        ```python
        {
            "key1": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
            "key2": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
            "\\textbf{key3}": {
                "key4": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
                "key5": "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}.",
            },
        }
        ```
    Args:
        dictionary (dict): The dictionary to convert.
    Returns:
        dict: The LaTeX dictionary.
    """
    for key, value in dictionary.copy().items():
        if isinstance(value, str):
            # if the value is a string, then apply markdown_to_latex and
            # escape_latex_characters to it:
            result = escape_latex_characters(value)
            dictionary[key] = markdown_to_latex(result)
        elif isinstance(value, list):
            # if the value is a list, then loop through the list and apply
            # markdown_to_latex and escape_latex_characters to each item:
            for index, item in enumerate(value):
                if isinstance(item, str):
                    result = escape_latex_characters(item)
                    dictionary[key][index] = markdown_to_latex(result)
                elif isinstance(item, dict):
                    # if the item is a dictionary, then call loop_through_dictionary
                    # again:
                    dictionary[key][index] = (
                        convert_a_markdown_dictionary_to_a_latex_dictionary(item)
                    )
        elif isinstance(value, dict):
            # if the value is a dictionary, then call loop_through_dictionary again:
            dictionary[key] = convert_a_markdown_dictionary_to_a_latex_dictionary(value)
        # do the same for the key:
        result = escape_latex_characters(key)
        dictionary[markdown_to_latex(result)] = dictionary.pop(key)
    return dictionary
 def read_input_file(
    file_path: pathlib.Path,
-) -> tuple[RenderCVDataModel, RenderCVDataModel]:
+) -> RenderCVDataModel:
    """Read the input file and return two instances of RenderCVDataModel. The first
    instance is the data model with LaTeX strings and the second instance is the data
    model with markdown strings.
@ -1282,16 +1097,12 @@ def read_input_file(
        )
    file_content = file_path.read_text(encoding="utf-8")
-    original_dictionary: dict[str, Any] = ruamel.yaml.YAML().load(file_content)
+    input_as_dictionary: dict[str, Any] = ruamel.yaml.YAML().load(file_content)
    parsed_dictionary = convert_a_markdown_dictionary_to_a_latex_dictionary(
        copy.deepcopy(original_dictionary)
    )
    # validate the parsed dictionary by creating an instance of RenderCVDataModel:
-    data_model_markdown = RenderCVDataModel(**original_dictionary)
+    rendercv_data_model = RenderCVDataModel(**input_as_dictionary)
    data_model_latex = RenderCVDataModel(**parsed_dictionary)
-    return data_model_latex, data_model_markdown
+    return rendercv_data_model
 def get_a_sample_data_model(name: str) -> RenderCVDataModel:
--- a/rendercv/renderer.py
+++ b/rendercv/renderer.py
@ -15,12 +15,12 @@ import pathlib
 import importlib.resources
 import shutil
 import sys
 import copy
 from datetime import date as Date
 from typing import Optional, Literal, Any
 import jinja2
 import markdown
 import fpdf
 from . import data_models as dm
@ -134,6 +134,16 @@ class LaTeXFile(TemplatedFile):
    data model and Jinja2 templates. It inherits from the TemplatedFile class.
    """
    def __init__(
        self,
        data_model: dm.RenderCVDataModel,
        environment: jinja2.Environment,
    ):
        data_model = transform_markdown_data_model_to_latex_data_model(
            copy.deepcopy(data_model)
        )
        super().__init__(data_model, environment)
    def render_templates(self):
        """Render and return all the templates for the $\\LaTeX$ file.
@ -312,6 +322,189 @@ class MarkdownFile(TemplatedFile):
        file_path.write_text(self.get_markdown_code(), encoding="utf-8")
 def escape_latex_characters(string: str) -> str:
    """Escape $\\LaTeX$ characters in a string.
    This function is called during the reading of the input file. Before the validation
    process, each input field's special $\\LaTeX$ characters are escaped.
    Example:
        ```python
        escape_latex_characters("This is a # string.")
        ```
        will return:
        `#!python "This is a \\# string."`
    """
    # Dictionary of escape characters:
    escape_characters = {
        "#": "\\#",
        # "$": "\\$", # Don't escape $ as it is used for math mode
        "%": "\\%",
        "&": "\\&",
        "~": "\\textasciitilde{}",
        # "_": "\\_", # Don't escape _ as it is used for math mode
        # "^": "\\textasciicircum{}", # Don't escape ^ as it is used for math mode
    }
    # Don't escape links as hyperref package will do it automatically:
    # Find all the links in the sentence:
    links = re.findall(r"\[.*?\]\(.*?\)", string)
    # Replace the links with a placeholder:
    for link in links:
        string = string.replace(link, "!!-link-!!")
    # Loop through the letters of the sentence and if you find an escape character,
    # replace it with its LaTeX equivalent:
    copy_of_the_string = list(string)
    for i, character in enumerate(copy_of_the_string):
        if character in escape_characters:
            new_character = escape_characters[character]
            copy_of_the_string[i] = new_character
    string = "".join(copy_of_the_string)
    # Replace the links with the original links:
    for link in links:
        string = string.replace("!!-link-!!", link)
    return string
 def markdown_to_latex(markdown_string: str) -> str:
    """Convert a markdown string to LaTeX.
    This function is called during the reading of the input file. Before the validation
    process, each input field is converted from markdown to LaTeX.
    Example:
        ```python
        markdown_to_latex("This is a **bold** text with an [*italic link*](https://google.com).")
        ```
        will return:
        `#!pytjon "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}."`
    Args:
        markdown_string (str): The markdown string to convert.
    Returns:
        str: The LaTeX string.
    """
    # convert links
    links = re.findall(r"\[([^\]\[]*)\]\((.*?)\)", markdown_string)
    if links is not None:
        for link in links:
            link_text = link[0]
            link_url = link[1]
            old_link_string = f"[{link_text}]({link_url})"
            new_link_string = "\\href{" + link_url + "}{" + link_text + "}"
            markdown_string = markdown_string.replace(old_link_string, new_link_string)
    # convert bold
    bolds = re.findall(r"\*\*([^\*]*)\*\*", markdown_string)
    if bolds is not None:
        for bold_text in bolds:
            old_bold_text = f"**{bold_text}**"
            new_bold_text = "\\textbf{" + bold_text + "}"
            markdown_string = markdown_string.replace(old_bold_text, new_bold_text)
    # convert italic
    italics = re.findall(r"\*([^\*]*)\*", markdown_string)
    if italics is not None:
        for italic_text in italics:
            old_italic_text = f"*{italic_text}*"
            new_italic_text = "\\textit{" + italic_text + "}"
            markdown_string = markdown_string.replace(old_italic_text, new_italic_text)
    # convert code
    codes = re.findall(r"`([^`]*)`", markdown_string)
    if codes is not None:
        for code_text in codes:
            old_code_text = f"`{code_text}`"
            new_code_text = "\\texttt{" + code_text + "}"
            markdown_string = markdown_string.replace(old_code_text, new_code_text)
    latex_string = markdown_string
    return latex_string
 def transform_markdown_data_model_to_latex_data_model(
    data_model: dm.RenderCVDataModel,
 ) -> dm.RenderCVDataModel:
    """
    Recursively loop through a `RenderCVDataModel` and convert all the markdown strings
    (user input is in markdown format) to LaTeX strings. Also, escape special LaTeX
    characters.
    Args:
        data_model (RenderCVDataModel): The data model to transform.
    Returns:
        dict: The data model with LaTeX strings.
    """
    data_model_as_dict = data_model.model_dump()
    for key, value in data_model_as_dict.items():
        if isinstance(value, str):
            # if the value is a string, then apply markdown_to_latex and
            # escape_latex_characters to it:
            result = markdown_to_latex(escape_latex_characters(value))
            # update data_model object's attribute with the new value:
            setattr(data_model, key, result)
        elif isinstance(value, list):
            # if the value is a list, then loop through the list and apply
            # markdown_to_latex and escape_latex_characters to each item:
            transformed_list = []
            for index, item in enumerate(value):
                if isinstance(item, str):
                    result = markdown_to_latex(escape_latex_characters(item))
                    transformed_list.append(result)
                elif isinstance(item, dict):
                    # if the item is a dictionary, then it means it's a sub data model.
                    # So, call transform_markdown_data_model_to_latex_data_model again:
                    sub_data_model = getattr(data_model, key)[index]
                    transformed_sub_data_model = (
                        transform_markdown_data_model_to_latex_data_model(
                            sub_data_model
                        )
                    )
                    transformed_list.append(transformed_sub_data_model)
            # update data_model object's attribute with the new value:
            setattr(data_model, key, transformed_list)
        elif isinstance(value, dict):
            if key == "sections_input":
                # Then it means it's the `sections` field:
                sections = getattr(data_model, key)
                for section_title, entries in sections.items():
                    transformed_entries = []
                    for entry in entries:
                        transformed_entry = (
                            transform_markdown_data_model_to_latex_data_model(entry)
                        )
                        transformed_entries.append(transformed_entry)
                setattr(data_model, key, sections)
            else:
                # Then it means it's a sub data model.
                # So, call transform_markdown_data_model_to_latex_data_model again:
                sub_data_model = getattr(data_model, key)
                transformed_sub_data_model = (
                    transform_markdown_data_model_to_latex_data_model(sub_data_model)
                )
                # update data_model object's attribute with the new value:
                setattr(data_model, key, transformed_sub_data_model)
    return data_model
 def make_matched_part_something(
    value: str, something: str, match_str: Optional[str] = None
 ) -> str: