mirror of https://github.com/eyhc1/rendercv.git
finalize data_models.py
This commit is contained in:
parent
605697c089
commit
cbac4b9432
|
@ -1,8 +1,9 @@
|
||||||
"""
|
"""
|
||||||
This module contains all the necessary classes to store CV data. The YAML input file is
|
This module contains all the necessary classes to store CV data. These classes are called
|
||||||
transformed into instances of these classes (i.e., the input file is read) with the
|
data models. The YAML input file is transformed into instances of these classes (i.e.,
|
||||||
[`read_input_file`](utilities.md#read_input_file) function. RenderCV utilizes these
|
the input file is read) with the [`read_input_file`](#read_input_file) function.
|
||||||
instances to generate a CV. These classes are called data models.
|
RenderCV utilizes these instances to generate a LaTeX file which is then rendered into a
|
||||||
|
PDF file.
|
||||||
|
|
||||||
The data models are initialized with data validation to prevent unexpected bugs. During
|
The data models are initialized with data validation to prevent unexpected bugs. During
|
||||||
the initialization, we ensure that everything is in the correct place and that the user
|
the initialization, we ensure that everything is in the correct place and that the user
|
||||||
|
@ -20,20 +21,21 @@ import json
|
||||||
import re
|
import re
|
||||||
import ssl
|
import ssl
|
||||||
import time
|
import time
|
||||||
|
import pathlib
|
||||||
|
|
||||||
import pydantic
|
import pydantic
|
||||||
import pydantic_extra_types.phone_numbers as pydantic_phone_numbers
|
import pydantic_extra_types.phone_numbers as pydantic_phone_numbers
|
||||||
import pydantic.functional_validators as pydantic_functional_validators
|
|
||||||
import strictyaml
|
import strictyaml
|
||||||
|
|
||||||
from . import utilities
|
from . import utilities
|
||||||
from .terminal_reporter import warning
|
from .terminal_reporter import warning
|
||||||
from .themes.classic import ClassicThemeOptions
|
from .themes.classic import ClassicThemeOptions
|
||||||
from .terminal_reporter import warning, error, information
|
from .terminal_reporter import information, time_the_event_below
|
||||||
|
|
||||||
|
|
||||||
# Create a custom type called PastDate that accepts a string in YYYY-MM-DD format and
|
# Create a custom type called RenderCVDate that accepts only strings in YYYY-MM-DD or
|
||||||
# returns a Date object. It also checks if the date is in the past.
|
# YYYY-MM format:
|
||||||
|
# This type is used to validate the date fields in the data.
|
||||||
# See https://docs.pydantic.dev/2.5/concepts/types/#custom-types for more information
|
# See https://docs.pydantic.dev/2.5/concepts/types/#custom-types for more information
|
||||||
# about custom types.
|
# about custom types.
|
||||||
RenderCVDate = Annotated[
|
RenderCVDate = Annotated[
|
||||||
|
@ -44,7 +46,8 @@ RenderCVDate = Annotated[
|
||||||
|
|
||||||
def get_date_object(date: str | int) -> Date:
|
def get_date_object(date: str | int) -> Date:
|
||||||
"""Parse a date string in YYYY-MM-DD, YYYY-MM, or YYYY format and return a
|
"""Parse a date string in YYYY-MM-DD, YYYY-MM, or YYYY format and return a
|
||||||
datetime.date object.
|
datetime.date object. This function is used throughout the validation process of the
|
||||||
|
data models.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
date_string (str): The date string to parse.
|
date_string (str): The date string to parse.
|
||||||
|
@ -71,10 +74,9 @@ def get_date_object(date: str | int) -> Date:
|
||||||
|
|
||||||
|
|
||||||
class RenderCVBaseModel(pydantic.BaseModel):
|
class RenderCVBaseModel(pydantic.BaseModel):
|
||||||
"""
|
"""This class is the parent class of all the data models in RenderCV. It has only
|
||||||
This class is the parent class of all the data models in RenderCV. It has only one
|
one difference from the default `pydantic.BaseModel`: It raises an error if an
|
||||||
difference from the default `pydantic.BaseModel`: It raises an error if an unknown
|
unknown key is provided in the input file.
|
||||||
key is provided in the input file.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_config = pydantic.ConfigDict(extra="forbid")
|
model_config = pydantic.ConfigDict(extra="forbid")
|
||||||
|
@ -136,8 +138,7 @@ class EntryBase(RenderCVBaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def check_dates(cls, model):
|
def check_dates(cls, model):
|
||||||
"""
|
"""
|
||||||
Check if the dates are provided correctly and convert them to `Date` objects if
|
Check if the dates are provided correctly and do the necessary adjustments.
|
||||||
they are provided in YYYY-MM-DD format.
|
|
||||||
"""
|
"""
|
||||||
date_is_provided = False
|
date_is_provided = False
|
||||||
start_date_is_provided = False
|
start_date_is_provided = False
|
||||||
|
@ -215,8 +216,7 @@ class EntryBase(RenderCVBaseModel):
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
```python
|
```python
|
||||||
entry = dm.EntryBase(start_date=2020-10-11, end_date=2021-04-04)
|
entry = dm.EntryBase(start_date=2020-10-11, end_date=2021-04-04).date_string
|
||||||
entry.date_string
|
|
||||||
```
|
```
|
||||||
will return:
|
will return:
|
||||||
`#!python "2020-10-11 to 2021-04-04"`
|
`#!python "2020-10-11 to 2021-04-04"`
|
||||||
|
@ -226,26 +226,33 @@ class EntryBase(RenderCVBaseModel):
|
||||||
date_object = get_date_object(self.date)
|
date_object = get_date_object(self.date)
|
||||||
date_string = utilities.format_date(date_object)
|
date_string = utilities.format_date(date_object)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
# Then it is a custom date string (e.g., "My Custom Date")
|
||||||
date_string = str(self.date)
|
date_string = str(self.date)
|
||||||
|
|
||||||
elif self.start_date is not None and self.end_date is not None:
|
elif self.start_date is not None and self.end_date is not None:
|
||||||
if isinstance(self.start_date, int):
|
if isinstance(self.start_date, int):
|
||||||
|
# Then it means only the year is provided
|
||||||
start_date = str(self.start_date)
|
start_date = str(self.start_date)
|
||||||
else:
|
else:
|
||||||
|
# Then it means start_date is either in YYYY-MM-DD or YYYY-MM format
|
||||||
date_object = get_date_object(self.start_date)
|
date_object = get_date_object(self.start_date)
|
||||||
start_date = utilities.format_date(date_object)
|
start_date = utilities.format_date(date_object)
|
||||||
|
|
||||||
if self.end_date == "present":
|
if self.end_date == "present":
|
||||||
end_date = "present"
|
end_date = "present"
|
||||||
elif isinstance(self.end_date, int):
|
elif isinstance(self.end_date, int):
|
||||||
|
# Then it means only the year is provided
|
||||||
end_date = str(self.end_date)
|
end_date = str(self.end_date)
|
||||||
else:
|
else:
|
||||||
|
# Then it means end_date is either in YYYY-MM-DD or YYYY-MM format
|
||||||
date_object = get_date_object(self.end_date)
|
date_object = get_date_object(self.end_date)
|
||||||
end_date = utilities.format_date(date_object)
|
end_date = utilities.format_date(date_object)
|
||||||
|
|
||||||
date_string = f"{start_date} to {end_date}"
|
date_string = f"{start_date} to {end_date}"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# Neither date, start_date, nor end_date is provided, so return an empty
|
||||||
|
# string:
|
||||||
date_string = ""
|
date_string = ""
|
||||||
|
|
||||||
return date_string
|
return date_string
|
||||||
|
@ -259,8 +266,7 @@ class EntryBase(RenderCVBaseModel):
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
```python
|
```python
|
||||||
entry = dm.EntryBase(start_date=2020-01-01, end_date=2020-04-20)
|
entry = dm.EntryBase(start_date=2020-01-01, end_date=2020-04-20).time_span
|
||||||
entry.time_span
|
|
||||||
```
|
```
|
||||||
will return:
|
will return:
|
||||||
`#!python "4 months"`
|
`#!python "4 months"`
|
||||||
|
@ -270,6 +276,8 @@ class EntryBase(RenderCVBaseModel):
|
||||||
date = self.date
|
date = self.date
|
||||||
|
|
||||||
if date is not None or (start_date is None and end_date is None):
|
if date is not None or (start_date is None and end_date is None):
|
||||||
|
# If only the date is provided, the time span is irrelevant. So, return an
|
||||||
|
# empty string.
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
elif isinstance(start_date, int) or isinstance(end_date, int):
|
elif isinstance(start_date, int) or isinstance(end_date, int):
|
||||||
|
@ -288,6 +296,8 @@ class EntryBase(RenderCVBaseModel):
|
||||||
return time_span_string
|
return time_span_string
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# Then it means both start_date and end_date are in YYYY-MM-DD or YYYY-MM
|
||||||
|
# format.
|
||||||
end_date = get_date_object(end_date) # type: ignore
|
end_date = get_date_object(end_date) # type: ignore
|
||||||
start_date = get_date_object(start_date) # type: ignore
|
start_date = get_date_object(start_date) # type: ignore
|
||||||
|
|
||||||
|
@ -326,6 +336,7 @@ class EntryBase(RenderCVBaseModel):
|
||||||
"""
|
"""
|
||||||
url_text = None
|
url_text = None
|
||||||
if self.url_text_input is not None:
|
if self.url_text_input is not None:
|
||||||
|
# If the user provides a custom URL text, then use it.
|
||||||
url_text = self.url_text_input
|
url_text = self.url_text_input
|
||||||
elif self.url is not None:
|
elif self.url is not None:
|
||||||
url_text_dictionary = {
|
url_text_dictionary = {
|
||||||
|
@ -399,7 +410,7 @@ class EducationEntry(EntryBase):
|
||||||
|
|
||||||
|
|
||||||
class PublicationEntry(RenderCVBaseModel):
|
class PublicationEntry(RenderCVBaseModel):
|
||||||
"""THis class is the data model of `PublicationEntry`."""
|
"""This class is the data model of `PublicationEntry`."""
|
||||||
|
|
||||||
title: str = pydantic.Field(
|
title: str = pydantic.Field(
|
||||||
title="Title of the Publication",
|
title="Title of the Publication",
|
||||||
|
@ -442,7 +453,8 @@ class PublicationEntry(RenderCVBaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def check_doi(cls, doi: str) -> str:
|
def check_doi(cls, doi: str) -> str:
|
||||||
"""Check if the DOI exists in the DOI System."""
|
"""Check if the DOI exists in the DOI System."""
|
||||||
# see https://stackoverflow.com/a/60671292/18840665
|
# see https://stackoverflow.com/a/60671292/18840665 for the explanation of the
|
||||||
|
# next line:
|
||||||
ssl._create_default_https_context = ssl._create_unverified_context
|
ssl._create_default_https_context = ssl._create_unverified_context
|
||||||
|
|
||||||
doi_url = f"http://doi.org/{doi}"
|
doi_url = f"http://doi.org/{doi}"
|
||||||
|
@ -458,11 +470,13 @@ class PublicationEntry(RenderCVBaseModel):
|
||||||
@pydantic.computed_field
|
@pydantic.computed_field
|
||||||
@cached_property
|
@cached_property
|
||||||
def doi_url(self) -> str:
|
def doi_url(self) -> str:
|
||||||
|
"""Return the URL of the DOI."""
|
||||||
return f"https://doi.org/{self.doi}"
|
return f"https://doi.org/{self.doi}"
|
||||||
|
|
||||||
@pydantic.computed_field
|
@pydantic.computed_field
|
||||||
@cached_property
|
@cached_property
|
||||||
def date_string(self) -> str:
|
def date_string(self) -> str:
|
||||||
|
"""Return the date string of the publication."""
|
||||||
if isinstance(self.date, int):
|
if isinstance(self.date, int):
|
||||||
date_string = str(self.date)
|
date_string = str(self.date)
|
||||||
elif isinstance(self.date, str):
|
elif isinstance(self.date, str):
|
||||||
|
@ -477,7 +491,9 @@ class PublicationEntry(RenderCVBaseModel):
|
||||||
# ======================================================================================
|
# ======================================================================================
|
||||||
# Section models: ======================================================================
|
# Section models: ======================================================================
|
||||||
# ======================================================================================
|
# ======================================================================================
|
||||||
|
# Each section data model has a field called `entry_type` and a field called `entries`.
|
||||||
|
# Since the same pydantic.Field object is used in all of the section models, it is
|
||||||
|
# defined as a separate variable and used in all of the section models:
|
||||||
entry_type_field_of_section_model = pydantic.Field(
|
entry_type_field_of_section_model = pydantic.Field(
|
||||||
title="Entry Type",
|
title="Entry Type",
|
||||||
description="The type of the entries in the section.",
|
description="The type of the entries in the section.",
|
||||||
|
@ -493,7 +509,7 @@ class SectionBase(RenderCVBaseModel):
|
||||||
because all of the section types have a common field called `title`.
|
because all of the section types have a common field called `title`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# title is excluded from the JSON schema because this will be written by RenderCV
|
# Title is excluded from the JSON schema because this will be written by RenderCV
|
||||||
# depending on the key in the input file.
|
# depending on the key in the input file.
|
||||||
title: Optional[str] = pydantic.Field(default=None, exclude=True)
|
title: Optional[str] = pydantic.Field(default=None, exclude=True)
|
||||||
|
|
||||||
|
@ -540,8 +556,9 @@ class SectionWithTextEntries(SectionBase):
|
||||||
entries: list[str] = entries_field_of_section_model
|
entries: list[str] = entries_field_of_section_model
|
||||||
|
|
||||||
|
|
||||||
# A custom type Section. It is a union of all the section types and the correct section
|
# Create a custom type called Section:
|
||||||
# type is determined by the entry_type field.
|
# It is a union of all the section types and the correct section type is determined by
|
||||||
|
# the entry_type field, thanks Pydantic's discriminator feature.
|
||||||
# See https://docs.pydantic.dev/2.5/concepts/fields/#discriminator for more information
|
# See https://docs.pydantic.dev/2.5/concepts/fields/#discriminator for more information
|
||||||
# about discriminators.
|
# about discriminators.
|
||||||
Section = Annotated[
|
Section = Annotated[
|
||||||
|
@ -562,9 +579,13 @@ Section = Annotated[
|
||||||
|
|
||||||
# RenderCV requires users to specify the entry type for each section in their CV in
|
# RenderCV requires users to specify the entry type for each section in their CV in
|
||||||
# order to render the correct thing in the CV. However, for certain sections, specifying
|
# order to render the correct thing in the CV. However, for certain sections, specifying
|
||||||
# the entry type can be redundant. To simplify this process for users, default entry
|
# the entry type can be redundant (for example, for the "Education" section, the entry
|
||||||
|
# type is probably "EducationEntry"). To simplify this process for users, default entry
|
||||||
# types are stored in a dictionary for certain section titles so that users do not have
|
# types are stored in a dictionary for certain section titles so that users do not have
|
||||||
# to specify them.
|
# to specify them.
|
||||||
|
|
||||||
|
# If you have new section titles that you would like to add to this dictionary, please
|
||||||
|
# open an issue or pull request on GitHub.
|
||||||
default_entry_types_for_a_given_title: dict[
|
default_entry_types_for_a_given_title: dict[
|
||||||
str,
|
str,
|
||||||
tuple[type[EducationEntry], type[SectionWithEducationEntries]]
|
tuple[type[EducationEntry], type[SectionWithEducationEntries]]
|
||||||
|
@ -613,6 +634,7 @@ class SocialNetwork(RenderCVBaseModel):
|
||||||
@pydantic.model_validator(mode="after")
|
@pydantic.model_validator(mode="after")
|
||||||
@classmethod
|
@classmethod
|
||||||
def check_networks(cls, model):
|
def check_networks(cls, model):
|
||||||
|
"""Check if the `SocialNetwork` is provided correctly."""
|
||||||
if model.network == "Mastodon":
|
if model.network == "Mastodon":
|
||||||
if not model.username.startswith("@"):
|
if not model.username.startswith("@"):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
@ -782,6 +804,11 @@ class CurriculumVitae(RenderCVBaseModel):
|
||||||
# ======================================================================================
|
# ======================================================================================
|
||||||
# ======================================================================================
|
# ======================================================================================
|
||||||
|
|
||||||
|
# Create a custom type called Design:
|
||||||
|
# It is a union of all the design options and the correct design option is determined by
|
||||||
|
# the theme field, thanks Pydantic's discriminator feature.
|
||||||
|
# See https://docs.pydantic.dev/2.5/concepts/fields/#discriminator for more information
|
||||||
|
# about discriminators.
|
||||||
Design = ClassicThemeOptions
|
Design = ClassicThemeOptions
|
||||||
|
|
||||||
|
|
||||||
|
@ -799,11 +826,200 @@ class RenderCVDataModel(RenderCVBaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def generate_json_schema(output_directory: str) -> str:
|
def escape_latex_characters(sentence: str) -> str:
|
||||||
"""Generate the JSON schema of the data model and save it to a file.
|
"""Escape $\LaTeX$ characters in a string.
|
||||||
|
|
||||||
|
This function is called during the reading of the input file. Before the validation
|
||||||
|
process, each input field's special $\\LaTeX$ characters are escaped.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
escape_latex_characters("This is a # string.")
|
||||||
|
```
|
||||||
|
will return:
|
||||||
|
`#!python "This is a \\# string."`
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Dictionary of escape characters:
|
||||||
|
escape_characters = {
|
||||||
|
"#": r"\#",
|
||||||
|
# "$": r"\$", # Don't escape $ as it is used for math mode
|
||||||
|
"%": r"\%",
|
||||||
|
"&": r"\&",
|
||||||
|
"~": r"\textasciitilde{}",
|
||||||
|
# "_": r"\_", # Don't escape _ as it is used for math mode
|
||||||
|
# "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
|
||||||
|
}
|
||||||
|
|
||||||
|
# Don't escape links as hyperref package will do it automatically:
|
||||||
|
|
||||||
|
# Find all the links in the sentence:
|
||||||
|
links = re.findall(r"\[.*?\]\(.*?\)", sentence)
|
||||||
|
|
||||||
|
# Replace the links with a placeholder:
|
||||||
|
for link in links:
|
||||||
|
sentence = sentence.replace(link, "!!-link-!!")
|
||||||
|
|
||||||
|
# Loop through the letters of the sentence and if you find an escape character,
|
||||||
|
# replace it with its LaTeX equivalent:
|
||||||
|
copy_of_the_sentence = sentence
|
||||||
|
for character in copy_of_the_sentence:
|
||||||
|
if character in escape_characters:
|
||||||
|
sentence = sentence.replace(character, escape_characters[character])
|
||||||
|
|
||||||
|
# Replace the links with the original links:
|
||||||
|
for link in links:
|
||||||
|
sentence = sentence.replace("!!-link-!!", link)
|
||||||
|
|
||||||
|
return sentence
|
||||||
|
|
||||||
|
|
||||||
|
def markdown_to_latex(markdown_string: str) -> str:
|
||||||
|
"""Convert a markdown string to LaTeX.
|
||||||
|
|
||||||
|
This function is called during the reading of the input file. Before the validation
|
||||||
|
process, each input field is converted from markdown to LaTeX.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
markdown_to_latex("This is a **bold** text with an [*italic link*](https://google.com).")
|
||||||
|
```
|
||||||
|
|
||||||
|
will return:
|
||||||
|
|
||||||
|
`#!pytjon "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}."`
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
output_directory (str): The output directory to save the schema.
|
markdown_string (str): The markdown string to convert.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The LaTeX string.
|
||||||
|
"""
|
||||||
|
# convert links
|
||||||
|
links = re.findall(r"\[([^\]\[]*)\]\((.*?)\)", markdown_string)
|
||||||
|
if links is not None:
|
||||||
|
for link in links:
|
||||||
|
link_text = link[0]
|
||||||
|
link_url = link[1]
|
||||||
|
|
||||||
|
old_link_string = f"[{link_text}]({link_url})"
|
||||||
|
new_link_string = "\\href{" + link_url + "}{" + link_text + "}"
|
||||||
|
|
||||||
|
markdown_string = markdown_string.replace(old_link_string, new_link_string)
|
||||||
|
|
||||||
|
# convert bold
|
||||||
|
bolds = re.findall(r"\*\*([^\*]*)\*\*", markdown_string)
|
||||||
|
if bolds is not None:
|
||||||
|
for bold_text in bolds:
|
||||||
|
old_bold_text = f"**{bold_text}**"
|
||||||
|
new_bold_text = "\\textbf{" + bold_text + "}"
|
||||||
|
|
||||||
|
markdown_string = markdown_string.replace(old_bold_text, new_bold_text)
|
||||||
|
|
||||||
|
# convert italic
|
||||||
|
italics = re.findall(r"\*([^\*]*)\*", markdown_string)
|
||||||
|
if italics is not None:
|
||||||
|
for italic_text in italics:
|
||||||
|
old_italic_text = f"*{italic_text}*"
|
||||||
|
new_italic_text = "\\textit{" + italic_text + "}"
|
||||||
|
|
||||||
|
markdown_string = markdown_string.replace(old_italic_text, new_italic_text)
|
||||||
|
|
||||||
|
# convert code
|
||||||
|
codes = re.findall(r"`([^`]*)`", markdown_string)
|
||||||
|
if codes is not None:
|
||||||
|
for code_text in codes:
|
||||||
|
old_code_text = f"`{code_text}`"
|
||||||
|
new_code_text = "\\texttt{" + code_text + "}"
|
||||||
|
|
||||||
|
markdown_string = markdown_string.replace(old_code_text, new_code_text)
|
||||||
|
|
||||||
|
latex_string = markdown_string
|
||||||
|
|
||||||
|
return latex_string
|
||||||
|
|
||||||
|
|
||||||
|
@time_the_event_below("Reading and validating the input file")
|
||||||
|
def read_input_file(file_path: pathlib.Path) -> RenderCVDataModel:
|
||||||
|
"""Read the input file and return an instance of RenderCVDataModel.
|
||||||
|
|
||||||
|
This function reads the input file, converts the markdown strings to $\\LaTeX$, and
|
||||||
|
validates the input file with the data models.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path (str): The path to the input file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The input file as a string.
|
||||||
|
"""
|
||||||
|
# check if the file exists:
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
raise FileNotFoundError(f"The input file {file_path} doesn't exist.")
|
||||||
|
|
||||||
|
# check the file extension:
|
||||||
|
accepted_extensions = [".yaml", ".yml", ".json", ".json5"]
|
||||||
|
if file_path.suffix not in accepted_extensions:
|
||||||
|
raise ValueError(
|
||||||
|
"The input file should have one of the following extensions:"
|
||||||
|
f" {accepted_extensions}. The input file is {file_path}."
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(file_path) as file:
|
||||||
|
file_content = file.read()
|
||||||
|
parsed_dictionary: dict[str, Any] = strictyaml.load(file_content).data # type: ignore
|
||||||
|
|
||||||
|
def loop_through_dictionary(dictionary: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Recursively loop through a dictionary and apply `markdown_to_latex` and
|
||||||
|
`escape_latex_characters` to all the fields.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dictionary (dict[str, Any]): The dictionary to loop through.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict[str, Any]: The dictionary with markdown_to_latex and
|
||||||
|
escape_latex_characters applied to all the fields.
|
||||||
|
"""
|
||||||
|
for key, value in dictionary.items():
|
||||||
|
if isinstance(value, str):
|
||||||
|
# if the value is a string, then apply markdown_to_latex and
|
||||||
|
# escape_latex_characters to it:
|
||||||
|
result = escape_latex_characters(value)
|
||||||
|
dictionary[key] = markdown_to_latex(result)
|
||||||
|
elif isinstance(value, list):
|
||||||
|
# if the value is a list, then loop through the list and apply
|
||||||
|
# markdown_to_latex and escape_latex_characters to each item:
|
||||||
|
for index, item in enumerate(value):
|
||||||
|
if isinstance(item, str):
|
||||||
|
result = escape_latex_characters(item)
|
||||||
|
dictionary[key][index] = markdown_to_latex(result)
|
||||||
|
elif isinstance(item, dict):
|
||||||
|
# if the item is a dictionary, then call loop_through_dictionary
|
||||||
|
# again:
|
||||||
|
dictionary[key][index] = loop_through_dictionary(item)
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
# if the value is a dictionary, then call loop_through_dictionary again:
|
||||||
|
dictionary[key] = loop_through_dictionary(value)
|
||||||
|
|
||||||
|
return dictionary
|
||||||
|
|
||||||
|
parsed_dictionary = loop_through_dictionary(parsed_dictionary)
|
||||||
|
|
||||||
|
# validate the parsed dictionary by creating an instance of RenderCVDataModel:
|
||||||
|
data = RenderCVDataModel(**parsed_dictionary) ## type: ignore
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def generate_json_schema(json_schema_path: pathlib.Path):
|
||||||
|
"""Generate the JSON schema of the data model and save it to a file.
|
||||||
|
|
||||||
|
JSON schema is generated for the users to make it easier for them to write the input
|
||||||
|
file. The JSON Schema of RenderCV is saved in the `docs` directory of the repository
|
||||||
|
and distributed to the users with the
|
||||||
|
[JSON Schema Store](https://www.schemastore.org/).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
json_schema_path (str): The path to save the JSON schema.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class RenderCVSchemaGenerator(pydantic.json_schema.GenerateJsonSchema):
|
class RenderCVSchemaGenerator(pydantic.json_schema.GenerateJsonSchema):
|
||||||
|
@ -870,186 +1086,5 @@ def generate_json_schema(output_directory: str) -> str:
|
||||||
# Change all anyOf to oneOf
|
# Change all anyOf to oneOf
|
||||||
schema = schema.replace('"anyOf"', '"oneOf"')
|
schema = schema.replace('"anyOf"', '"oneOf"')
|
||||||
|
|
||||||
path_to_schema = os.path.join(output_directory, "schema.json")
|
with open(json_schema_path, "w") as f:
|
||||||
with open(path_to_schema, "w") as f:
|
|
||||||
f.write(schema)
|
f.write(schema)
|
||||||
|
|
||||||
return path_to_schema
|
|
||||||
|
|
||||||
|
|
||||||
def escape_latex_characters(sentence: str) -> str:
|
|
||||||
"""Escape LaTeX characters in a string.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```python
|
|
||||||
escape_latex_characters("This is a # string.")
|
|
||||||
```
|
|
||||||
will return:
|
|
||||||
`#!python "This is a \\# string."`
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Dictionary of escape characters:
|
|
||||||
escape_characters = {
|
|
||||||
"#": r"\#",
|
|
||||||
# "$": r"\$", # Don't escape $ as it is used for math mode
|
|
||||||
"%": r"\%",
|
|
||||||
"&": r"\&",
|
|
||||||
"~": r"\textasciitilde{}",
|
|
||||||
# "_": r"\_", # Don't escape _ as it is used for math mode
|
|
||||||
# "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
|
|
||||||
}
|
|
||||||
|
|
||||||
# Don't escape links as hyperref package will do it automatically:
|
|
||||||
|
|
||||||
# Find all the links in the sentence:
|
|
||||||
links = re.findall(r"\[.*?\]\(.*?\)", sentence)
|
|
||||||
|
|
||||||
# Replace the links with a placeholder:
|
|
||||||
for link in links:
|
|
||||||
sentence = sentence.replace(link, "!!-link-!!")
|
|
||||||
|
|
||||||
# Loop through the letters of the sentence and if you find an escape character,
|
|
||||||
# replace it with its LaTeX equivalent:
|
|
||||||
copy_of_the_sentence = sentence
|
|
||||||
for character in copy_of_the_sentence:
|
|
||||||
if character in escape_characters:
|
|
||||||
sentence = sentence.replace(character, escape_characters[character])
|
|
||||||
|
|
||||||
# Replace the links with the original links:
|
|
||||||
for link in links:
|
|
||||||
sentence = sentence.replace("!!-link-!!", link)
|
|
||||||
|
|
||||||
return sentence
|
|
||||||
|
|
||||||
|
|
||||||
def markdown_to_latex(markdown_string: str) -> str:
|
|
||||||
"""Convert a markdown string to LaTeX.
|
|
||||||
|
|
||||||
This function is used as a Jinja2 filter.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```python
|
|
||||||
markdown_to_latex("This is a **bold** text with an [*italic link*](https://google.com).")
|
|
||||||
```
|
|
||||||
|
|
||||||
will return:
|
|
||||||
|
|
||||||
`#!pytjon "This is a \\textbf{bold} text with a \\href{https://google.com}{\\textit{link}}."`
|
|
||||||
|
|
||||||
Args:
|
|
||||||
markdown_string (str): The markdown string to convert.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The LaTeX string.
|
|
||||||
"""
|
|
||||||
# convert links
|
|
||||||
links = re.findall(r"\[([^\]\[]*)\]\((.*?)\)", markdown_string)
|
|
||||||
if links is not None:
|
|
||||||
for link in links:
|
|
||||||
link_text = link[0]
|
|
||||||
link_url = link[1]
|
|
||||||
|
|
||||||
old_link_string = f"[{link_text}]({link_url})"
|
|
||||||
new_link_string = "\\href{" + link_url + "}{" + link_text + "}"
|
|
||||||
|
|
||||||
markdown_string = markdown_string.replace(old_link_string, new_link_string)
|
|
||||||
|
|
||||||
# convert bold
|
|
||||||
bolds = re.findall(r"\*\*([^\*]*)\*\*", markdown_string)
|
|
||||||
if bolds is not None:
|
|
||||||
for bold_text in bolds:
|
|
||||||
old_bold_text = f"**{bold_text}**"
|
|
||||||
new_bold_text = "\\textbf{" + bold_text + "}"
|
|
||||||
|
|
||||||
markdown_string = markdown_string.replace(old_bold_text, new_bold_text)
|
|
||||||
|
|
||||||
# convert italic
|
|
||||||
italics = re.findall(r"\*([^\*]*)\*", markdown_string)
|
|
||||||
if italics is not None:
|
|
||||||
for italic_text in italics:
|
|
||||||
old_italic_text = f"*{italic_text}*"
|
|
||||||
new_italic_text = "\\textit{" + italic_text + "}"
|
|
||||||
|
|
||||||
markdown_string = markdown_string.replace(old_italic_text, new_italic_text)
|
|
||||||
|
|
||||||
# convert code
|
|
||||||
codes = re.findall(r"`([^`]*)`", markdown_string)
|
|
||||||
if codes is not None:
|
|
||||||
for code_text in codes:
|
|
||||||
old_code_text = f"`{code_text}`"
|
|
||||||
new_code_text = "\\texttt{" + code_text + "}"
|
|
||||||
|
|
||||||
markdown_string = markdown_string.replace(old_code_text, new_code_text)
|
|
||||||
|
|
||||||
latex_string = markdown_string
|
|
||||||
|
|
||||||
return latex_string
|
|
||||||
|
|
||||||
|
|
||||||
def read_input_file(file_path: str) -> RenderCVDataModel:
|
|
||||||
"""Read the input file and return an instance of RenderCVDataModel.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path (str): The path to the input file.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The input file as a string.
|
|
||||||
"""
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
information(f"Reading and validating the input file {file_path} has started.")
|
|
||||||
|
|
||||||
# check if the file exists:
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
raise FileNotFoundError(f"The input file {file_path} doesn't exist.")
|
|
||||||
|
|
||||||
# check the file extension:
|
|
||||||
accepted_extensions = [".yaml", ".yml", ".json", ".json5"]
|
|
||||||
if not any(file_path.endswith(extension) for extension in accepted_extensions):
|
|
||||||
raise ValueError(
|
|
||||||
f"The file {file_path} doesn't have an accepted extension!"
|
|
||||||
f" Accepted extensions are: {accepted_extensions}"
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(file_path) as file:
|
|
||||||
file_content = file.read()
|
|
||||||
parsed_dictionary: dict[str, Any] = strictyaml.load(file_content).data # type: ignore
|
|
||||||
|
|
||||||
def loop_through_dictionary(dictionary: dict[str, Any]) -> dict[str, Any]:
|
|
||||||
"""Recursively loop through a dictionary and apply markdown_to_latex and
|
|
||||||
escape_latex_characters to all the fields.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
dictionary (dict[str, Any]): The dictionary to loop through.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict[str, Any]: The dictionary with markdown_to_latex and
|
|
||||||
escape_latex_characters applied to all the fields.
|
|
||||||
"""
|
|
||||||
for key, value in dictionary.items():
|
|
||||||
if isinstance(value, str):
|
|
||||||
result = escape_latex_characters(value)
|
|
||||||
dictionary[key] = markdown_to_latex(result)
|
|
||||||
elif isinstance(value, list):
|
|
||||||
for index, item in enumerate(value):
|
|
||||||
if isinstance(item, str):
|
|
||||||
result = escape_latex_characters(item)
|
|
||||||
dictionary[key][index] = markdown_to_latex(result)
|
|
||||||
elif isinstance(item, dict):
|
|
||||||
dictionary[key][index] = loop_through_dictionary(item)
|
|
||||||
elif isinstance(value, dict):
|
|
||||||
dictionary[key] = loop_through_dictionary(value)
|
|
||||||
|
|
||||||
return dictionary
|
|
||||||
|
|
||||||
parsed_dictionary = loop_through_dictionary(parsed_dictionary)
|
|
||||||
|
|
||||||
data = RenderCVDataModel(**parsed_dictionary) ## type: ignore
|
|
||||||
|
|
||||||
end_time = time.time()
|
|
||||||
time_taken = end_time - start_time
|
|
||||||
information(
|
|
||||||
f"Reading and validating the input file {file_path} has finished in"
|
|
||||||
f" {time_taken:.2f} s."
|
|
||||||
)
|
|
||||||
return data
|
|
||||||
|
|
Loading…
Reference in New Issue