rendercv/rendercv/data_model.py

1551 lines
53 KiB
Python

"""
This module contains classes and functions to parse and validate YAML or JSON input
files. It uses [Pydantic](https://github.com/pydantic/pydantic) to achieve this goal.
All the data classes have `BaseModel` from Pydantic as a base class, and some data
fields have advanced types like `HttpUrl`, `EmailStr`, or `PastDate` from the Pydantic
library for validation.
"""
from datetime import date as Date
from typing import Literal
from typing_extensions import Annotated, Optional
import re
import logging
from functools import cached_property
import urllib.request
import os
from importlib.resources import files
import json
import time
from pydantic import (
BaseModel,
HttpUrl,
Field,
field_validator,
model_validator,
computed_field,
EmailStr,
)
from pydantic.json_schema import GenerateJsonSchema
from pydantic.functional_validators import AfterValidator
from pydantic_extra_types.phone_numbers import PhoneNumber
from pydantic_extra_types.color import Color
from ruamel.yaml import YAML
logger = logging.getLogger(__name__)
def escape_latex_characters(sentence: str) -> str:
"""Escape LaTeX characters in a sentence.
Example:
```python
escape_latex_characters("This is a # sentence.")
```
will return:
`#!python "This is a \\# sentence."`
"""
# Dictionary of escape characters:
escape_characters = {
"#": r"\#",
# "$": r"\$", # Don't escape $ as it is used for math mode
"%": r"\%",
"&": r"\&",
"~": r"\textasciitilde{}",
# "_": r"\_", # Don't escape _ as it is used for math mode
# "^": r"\textasciicircum{}", # Don't escape ^ as it is used for math mode
}
# Don't escape links as hyperref will do it automatically:
# Find all the links in the sentence:
links = re.findall(r"\[.*?\]\(.*?\)", sentence)
# Replace the links with a placeholder:
for link in links:
sentence = sentence.replace(link, "!!-link-!!")
# Handle backslash and curly braces separately because the other characters are
# escaped with backslash and curly braces:
sentence = sentence.replace("{", ">>{")
sentence = sentence.replace("}", ">>}")
# don't escape backslash as it is used heavily in LaTeX:
# sentence = sentence.replace("\\", "\\textbackslash{}")
sentence = sentence.replace(">>{", "\\{")
sentence = sentence.replace(">>}", "\\}")
# Loop through the letters of the sentence and if you find an escape character,
# replace it with its LaTeX equivalent:
copy_of_the_sentence = sentence
for character in copy_of_the_sentence:
if character in escape_characters:
sentence = sentence.replace(character, escape_characters[character])
# Replace the links with the original links:
for link in links:
sentence = sentence.replace("!!-link-!!", link)
return sentence
def parse_date_string(date_string: str) -> Date | int:
"""Parse a date string in YYYY-MM-DD, YYYY-MM, or YYYY format and return a
datetime.date object.
Args:
date_string (str): The date string to parse.
Returns:
datetime.date: The parsed date.
"""
if re.match(r"\d{4}-\d{2}-\d{2}", date_string):
# Then it is in YYYY-MM-DD format
date = Date.fromisoformat(date_string)
elif re.match(r"\d{4}-\d{2}", date_string):
# Then it is in YYYY-MM format
# Assign a random day since days are not rendered in the CV
date = Date.fromisoformat(f"{date_string}-01")
elif re.match(r"\d{4}", date_string):
# Then it is in YYYY format
# Then keep it as an integer
date = int(date_string)
else:
raise ValueError(
f'The date string "{date_string}" is not in YYYY-MM-DD, YYYY-MM, or YYYY'
" format."
)
if isinstance(date, Date):
# Then it means the date is a Date object, so check if it is a past date:
if date > Date.today():
raise ValueError(
f'The date "{date_string}" is in the future. Please check the dates.'
)
return date
def compute_time_span_string(start_date: Date | int, end_date: Date | int) -> str:
"""Compute the time span between two dates and return a string that represents it.
Example:
```python
compute_time_span_string(Date(2022,9,24), Date(2025,2,12))
```
will return:
`#!python "2 years 5 months"`
Args:
start_date (Date | int): The start date.
end_date (Date | int): The end date.
Returns:
str: The time span string.
"""
# check if the types of start_date and end_date are correct:
if not isinstance(start_date, (Date, int)):
raise TypeError("start_date is not a Date object or an integer!")
if not isinstance(end_date, (Date, int)):
raise TypeError("end_date is not a Date object or an integer!")
# calculate the number of days between start_date and end_date:
if isinstance(start_date, Date) and isinstance(end_date, Date):
timespan_in_days = (end_date - start_date).days
elif isinstance(start_date, Date) and isinstance(end_date, int):
timespan_in_days = (Date(end_date, 1, 1) - start_date).days
elif isinstance(start_date, int) and isinstance(end_date, Date):
timespan_in_days = (end_date - Date(start_date, 1, 1)).days
elif isinstance(start_date, int) and isinstance(end_date, int):
timespan_in_days = (end_date - start_date) * 365
if timespan_in_days < 0:
raise ValueError(
'"start_date" can not be after "end_date". Please check the dates.'
)
# calculate the number of years between start_date and end_date:
how_many_years = timespan_in_days // 365
if how_many_years == 0:
how_many_years_string = None
elif how_many_years == 1:
how_many_years_string = "1 year"
else:
how_many_years_string = f"{how_many_years} years"
# calculate the number of months between start_date and end_date:
how_many_months = round((timespan_in_days % 365) / 30)
if how_many_months <= 1:
how_many_months_string = "1 month"
else:
how_many_months_string = f"{how_many_months} months"
# combine howManyYearsString and howManyMonthsString:
if how_many_years_string is None:
timespan_string = how_many_months_string
else:
timespan_string = f"{how_many_years_string} {how_many_months_string}"
return timespan_string
def format_date(date: Date) -> str:
"""Formats a date to a string in the following format: "Jan. 2021".
It uses month abbreviations, taken from
[Yale University Library](https://web.library.yale.edu/cataloging/months).
Example:
```python
format_date(Date(2024,5,1))
```
will return
`#!python "May 2024"`
Args:
date (Date): The date to format.
Returns:
str: The formatted date.
"""
if not isinstance(date, (Date, int)):
raise TypeError("date is not a Date object or an integer!")
if isinstance(date, int):
# Then it means the user only provided the year, so just return the year
return str(date)
# Month abbreviations,
# taken from: https://web.library.yale.edu/cataloging/months
abbreviations_of_months = [
"Jan.",
"Feb.",
"Mar.",
"Apr.",
"May",
"June",
"July",
"Aug.",
"Sept.",
"Oct.",
"Nov.",
"Dec.",
]
month = int(date.strftime("%m"))
monthAbbreviation = abbreviations_of_months[month - 1]
year = date.strftime("%Y")
date_string = f"{monthAbbreviation} {year}"
return date_string
def generate_json_schema(output_directory: str) -> str:
"""Generate the JSON schema of the data model and save it to a file.
Args:
output_directory (str): The output directory to save the schema.
"""
class RenderCVSchemaGenerator(GenerateJsonSchema):
def generate(self, schema, mode="validation"):
json_schema = super().generate(schema, mode=mode)
json_schema["title"] = "RenderCV Input"
# remove the description of the class (RenderCVDataModel)
del json_schema["description"]
# add $id
json_schema[
"$id"
] = "https://raw.githubusercontent.com/sinaatalay/rendercv/main/schema.json"
# add $schema
json_schema["$schema"] = "http://json-schema.org/draft-07/schema#"
# Loop through $defs and remove docstring descriptions and fix optional
# fields
for key, value in json_schema["$defs"].items():
# Don't allow additional properties
value["additionalProperties"] = False
if "This class" in value["description"]:
del value["description"]
null_type_dict = {}
null_type_dict["type"] = "null"
for field in value["properties"].values():
if "anyOf" in field:
if (
len(field["anyOf"]) == 2
and null_type_dict in field["anyOf"]
):
field["allOf"] = [field["anyOf"][0]]
del field["anyOf"]
return json_schema
schema = RenderCVDataModel.model_json_schema(
schema_generator=RenderCVSchemaGenerator
)
schema = json.dumps(schema, indent=2)
# Change all anyOf to oneOf
schema = schema.replace('"anyOf"', '"oneOf"')
path_to_schema = os.path.join(output_directory, "schema.json")
with open(path_to_schema, "w") as f:
f.write(schema)
return path_to_schema
# ======================================================================================
# DESIGN MODELS ========================================================================
# ======================================================================================
# To understand how to create custom data types, see:
# https://docs.pydantic.dev/latest/usage/types/custom/
LaTeXDimension = Annotated[
str,
Field(
pattern=r"\d+\.?\d* *(cm|in|pt|mm|ex|em)",
),
]
class ClassicThemePageMargins(BaseModel):
"""This class stores the margins of pages for the classic theme."""
top: LaTeXDimension = Field(
default="1.35 cm",
title="Top Margin",
description="The top margin of the page with units.",
)
bottom: LaTeXDimension = Field(
default="1.35 cm",
title="Bottom Margin",
description="The bottom margin of the page with units.",
)
left: LaTeXDimension = Field(
default="1.35 cm",
title="Left Margin",
description="The left margin of the page with units.",
)
right: LaTeXDimension = Field(
default="1.35 cm",
title="Right Margin",
description="The right margin of the page with units.",
)
class ClassicThemeSectionTitleMargins(BaseModel):
"""This class stores the margins of section titles for the classic theme."""
top: LaTeXDimension = Field(
default="0.13 cm",
title="Top Margin",
description="The top margin of section titles.",
)
bottom: LaTeXDimension = Field(
default="0.13 cm",
title="Bottom Margin",
description="The bottom margin of section titles.",
)
class ClassicThemeEntryAreaMargins(BaseModel):
"""This class stores the margins of entry areas for the classic theme.
For the classic theme, entry areas are [OneLineEntry](../user_guide.md#onelineentry),
[NormalEntry](../user_guide.md#normalentry), and
[ExperienceEntry](../user_guide.md#experienceentry).
"""
left: LaTeXDimension = Field(
default="0.2 cm",
title="Left Margin",
description="The left margin of entry areas.",
)
right: LaTeXDimension = Field(
default="0.2 cm",
title="Right Margin",
description="The right margin of entry areas.",
)
vertical_between: LaTeXDimension = Field(
default="0.12 cm",
title="Vertical Margin Between Entry Areas",
description="The vertical margin between entry areas.",
)
class ClassicThemeHighlightsAreaMargins(BaseModel):
"""This class stores the margins of highlights areas for the classic theme."""
top: LaTeXDimension = Field(
default="0.12 cm",
title="Top Margin",
description="The top margin of highlights areas.",
)
left: LaTeXDimension = Field(
default="0.6 cm",
title="Left Margin",
description="The left margin of highlights areas.",
)
vertical_between_bullet_points: LaTeXDimension = Field(
default="0.07 cm",
title="Vertical Margin Between Bullet Points",
description="The vertical margin between bullet points.",
)
class ClassicThemeMargins(BaseModel):
"""This class stores the margins for the classic theme."""
page: ClassicThemePageMargins = Field(
default=ClassicThemePageMargins(),
title="Page Margins",
description="Page margins for the classic theme.",
)
section_title: ClassicThemeSectionTitleMargins = Field(
default=ClassicThemeSectionTitleMargins(),
title="Section Title Margins",
description="Section title margins for the classic theme.",
)
entry_area: ClassicThemeEntryAreaMargins = Field(
default=ClassicThemeEntryAreaMargins(),
title="Entry Area Margins",
description="Entry area margins for the classic theme.",
)
highlights_area: ClassicThemeHighlightsAreaMargins = Field(
default=ClassicThemeHighlightsAreaMargins(),
title="Highlights Area Margins",
description="Highlights area margins for the classic theme.",
)
class ClassicThemeOptions(BaseModel):
"""This class stores the options for the classic theme.
In RenderCV, each theme has its own Pydantic class so that new themes
can be implemented easily in future.
"""
primary_color: Color = Field(
default="rgb(0,79,144)",
validate_default=True,
title="Primary Color",
description=(
"The primary color of Classic Theme. It is used for the section titles,"
" heading, and the links.\nThe color can be specified either with their"
" [name](https://www.w3.org/TR/SVG11/types.html#ColorKeywords), hexadecimal"
" value, RGB value, or HSL value."
),
examples=["Black", "7fffd4", "rgb(0,79,144)", "hsl(270, 60%, 70%)"],
)
date_and_location_width: LaTeXDimension = Field(
default="3.6 cm",
title="Date and Location Column Width",
description="The width of the date and location column.",
)
show_timespan_in: list[str] = Field(
default=[],
title="Show Time Span in These Sections",
description=(
"The time span will be shown in the date and location column in these"
" sections. The input should be a list of strings."
),
)
show_last_updated_date: bool = Field(
default=True,
title="Show Last Updated Date",
description=(
"If this option is set to true, then the last updated date will be shown"
" in the header."
),
)
margins: ClassicThemeMargins = Field(
default=ClassicThemeMargins(),
title="Margins",
description="Page, section title, entry field, and highlights field margins.",
)
class Design(BaseModel):
"""This class stores the theme name of the CV and the theme's options."""
theme: Literal["classic"] = Field(
default="classic",
title="Theme name",
description='The only option is "Classic" for now.',
)
font: Literal["SourceSans3", "Roboto", "EBGaramond"] = Field(
default="SourceSans3",
title="Font",
description="The font of the CV.",
)
font_size: Literal["10pt", "11pt", "12pt"] = Field(
default="10pt",
title="Font Size",
description="The font size of the CV. It can be 10pt, 11pt, or 12pt.",
)
page_size: Literal["a4paper", "letterpaper"] = Field(
default="a4paper",
title="Page Size",
description="The page size of the CV. It can be a4paper or letterpaper.",
)
options: Optional[ClassicThemeOptions] = Field(
default=None,
title="Theme Options",
description="The options of the theme.",
)
@model_validator(mode="after")
@classmethod
def check_theme_options(cls, model):
"""Check if the correct options are provided for the theme. If the theme
options are not provided, then set the default options for the theme.
"""
if model.options is None:
if model.theme == "classic":
model.options = ClassicThemeOptions()
else:
raise RuntimeError(f'The theme "{model.theme}" does not exist.')
else:
if model.theme == "classic":
if not isinstance(model.options, ClassicThemeOptions):
raise ValueError(
"Theme is classic but options is not classic theme options."
)
else:
raise RuntimeError(f'The theme "{model.theme}"" does not exist.')
return model
@field_validator("font")
@classmethod
def check_font(cls, font: str) -> str:
"""Go to the fonts directory and check if the font exists. If it exists, then
check if all the required files are there.
"""
fonts_directory = str(files("rendercv").joinpath("templates", "fonts"))
if font not in os.listdir(fonts_directory):
raise ValueError(
f'The font "{font}" is not found in the "fonts" directory.'
)
else:
font_directory = os.path.join(fonts_directory, font)
required_files = [
f"{font}-Bold.ttf",
f"{font}-BoldItalic.ttf",
f"{font}-Italic.ttf",
f"{font}-Regular.ttf",
]
for file in required_files:
if file not in os.listdir(font_directory):
raise ValueError(f"{file} is not found in the {font} directory.")
return font
@field_validator("theme")
@classmethod
def check_if_theme_exists(cls, theme: str) -> str:
"""Check if the theme exists in the templates directory."""
template_directory = str(files("rendercv").joinpath("templates", theme))
if f"{theme}.tex.j2" not in os.listdir(template_directory):
raise ValueError(
f'The theme "{theme}" is not found in the "templates" directory.'
)
return theme
# ======================================================================================
# ======================================================================================
# ======================================================================================
# ======================================================================================
# CONTENT MODELS =======================================================================
# ======================================================================================
LaTeXString = Annotated[str, AfterValidator(escape_latex_characters)]
PastDate = Annotated[
str,
Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
AfterValidator(parse_date_string),
]
class Event(BaseModel):
"""This class is the parent class for classes like `#!python EducationEntry`,
`#!python ExperienceEntry`, `#!python NormalEntry`, and `#!python OneLineEntry`.
It stores the common fields between these classes like dates, location, highlights,
and URL.
"""
start_date: Optional[PastDate] = Field(
default=None,
title="Start Date",
description="The start date of the event in YYYY-MM-DD format.",
examples=["2020-09-24"],
)
end_date: Optional[Literal["present"] | PastDate] = Field(
default=None,
title="End Date",
description=(
"The end date of the event in YYYY-MM-DD format. If the event is still"
' ongoing, then the value should be "present".'
),
examples=["2020-09-24", "present"],
)
date: Optional[PastDate | LaTeXString] = Field(
default=None,
title="Date",
description=(
"If the event is a one-day event, then this field should be filled in"
" YYYY-MM-DD format. If the event is a multi-day event, then the start date"
" and end date should be provided instead. All of them can't be provided at"
" the same time."
),
examples=["2020-09-24", "My Custom Date"],
)
highlights: Optional[list[LaTeXString]] = Field(
default=[],
title="Highlights",
description=(
"The highlights of the event. It will be rendered as bullet points."
),
examples=["Did this.", "Did that."],
)
location: Optional[LaTeXString] = Field(
default=None,
title="Location",
description=(
"The location of the event. It will be shown with the date in the"
" same column."
),
examples=["Istanbul, Turkey"],
)
url: Optional[HttpUrl] = None
@field_validator("date")
@classmethod
def check_date(cls, date: PastDate | LaTeXString) -> PastDate | LaTeXString:
"""Check if the date is a string or a Date object and return accordingly."""
if isinstance(date, str):
try:
# If this runs, it means the date is an ISO format string, and it can be
# parsed
date = parse_date_string(date)
except ValueError:
# Then it means it is a custom string like "Fall 2023"
date = date
return date
@model_validator(mode="after")
@classmethod
def check_dates(cls, model):
"""Make sure that either `#!python start_date` and `#!python end_date` or only
`#!python date` is provided.
"""
date_is_provided = False
start_date_is_provided = False
end_date_is_provided = False
if model.date is not None:
date_is_provided = True
if model.start_date is not None:
start_date_is_provided = True
if model.end_date is not None:
end_date_is_provided = True
if date_is_provided and start_date_is_provided and end_date_is_provided:
logger.warning(
'"start_date", "end_date" and "date" are all provided in of the'
" entries. Therefore, date will be ignored."
)
model.date = None
elif date_is_provided and start_date_is_provided and not end_date_is_provided:
logger.warning(
'Both "date" and "start_date" is provided in of the entries.'
' "start_date" will be ignored.'
)
model.start_date = None
model.end_date = None
elif date_is_provided and end_date_is_provided and not start_date_is_provided:
logger.warning(
'Both "date" and "end_date" is provided in of the entries. "end_date"'
" will be ignored."
)
model.start_date = None
model.end_date = None
elif start_date_is_provided and not end_date_is_provided:
logger.warning(
'"start_date" is provided in of the entries, but "end_date" is not.'
' "end_date" will be set to "present".'
)
model.end_date = "present"
if model.start_date is not None and model.end_date is not None:
if model.end_date == "present":
end_date = Date.today()
elif isinstance(model.end_date, int):
# Then it means user only provided the year, so convert it to a Date
# object with the first day of the year (just for the date comparison)
end_date = Date(model.end_date, 1, 1)
elif isinstance(model.end_date, Date):
# Then it means user provided either YYYY-MM-DD or YYYY-MM
end_date = model.end_date
else:
raise RuntimeError("end_date is neither an integer nor a Date object.")
if isinstance(model.start_date, int):
# Then it means user only provided the year, so convert it to a Date
# object with the first day of the year (just for the date comparison)
start_date = Date(model.start_date, 1, 1)
elif isinstance(model.start_date, Date):
# Then it means user provided either YYYY-MM-DD or YYYY-MM
start_date = model.start_date
else:
raise RuntimeError(
"start_date is neither an integer nor a Date object."
)
if start_date > end_date:
raise ValueError(
'"start_date" can not be after "end_date". Please check the dates.'
)
return model
@computed_field
@cached_property
def date_and_location_strings_with_timespan(self) -> list[LaTeXString]:
date_and_location_strings = []
if self.location is not None:
date_and_location_strings.append(self.location)
if self.date is not None:
if isinstance(self.date, str):
date_and_location_strings.append(self.date)
elif isinstance(self.date, Date):
date_and_location_strings.append(format_date(self.date))
else:
raise RuntimeError("Date is neither a string nor a Date object.")
elif self.start_date is not None and self.end_date is not None:
start_date = format_date(self.start_date)
if self.end_date == "present":
end_date = "present"
time_span_string = compute_time_span_string(
self.start_date, Date.today()
)
else:
end_date = format_date(self.end_date)
time_span_string = compute_time_span_string(
self.start_date, self.end_date
)
date_and_location_strings.append(f"{start_date} to {end_date}")
date_and_location_strings.append(f"{time_span_string}")
return date_and_location_strings
@computed_field
@cached_property
def date_and_location_strings_without_timespan(self) -> list[LaTeXString]:
# use copy() to avoid modifying the original list
date_and_location_strings = self.date_and_location_strings_with_timespan.copy()
for string in date_and_location_strings:
if (
"years" in string
or "months" in string
or "year" in string
or "month" in string
):
date_and_location_strings.remove(string)
return date_and_location_strings
@computed_field
@cached_property
def highlight_strings(self) -> list[LaTeXString]:
highlight_strings = []
if self.highlights is not None:
highlight_strings.extend(self.highlights)
return highlight_strings
@computed_field
@cached_property
def markdown_url(self) -> Optional[str]:
if self.url is None:
return None
else:
url = str(self.url)
if "github" in url:
link_text = "view on GitHub"
elif "linkedin" in url:
link_text = "view on LinkedIn"
elif "instagram" in url:
link_text = "view on Instagram"
elif "youtube" in url:
link_text = "view on YouTube"
else:
link_text = "view on my website"
markdown_url = f"[{link_text}]({url})"
return markdown_url
@computed_field
@cached_property
def month_and_year(self) -> Optional[LaTeXString]:
if self.date is not None:
# Then it means start_date and end_date are not provided.
try:
# If this runs, it means the date is an ISO format string, and it can be
# parsed
month_and_year = format_date(self.date) # type: ignore
except TypeError:
month_and_year = str(self.date)
else:
# Then it means start_date and end_date are provided and month_and_year
# doesn't make sense.
month_and_year = None
return month_and_year
class OneLineEntry(Event):
"""This class stores [OneLineEntry](../user_guide.md#onelineentry) information."""
name: LaTeXString = Field(
title="Name",
description="The name of the entry. It will be shown as bold text.",
)
details: LaTeXString = Field(
title="Details",
description="The details of the entry. It will be shown as normal text.",
)
class NormalEntry(Event):
"""This class stores [NormalEntry](../user_guide.md#normalentry) information."""
name: LaTeXString = Field(
title="Name",
description="The name of the entry. It will be shown as bold text.",
)
class ExperienceEntry(Event):
"""This class stores [ExperienceEntry](../user_guide.md#experienceentry) information."""
company: LaTeXString = Field(
title="Company",
description="The company name. It will be shown as bold text.",
)
position: LaTeXString = Field(
title="Position",
description="The position. It will be shown as normal text.",
)
class EducationEntry(Event):
"""This class stores [EducationEntry](../user_guide.md#educationentry) information."""
institution: LaTeXString = Field(
title="Institution",
description="The institution name. It will be shown as bold text.",
examples=["Bogazici University"],
)
area: LaTeXString = Field(
title="Area",
description="The area of study. It will be shown as normal text.",
)
study_type: Optional[LaTeXString] = Field(
default=None,
title="Study Type",
description="The type of the degree.",
examples=["BS", "BA", "PhD", "MS"],
)
gpa: Optional[LaTeXString | float] = Field(
default=None,
title="GPA",
description="The GPA of the degree.",
)
transcript_url: Optional[HttpUrl] = Field(
default=None,
title="Transcript URL",
description=(
"The URL of the transcript. It will be shown as a link next to the GPA."
),
examples=["https://example.com/transcript.pdf"],
)
@computed_field
@cached_property
def highlight_strings(self) -> list[LaTeXString]:
highlight_strings = []
if self.gpa is not None:
gpaString = f"GPA: {self.gpa}"
if self.transcript_url is not None:
gpaString += f" ([Transcript]({self.transcript_url}))"
highlight_strings.append(gpaString)
if self.highlights is not None:
highlight_strings.extend(self.highlights)
return highlight_strings
class PublicationEntry(Event):
"""This class stores [PublicationEntry](../user_guide.md#publicationentry) information."""
title: LaTeXString = Field(
title="Title of the Publication",
description="The title of the publication. It will be shown as bold text.",
)
authors: list[LaTeXString] = Field(
title="Authors",
description="The authors of the publication in order as a list of strings.",
)
doi: str = Field(
title="DOI",
description="The DOI of the publication.",
examples=["10.48550/arXiv.2310.03138"],
)
date: LaTeXString = Field(
title="Publication Date",
description="The date of the publication.",
examples=["2021-10-31"],
)
cited_by: Optional[int] = Field(
default=None,
title="Cited By",
description="The number of citations of the publication.",
)
journal: Optional[LaTeXString] = Field(
default=None,
title="Journal",
description="The journal or the conference name.",
)
@field_validator("doi")
@classmethod
def check_doi(cls, doi: str) -> str:
"""Check if the DOI exists in the DOI System."""
doi_url = f"https://doi.org/{doi}"
try:
urllib.request.urlopen(doi_url)
except urllib.request.HTTPError as err:
if err.code == 404:
raise ValueError(f"{doi} cannot be found in the DOI System.")
return doi
@computed_field
@cached_property
def doi_url(self) -> str:
return f"https://doi.org/{self.doi}"
class SocialNetwork(BaseModel):
"""This class stores a social network information.
Currently, only LinkedIn, Github, and Instagram are supported.
"""
network: Literal["LinkedIn", "GitHub", "Instagram", "Orcid", "Mastodon"] = Field(
title="Social Network",
description="The social network name.",
)
username: str = Field(
title="Username",
description="The username of the social network. The link will be generated.",
)
class Connection(BaseModel):
"""This class stores a connection/communication information.
Warning:
This class isn't designed for users to use, but it is used by RenderCV to make
the $\\LaTeX$ templating easier.
"""
name: Literal[
"LinkedIn",
"GitHub",
"Instagram",
"Orcid",
"Mastodon",
"phone",
"email",
"website",
"location",
]
value: str
@staticmethod
def MastodonUname2Url(id: str) -> Optional[HttpUrl]:
"""From a Mastodon id "user@domain.example" returns profile url."""
# The closest thing to a formal spec of Mastodon usernames
# where these regular expressions from a (reference?)
# implementation
#
# https://github.com/mastodon/mastodon/blob/852123867768e23410af5bd07ac0327bead0d9b2/app/models/account.rb#L68
#
# USERNAME_RE = /[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?/i
# SERNAME_RE = /[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?/i
# MENTION_RE = %r{(?<![=/[:word:]])@((#{USERNAME_RE})(?:@[[:word:].-]+[[:word:]]+)?)}i
# URL_PREFIX_RE = %r{\Ahttp(s?)://[^/]+}
pattern = re.compile(r"""
^\s* # ignore leading spaces
@? # Optional @ prefix
(?P<uname>[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?) # username part
@ # separator
(?P<domain>[a-z0-9_]+([a-z0-9_.-]+[a-z0-9_]+)?) # domain part
\s*$ # ignore trailing whitespace
""", re.VERBOSE | re.IGNORECASE)
m = pattern.match(id)
if m is None:
raise ValueError("Invalid mastodon address")
uname = m.group("uname")
domain = m.group("domain")
url = HttpUrl(f'https://{domain}/@{uname}')
return url
@computed_field
@cached_property
def url(self) -> Optional[HttpUrl | str]:
if self.name == "LinkedIn":
url = f"https://www.linkedin.com/in/{self.value}"
elif self.name == "GitHub":
url = f"https://www.github.com/{self.value}"
elif self.name == "Instagram":
url = f"https://www.instagram.com/{self.value}"
elif self.name == "Orcid":
url = f"https://orcid.org/{self.value}"
elif self.name == "Mastodon":
url = self.MastodonUname2Url(self.value)
elif self.name == "email":
url = f"mailto:{self.value}"
elif self.name == "website":
url = self.value
elif self.name == "phone":
url = self.value
elif self.name == "location":
url = None
else:
raise RuntimeError(f'"{self.name}" is not a valid connection.')
return url
class SectionBase(BaseModel):
"""This class stores a section information.
It is the parent class of all the section classes like
`#!python SectionWithEducationEntries`, `#!python SectionWithExperienceEntries`,
`#!python SectionWithNormalEntries`, `#!python SectionWithOneLineEntries`, and
`#!python SectionWithPublicationEntries`.
"""
title: LaTeXString = Field(
title="Section Title",
description="The title of the section.",
examples=["My Custom Section"],
)
link_text: Optional[LaTeXString] = Field(
default=None,
title="Link Text",
description=(
"If the section has a link, then what should be the text of the link? If"
" this field is not provided, then the link text will be generated"
" automatically based on the URL."
),
examples=["view on GitHub", "view on LinkedIn"],
)
@field_validator("title")
@classmethod
def make_first_letters_uppercase(cls, title: LaTeXString) -> LaTeXString:
"""Capitalize the first letters of the words in the title."""
return title.title()
entry_type_field = Field(
title="Entry Type",
description="The type of the entries in the section.",
)
entries_field = Field(
title="Entries",
description="The entries of the section. The format depends on the entry type.",
)
class SectionWithEducationEntries(SectionBase):
"""This class stores a section with
[EducationEntry](../user_guide.md#educationentry)s.
"""
entry_type: Literal["EducationEntry"] = entry_type_field
entries: list[EducationEntry] = entries_field
class SectionWithExperienceEntries(SectionBase):
"""This class stores a section with
[ExperienceEntry](../user_guide.md#experienceentry)s.
"""
entry_type: Literal["ExperienceEntry"] = entry_type_field
entries: list[ExperienceEntry] = entries_field
class SectionWithNormalEntries(SectionBase):
"""This class stores a section with
[NormalEntry](../user_guide.md#normalentry)s.
"""
entry_type: Literal["NormalEntry"] = entry_type_field
entries: list[NormalEntry] = entries_field
class SectionWithOneLineEntries(SectionBase):
"""This class stores a section with
[OneLineEntry](../user_guide.md#onelineentry)s.
"""
entry_type: Literal["OneLineEntry"] = entry_type_field
entries: list[OneLineEntry] = entries_field
class SectionWithPublicationEntries(SectionBase):
"""This class stores a section with
[PublicationEntry](../user_guide.md#publicationentry)s.
"""
entry_type: Literal["PublicationEntry"] = entry_type_field
entries: list[PublicationEntry] = entries_field
Section = Annotated[
SectionWithEducationEntries
| SectionWithExperienceEntries
| SectionWithNormalEntries
| SectionWithOneLineEntries
| SectionWithPublicationEntries,
Field(
discriminator="entry_type",
),
]
class CurriculumVitae(BaseModel):
"""This class bindes all the information of a CV together."""
name: LaTeXString = Field(
title="Name",
description="The name of the person.",
)
label: Optional[LaTeXString] = Field(
default=None,
title="Label",
description="The label of the person.",
)
location: Optional[LaTeXString] = Field(
default=None,
title="Location",
description="The location of the person. This is not rendered currently.",
)
email: Optional[EmailStr] = Field(
default=None,
title="Email",
description="The email of the person. It will be rendered in the heading.",
)
phone: Optional[PhoneNumber] = None
website: Optional[HttpUrl] = None
social_networks: Optional[list[SocialNetwork]] = Field(
default=None,
title="Social Networks",
description=(
"The social networks of the person. They will be rendered in the heading."
),
)
summary: Optional[LaTeXString] = Field(
default=None,
title="Summary",
description="The summary of the person.",
)
section_order: Optional[list[str]] = Field(
default=None,
title="Section Order",
description=(
"The order of sections in the CV. The section title should be used."
),
)
education: Optional[list[EducationEntry]] = Field(
default=None,
title="Education",
description="The education entries of the person.",
)
experience: Optional[list[ExperienceEntry]] = Field(
default=None,
title="Experience",
description="The experience entries of the person.",
)
work_experience: Optional[list[ExperienceEntry]] = Field(
default=None,
title="Work Experience",
description="The work experience entries of the person.",
)
projects: Optional[list[NormalEntry]] = Field(
default=None,
title="Projects",
description="The project entries of the person.",
)
academic_projects: Optional[list[NormalEntry]] = Field(
default=None,
title="Academic Projects",
description="The academic project entries of the person.",
)
personal_projects: Optional[list[NormalEntry]] = Field(
default=None,
title="Personal Projects",
description="The personal project entries of the person.",
)
publications: Optional[list[PublicationEntry]] = Field(
default=None,
title="Publications",
description="The publication entries of the person.",
)
certificates: Optional[list[NormalEntry]] = Field(
default=None,
title="Certificates",
description="The certificate entries of the person.",
)
extracurricular_activities: Optional[list[ExperienceEntry]] = Field(
default=None,
title="Extracurricular Activities",
description="The extracurricular activity entries of the person.",
)
test_scores: Optional[list[OneLineEntry]] = Field(
default=None,
title="Test Scores",
description="The test score entries of the person.",
)
programming_skills: Optional[list[OneLineEntry]] = Field(
default=None,
title="Programming Skills",
description="The programming skill entries of the person.",
)
skills: Optional[list[OneLineEntry]] = Field(
default=None,
title="Skills",
description="The skill entries of the person.",
)
awards: Optional[list[OneLineEntry]] = Field(
default=None,
title="Awards",
description="The award entries of the person.",
)
interests: Optional[list[OneLineEntry]] = Field(
default=None,
title="Interests",
description="The interest entries of the person.",
)
custom_sections: Optional[list[Section]] = Field(
default=None,
title="Custom Sections",
description=(
"Custom sections with custom section titles can be rendered as well."
),
)
@model_validator(mode="after")
@classmethod
def check_if_the_section_names_are_unique(cls, model):
"""Check if the section names are unique."""
pre_defined_section_names = [
"Education",
"Work Experience",
"Academic Projects",
"Personal Projects",
"Certificates",
"Extracurricular Activities",
"Test Scores",
"Skills",
"Publications",
]
if model.custom_sections is not None:
custom_section_names = []
for custom_section in model.custom_sections:
custom_section_names.append(custom_section.title)
section_names = pre_defined_section_names + custom_section_names
else:
section_names = pre_defined_section_names
seen = set()
duplicates = {val for val in section_names if (val in seen or seen.add(val))}
if len(duplicates) > 0:
raise ValueError(
"The section names should be unique. The following section names are"
f" duplicated: {duplicates}"
)
return model
@computed_field
@cached_property
def connections(self) -> list[Connection]:
connections = []
if self.location is not None:
connections.append(Connection(name="location", value=self.location))
if self.phone is not None:
connections.append(Connection(name="phone", value=self.phone))
if self.email is not None:
connections.append(Connection(name="email", value=self.email))
if self.website is not None:
connections.append(Connection(name="website", value=str(self.website)))
if self.social_networks is not None:
for social_network in self.social_networks:
connections.append(
Connection(
name=social_network.network, value=social_network.username
)
)
return connections
@computed_field
@cached_property
def sections(self) -> list[SectionBase]:
sections = []
# Pre-defined sections (i.e. sections that are not custom)):
pre_defined_sections = {
"Education": self.education,
"Experience": self.experience,
"Work Experience": self.work_experience,
"Projects": self.projects,
"Academic Projects": self.academic_projects,
"Personal Projects": self.personal_projects,
"Certificates": self.certificates,
"Extracurricular Activities": self.extracurricular_activities,
"Test Scores": self.test_scores,
"Skills": self.skills,
"Awards": self.awards,
"Interests": self.interests,
"Programming Skills": self.programming_skills,
"Publications": self.publications,
}
if self.section_order is None:
# If the user didn't specify the section order, then use the default order:
self.section_order = [
"Education",
"Experience",
"Work Experience",
"Projects",
"Academic Projects",
"Personal Projects",
"Skills",
"Awards",
"Interests",
"Programming Skills",
"Test Scores",
"Certificates",
"Extracurricular Activities",
"Publications",
]
if self.custom_sections is not None:
# If the user specified custom sections, then add them to the end of the
# section order with the same order as they are in the input file:
self.section_order.extend(
[section.title for section in self.custom_sections]
)
link_text = None
entry_type = None
entries = None
for section_name in self.section_order:
# Create a section for each section name in the section order:
if section_name in pre_defined_sections:
if pre_defined_sections[section_name] is None:
continue
entry_type = pre_defined_sections[section_name][0].__class__.__name__
entries = pre_defined_sections[section_name]
if section_name == "Test Scores":
link_text = "Score Report"
elif section_name == "Certificates":
link_text = "Certificate"
else:
link_text = None
else:
# If the section is not pre-defined, then it is a custom section.
# Find the corresponding custom section and get its entries:
for custom_section in self.custom_sections: # type: ignore
if custom_section.title == section_name:
entry_type = custom_section.entries[0].__class__.__name__
link_text = custom_section.link_text
entries = custom_section.entries
break
else:
entry_type = None
link_text = None
entries = None
if entry_type is None or entries is None:
raise ValueError(
f'"{section_name}" is not a valid section name. Please create a'
" custom section with this name or delete it from the section"
" order."
)
object_map = {
"EducationEntry": SectionWithEducationEntries,
"ExperienceEntry": SectionWithExperienceEntries,
"NormalEntry": SectionWithNormalEntries,
"OneLineEntry": SectionWithOneLineEntries,
"PublicationEntry": SectionWithPublicationEntries,
}
section = object_map[entry_type](
title=section_name,
entry_type=entry_type, # type: ignore
entries=entries,
link_text=link_text,
)
sections.append(section)
# Check if any of the pre-defined sections are missing from the section order:
for section_name in pre_defined_sections:
if pre_defined_sections[section_name] is not None:
if section_name not in self.section_order:
logger.warning(
f'The section "{section_name}" is not found in the section'
" order! It will not be rendered."
)
# Check if any of the custom sections are missing from the section order:
if self.custom_sections is not None:
for custom_section in self.custom_sections:
if custom_section.title not in self.section_order:
logger.warning(
f'The custom section "{custom_section.title}" is not found in'
" the section order! It will not be rendered."
)
return sections
# ======================================================================================
# ======================================================================================
# ======================================================================================
class RenderCVDataModel(BaseModel):
"""This class binds both the CV and the design information together."""
design: Design = Field(
default=Design(),
title="Design",
description="The design of the CV.",
)
cv: CurriculumVitae = Field(
default=CurriculumVitae(name="John Doe"),
title="Curriculum Vitae",
description="The data of the CV.",
)
@model_validator(mode="after")
@classmethod
def check_classical_theme_show_timespan_in(cls, model):
"""Check if the sections that are specified in the "show_timespan_in" option
exist in the CV.
"""
if model.design.theme == "classic":
design: Design = model.design
cv: CurriculumVitae = model.cv
section_titles = [section.title for section in cv.sections]
for title in design.options.show_timespan_in: # type: ignore
if title not in section_titles:
not_used_section_titles = list(
set(section_titles) - set(design.options.show_timespan_in)
)
not_used_section_titles = ", ".join(not_used_section_titles)
raise ValueError(
f'The section "{title}" that is specified in the'
' "show_timespan_in" option is not found in the CV. You'
" might have wanted to use one of these:"
f" {not_used_section_titles}."
)
return model
def read_input_file(file_path: str) -> RenderCVDataModel:
"""Read the input file.
Args:
file_path (str): The path to the input file.
Returns:
str: The input file as a string.
"""
start_time = time.time()
logger.info(f"Reading and validating the input file {file_path} has started.")
# check if the file exists:
if not os.path.exists(file_path):
raise FileNotFoundError(f"The file {file_path} doesn't exist.")
# check the file extension:
accepted_extensions = [".yaml", ".yml", ".json", ".json5"]
if not any(file_path.endswith(extension) for extension in accepted_extensions):
raise ValueError(
f"The file {file_path} doesn't have an accepted extension!"
f" Accepted extensions are: {accepted_extensions}"
)
with open(file_path) as file:
yaml = YAML()
raw_json = yaml.load(file)
data = RenderCVDataModel(**raw_json)
end_time = time.time()
time_taken = end_time - start_time
logger.info(
f"Reading and validating the input file {file_path} has finished in"
f" {time_taken:.2f} s."
)
return data