data_models: enhance data models so that new entry types can be added easily

2024-03-10 18:12:39 +01:00 · 2024-03-10 18:12:39 +01:00 · ecca51b2c8
parent 96cdae32f2
commit ecca51b2c8
1 changed files with 128 additions and 170 deletions
--- a/rendercv/data_models.py
+++ b/rendercv/data_models.py
@ -2,17 +2,19 @@
 This module contains all the necessary classes to store CV data. These classes are called
 data models. The YAML input file is transformed into instances of these classes (i.e.,
 the input file is read) with the [`read_input_file`](#read_input_file) function.
-RenderCV utilizes these instances to generate a $\\LaTeX$ file which is then rendered into a
-PDF file.
+RenderCV utilizes these instances to generate a $\\LaTeX$ file which is then rendered
+into a PDF file.

 The data models are initialized with data validation to prevent unexpected bugs. During
 the initialization, we ensure that everything is in the correct place and that the user
 has provided a valid RenderCV input. This is achieved through the use of
-[Pydantic](https://pypi.org/project/pydantic/).
+[Pydantic](https://pypi.org/project/pydantic/). Each class method decorated with
+'pydantic.model_validator` or 'pydantic.field_validator` is executed automatically
+during the data classes' initialization.
 """

 from datetime import date as Date
-from typing import Literal, Any, Type, Annotated, Optional, get_args
+from typing import Literal, Any, Type, Annotated, Optional, get_args, Union
 import importlib
 import importlib.util
 import importlib.machinery
@ -158,8 +160,8 @@ class EntryBase(RenderCVBaseModel):
        title="End Date",
        description=(
            "The end date of the event in YYYY-MM-DD, YYYY-MM, or YYYY format. If the"
-            ' event is still ongoing, then type "present" or provide only the start'
-            " date."
+            ' event is still ongoing, then type "present" or provide only the'
+            " start_date."
        ),
        examples=["2020-09-24", "present"],
        json_schema_extra={"default": "2020-01-01"},
@ -168,10 +170,9 @@ class EntryBase(RenderCVBaseModel):
        default=None,
        title="Date",
        description=(
-            "If the event is a one-day event, then this field should be filled in"
-            " YYYY-MM-DD format. If the event is a multi-day event, then the start date"
-            " and end date should be provided instead. All of them can't be provided at"
-            " the same time."
+            "If the event is a one-day event, then this field can be filled in"
+            " YYYY-MM-DD format. Also, this field can be used if you would like to use"
+            ' a custom date string (like "Fall 2020").'
        ),
        examples=["2020-09-24", "My Custom Date"],
        json_schema_extra={"default": "Custom Date or 2020-01-01"},
@ -197,15 +198,9 @@ class EntryBase(RenderCVBaseModel):
        """
        Check if the dates are provided correctly and do the necessary adjustments.
        """
-        date_is_provided = False
-        start_date_is_provided = False
-        end_date_is_provided = False
-        if model.date is not None:
-            date_is_provided = True
-        if model.start_date is not None:
-            start_date_is_provided = True
-        if model.end_date is not None:
-            end_date_is_provided = True
+        date_is_provided = model.date is not None
+        start_date_is_provided = model.start_date is not None
+        end_date_is_provided = model.end_date is not None

        if date_is_provided:
            model.start_date = None
@ -213,6 +208,7 @@ class EntryBase(RenderCVBaseModel):

            if re.fullmatch(date_pattern_for_json_schema, model.date):
                # Then it is in YYYY-MM-DD, YYYY-MM, or YYYY format
+                # Check if it is a valid date:
                try:
                    get_date_object(model.date)
                except ValueError as e:
@ -227,6 +223,8 @@ class EntryBase(RenderCVBaseModel):
            )
        elif start_date_is_provided:
            if not end_date_is_provided:
+                # Then it means only the start_date is provided, so it is an ongoing
+                # event:
                model.end_date = "present"

            # Check if start_date and end_date are provided correctly:
@ -359,20 +357,24 @@ class EntryBase(RenderCVBaseModel):
            will return:
            `#!python "4 months"`
        """
-        start_date = self.start_date
-        end_date = self.end_date
-        date = self.date
+        date_is_provided = self.date is not None
+        start_date_is_provided = self.start_date is not None
+        end_date_is_provided = self.end_date is not None

-        if date is not None or (start_date is None and end_date is None):
+        if date_is_provided:
            # If only the date is provided, the time span is irrelevant. So, return an
            # empty string.
            return ""

-        elif isinstance(start_date, int) or isinstance(end_date, int):
+        elif not start_date_is_provided and not end_date_is_provided:
+            # If neither start_date nor end_date is provided, return an empty string.
+            return ""
+
+        elif isinstance(self.start_date, int) or isinstance(self.end_date, int):
            # Then it means one of the dates is year, so time span cannot be more
            # specific than years.
-            start_year = get_date_object(start_date).year  # type: ignore
-            end_year = get_date_object(end_date).year  # type: ignore
+            start_year = get_date_object(self.start_date).year  # type: ignore
+            end_year = get_date_object(self.end_date).year  # type: ignore

            time_span_in_years = end_year - start_year

@ -386,8 +388,8 @@ class EntryBase(RenderCVBaseModel):
        else:
            # Then it means both start_date and end_date are in YYYY-MM-DD or YYYY-MM
            # format.
-            end_date = get_date_object(end_date)  # type: ignore
-            start_date = get_date_object(start_date)  # type: ignore
+            end_date = get_date_object(self.end_date)  # type: ignore
+            start_date = get_date_object(self.start_date)  # type: ignore

            # calculate the number of days between start_date and end_date:
            timespan_in_days = (end_date - start_date).days  # type: ignore
@ -420,13 +422,13 @@ class EntryBase(RenderCVBaseModel):
 class OneLineEntry(RenderCVBaseModel):
    """This class is the data model of `OneLineEntry`."""

-    name: str = pydantic.Field(
+    label: str = pydantic.Field(
        title="Name",
-        description="The name of the entry. It will be shown as bold text.",
+        description="The label of the OneLineEntry.",
    )
    details: str = pydantic.Field(
        title="Details",
-        description="The details of the entry. It will be shown as normal text.",
+        description="The details of the OneLineEntry.",
    )


@ -435,7 +437,7 @@ class NormalEntry(EntryBase):

    name: str = pydantic.Field(
        title="Name",
-        description="The name of the entry. It will be shown as bold text.",
+        description="The name of the NormalEntry.",
    )


@ -444,11 +446,11 @@ class ExperienceEntry(EntryBase):

    company: str = pydantic.Field(
        title="Company",
-        description="The company name. It will be shown as bold text.",
+        description="The company name.",
    )
    position: str = pydantic.Field(
        title="Position",
-        description="The position. It will be shown as normal text.",
+        description="The position.",
    )


@ -457,11 +459,11 @@ class EducationEntry(EntryBase):

    institution: str = pydantic.Field(
        title="Institution",
-        description="The institution name. It will be shown as bold text.",
+        description="The institution name.",
    )
    area: str = pydantic.Field(
        title="Area",
-        description="The area of study. It will be shown as normal text.",
+        description="The area of study.",
    )
    degree: Optional[str] = pydantic.Field(
        default=None,
@ -477,7 +479,7 @@ class PublicationEntry(RenderCVBaseModel):

    title: str = pydantic.Field(
        title="Title of the Publication",
-        description="The title of the publication. It will be shown as bold text.",
+        description="The title of the publication.",
    )
    authors: list[str] = pydantic.Field(
        title="Authors",
@ -554,6 +556,25 @@ class PublicationEntry(RenderCVBaseModel):
        return date_string


+# Create a custom type called Entry and ListOfEntries:
+Entry = (
+    OneLineEntry
+    | NormalEntry
+    | ExperienceEntry
+    | EducationEntry
+    | PublicationEntry
+    | str
+)
+ListOfEntries = (
+    list[OneLineEntry]
+    | list[NormalEntry]
+    | list[ExperienceEntry]
+    | list[EducationEntry]
+    | list[PublicationEntry]
+    | list[str]
+)
+entry_types = Entry.__args__[:-1]  # a tuple of all the entry types except str
+
 # ======================================================================================
 # Section models: ======================================================================
 # ======================================================================================
@ -578,144 +599,88 @@ class SectionBase(RenderCVBaseModel):
    # Title is excluded from the JSON schema because this will be written by RenderCV
    # depending on the key in the input file.
    title: Optional[str] = pydantic.Field(default=None, exclude=True)
+    entry_type: str
+    entries: list[Entry]


-class SectionWithEducationEntries(SectionBase):
-    """This class is the data model of the section with `EducationEntry`s."""
+def create_a_section_model(entry_type: Type[Entry]) -> Type[SectionBase]:
+    """Create a section model based on the entry type. See [Pydantic's documentation
+    about dynamic model
+    creation](https://pydantic-docs.helpmanual.io/usage/models/#dynamic-model-creation)
+    for more information.

-    entry_type: Literal["EducationEntry"] = entry_type_field_of_section_model
-    entries: list[EducationEntry] = entries_field_of_section_model
+    Args:
+        entry_type (Type[Entry]): The entry type to create the section model.
+    Returns:
+        Type[SectionBase]: The section model.
+    """
+    if entry_type == str:
+        model_name = "SectionWithTextEntries"
+        entry_type_name = "TextEntry"
+    else:
+        model_name = "SectionWith" + entry_type.__name__.replace("Entry", "Entries")
+        entry_type_name = entry_type.__name__

+    SectionModel = pydantic.create_model(
+        model_name,
+        entry_type=(Literal[entry_type_name], ...),  # type: ignore
+        entries=(list[entry_type], ...),
+        __base__=SectionBase,
+    )

-class SectionWithExperienceEntries(SectionBase):
-    """This class is the data model of the section with `ExperienceEntry`s."""
-
-    entry_type: Literal["ExperienceEntry"] = entry_type_field_of_section_model
-    entries: list[ExperienceEntry] = entries_field_of_section_model
-
-
-class SectionWithNormalEntries(SectionBase):
-    """This class is the data model of the section with `NormalEntry`s."""
-
-    entry_type: Literal["NormalEntry"] = entry_type_field_of_section_model
-    entries: list[NormalEntry] = entries_field_of_section_model
-
-
-class SectionWithOneLineEntries(SectionBase):
-    """This class is the data model of the section with `OneLineEntry`s."""
-
-    entry_type: Literal["OneLineEntry"] = entry_type_field_of_section_model
-    entries: list[OneLineEntry] = entries_field_of_section_model
-
-
-class SectionWithPublicationEntries(SectionBase):
-    """This class is the data model of the section with `PublicationEntry`s."""
-
-    entry_type: Literal["PublicationEntry"] = entry_type_field_of_section_model
-    entries: list[PublicationEntry] = entries_field_of_section_model
-
-
-class SectionWithTextEntries(SectionBase):
-    """This class is the data model of the section with `TextEntry`s."""
-
-    entry_type: Literal["TextEntry"] = entry_type_field_of_section_model
-    entries: list[str] = entries_field_of_section_model
-
-
-# Create a custom type called Section:
-# It is a union of all the section types and the correct section type is determined by
-# the entry_type field, thanks Pydantic's discriminator feature.
-# See https://docs.pydantic.dev/2.5/concepts/fields/#discriminator for more information
-# about discriminators.
-Section = Annotated[
-    SectionWithEducationEntries
-    | SectionWithExperienceEntries
-    | SectionWithNormalEntries
-    | SectionWithOneLineEntries
-    | SectionWithPublicationEntries
-    | SectionWithTextEntries,
-    pydantic.Field(
-        discriminator="entry_type",
-    ),
-]
+    return SectionModel


 def get_entry_and_section_type(
-    entry: (
-        dict[str, Any]
-        | EducationEntry
-        | ExperienceEntry
-        | PublicationEntry
-        | NormalEntry
-        | OneLineEntry
-        | str
-    ),
+    entry: dict[str, Any] | Entry,
 ) -> tuple[
    str,
-    Type[
-        SectionWithTextEntries
-        | SectionWithOneLineEntries
-        | SectionWithExperienceEntries
-        | SectionWithEducationEntries
-        | SectionWithPublicationEntries
-        | SectionWithNormalEntries
-    ],
+    Type[SectionBase],
 ]:
    """Determine the entry and section type based on the entry.

    Args:
-        entry (dict[str, Any] | EducationEntry | ExperienceEntry | PublicationEntry | NormalEntry | OneLineEntry | str): The entry to determine the type.
+        entry: The entry to determine the type.
    Returns:
-        tuple[str, Type[SectionWithTextEntries | SectionWithOneLineEntries | SectionWithExperienceEntries | SectionWithEducationEntries | SectionWithPublicationEntries | SectionWithNormalEntries]]: The entry type and the section type.
+        tuple[str, Type[Section]]: The entry type and the section type.
    """
+    # Get class attributes of EntryBase class:
+    common_attributes = set(EntryBase.model_fields.keys())
+
    if isinstance(entry, dict):
-        if "details" in entry:
-            entry_type = "OneLineEntry"
-            section_type = SectionWithOneLineEntries
-        elif "company" in entry or "position" in entry:
-            entry_type = "ExperienceEntry"
-            section_type = SectionWithExperienceEntries
-        elif "institution" in entry or "area" in entry or "degree" in entry:
-            entry_type = "EducationEntry"
-            section_type = SectionWithEducationEntries
-        elif "title" in entry or "authors" in entry or "doi" in entry:
-            entry_type = "PublicationEntry"
-            section_type = SectionWithPublicationEntries
-        elif "name" in entry:
-            entry_type = "NormalEntry"
-            section_type = SectionWithNormalEntries
-        else:
-            raise ValueError("The entry is not provided correctly.")
-    else:
-        if isinstance(entry, str):
-            entry_type = "TextEntry"
-            section_type = SectionWithTextEntries
-        elif isinstance(entry, OneLineEntry):
-            entry_type = "OneLineEntry"
-            section_type = SectionWithOneLineEntries
-        elif isinstance(entry, ExperienceEntry):
-            entry_type = "ExperienceEntry"
-            section_type = SectionWithExperienceEntries
-        elif isinstance(entry, EducationEntry):
-            entry_type = "EducationEntry"
-            section_type = SectionWithEducationEntries
-        elif isinstance(entry, PublicationEntry):
-            entry_type = "PublicationEntry"
-            section_type = SectionWithPublicationEntries
-        elif isinstance(entry, NormalEntry):  # type: ignore
-            entry_type = "NormalEntry"
-            section_type = SectionWithNormalEntries
-        else:
-            raise RuntimeError(
-                "This error shouldn't have been raised. Please open an issue on GitHub."
+        entry_type = None  # the entry type is not determined yet
+
+        for EntryType in entry_types:
+            characteristic_entry_attributes = (
+                set(EntryType.model_fields.keys()) - common_attributes
            )

+            # If at least one of the characteristic_entry_attributes is in the entry,
+            # then it means the entry is of this type:
+            if characteristic_entry_attributes & set(entry.keys()):
+                entry_type = EntryType.__name__
+                section_type = create_a_section_model(EntryType)
+                break
+
+        if entry_type is None:
+            raise ValueError("The entry is not provided correctly.")
+
+    elif isinstance(entry, str):
+        # Then it is a TextEntry
+        entry_type = "TextEntry"
+        section_type = create_a_section_model(str)
+
+    else:
+        # Then the entry is already initialized with a data model:
+        entry_type = entry.__class__.__name__
+        section_type = create_a_section_model(entry.__class__)
+
    return entry_type, section_type


 def validate_section_input(
-    sections_input: Section | list[Any],
-) -> Section | list[Any]:
+    sections_input: SectionBase | list[Any],
+) -> SectionBase | list[Any]:
    """Validate a SectionInput object and raise an error if it is not valid.

    Sections input is very complex. It is either a `Section` object or a list of
@ -775,14 +740,7 @@ def validate_section_input(
 # Create a custom type called SectionInput so that it can be validated with
 # `validate_section_input` function.
 SectionInput = Annotated[
-    list[
-        EducationEntry
-        | ExperienceEntry
-        | PublicationEntry
-        | NormalEntry
-        | OneLineEntry
-        | str
-    ],
+    ListOfEntries,
    pydantic.BeforeValidator(validate_section_input),
 ]

@ -864,7 +822,7 @@ class CurriculumVitae(RenderCVBaseModel):
    location: Optional[str] = pydantic.Field(
        default=None,
        title="Location",
-        description="The location of the person. This is not rendered currently.",
+        description="The location of the person.",
    )
    email: Optional[pydantic.EmailStr] = pydantic.Field(
        default=None,
@ -894,9 +852,9 @@ class CurriculumVitae(RenderCVBaseModel):
    )

    @functools.cached_property
-    def sections(self) -> list[Section]:
+    def sections(self) -> list[SectionBase]:
        """Return all the sections of the CV with their titles."""
-        sections: list[Section] = []
+        sections: list[SectionBase] = []
        if self.sections_input is not None:
            for title, section_or_entries in self.sections_input.items():
                title = title.replace("_", " ").title()
@ -939,7 +897,7 @@ class RenderCVDataModel(RenderCVBaseModel):
        title="Curriculum Vitae",
        description="The data of the CV.",
    )
-    design: RenderCVDesign | pydantic.json_schema.SkipJsonSchema[Any] = pydantic.Field(
+    design: pydantic.json_schema.SkipJsonSchema[Any] | RenderCVDesign = pydantic.Field(
        default=ClassicThemeOptions(theme="classic"),
        title="Design",
        description=(
@ -1057,8 +1015,8 @@ def read_input_file(
    file_path: pathlib.Path,
 ) -> RenderCVDataModel:
    """Read the input file and return two instances of RenderCVDataModel. The first
-    instance is the data model with $\\LaTeX$ strings and the second instance is the data
-    model with markdown strings.
+    instance is the data model with $\\LaTeX$ strings and the second instance is the
+    data model with markdown strings.

    Args:
        file_path (str): The path to the input file.
@ -1273,11 +1231,11 @@ def get_a_sample_data_model(
        ],
        "additional_experience_and_awards": [
            OneLineEntry(
-                name="Instructor (2003 - 2005)",
+                label="Instructor (2003 - 2005)",
                details="Taught two full-credit Computer Science courses.",
            ),
            OneLineEntry(
-                name="Third Prize, Senior Design Projects",
+                label="Third Prize, Senior Design Projects",
                details=(
                    "Awarded 3rd prize for a synchronized calendar project out of 100"
                    " projects."
@ -1286,11 +1244,11 @@ def get_a_sample_data_model(
        ],
        "technologies": [
            OneLineEntry(
-                name="Languages",
+                label="Languages",
                details="C++, C, Java, Objective-C, C#.NET, SQL, JavaScript",
            ),
            OneLineEntry(
-                name="Software",
+                label="Software",
                details=(
                    "Visual Studio, Microsoft SQL Server, Eclipse, XCode, Interface"
                    " Builder"