improve new data models

This commit is contained in:
Sina Atalay 2024-01-26 19:47:42 +01:00
parent f8f59068a5
commit 4f88245e5f
1 changed files with 130 additions and 102 deletions

View File

@ -5,40 +5,44 @@ in the end: document the whole code!
from datetime import date as Date from datetime import date as Date
from typing import Literal from typing import Literal
from typing_extensions import Annotated, Optional from typing_extensions import Annotated, Optional
import logging
from functools import cached_property from functools import cached_property
import urllib.request from urllib.request import urlopen, HTTPError
import os import os
import json import json
import pydantic import pydantic
import pydantic_extra_types.phone_numbers as pydantic_phone_numbers import pydantic_extra_types.phone_numbers as pydantic_phone_numbers
import pydantic.functional_validators as pydantic_functional_validators
from . import parser from . import utilities
from .terminal_reporter import warning
logger = logging.getLogger(__name__)
# To understand how to create custom data types, see: # To understand how to create custom data types, see:
# https://docs.pydantic.dev/latest/usage/types/custom/ # use links with pydantic version tags! # https://docs.pydantic.dev/latest/usage/types/custom/ # use links with pydantic version tags!
LaTeXDimension = Annotated[ # LaTeXDimension = Annotated[
str, # str,
pydantic.Field( # pydantic.Field(
pattern=r"\d+\.?\d* *(cm|in|pt|mm|ex|em)", # pattern=r"\d+\.?\d* *(cm|in|pt|mm|ex|em)",
), # ),
] # ]
LaTeXString = Annotated[ LaTeXString = Annotated[
str, pydantic.functional_validators.AfterValidator(parser.escape_latex_characters) str,
pydantic_functional_validators.AfterValidator(utilities.escape_latex_characters),
] ]
PastDate = Annotated[ PastDate = Annotated[
str, str,
pydantic.Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"), pydantic.Field(pattern=r"\d{4}-?(\d{2})?-?(\d{2})?"),
pydantic.functional_validators.AfterValidator(parser.parse_date_string), pydantic_functional_validators.AfterValidator(utilities.parse_date_string),
] ]
PastDateAdapter = pydantic.TypeAdapter(PastDate) PastDateAdapter = pydantic.TypeAdapter(PastDate)
# ======================================================================================
# Entry models: ========================================================================
# ======================================================================================
class EntryBase(pydantic.BaseModel): class EntryBase(pydantic.BaseModel):
"""This class is the parent class for classes like `#!python EducationEntry`, """This class is the parent class for classes like `#!python EducationEntry`,
@ -99,19 +103,12 @@ class EntryBase(pydantic.BaseModel):
cls, date: PastDate | LaTeXString cls, date: PastDate | LaTeXString
) -> Optional[PastDate | int | LaTeXString]: ) -> Optional[PastDate | int | LaTeXString]:
"""Check if the date is a string or a Date object and return accordingly.""" """Check if the date is a string or a Date object and return accordingly."""
if isinstance(date, str): if date is None:
try:
# If this runs, it means the date is an ISO format string, and it can be
# parsed
new_date = parser.parse_date_string(date)
new_date = PastDateAdapter.validate_python(new_date)
except ValueError:
# Then it means it is a custom string like "Fall 2023"
new_date = date
elif date is None:
new_date = None new_date = None
elif isinstance(date, Date):
new_date = date
else: else:
raise TypeError(f"Date ({date}) is neither a string nor a Date object.") raise TypeError(f"{date} is an invalid date.")
return new_date return new_date
@ -132,14 +129,14 @@ class EntryBase(pydantic.BaseModel):
end_date_is_provided = True end_date_is_provided = True
if date_is_provided and start_date_is_provided and end_date_is_provided: if date_is_provided and start_date_is_provided and end_date_is_provided:
logger.warning( warning(
'"start_date", "end_date" and "date" are all provided in of the' '"start_date", "end_date" and "date" are all provided in of the'
" entries. Therefore, date will be ignored." " entries. Therefore, date will be ignored."
) )
model.date = None model.date = None
elif date_is_provided and start_date_is_provided and not end_date_is_provided: elif date_is_provided and start_date_is_provided and not end_date_is_provided:
logger.warning( warning(
'Both "date" and "start_date" is provided in of the entries.' 'Both "date" and "start_date" is provided in of the entries.'
' "start_date" will be ignored.' ' "start_date" will be ignored.'
) )
@ -147,7 +144,7 @@ class EntryBase(pydantic.BaseModel):
model.end_date = None model.end_date = None
elif date_is_provided and end_date_is_provided and not start_date_is_provided: elif date_is_provided and end_date_is_provided and not start_date_is_provided:
logger.warning( warning(
'Both "date" and "end_date" is provided in of the entries. "end_date"' 'Both "date" and "end_date" is provided in of the entries. "end_date"'
" will be ignored." " will be ignored."
) )
@ -155,7 +152,7 @@ class EntryBase(pydantic.BaseModel):
model.end_date = None model.end_date = None
elif start_date_is_provided and not end_date_is_provided: elif start_date_is_provided and not end_date_is_provided:
logger.warning( warning(
'"start_date" is provided in of the entries, but "end_date" is not.' '"start_date" is provided in of the entries, but "end_date" is not.'
' "end_date" will be set to "present".' ' "end_date" will be set to "present".'
) )
@ -200,17 +197,27 @@ class EntryBase(pydantic.BaseModel):
if isinstance(self.date, str): if isinstance(self.date, str):
date_string = self.date date_string = self.date
elif isinstance(self.date, Date): elif isinstance(self.date, Date):
date_string = parser.format_date(self.date) date_string = utilities.format_date(self.date)
else: else:
raise RuntimeError("Date is neither a string nor a Date object.") raise RuntimeError("Date is neither a string nor a Date object.")
elif self.start_date is not None and self.end_date is not None: elif self.start_date is not None and self.end_date is not None:
start_date = parser.format_date(self.start_date) if isinstance(self.start_date, (int, Date)):
start_date = utilities.format_date(self.start_date)
else:
raise RuntimeError(
"This error shouldn't have been raised. Please open an issue on"
" GitHub."
)
if self.end_date == "present": if self.end_date == "present":
end_date = "present" end_date = "present"
elif isinstance(self.end_date, (int, Date)):
end_date = utilities.format_date(self.end_date)
else: else:
end_date = parser.format_date(self.end_date) raise RuntimeError(
"This error shouldn't have been raised. Please open an issue on"
" GitHub."
)
date_string = f"{start_date} to {end_date}" date_string = f"{start_date} to {end_date}"
@ -225,13 +232,20 @@ class EntryBase(pydantic.BaseModel):
if self.date is not None: if self.date is not None:
time_span = "" time_span = ""
elif self.start_date is not None and self.end_date is not None: elif self.start_date is not None and self.end_date is not None:
if self.end_date == "present": if self.end_date == "present" and isinstance(self.start_date, Date):
time_span = parser.compute_time_span_string( time_span = utilities.compute_time_span_string(
self.start_date, PastDate(Date.today()) self.start_date, Date.today()
)
elif isinstance(self.start_date, (int, Date)) and isinstance(
self.end_date, (int, Date)
):
time_span = utilities.compute_time_span_string(
self.start_date, self.end_date
) )
else: else:
time_span = parser.compute_time_span_string( raise RuntimeError(
self.start_date, self.end_date "This error shouldn't have been raised. Please open an issue on"
" GitHub."
) )
else: else:
time_span = None time_span = None
@ -269,7 +283,7 @@ class EntryBase(pydantic.BaseModel):
try: try:
# If this runs, it means the date is an ISO format string, and it can be # If this runs, it means the date is an ISO format string, and it can be
# parsed # parsed
month_and_year = parser.format_date(self.date) month_and_year = utilities.format_date(self.date) # type: ignore
except TypeError: except TypeError:
month_and_year = str(self.date) month_and_year = str(self.date)
else: else:
@ -373,8 +387,8 @@ class PublicationEntry(pydantic.BaseModel):
doi_url = f"https://doi.org/{doi}" doi_url = f"https://doi.org/{doi}"
try: try:
urllib.request.urlopen(doi_url) urlopen(doi_url)
except urllib.request.HTTPError as err: except HTTPError as err:
if err.code == 404: if err.code == 404:
raise ValueError(f"{doi} cannot be found in the DOI System.") raise ValueError(f"{doi} cannot be found in the DOI System.")
@ -386,6 +400,20 @@ class PublicationEntry(pydantic.BaseModel):
return f"https://doi.org/{self.doi}" return f"https://doi.org/{self.doi}"
# ======================================================================================
# Section models: ======================================================================
# ======================================================================================
entry_type_field_of_section_model = pydantic.Field(
title="Entry Type",
description="The type of the entries in the section.",
)
entries_field_of_section_model = pydantic.Field(
title="Entries",
description="The entries of the section. The format depends on the entry type.",
)
class SectionBase(pydantic.BaseModel): class SectionBase(pydantic.BaseModel):
"""This class stores a section information. """This class stores a section information.
@ -408,16 +436,6 @@ class SectionBase(pydantic.BaseModel):
) )
entry_type_field_of_section_model = pydantic.Field(
title="Entry Type",
description="The type of the entries in the section.",
)
entries_field_of_section_model = pydantic.Field(
title="Entries",
description="The entries of the section. The format depends on the entry type.",
)
class SectionWithEducationEntries(SectionBase): class SectionWithEducationEntries(SectionBase):
"""This class stores a section with """This class stores a section with
[EducationEntry](../user_guide.md#educationentry)s. [EducationEntry](../user_guide.md#educationentry)s.
@ -488,6 +506,55 @@ class SocialNetwork(pydantic.BaseModel):
) )
# Section type
Section = Annotated[
SectionWithEducationEntries
| SectionWithExperienceEntries
| SectionWithNormalEntries
| SectionWithOneLineEntries
| SectionWithPublicationEntries
| SectionWithTextEntries,
pydantic.Field(
discriminator="entry_type",
),
]
# ======================================================================================
# Full RenderCV data models: ===========================================================
# ======================================================================================
# Default entry types for a given section title
default_entry_types_for_a_given_title: dict[
str,
tuple[type[EducationEntry], type[SectionWithEducationEntries]]
| tuple[type[ExperienceEntry], type[SectionWithExperienceEntries]]
| tuple[type[PublicationEntry], type[SectionWithPublicationEntries]]
| tuple[type[NormalEntry], type[SectionWithNormalEntries]]
| tuple[type[OneLineEntry], type[SectionWithOneLineEntries]]
| tuple[type[LaTeXString], type[SectionWithTextEntries]],
] = {
"Education": (EducationEntry, SectionWithEducationEntries),
"Experience": (ExperienceEntry, SectionWithExperienceEntries),
"Work Experience": (ExperienceEntry, SectionWithExperienceEntries),
"Research Experience": (ExperienceEntry, SectionWithExperienceEntries),
"Publications": (PublicationEntry, SectionWithPublicationEntries),
"Papers": (PublicationEntry, SectionWithPublicationEntries),
"Projects": (NormalEntry, SectionWithNormalEntries),
"Academic Projects": (NormalEntry, SectionWithNormalEntries),
"University Projects": (NormalEntry, SectionWithNormalEntries),
"Personal Projects": (NormalEntry, SectionWithNormalEntries),
"Certificates": (NormalEntry, SectionWithNormalEntries),
"Extracurricular Activities": (ExperienceEntry, SectionWithExperienceEntries),
"Test Scores": (OneLineEntry, SectionWithOneLineEntries),
"Skills": (OneLineEntry, SectionWithOneLineEntries),
"Programming Skills": (NormalEntry, SectionWithNormalEntries),
"Other Skills": (OneLineEntry, SectionWithOneLineEntries),
"Awards": (OneLineEntry, SectionWithOneLineEntries),
"Interests": (OneLineEntry, SectionWithOneLineEntries),
"Summary": (LaTeXString, SectionWithTextEntries),
}
class Connection(pydantic.BaseModel): class Connection(pydantic.BaseModel):
"""This class stores a connection/communication information. """This class stores a connection/communication information.
@ -533,52 +600,6 @@ class Connection(pydantic.BaseModel):
return url return url
# Section type
Section = Annotated[
SectionWithEducationEntries
| SectionWithExperienceEntries
| SectionWithNormalEntries
| SectionWithOneLineEntries
| SectionWithPublicationEntries
| SectionWithTextEntries,
pydantic.Field(
discriminator="entry_type",
),
]
# Default entry types for a given section title
default_entry_types_for_a_given_title: dict[
str,
tuple[type[EducationEntry], type[SectionWithEducationEntries]]
| tuple[type[ExperienceEntry], type[SectionWithExperienceEntries]]
| tuple[type[PublicationEntry], type[SectionWithPublicationEntries]]
| tuple[type[NormalEntry], type[SectionWithNormalEntries]]
| tuple[type[OneLineEntry], type[SectionWithOneLineEntries]]
| tuple[type[LaTeXString], type[SectionWithTextEntries]],
] = {
"Education": (EducationEntry, SectionWithEducationEntries),
"Experience": (ExperienceEntry, SectionWithExperienceEntries),
"Work Experience": (ExperienceEntry, SectionWithExperienceEntries),
"Research Experience": (ExperienceEntry, SectionWithExperienceEntries),
"Publications": (PublicationEntry, SectionWithPublicationEntries),
"Papers": (PublicationEntry, SectionWithPublicationEntries),
"Projects": (NormalEntry, SectionWithNormalEntries),
"Academic Projects": (NormalEntry, SectionWithNormalEntries),
"University Projects": (NormalEntry, SectionWithNormalEntries),
"Personal Projects": (NormalEntry, SectionWithNormalEntries),
"Certificates": (NormalEntry, SectionWithNormalEntries),
"Extracurricular Activities": (ExperienceEntry, SectionWithExperienceEntries),
"Test Scores": (OneLineEntry, SectionWithOneLineEntries),
"Skills": (OneLineEntry, SectionWithOneLineEntries),
"Programming Skills": (OneLineEntry, SectionWithOneLineEntries),
"Other Skills": (OneLineEntry, SectionWithOneLineEntries),
"Awards": (OneLineEntry, SectionWithOneLineEntries),
"Interests": (OneLineEntry, SectionWithOneLineEntries),
"Summary": (LaTeXString, SectionWithTextEntries),
}
class CurriculumVitae(pydantic.BaseModel): class CurriculumVitae(pydantic.BaseModel):
"""This class binds all the information of a CV together.""" """This class binds all the information of a CV together."""
@ -683,12 +704,19 @@ class CurriculumVitae(pydantic.BaseModel):
# try if the entries are of the correct type by casting them # try if the entries are of the correct type by casting them
# to the entry type one by one # to the entry type one by one
for entry in section_or_entries: for entry in section_or_entries:
if not isinstance(entry, entry_type): if entry_type is LaTeXString:
raise TypeError( if not isinstance(entry, str):
f'"{entry}" is not an instance of' raise pydantic.ValidationError(
f' "{entry_type.__name__}". Please check' f'"{entry}" is not a valid string.'
" the entries." )
) else:
try:
entry = entry_type(**entry) # type: ignore
except pydantic.ValidationError as err:
raise pydantic.ValidationError(
f'"{entry}" is not a valid'
f" {entry_type.__name__}."
) from err
else: else:
raise ValueError( raise ValueError(