From 4dcea39c08a115b2d2e62ece3048e434b3e4cf31 Mon Sep 17 00:00:00 2001 From: Sina Atalay Date: Thu, 7 Sep 2023 20:50:03 +0200 Subject: [PATCH] add date parser --- rendercv/data/content.py | 265 +++++++++++++++++++++++++++++++++------ 1 file changed, 226 insertions(+), 39 deletions(-) diff --git a/rendercv/data/content.py b/rendercv/data/content.py index 630aa38..9301d67 100644 --- a/rendercv/data/content.py +++ b/rendercv/data/content.py @@ -1,7 +1,119 @@ -from pydantic import BaseModel, HttpUrl, model_validator -from pydantic_extra_types.phone_numbers import PhoneNumber -from typing import Literal, Union from datetime import date as Date +from typing import Literal, Union +from typing_extensions import Annotated +import re +import logging +from functools import cached_property + +from pydantic import BaseModel, HttpUrl, model_validator, computed_field +from pydantic.functional_validators import AfterValidator +from pydantic_extra_types.phone_numbers import PhoneNumber + +from spellchecker import SpellChecker + +spell = SpellChecker() +# don't give spelling warnings for these words: +dictionary = [ + "aerostructures", + "sportsperson", + "cern", + "calculix", + "ansys", + "nx", + "aselsan", + "hrjet", + "simularge", + "siemens", +] + + +def check_spelling(sentence: str) -> str: + """ + To be continued... + """ + modifiedSentence = sentence.lower() # convert to lower case + modifiedSentence = re.sub( + r"\-+", " ", modifiedSentence + ) # replace hyphens with spaces + modifiedSentence = re.sub( + "[^a-z\s\-']", "", modifiedSentence + ) # remove unwanted characters + words = modifiedSentence.split() # split sentence into a list of words + misspelled = spell.unknown(words) # find misspelled words + + if len(misspelled) > 0: + for word in misspelled: + if word in dictionary: + continue + logging.warning( + f'The word "{word}" might be misspelled according to the' + " pyspellchecker." + ) + + return sentence + + +SpellCheckedString = Annotated[str, AfterValidator(check_spelling)] + + +def compute_time_span_string(start_date: Date, end_date: Date) -> str: + """ + To be continued... + """ + timeSpan = (end_date - start_date).days + + howManyYears = timeSpan // 365 + if howManyYears == 0: + howManyYearsString = None + elif howManyYears == 1: + howManyYearsString = "1 year" + else: + howManyYearsString = f"{howManyYears} years" + + howManyMonths = (timeSpan % 365) // 30 + if howManyMonths == 0: + howManyYearsString = None + elif howManyMonths == 1: + howManyMonthsString = "1 month" + else: + howManyMonthsString = f"{howManyMonths} months" + + if howManyYearsString is None: + timeSpanString = howManyMonthsString + elif howManyMonthsString is None: + timeSpanString = howManyYearsString + else: + timeSpanString = f"{howManyYearsString} {howManyMonthsString}" + + return timeSpanString + + +def format_date(date: Date) -> str: + """ + To be continued... + """ + # Month abbreviations, + # taken from: https://web.library.yale.edu/cataloging/months + abbreviations_of_months = [ + "Jan.", + "Feb.", + "Mar.", + "Apr.", + "May", + "June", + "July", + "Aug.", + "Sept.", + "Oct.", + "Nov.", + "Dec.", + ] + + month = abbreviations_of_months[int(date.strftime("%m")) - 1] + year = date.strftime("%Y") + date_string = f"{month} {year}" + + return date_string class Skill(BaseModel): @@ -11,52 +123,130 @@ class Skill(BaseModel): details: str = None -class TestScore(BaseModel): +class Event(BaseModel): + start_date: Date = None + end_date: Date | Literal["present"] = None + date: str = None + location: str + # date_and_location_strings: list[str] = [] + + @model_validator(mode="after") + @classmethod + def check_dates(cls, model): + """ + To be continued... + """ + if ( + model.start_date is not None + and model.end_date is not None + and model.date is not None + ): + logging.warning( + "start_date, end_date and date are all provided. Therefore, date will" + " be ignored." + ) + model.date = None + elif model.date is not None and ( + model.start_date is not None or model.end_date is not None + ): + logging.warning( + "date is provided. Therefore, start_date and end_date will be ignored." + ) + model.start_date = None + model.end_date = None + + return model + + @computed_field + @cached_property + def date_and_location_strings(self) -> list[str]: + date_and_location_strings = [] + + date_and_location_strings.append(self.location) + + if self.date is not None: + # Then it means start_date and end_date are not provided. + date_and_location_strings.append(str(self.start_date)) + else: + # Then it means start_date and end_date are provided. + + start_date = format_date(self.start_date) + + if self.end_date == "present": + end_date = "present" + + time_span_string = compute_time_span_string( + self.start_date, Date.today() + ) + else: + end_date = format_date(self.end_date) + + time_span_string = compute_time_span_string( + self.start_date, self.end_date + ) + + date_and_location_strings.append(f"{start_date} to {end_date}") + + list_of_no_time_span_string_classes = [ + "Education", + ] + if not self.__class__.__name__ in list_of_no_time_span_string_classes: + date_and_location_strings.append(f"{time_span_string}") + + return date_and_location_strings + + +class TestScore(Event): # 1) Mandotory user inputs: name: str score: str # 2) Optional user inputs: url: HttpUrl = None - date: Date = None -class Project(BaseModel): +class Project(Event): # 1) Mandotory user inputs: name: str - location: str # 2) Optional user inputs: - start_date: Date = None - end_date: Date | Literal["present"] = None - date: str = None url: HttpUrl = None - highlights: list[str] = None + highlights: list[SpellCheckedString] = None -class Experience(BaseModel): +class Experience(Event): # 1) Mandotory user inputs: company: str position: str - location: str # 2) Optional user inputs: - start_date: Date = None - end_date: Date | Literal["present"] = None - date: str = None - highlights: list[str] = None + highlights: list[SpellCheckedString] = None -class Education(BaseModel): +class Education(Event): # 1) Mandotory user inputs: institution: str area: str - location: str # 2) Optional user inputs: - start_date: Date = None - end_date: Date | Literal["present"] = None - date: str = None study_type: str = None gpa: str = None transcript_url: HttpUrl = None - highlights: list[str] = None + highlights: list[SpellCheckedString] = None + + @computed_field + @cached_property + def highlight_strings(self) -> list[str]: + """ + To be continued... + """ + highlight_strings = [] + + if self.gpa is not None: + gpaString = f"GPA: {self.gpa}" + if self.transcript_url is not None: + gpaString += f" ([Transcript]({self.transcript_url}))" + highlight_strings.append(gpaString) + + highlight_strings.extend(self.highlights) + + return highlight_strings class SocialNetwork(BaseModel): @@ -87,25 +277,22 @@ class CurriculumVitae(BaseModel): test_scores: list[TestScore] = None skills: list[Skill] = None - # 3) Derived fields (not user inputs): - connections: list[SocialNetwork] = [] - - @model_validator(mode="after") - @classmethod - def derive_connections(cls, model): + @computed_field + @cached_property + def connections(self) -> list[str]: connections = [] - if model.email is not None: - connections.append(Connection(name="email", value=model.email)) - if model.phone is not None: - connections.append(Connection(name="phone", value=model.phone)) - if model.website is not None: - connections.append(Connection(name="website", value=model.website)) - if model.social_networks is not None: - for social_network in model.social_networks: + if self.phone is not None: + connections.append(Connection(name="phone", value=self.phone)) + if self.email is not None: + connections.append(Connection(name="email", value=self.email)) + if self.website is not None: + connections.append(Connection(name="website", value=str(self.website))) + if self.social_networks is not None: + for social_network in self.social_networks: connections.append( Connection( name=social_network.network, value=social_network.username ) ) - model.connections = connections - return model + + return connections