Module kgforge.data_models.data_models

Expand source code
import logging
from datetime import date, datetime
from typing import Any, List, Optional

from pydantic import BaseModel, Field

from kgforge.utils import TextLoader

logger = logging.getLogger(__name__)


class Prompt(BaseModel):
    """Prompt to be used in the construction of a KG.

    Attributes:
        concept (str): The concept/key that the answer to the prompt is classified as.
        question (str): The actual prompt/question to be used.
    """

    concept: str
    question: str


class PromptResponse(BaseModel):
    """Prompt to be used in the construction of a KG.

    Attributes:
        concept (str): The concept/key that the answer to the prompt is classified as.
        prompt_response (str): Response to the prompt/question used.
    """

    concept: str
    score: float
    prompt_response: str


class ArtifactID(BaseModel):
    openalex: Optional[str] = None
    doi: Optional[str] = None
    mag: Optional[str] = None


class ArtifactSource(BaseModel):
    source_id: Optional[str] = Field(alias="id", default=None)
    display_name: Optional[str] = None
    issn_l: Optional[str] = None
    issn: Optional[List[str]] = None
    is_oa: Optional[bool] = None
    is_in_doaj: Optional[bool] = None
    host_organization: Optional[str] = None
    host_organization_name: Optional[str] = None
    host_organization_lineage: Optional[List[str]] = None
    host_organization_lineage_names: Optional[List[str]] = None
    source_type: Optional[str] = Field(alias="type", default=None)


class ArtifactLocation(BaseModel):
    is_oa: Optional[bool] = None
    landing_page_url: Optional[str] = None
    pdf_url: Optional[str] = None
    source: Optional[ArtifactSource] = None
    license: Optional[str] = None
    version: Optional[str] = None
    is_accepted: Optional[bool] = None
    is_published: Optional[bool] = None


class OpenAccess(BaseModel):
    is_oa: Optional[bool] = None
    oa_status: Optional[str] = None
    oa_url: Optional[str] = None
    any_repository_has_fulltext: Optional[bool] = None


class Author(BaseModel):
    author_id: Optional[str] = Field(alias="id", default=None)
    display_name: Optional[str] = None
    orcid: Optional[str] = None


# class Institution(BaseModel):


class Authorship(BaseModel):
    author_position: Optional[str] = None
    author: Optional[Author] = None
    institutions: Optional[List[Any]] = None
    countries: Optional[List[str]] = None
    is_corresponding: Optional[bool] = None
    raw_author_name: Optional[str] = None
    raw_affiliation_string: Optional[str] = None
    raw_affiliation_strings: Optional[List[str]] = None


class APC(BaseModel):
    value: Optional[int] = None
    currency: Optional[str] = None
    value_usd: Optional[int] = None
    provenance: Optional[str] = None


class Biblio(BaseModel):
    volume: Optional[str] = None
    issue: Optional[str] = None
    first_page: Optional[str] = None
    last_page: Optional[str] = None


class Concept(BaseModel):
    concept_id: Optional[str] = Field(alias="id", default=None)
    wikidata: Optional[str] = None
    display_name: Optional[str] = None
    level: Optional[int] = None
    score: Optional[float] = None


class Goal(BaseModel):
    goal_id: Optional[str] = Field(alias="id", default=None)
    display_name: Optional[str] = None
    score: Optional[float] = None


class CountByYear(BaseModel):
    year: Optional[int] = None
    cited_by_count: Optional[int] = None


class ResearchArtifact(BaseModel):
    artifact_id: Optional[str] = Field(alias="id", default=None)
    title: Optional[str] = None
    display_name: Optional[str] = None
    publication_year: Optional[int] = None
    publication_date: Optional[date] = None
    ids: Optional[ArtifactID] = None
    language: Optional[str] = None
    primary_location: Optional[ArtifactLocation] = None
    artifact_type: Optional[str] = Field(alias="type", default=None)
    type_crossref: Optional[str] = None
    open_access: Optional[OpenAccess] = None
    authorships: Optional[List[Authorship]] = None
    countries_distinct_count: Optional[int] = None
    institutions_distinct_count: Optional[int] = None
    corresponding_author_ids: Optional[List[str]] = None
    corresponding_institution_ids: Optional[List[str]] = None
    apc_list: Optional[APC] = None
    apc_paid: Optional[APC] = None
    has_fulltext: Optional[bool] = None
    cited_by_count: Optional[int] = None
    biblio: Optional[Biblio] = None
    is_retracted: Optional[bool] = None
    is_paratext: Optional[bool] = None
    concepts: Optional[List[Concept]] = None
    mesh: Optional[List[Any]] = None
    locations_count: Optional[int] = None
    locations: Optional[List[ArtifactLocation]] = None
    best_oa_location: Optional[ArtifactLocation] = None
    sustainable_development_goals: Optional[List[Goal]] = None
    grants: Optional[List[Any]] = None
    referenced_works_count: Optional[int] = None
    referenced_works: Optional[List[str]] = None
    related_works: Optional[List[str]] = None
    ngrams_url: Optional[str] = None
    abstract_inverted_index: Optional[dict] = None
    cited_by_api_url: Optional[str] = None
    counts_by_year: Optional[List[CountByYear]] = None
    updated_date: Optional[datetime] = None
    created_date: Optional[date] = None
    full_text: Optional[str] = None
    extracted_concepts: Optional[List[PromptResponse]] = None

    def _get_pdf_url(self) -> str | None:
        """Returns the PDF URL of the artifact.

        Usage example:
        >>>artifact = ResearchArtifact()
        >>>artifact._get_pdf_url()

        Args:

        Returns:
            str: PDF URL of the artifact.

        Raises:
            None
        """
        if self.open_access.is_oa:
            if self.best_oa_location.pdf_url is None:
                return self.open_access.oa_url
            else:
                return self.best_oa_location.pdf_url
        else:
            return None

    def referenced_works_ids(self):
        return [_.split("/")[-1] for _ in self.referenced_works]

    def get_full_text(self):
        if self.full_text is not None:
            logger.info("Full text already available.")
        else:
            try:
                url = self._get_pdf_url()
                if url is not None:
                    text_loader = TextLoader()
                    full_text_pull = text_loader.read_pdf_from_url(url=url)
                    if full_text_pull is not None:
                        self.full_text = "\n".join(
                            text_loader.read_pdf_from_url(self.best_oa_location.pdf_url)
                        )
                else:
                    logger.info("PDF URL not found.")
            except Exception as e:
                logger.info("Error while pulling full text. " + str(e))


class CitationEdge(BaseModel):
    cited_by: ResearchArtifact
    cites: ResearchArtifact

Classes

class APC (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class APC(BaseModel):
    value: Optional[int] = None
    currency: Optional[str] = None
    value_usd: Optional[int] = None
    provenance: Optional[str] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var currency : Optional[str]
var model_config
var model_fields
var provenance : Optional[str]
var value : Optional[int]
var value_usd : Optional[int]
class ArtifactID (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class ArtifactID(BaseModel):
    openalex: Optional[str] = None
    doi: Optional[str] = None
    mag: Optional[str] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var doi : Optional[str]
var mag : Optional[str]
var model_config
var model_fields
var openalex : Optional[str]
class ArtifactLocation (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class ArtifactLocation(BaseModel):
    is_oa: Optional[bool] = None
    landing_page_url: Optional[str] = None
    pdf_url: Optional[str] = None
    source: Optional[ArtifactSource] = None
    license: Optional[str] = None
    version: Optional[str] = None
    is_accepted: Optional[bool] = None
    is_published: Optional[bool] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var is_accepted : Optional[bool]
var is_oa : Optional[bool]
var is_published : Optional[bool]
var landing_page_url : Optional[str]
var license : Optional[str]
var model_config
var model_fields
var pdf_url : Optional[str]
var source : Optional[ArtifactSource]
var version : Optional[str]
class ArtifactSource (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class ArtifactSource(BaseModel):
    source_id: Optional[str] = Field(alias="id", default=None)
    display_name: Optional[str] = None
    issn_l: Optional[str] = None
    issn: Optional[List[str]] = None
    is_oa: Optional[bool] = None
    is_in_doaj: Optional[bool] = None
    host_organization: Optional[str] = None
    host_organization_name: Optional[str] = None
    host_organization_lineage: Optional[List[str]] = None
    host_organization_lineage_names: Optional[List[str]] = None
    source_type: Optional[str] = Field(alias="type", default=None)

Ancestors

  • pydantic.main.BaseModel

Class variables

var display_name : Optional[str]
var host_organization : Optional[str]
var host_organization_lineage : Optional[List[str]]
var host_organization_lineage_names : Optional[List[str]]
var host_organization_name : Optional[str]
var is_in_doaj : Optional[bool]
var is_oa : Optional[bool]
var issn : Optional[List[str]]
var issn_l : Optional[str]
var model_config
var model_fields
var source_id : Optional[str]
var source_type : Optional[str]
class Author (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class Author(BaseModel):
    author_id: Optional[str] = Field(alias="id", default=None)
    display_name: Optional[str] = None
    orcid: Optional[str] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var author_id : Optional[str]
var display_name : Optional[str]
var model_config
var model_fields
var orcid : Optional[str]
class Authorship (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class Authorship(BaseModel):
    author_position: Optional[str] = None
    author: Optional[Author] = None
    institutions: Optional[List[Any]] = None
    countries: Optional[List[str]] = None
    is_corresponding: Optional[bool] = None
    raw_author_name: Optional[str] = None
    raw_affiliation_string: Optional[str] = None
    raw_affiliation_strings: Optional[List[str]] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var author : Optional[Author]
var author_position : Optional[str]
var countries : Optional[List[str]]
var institutions : Optional[List[Any]]
var is_corresponding : Optional[bool]
var model_config
var model_fields
var raw_affiliation_string : Optional[str]
var raw_affiliation_strings : Optional[List[str]]
var raw_author_name : Optional[str]
class Biblio (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class Biblio(BaseModel):
    volume: Optional[str] = None
    issue: Optional[str] = None
    first_page: Optional[str] = None
    last_page: Optional[str] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var first_page : Optional[str]
var issue : Optional[str]
var last_page : Optional[str]
var model_config
var model_fields
var volume : Optional[str]
class CitationEdge (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class CitationEdge(BaseModel):
    cited_by: ResearchArtifact
    cites: ResearchArtifact

Ancestors

  • pydantic.main.BaseModel

Class variables

var cited_byResearchArtifact
var citesResearchArtifact
var model_config
var model_fields
class Concept (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class Concept(BaseModel):
    concept_id: Optional[str] = Field(alias="id", default=None)
    wikidata: Optional[str] = None
    display_name: Optional[str] = None
    level: Optional[int] = None
    score: Optional[float] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var concept_id : Optional[str]
var display_name : Optional[str]
var level : Optional[int]
var model_config
var model_fields
var score : Optional[float]
var wikidata : Optional[str]
class CountByYear (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class CountByYear(BaseModel):
    year: Optional[int] = None
    cited_by_count: Optional[int] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var cited_by_count : Optional[int]
var model_config
var model_fields
var year : Optional[int]
class Goal (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class Goal(BaseModel):
    goal_id: Optional[str] = Field(alias="id", default=None)
    display_name: Optional[str] = None
    score: Optional[float] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var display_name : Optional[str]
var goal_id : Optional[str]
var model_config
var model_fields
var score : Optional[float]
class OpenAccess (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class OpenAccess(BaseModel):
    is_oa: Optional[bool] = None
    oa_status: Optional[str] = None
    oa_url: Optional[str] = None
    any_repository_has_fulltext: Optional[bool] = None

Ancestors

  • pydantic.main.BaseModel

Class variables

var any_repository_has_fulltext : Optional[bool]
var is_oa : Optional[bool]
var model_config
var model_fields
var oa_status : Optional[str]
var oa_url : Optional[str]
class Prompt (**data: Any)

Prompt to be used in the construction of a KG.

Attributes

concept : str
The concept/key that the answer to the prompt is classified as.
question : str
The actual prompt/question to be used.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class Prompt(BaseModel):
    """Prompt to be used in the construction of a KG.

    Attributes:
        concept (str): The concept/key that the answer to the prompt is classified as.
        question (str): The actual prompt/question to be used.
    """

    concept: str
    question: str

Ancestors

  • pydantic.main.BaseModel

Class variables

var concept : str
var model_config
var model_fields
var question : str
class PromptResponse (**data: Any)

Prompt to be used in the construction of a KG.

Attributes

concept : str
The concept/key that the answer to the prompt is classified as.
prompt_response : str
Response to the prompt/question used.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class PromptResponse(BaseModel):
    """Prompt to be used in the construction of a KG.

    Attributes:
        concept (str): The concept/key that the answer to the prompt is classified as.
        prompt_response (str): Response to the prompt/question used.
    """

    concept: str
    score: float
    prompt_response: str

Ancestors

  • pydantic.main.BaseModel

Class variables

var concept : str
var model_config
var model_fields
var prompt_response : str
var score : float
class ResearchArtifact (**data: Any)

Usage docs: https://docs.pydantic.dev/2.4/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__
The names of classvars defined on the model.
__private_attributes__
Metadata about the private attributes of the model.
__signature__
The signature for instantiating the model.
__pydantic_complete__
Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__
Whether the model has a custom __init__ function.
__pydantic_decorators__
Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__
Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
The name of the post-init method for the model, if defined.
__pydantic_root_model__
Whether the model is a RootModel.
__pydantic_serializer__
The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__
The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__
An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__
An instance attribute with the names of fields explicitly specified during validation.
__pydantic_private__
Instance attribute with the values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

__init__ uses __pydantic_self__ instead of the more common self for the first arg to allow self as a field name.

Expand source code
class ResearchArtifact(BaseModel):
    artifact_id: Optional[str] = Field(alias="id", default=None)
    title: Optional[str] = None
    display_name: Optional[str] = None
    publication_year: Optional[int] = None
    publication_date: Optional[date] = None
    ids: Optional[ArtifactID] = None
    language: Optional[str] = None
    primary_location: Optional[ArtifactLocation] = None
    artifact_type: Optional[str] = Field(alias="type", default=None)
    type_crossref: Optional[str] = None
    open_access: Optional[OpenAccess] = None
    authorships: Optional[List[Authorship]] = None
    countries_distinct_count: Optional[int] = None
    institutions_distinct_count: Optional[int] = None
    corresponding_author_ids: Optional[List[str]] = None
    corresponding_institution_ids: Optional[List[str]] = None
    apc_list: Optional[APC] = None
    apc_paid: Optional[APC] = None
    has_fulltext: Optional[bool] = None
    cited_by_count: Optional[int] = None
    biblio: Optional[Biblio] = None
    is_retracted: Optional[bool] = None
    is_paratext: Optional[bool] = None
    concepts: Optional[List[Concept]] = None
    mesh: Optional[List[Any]] = None
    locations_count: Optional[int] = None
    locations: Optional[List[ArtifactLocation]] = None
    best_oa_location: Optional[ArtifactLocation] = None
    sustainable_development_goals: Optional[List[Goal]] = None
    grants: Optional[List[Any]] = None
    referenced_works_count: Optional[int] = None
    referenced_works: Optional[List[str]] = None
    related_works: Optional[List[str]] = None
    ngrams_url: Optional[str] = None
    abstract_inverted_index: Optional[dict] = None
    cited_by_api_url: Optional[str] = None
    counts_by_year: Optional[List[CountByYear]] = None
    updated_date: Optional[datetime] = None
    created_date: Optional[date] = None
    full_text: Optional[str] = None
    extracted_concepts: Optional[List[PromptResponse]] = None

    def _get_pdf_url(self) -> str | None:
        """Returns the PDF URL of the artifact.

        Usage example:
        >>>artifact = ResearchArtifact()
        >>>artifact._get_pdf_url()

        Args:

        Returns:
            str: PDF URL of the artifact.

        Raises:
            None
        """
        if self.open_access.is_oa:
            if self.best_oa_location.pdf_url is None:
                return self.open_access.oa_url
            else:
                return self.best_oa_location.pdf_url
        else:
            return None

    def referenced_works_ids(self):
        return [_.split("/")[-1] for _ in self.referenced_works]

    def get_full_text(self):
        if self.full_text is not None:
            logger.info("Full text already available.")
        else:
            try:
                url = self._get_pdf_url()
                if url is not None:
                    text_loader = TextLoader()
                    full_text_pull = text_loader.read_pdf_from_url(url=url)
                    if full_text_pull is not None:
                        self.full_text = "\n".join(
                            text_loader.read_pdf_from_url(self.best_oa_location.pdf_url)
                        )
                else:
                    logger.info("PDF URL not found.")
            except Exception as e:
                logger.info("Error while pulling full text. " + str(e))

Ancestors

  • pydantic.main.BaseModel

Class variables

var abstract_inverted_index : Optional[dict]
var apc_list : Optional[APC]
var apc_paid : Optional[APC]
var artifact_id : Optional[str]
var artifact_type : Optional[str]
var authorships : Optional[List[Authorship]]
var best_oa_location : Optional[ArtifactLocation]
var biblio : Optional[Biblio]
var cited_by_api_url : Optional[str]
var cited_by_count : Optional[int]
var concepts : Optional[List[Concept]]
var corresponding_author_ids : Optional[List[str]]
var corresponding_institution_ids : Optional[List[str]]
var countries_distinct_count : Optional[int]
var counts_by_year : Optional[List[CountByYear]]
var created_date : Optional[datetime.date]
var display_name : Optional[str]
var extracted_concepts : Optional[List[PromptResponse]]
var full_text : Optional[str]
var grants : Optional[List[Any]]
var has_fulltext : Optional[bool]
var ids : Optional[ArtifactID]
var institutions_distinct_count : Optional[int]
var is_paratext : Optional[bool]
var is_retracted : Optional[bool]
var language : Optional[str]
var locations : Optional[List[ArtifactLocation]]
var locations_count : Optional[int]
var mesh : Optional[List[Any]]
var model_config
var model_fields
var ngrams_url : Optional[str]
var open_access : Optional[OpenAccess]
var primary_location : Optional[ArtifactLocation]
var publication_date : Optional[datetime.date]
var publication_year : Optional[int]
var referenced_works : Optional[List[str]]
var referenced_works_count : Optional[int]
var related_works : Optional[List[str]]
var sustainable_development_goals : Optional[List[Goal]]
var title : Optional[str]
var type_crossref : Optional[str]
var updated_date : Optional[datetime.datetime]

Methods

def get_full_text(self)
Expand source code
def get_full_text(self):
    if self.full_text is not None:
        logger.info("Full text already available.")
    else:
        try:
            url = self._get_pdf_url()
            if url is not None:
                text_loader = TextLoader()
                full_text_pull = text_loader.read_pdf_from_url(url=url)
                if full_text_pull is not None:
                    self.full_text = "\n".join(
                        text_loader.read_pdf_from_url(self.best_oa_location.pdf_url)
                    )
            else:
                logger.info("PDF URL not found.")
        except Exception as e:
            logger.info("Error while pulling full text. " + str(e))
def referenced_works_ids(self)
Expand source code
def referenced_works_ids(self):
    return [_.split("/")[-1] for _ in self.referenced_works]