Module kgforge.utils.openalex_util

Expand source code
import logging
from typing import Any, List

import requests
from requests import HTTPError

logger = logging.getLogger(__name__)


class OpenAlexUtilConfig:
    """Configuration for OpenAlexUtil

    Attributes:
        work_endpoint (str): Endpoint to get a specific artifact using OpenAlexID.
        search_endpoint (str): Endpoint to search for artifacts using a query.
        filter_endpoint (str): Endpoint to filter artifacts.
    """

    def __init__(
        self,
        work_endpoint: str = "https://api.openalex.org/works/{}",
        search_endpoint: str = "https://api.openalex.org/works?search={}&filter=open_access.is_oa:true&per-page={}",
        filter_endpoint: str = "https://api.openalex.org/works?filter=",
    ) -> None:
        """Initializes KnowledgeGraphConfig

        Usage example:
        >>>oa_config = OpenAlexUtilConfig(work_endpoint="sample-url", search_endpoint="sample-url", filter_endpoint="sample-url")

        Args:
            work_endpoint (str): Endpoint to get a specific artifact using OpenAlexID.
            search_endpoint (str): Endpoint to search for artifacts using a query.
            filter_endpoint (str): Endpoint to filter artifacts.

        Returns:
            None: Initializes OpenAlexUtilConfig
        """
        self.work_endpoint = work_endpoint
        self.search_endpoint = search_endpoint
        self.filter_endpoint = filter_endpoint


class OpenAlexUtil:
    """Provides functionality to fetch artifacts from OpenAlex."""

    def __init__(self, config: OpenAlexUtilConfig = OpenAlexUtilConfig()) -> None:
        self.config = config or OpenAlexUtilConfig()

    def search_works(self, search_query: str, results_limit: int = 25) -> List[Any]:
        """Searches for artifacts using a query.

        Usage example:
        >>>oa_util = OpenAlexUtil()
        >>>oa_util.search_works("sample-query", 25)

        Args:
            search_query (str): Query to search for artifacts.
            results_limit (int): Number of results to return.

        Returns:
            List[ResearchArtifact]: List of artifacts that match the query.

        Raises:
            HTTPError: If an HTTP error occurs while searching for artifacts.
            Exception: If an error occurs while searching for artifacts.
        """
        url = self.config.search_endpoint.format(search_query, results_limit)

        try:
            response = requests.get(url)
            response.raise_for_status()
            search_results = response.json().get("results")
            if response.status_code == 200 and search_results is not None:
                return search_results
                # artifacts = [ResearchArtifact.parse_obj(_) for _ in search_results]
                # full_text_artifacts = list(map(lambda x: x.get_full_text(), artifacts))
                # return full_text_artifacts
            else:
                return []
        except HTTPError as http_err:
            logger.info(f"HTTP error occurred: {http_err}")
            return []
        except Exception as err:
            logger.info(f"Other error occurred: {err}")
            return []

Classes

class OpenAlexUtil (config: OpenAlexUtilConfig = <kgforge.utils.openalex_util.OpenAlexUtilConfig object>)

Provides functionality to fetch artifacts from OpenAlex.

Expand source code
class OpenAlexUtil:
    """Provides functionality to fetch artifacts from OpenAlex."""

    def __init__(self, config: OpenAlexUtilConfig = OpenAlexUtilConfig()) -> None:
        self.config = config or OpenAlexUtilConfig()

    def search_works(self, search_query: str, results_limit: int = 25) -> List[Any]:
        """Searches for artifacts using a query.

        Usage example:
        >>>oa_util = OpenAlexUtil()
        >>>oa_util.search_works("sample-query", 25)

        Args:
            search_query (str): Query to search for artifacts.
            results_limit (int): Number of results to return.

        Returns:
            List[ResearchArtifact]: List of artifacts that match the query.

        Raises:
            HTTPError: If an HTTP error occurs while searching for artifacts.
            Exception: If an error occurs while searching for artifacts.
        """
        url = self.config.search_endpoint.format(search_query, results_limit)

        try:
            response = requests.get(url)
            response.raise_for_status()
            search_results = response.json().get("results")
            if response.status_code == 200 and search_results is not None:
                return search_results
                # artifacts = [ResearchArtifact.parse_obj(_) for _ in search_results]
                # full_text_artifacts = list(map(lambda x: x.get_full_text(), artifacts))
                # return full_text_artifacts
            else:
                return []
        except HTTPError as http_err:
            logger.info(f"HTTP error occurred: {http_err}")
            return []
        except Exception as err:
            logger.info(f"Other error occurred: {err}")
            return []

Methods

def search_works(self, search_query: str, results_limit: int = 25) ‑> List[Any]

Searches for artifacts using a query.

Usage example:

oa_util = OpenAlexUtil() oa_util.search_works("sample-query", 25)

Args

search_query : str
Query to search for artifacts.
results_limit : int
Number of results to return.

Returns

List[ResearchArtifact]
List of artifacts that match the query.

Raises

HTTPError
If an HTTP error occurs while searching for artifacts.
Exception
If an error occurs while searching for artifacts.
Expand source code
def search_works(self, search_query: str, results_limit: int = 25) -> List[Any]:
    """Searches for artifacts using a query.

    Usage example:
    >>>oa_util = OpenAlexUtil()
    >>>oa_util.search_works("sample-query", 25)

    Args:
        search_query (str): Query to search for artifacts.
        results_limit (int): Number of results to return.

    Returns:
        List[ResearchArtifact]: List of artifacts that match the query.

    Raises:
        HTTPError: If an HTTP error occurs while searching for artifacts.
        Exception: If an error occurs while searching for artifacts.
    """
    url = self.config.search_endpoint.format(search_query, results_limit)

    try:
        response = requests.get(url)
        response.raise_for_status()
        search_results = response.json().get("results")
        if response.status_code == 200 and search_results is not None:
            return search_results
            # artifacts = [ResearchArtifact.parse_obj(_) for _ in search_results]
            # full_text_artifacts = list(map(lambda x: x.get_full_text(), artifacts))
            # return full_text_artifacts
        else:
            return []
    except HTTPError as http_err:
        logger.info(f"HTTP error occurred: {http_err}")
        return []
    except Exception as err:
        logger.info(f"Other error occurred: {err}")
        return []
class OpenAlexUtilConfig (work_endpoint: str = 'https://api.openalex.org/works/{}', search_endpoint: str = 'https://api.openalex.org/works?search={}&filter=open_access.is_oa:true&per-page={}', filter_endpoint: str = 'https://api.openalex.org/works?filter=')

Configuration for OpenAlexUtil

Attributes

work_endpoint : str
Endpoint to get a specific artifact using OpenAlexID.
search_endpoint : str
Endpoint to search for artifacts using a query.
filter_endpoint : str
Endpoint to filter artifacts.

Initializes KnowledgeGraphConfig

Usage example:

oa_config = OpenAlexUtilConfig(work_endpoint="sample-url", search_endpoint="sample-url", filter_endpoint="sample-url")

Args

work_endpoint : str
Endpoint to get a specific artifact using OpenAlexID.
search_endpoint : str
Endpoint to search for artifacts using a query.
filter_endpoint : str
Endpoint to filter artifacts.

Returns

None
Initializes OpenAlexUtilConfig
Expand source code
class OpenAlexUtilConfig:
    """Configuration for OpenAlexUtil

    Attributes:
        work_endpoint (str): Endpoint to get a specific artifact using OpenAlexID.
        search_endpoint (str): Endpoint to search for artifacts using a query.
        filter_endpoint (str): Endpoint to filter artifacts.
    """

    def __init__(
        self,
        work_endpoint: str = "https://api.openalex.org/works/{}",
        search_endpoint: str = "https://api.openalex.org/works?search={}&filter=open_access.is_oa:true&per-page={}",
        filter_endpoint: str = "https://api.openalex.org/works?filter=",
    ) -> None:
        """Initializes KnowledgeGraphConfig

        Usage example:
        >>>oa_config = OpenAlexUtilConfig(work_endpoint="sample-url", search_endpoint="sample-url", filter_endpoint="sample-url")

        Args:
            work_endpoint (str): Endpoint to get a specific artifact using OpenAlexID.
            search_endpoint (str): Endpoint to search for artifacts using a query.
            filter_endpoint (str): Endpoint to filter artifacts.

        Returns:
            None: Initializes OpenAlexUtilConfig
        """
        self.work_endpoint = work_endpoint
        self.search_endpoint = search_endpoint
        self.filter_endpoint = filter_endpoint