Module kgforge.kg.kg_construct

Expand source code
import errno
import logging
import os
from typing import List

import matplotlib.pyplot as plt
import networkx as nx
import transformers
from transformers import pipeline
import pickle
import json

from kgforge.config import KGConfig
from kgforge.data_models import Prompt, PromptResponse, ResearchArtifact

logger = logging.getLogger(__name__)


class KnowledgeGraphConfig:
    """Configuration for KnowledgeGraph

    Attributes:
        email (str): Email address of the user.
        prompts (List[str]): List of prompts to be used in the construction of the KG.
        model_name (str): Name of the model to be used for answering questions.
    """

    def __init__(
        self,
        email: str = None,
        prompts: List[Prompt] = None,
        model_name: str = "deepset/roberta-base-squad2",
    ) -> None:
        """Initializes KnowledgeGraphConfig

        Usage example:
        >>>config = KnowledgeGraphConfig()
        >>>config.email = "sample-email"
        >>>config.prompts = [Prompt(concept="author", question="Who is the author of this text?")]
        >>>config.model_name = "deepset/roberta-base-squad2"

        Args:
            email (str): Email address of the user.
            prompts (List[Prompt]): List of prompts to be used in the construction of the KG.

        Returns:
            None: Initializes KnowledgeGraphConfig
        """
        if prompts is None:
            self.prompts = KGConfig.DEFAULT_PROMPTS
        else:
            self.prompts = prompts
        self.email = email or os.environ.get("OPEN_ALEX_EMAIL", None)
        self.model_name = model_name


class KnowledgeGraph:
    """Knowledge graph built using Documents"""

    artifacts: List[ResearchArtifact] = []

    def __init__(
        self,
        config: KnowledgeGraphConfig = None,
        artifacts: List[ResearchArtifact] = None,
    ):
        self.config = config or KnowledgeGraphConfig()
        self.artifacts = artifacts
        self.graph = nx.DiGraph()

    def clear_prompts(self) -> None:
        """Clears the list of prompts used in the construction of this KG

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.clear_prompts()

        Args:

        Returns:
            None

        Raises:
            None
        """
        self.config.prompts = None

    def update_prompts(self, new_prompts: List[Prompt]) -> None:
        """Appends new prompts to existing prompts

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.update_prompts([Prompt(concept="author", question="Who is the author of this text?")]

        Args:
            new_prompts (List[Prompt]): New prompts to be appended to existint prompts

        Returns:
            None: Appends prompts to existing prompts

        Raises:
            None
        """
        if self.config.prompts is None:
            self.config.prompts = new_prompts
        elif len(new_prompts) > 0:
            self.config.prompts.extend(new_prompts)

    def answer_question(
        self, artifact: ResearchArtifact, prompt: Prompt
    ) -> PromptResponse:
        """Answers questions based on context.

        Usage example:
        >>>artifacts = ResearchArtifact()
        >>>kg = KnowledgeGraph()
        >>>kg.answer_question(artifact, Prompt(concept="author", question="Who is the author of this text?"))

        Args:
            artifact (ResearchArtifact): Artifact to be used for answering the question.
            prompt (Prompt): Question to be answered.

        Returns:
            PromptResponse: Answer to the question.

        Raises:
            ValueError: If no text is found in the question.
        """
        if artifact is None:
            logger.info("Artifact is needed to answer the question.")
            return PromptResponse(
                concept=prompt.concept, score=0, prompt_response="Unavailable"
            )
        if artifact.full_text is None:
            logger.info("Full text not found.")
            return PromptResponse(
                concept=prompt.concept, score=0, prompt_response="Unavailable"
            )
        if prompt.question == "":
            raise ValueError("Question cannot be empty")
        try:
            nlp = pipeline(task="question-answering", model=self.config.model_name)
            res = nlp(question=prompt.question, context=artifact.full_text)
            return PromptResponse(
                concept=prompt.concept,
                score=res.get("score", 0),
                prompt_response=res.get("answer", "Unavailable"),
            )
        except transformers.pipelines.base.PipelineException:
            logger.error("Error while answering question")
            return PromptResponse(
                concept=prompt.concept, score=0, prompt_response="Unavailable"
            )

    def construct_kg(self) -> None:
        """Constructs knowledge graph using the list of documents

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.construct_kg()

        Args:

        Returns:
            None: Builds a knowledge graph

        Raises:
            ValueError: If no text is found in the document or the question.
        """

        if self.artifacts is None:
            logger.info("Artifacts are needed to construct the knowledge graph.")

        try:
            processed_artifacts = []
            for artifact in self.artifacts:
                self.graph.add_node(artifact.artifact_id)
                res = []
                for prompt in self.config.prompts:
                    prompt_res = self.answer_question(artifact=artifact, prompt=prompt)
                    res.append(prompt_res)
                    self.graph.add_node(prompt_res.prompt_response)
                    if prompt in ["contribution", "findings"]:
                        self.graph.add_edge(
                            artifact.artifact_id, prompt_res.prompt_response
                        )
                    else:
                        self.graph.add_edge(
                            prompt_res.prompt_response, artifact.artifact_id
                        )
                processed_artifacts.append(res)

            logger.info("Knowledge Graph constructed successfully.")
        except Exception as e:
            logger.info("Error while constructing the knowledge graph: " + str(e))

    def read_graph(self, path: str) -> None:
        """Reads the graph from a file

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.read_graph("kg.pickle")

        Args:
            path (str): Path to the file where the graph is to be read from

        Returns:
            None: Reads the graph from a file

        Raises:
            ValueError: If the path is empty
            FileNotFoundError: If the file is not found
        """
        if path is None:
            raise ValueError("Path cannot be empty")
        else:
            if not os.path.isfile(path):
                raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
            else:
                with open(path, "rb") as f:
                    self.graph = pickle.load(f)

    def write_graph(self, path: str) -> None:
        """Writes the graph to a file

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.write_graph("kg.pickle")

        Args:
            path (str): Path to the file where the graph is to be written

        Returns:
            None: Writes the graph to a file

        Raises:
            ValueError: If the path is empty
        """
        try:
            node_arr = []
            edge_arr = []

            for node in list(self.graph.nodes(data=True)):
                node_arr.append(node)

            for edge in list(self.graph.edges()):
                edge_arr.append(edge)

            graph_dict = {"nodes": node_arr, "edges": edge_arr}
            with open(path, "w") as f:
                json.dump(graph_dict, f, indent=4)
        except:
            pass
        # if path is not None and self.graph is not None:
        #     with open(path, "wb") as f:
        #         pickle.dump(self.graph, f)
        # else:
        #     raise ValueError("Path cannot be empty")

    def visualize_kg(self, file_path: str = "graph.png"):
        """Visualizes the knowledge graph

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.visualize_kg()

        Args:

        Returns:
            None: Visualizes the knowledge graph

        Raises:
            None
        """
        pos = nx.spring_layout(self.graph, k=0.7, iterations=50)
        nx.draw(self.graph, pos=pos, with_labels=False, font_weight="bold")
        ax = plt.gca()
        ax.set_aspect('equal')
        ax.set_axis_off()

        plt.savefig(file_path, format="PNG")

Classes

class KnowledgeGraph (config: KnowledgeGraphConfig = None, artifacts: List[ResearchArtifact] = None)

Knowledge graph built using Documents

Expand source code
class KnowledgeGraph:
    """Knowledge graph built using Documents"""

    artifacts: List[ResearchArtifact] = []

    def __init__(
        self,
        config: KnowledgeGraphConfig = None,
        artifacts: List[ResearchArtifact] = None,
    ):
        self.config = config or KnowledgeGraphConfig()
        self.artifacts = artifacts
        self.graph = nx.DiGraph()

    def clear_prompts(self) -> None:
        """Clears the list of prompts used in the construction of this KG

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.clear_prompts()

        Args:

        Returns:
            None

        Raises:
            None
        """
        self.config.prompts = None

    def update_prompts(self, new_prompts: List[Prompt]) -> None:
        """Appends new prompts to existing prompts

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.update_prompts([Prompt(concept="author", question="Who is the author of this text?")]

        Args:
            new_prompts (List[Prompt]): New prompts to be appended to existint prompts

        Returns:
            None: Appends prompts to existing prompts

        Raises:
            None
        """
        if self.config.prompts is None:
            self.config.prompts = new_prompts
        elif len(new_prompts) > 0:
            self.config.prompts.extend(new_prompts)

    def answer_question(
        self, artifact: ResearchArtifact, prompt: Prompt
    ) -> PromptResponse:
        """Answers questions based on context.

        Usage example:
        >>>artifacts = ResearchArtifact()
        >>>kg = KnowledgeGraph()
        >>>kg.answer_question(artifact, Prompt(concept="author", question="Who is the author of this text?"))

        Args:
            artifact (ResearchArtifact): Artifact to be used for answering the question.
            prompt (Prompt): Question to be answered.

        Returns:
            PromptResponse: Answer to the question.

        Raises:
            ValueError: If no text is found in the question.
        """
        if artifact is None:
            logger.info("Artifact is needed to answer the question.")
            return PromptResponse(
                concept=prompt.concept, score=0, prompt_response="Unavailable"
            )
        if artifact.full_text is None:
            logger.info("Full text not found.")
            return PromptResponse(
                concept=prompt.concept, score=0, prompt_response="Unavailable"
            )
        if prompt.question == "":
            raise ValueError("Question cannot be empty")
        try:
            nlp = pipeline(task="question-answering", model=self.config.model_name)
            res = nlp(question=prompt.question, context=artifact.full_text)
            return PromptResponse(
                concept=prompt.concept,
                score=res.get("score", 0),
                prompt_response=res.get("answer", "Unavailable"),
            )
        except transformers.pipelines.base.PipelineException:
            logger.error("Error while answering question")
            return PromptResponse(
                concept=prompt.concept, score=0, prompt_response="Unavailable"
            )

    def construct_kg(self) -> None:
        """Constructs knowledge graph using the list of documents

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.construct_kg()

        Args:

        Returns:
            None: Builds a knowledge graph

        Raises:
            ValueError: If no text is found in the document or the question.
        """

        if self.artifacts is None:
            logger.info("Artifacts are needed to construct the knowledge graph.")

        try:
            processed_artifacts = []
            for artifact in self.artifacts:
                self.graph.add_node(artifact.artifact_id)
                res = []
                for prompt in self.config.prompts:
                    prompt_res = self.answer_question(artifact=artifact, prompt=prompt)
                    res.append(prompt_res)
                    self.graph.add_node(prompt_res.prompt_response)
                    if prompt in ["contribution", "findings"]:
                        self.graph.add_edge(
                            artifact.artifact_id, prompt_res.prompt_response
                        )
                    else:
                        self.graph.add_edge(
                            prompt_res.prompt_response, artifact.artifact_id
                        )
                processed_artifacts.append(res)

            logger.info("Knowledge Graph constructed successfully.")
        except Exception as e:
            logger.info("Error while constructing the knowledge graph: " + str(e))

    def read_graph(self, path: str) -> None:
        """Reads the graph from a file

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.read_graph("kg.pickle")

        Args:
            path (str): Path to the file where the graph is to be read from

        Returns:
            None: Reads the graph from a file

        Raises:
            ValueError: If the path is empty
            FileNotFoundError: If the file is not found
        """
        if path is None:
            raise ValueError("Path cannot be empty")
        else:
            if not os.path.isfile(path):
                raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
            else:
                with open(path, "rb") as f:
                    self.graph = pickle.load(f)

    def write_graph(self, path: str) -> None:
        """Writes the graph to a file

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.write_graph("kg.pickle")

        Args:
            path (str): Path to the file where the graph is to be written

        Returns:
            None: Writes the graph to a file

        Raises:
            ValueError: If the path is empty
        """
        try:
            node_arr = []
            edge_arr = []

            for node in list(self.graph.nodes(data=True)):
                node_arr.append(node)

            for edge in list(self.graph.edges()):
                edge_arr.append(edge)

            graph_dict = {"nodes": node_arr, "edges": edge_arr}
            with open(path, "w") as f:
                json.dump(graph_dict, f, indent=4)
        except:
            pass
        # if path is not None and self.graph is not None:
        #     with open(path, "wb") as f:
        #         pickle.dump(self.graph, f)
        # else:
        #     raise ValueError("Path cannot be empty")

    def visualize_kg(self, file_path: str = "graph.png"):
        """Visualizes the knowledge graph

        Usage example:
        >>>kg = KnowledgeGraph()
        >>>kg.visualize_kg()

        Args:

        Returns:
            None: Visualizes the knowledge graph

        Raises:
            None
        """
        pos = nx.spring_layout(self.graph, k=0.7, iterations=50)
        nx.draw(self.graph, pos=pos, with_labels=False, font_weight="bold")
        ax = plt.gca()
        ax.set_aspect('equal')
        ax.set_axis_off()

        plt.savefig(file_path, format="PNG")

Class variables

var artifacts : List[ResearchArtifact]

Methods

def answer_question(self, artifact: ResearchArtifact, prompt: Prompt) ‑> PromptResponse

Answers questions based on context.

Usage example:

artifacts = ResearchArtifact() kg = KnowledgeGraph() kg.answer_question(artifact, Prompt(concept="author", question="Who is the author of this text?"))

Args

artifact : ResearchArtifact
Artifact to be used for answering the question.
prompt : Prompt
Question to be answered.

Returns

PromptResponse
Answer to the question.

Raises

ValueError
If no text is found in the question.
Expand source code
def answer_question(
    self, artifact: ResearchArtifact, prompt: Prompt
) -> PromptResponse:
    """Answers questions based on context.

    Usage example:
    >>>artifacts = ResearchArtifact()
    >>>kg = KnowledgeGraph()
    >>>kg.answer_question(artifact, Prompt(concept="author", question="Who is the author of this text?"))

    Args:
        artifact (ResearchArtifact): Artifact to be used for answering the question.
        prompt (Prompt): Question to be answered.

    Returns:
        PromptResponse: Answer to the question.

    Raises:
        ValueError: If no text is found in the question.
    """
    if artifact is None:
        logger.info("Artifact is needed to answer the question.")
        return PromptResponse(
            concept=prompt.concept, score=0, prompt_response="Unavailable"
        )
    if artifact.full_text is None:
        logger.info("Full text not found.")
        return PromptResponse(
            concept=prompt.concept, score=0, prompt_response="Unavailable"
        )
    if prompt.question == "":
        raise ValueError("Question cannot be empty")
    try:
        nlp = pipeline(task="question-answering", model=self.config.model_name)
        res = nlp(question=prompt.question, context=artifact.full_text)
        return PromptResponse(
            concept=prompt.concept,
            score=res.get("score", 0),
            prompt_response=res.get("answer", "Unavailable"),
        )
    except transformers.pipelines.base.PipelineException:
        logger.error("Error while answering question")
        return PromptResponse(
            concept=prompt.concept, score=0, prompt_response="Unavailable"
        )
def clear_prompts(self) ‑> None

Clears the list of prompts used in the construction of this KG

Usage example:

kg = KnowledgeGraph() kg.clear_prompts()

Args:

Returns

None

Raises

None

Expand source code
def clear_prompts(self) -> None:
    """Clears the list of prompts used in the construction of this KG

    Usage example:
    >>>kg = KnowledgeGraph()
    >>>kg.clear_prompts()

    Args:

    Returns:
        None

    Raises:
        None
    """
    self.config.prompts = None
def construct_kg(self) ‑> None

Constructs knowledge graph using the list of documents

Usage example:

kg = KnowledgeGraph() kg.construct_kg()

Args:

Returns

None
Builds a knowledge graph

Raises

ValueError
If no text is found in the document or the question.
Expand source code
def construct_kg(self) -> None:
    """Constructs knowledge graph using the list of documents

    Usage example:
    >>>kg = KnowledgeGraph()
    >>>kg.construct_kg()

    Args:

    Returns:
        None: Builds a knowledge graph

    Raises:
        ValueError: If no text is found in the document or the question.
    """

    if self.artifacts is None:
        logger.info("Artifacts are needed to construct the knowledge graph.")

    try:
        processed_artifacts = []
        for artifact in self.artifacts:
            self.graph.add_node(artifact.artifact_id)
            res = []
            for prompt in self.config.prompts:
                prompt_res = self.answer_question(artifact=artifact, prompt=prompt)
                res.append(prompt_res)
                self.graph.add_node(prompt_res.prompt_response)
                if prompt in ["contribution", "findings"]:
                    self.graph.add_edge(
                        artifact.artifact_id, prompt_res.prompt_response
                    )
                else:
                    self.graph.add_edge(
                        prompt_res.prompt_response, artifact.artifact_id
                    )
            processed_artifacts.append(res)

        logger.info("Knowledge Graph constructed successfully.")
    except Exception as e:
        logger.info("Error while constructing the knowledge graph: " + str(e))
def read_graph(self, path: str) ‑> None

Reads the graph from a file

Usage example:

kg = KnowledgeGraph() kg.read_graph("kg.pickle")

Args

path : str
Path to the file where the graph is to be read from

Returns

None
Reads the graph from a file

Raises

ValueError
If the path is empty
FileNotFoundError
If the file is not found
Expand source code
def read_graph(self, path: str) -> None:
    """Reads the graph from a file

    Usage example:
    >>>kg = KnowledgeGraph()
    >>>kg.read_graph("kg.pickle")

    Args:
        path (str): Path to the file where the graph is to be read from

    Returns:
        None: Reads the graph from a file

    Raises:
        ValueError: If the path is empty
        FileNotFoundError: If the file is not found
    """
    if path is None:
        raise ValueError("Path cannot be empty")
    else:
        if not os.path.isfile(path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
        else:
            with open(path, "rb") as f:
                self.graph = pickle.load(f)
def update_prompts(self, new_prompts: List[Prompt]) ‑> None

Appends new prompts to existing prompts

Usage example:

kg = KnowledgeGraph() kg.update_prompts([Prompt(concept="author", question="Who is the author of this text?")]

Args

new_prompts : List[Prompt]
New prompts to be appended to existint prompts

Returns

None
Appends prompts to existing prompts

Raises

None

Expand source code
def update_prompts(self, new_prompts: List[Prompt]) -> None:
    """Appends new prompts to existing prompts

    Usage example:
    >>>kg = KnowledgeGraph()
    >>>kg.update_prompts([Prompt(concept="author", question="Who is the author of this text?")]

    Args:
        new_prompts (List[Prompt]): New prompts to be appended to existint prompts

    Returns:
        None: Appends prompts to existing prompts

    Raises:
        None
    """
    if self.config.prompts is None:
        self.config.prompts = new_prompts
    elif len(new_prompts) > 0:
        self.config.prompts.extend(new_prompts)
def visualize_kg(self, file_path: str = 'graph.png')

Visualizes the knowledge graph

Usage example:

kg = KnowledgeGraph() kg.visualize_kg()

Args:

Returns

None
Visualizes the knowledge graph

Raises

None

Expand source code
def visualize_kg(self, file_path: str = "graph.png"):
    """Visualizes the knowledge graph

    Usage example:
    >>>kg = KnowledgeGraph()
    >>>kg.visualize_kg()

    Args:

    Returns:
        None: Visualizes the knowledge graph

    Raises:
        None
    """
    pos = nx.spring_layout(self.graph, k=0.7, iterations=50)
    nx.draw(self.graph, pos=pos, with_labels=False, font_weight="bold")
    ax = plt.gca()
    ax.set_aspect('equal')
    ax.set_axis_off()

    plt.savefig(file_path, format="PNG")
def write_graph(self, path: str) ‑> None

Writes the graph to a file

Usage example:

kg = KnowledgeGraph() kg.write_graph("kg.pickle")

Args

path : str
Path to the file where the graph is to be written

Returns

None
Writes the graph to a file

Raises

ValueError
If the path is empty
Expand source code
def write_graph(self, path: str) -> None:
    """Writes the graph to a file

    Usage example:
    >>>kg = KnowledgeGraph()
    >>>kg.write_graph("kg.pickle")

    Args:
        path (str): Path to the file where the graph is to be written

    Returns:
        None: Writes the graph to a file

    Raises:
        ValueError: If the path is empty
    """
    try:
        node_arr = []
        edge_arr = []

        for node in list(self.graph.nodes(data=True)):
            node_arr.append(node)

        for edge in list(self.graph.edges()):
            edge_arr.append(edge)

        graph_dict = {"nodes": node_arr, "edges": edge_arr}
        with open(path, "w") as f:
            json.dump(graph_dict, f, indent=4)
    except:
        pass
    # if path is not None and self.graph is not None:
    #     with open(path, "wb") as f:
    #         pickle.dump(self.graph, f)
    # else:
    #     raise ValueError("Path cannot be empty")
class KnowledgeGraphConfig (email: str = None, prompts: List[Prompt] = None, model_name: str = 'deepset/roberta-base-squad2')

Configuration for KnowledgeGraph

Attributes

email : str
Email address of the user.
prompts : List[str]
List of prompts to be used in the construction of the KG.
model_name : str
Name of the model to be used for answering questions.

Initializes KnowledgeGraphConfig

Usage example:

config = KnowledgeGraphConfig() config.email = "sample-email" config.prompts = [Prompt(concept="author", question="Who is the author of this text?")] config.model_name = "deepset/roberta-base-squad2"

Args

email : str
Email address of the user.
prompts : List[Prompt]
List of prompts to be used in the construction of the KG.

Returns

None
Initializes KnowledgeGraphConfig
Expand source code
class KnowledgeGraphConfig:
    """Configuration for KnowledgeGraph

    Attributes:
        email (str): Email address of the user.
        prompts (List[str]): List of prompts to be used in the construction of the KG.
        model_name (str): Name of the model to be used for answering questions.
    """

    def __init__(
        self,
        email: str = None,
        prompts: List[Prompt] = None,
        model_name: str = "deepset/roberta-base-squad2",
    ) -> None:
        """Initializes KnowledgeGraphConfig

        Usage example:
        >>>config = KnowledgeGraphConfig()
        >>>config.email = "sample-email"
        >>>config.prompts = [Prompt(concept="author", question="Who is the author of this text?")]
        >>>config.model_name = "deepset/roberta-base-squad2"

        Args:
            email (str): Email address of the user.
            prompts (List[Prompt]): List of prompts to be used in the construction of the KG.

        Returns:
            None: Initializes KnowledgeGraphConfig
        """
        if prompts is None:
            self.prompts = KGConfig.DEFAULT_PROMPTS
        else:
            self.prompts = prompts
        self.email = email or os.environ.get("OPEN_ALEX_EMAIL", None)
        self.model_name = model_name