Module `lum.clu.processors.directed_graph`

Expand source code

from pydantic import BaseModel, Field
import typing

__all__ = ["DirectedGraph"]




class Edge(BaseModel):
    
    source: int = Field(description="0-based index of token serving as relation's source")
    destination: int = Field(description="0-based index of token serving as relation's destination")
    relation: str = Field(description="label for relation")

class DirectedGraph(BaseModel):
    
    STANFORD_BASIC_DEPENDENCIES: typing.ClassVar[str] = "stanford-basic"
    STANFORD_COLLAPSED_DEPENDENCIES: typing.ClassVar[str] =  "stanford-collapsed"

    roots: list[int] = Field(description="Roots of the directed graph")
    edges: list[Edge] = Field(description="the directed edges that comprise the graph")

    """
    Storage class for directed graphs.


    Parameters
    ----------
    kind : str
        The name of the directed graph.

    deps : dict
        A dictionary of {edges: [{source, destination, relation}], roots: [int]}

    words : [str]
        A list of the word form of the tokens from the originating `Sentence`.

    Attributes
    ----------
    _words : [str]
        A list of the word form of the tokens from the originating `Sentence`.

    roots : [int]
        A list of indices for the syntactic dependency graph's roots.  Generally this is a single token index.

    edges: list[lum.clu.processors.doc.Edge]
        A list of `lum.clu.processors.doc.Edge`

    incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.

    outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.

    labeled : [str]
        A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").

    unlabeled : [str]
        A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").

    graph : networkx.Graph
        A `networkx.graph` representation of the `DirectedGraph`.  Used by `shortest_path`

    Methods
    -------
    bag_of_labeled_dependencies_from_tokens(form)
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
    bag_of_unlabeled_dependencies_from_tokens(form)
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
    """

    # def __init__(self, kind, deps, words):
    #     NLPDatum.__init__(self)
    #     self._words = [w.lower() for w in words]
    #     self.kind = kind
    #     self.roots = deps.get("roots", [])
    #     self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]
    #     self.incoming = self._build_incoming(self.edges)
    #     self.outgoing = self._build_outgoing(self.edges)
    #     self.labeled = self._build_labeled()
    #     self.unlabeled = self._build_unlabeled()
    #     self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)
    #     self.undirected_graph = self.directed_graph.to_undirected()

    # def __unicode__(self):
    #     return self.edges

    # def __eq__(self, other):
    #     if isinstance(other, self.__class__):
    #         return self.to_JSON() == other.to_JSON()
    #     else:
    #         return False

    # def __ne__(self, other):
    #     return not self.__eq__(other)

    # def __hash__(self):
    #     return hash(self.to_JSON())

    # def shortest_paths(self, start, end):
    #     """
    #     Find the shortest paths in the syntactic depedency graph
    #     between the provided start and end nodes.

    #     Parameters
    #     ----------
    #     start : int or [int]
    #         A single token index or list of token indices serving as the start of the graph traversal.

    #     end : int or [int]
    #         A single token index or list of token indices serving as the end of the graph traversal.

    #     See Also
    #     --------
    #     `processors.paths.DependencyUtils.shortest_path`
    #     """
    #     paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)
    #     return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]

    # def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):
    #     """
    #     Find the shortest path in the syntactic depedency graph
    #     between the provided start and end nodes.

    #     Parameters
    #     ----------
    #     start : int or [int]
    #         A single token index or list of token indices serving as the start of the graph traversal.

    #     end : int or [int]
    #         A single token index or list of token indices serving as the end of the graph traversal.

    #     scoring_func : function
    #         A function that scores each path in a list of [(source index, directed relation, destination index)] paths.  Each path has the form [(source index, relation, destination index)].
    #         The path with the maximum score will be returned.

    #     See Also
    #     --------
    #     `processors.paths.DependencyUtils.shortest_path`
    #     """
    #     paths = self.shortest_paths(start, end)
    #     return None if not paths else max(paths, key=scoring_func)

    # def degree_centrality(self):
    #     """
    #     Compute the degree centrality for nodes.

    #     See Also
    #     --------
    #     https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
    #     """
    #     return Counter(nx.degree_centrality(self.directed_graph))

    # def in_degree_centrality(self):
    #     """
    #     Compute the in-degree centrality for nodes.

    #     See Also
    #     --------
    #     https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
    #     """
    #     return Counter(nx.in_degree_centrality(self.directed_graph))

    # def out_degree_centrality(self):
    #     """
    #     Compute the out-degree centrality for nodes.

    #     See Also
    #     --------
    #     https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
    #     """
    #     return Counter(nx.out_degree_centrality(self.directed_graph))

    # def pagerank(self,
    #              alpha=0.85,
    #              personalization=None,
    #              max_iter=1000,
    #              tol=1e-06,
    #              nstart=None,
    #              weight='weight',
    #              dangling=None,
    #              use_directed=True,
    #              reverse=True):
    #     """
    #     Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`).  Use with `lum.clu.processors.doc.DirectedGraph.graph`.
    #     Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).

    #     See Also
    #     --------
    #     `processors.paths.DependencyUtils.pagerank`
    #     Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
    #     """
    #     # check whether or not to reverse directed graph
    #     dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True)
    #     # determine graph to use
    #     graph = dg if use_directed else self.undirected_graph
    #     return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)

    # def _build_incoming(self, edges):
    #     dep_dict = defaultdict(list)
    #     for edge in edges:
    #         dep_dict[edge.destination].append((edge.source, edge.relation))
    #     return dep_dict

    # def _build_outgoing(self, edges):
    #     dep_dict = defaultdict(list)
    #     for edge in edges:
    #         dep_dict[edge.source].append((edge.destination, edge.relation))
    #     return dep_dict

    # def _build_labeled(self):
    #     labeled = []
    #     for out in self.outgoing:
    #         for (dest, rel) in self.outgoing[out]:
    #             labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))
    #     return labeled

    # def _build_unlabeled(self):
    #     unlabeled = []
    #     for out in self.outgoing:
    #         for (dest, _) in self.outgoing[out]:
    #             unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))
    #     return unlabeled

    # def _graph_to_JSON_dict(self):
    #     dg_dict = dict()
    #     dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]
    #     dg_dict["roots"] = self.roots
    #     return dg_dict

    # def to_JSON_dict(self):
    #     return {self.kind:self._graph_to_JSON_dict()}

Classes

class DirectedGraph (**data: Any)

Usage docs: https://docs.pydantic.dev/2.9/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__: The names of the class variables defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom __init__ function.
__pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [RootModel][pydantic.root_model.RootModel].
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: A dictionary containing extra values, if [extra][pydantic.config.ConfigDict.extra] is set to 'allow'.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code

class DirectedGraph(BaseModel):
    
    STANFORD_BASIC_DEPENDENCIES: typing.ClassVar[str] = "stanford-basic"
    STANFORD_COLLAPSED_DEPENDENCIES: typing.ClassVar[str] =  "stanford-collapsed"

    roots: list[int] = Field(description="Roots of the directed graph")
    edges: list[Edge] = Field(description="the directed edges that comprise the graph")

    """
    Storage class for directed graphs.


    Parameters
    ----------
    kind : str
        The name of the directed graph.

    deps : dict
        A dictionary of {edges: [{source, destination, relation}], roots: [int]}

    words : [str]
        A list of the word form of the tokens from the originating `Sentence`.

    Attributes
    ----------
    _words : [str]
        A list of the word form of the tokens from the originating `Sentence`.

    roots : [int]
        A list of indices for the syntactic dependency graph's roots.  Generally this is a single token index.

    edges: list[lum.clu.processors.doc.Edge]
        A list of `lum.clu.processors.doc.Edge`

    incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.

    outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.

    labeled : [str]
        A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").

    unlabeled : [str]
        A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").

    graph : networkx.Graph
        A `networkx.graph` representation of the `DirectedGraph`.  Used by `shortest_path`

    Methods
    -------
    bag_of_labeled_dependencies_from_tokens(form)
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
    bag_of_unlabeled_dependencies_from_tokens(form)
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
    """

    # def __init__(self, kind, deps, words):
    #     NLPDatum.__init__(self)
    #     self._words = [w.lower() for w in words]
    #     self.kind = kind
    #     self.roots = deps.get("roots", [])
    #     self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]
    #     self.incoming = self._build_incoming(self.edges)
    #     self.outgoing = self._build_outgoing(self.edges)
    #     self.labeled = self._build_labeled()
    #     self.unlabeled = self._build_unlabeled()
    #     self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)
    #     self.undirected_graph = self.directed_graph.to_undirected()

    # def __unicode__(self):
    #     return self.edges

    # def __eq__(self, other):
    #     if isinstance(other, self.__class__):
    #         return self.to_JSON() == other.to_JSON()
    #     else:
    #         return False

    # def __ne__(self, other):
    #     return not self.__eq__(other)

    # def __hash__(self):
    #     return hash(self.to_JSON())

    # def shortest_paths(self, start, end):
    #     """
    #     Find the shortest paths in the syntactic depedency graph
    #     between the provided start and end nodes.

    #     Parameters
    #     ----------
    #     start : int or [int]
    #         A single token index or list of token indices serving as the start of the graph traversal.

    #     end : int or [int]
    #         A single token index or list of token indices serving as the end of the graph traversal.

    #     See Also
    #     --------
    #     `processors.paths.DependencyUtils.shortest_path`
    #     """
    #     paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)
    #     return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]

    # def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):
    #     """
    #     Find the shortest path in the syntactic depedency graph
    #     between the provided start and end nodes.

    #     Parameters
    #     ----------
    #     start : int or [int]
    #         A single token index or list of token indices serving as the start of the graph traversal.

    #     end : int or [int]
    #         A single token index or list of token indices serving as the end of the graph traversal.

    #     scoring_func : function
    #         A function that scores each path in a list of [(source index, directed relation, destination index)] paths.  Each path has the form [(source index, relation, destination index)].
    #         The path with the maximum score will be returned.

    #     See Also
    #     --------
    #     `processors.paths.DependencyUtils.shortest_path`
    #     """
    #     paths = self.shortest_paths(start, end)
    #     return None if not paths else max(paths, key=scoring_func)

    # def degree_centrality(self):
    #     """
    #     Compute the degree centrality for nodes.

    #     See Also
    #     --------
    #     https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
    #     """
    #     return Counter(nx.degree_centrality(self.directed_graph))

    # def in_degree_centrality(self):
    #     """
    #     Compute the in-degree centrality for nodes.

    #     See Also
    #     --------
    #     https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
    #     """
    #     return Counter(nx.in_degree_centrality(self.directed_graph))

    # def out_degree_centrality(self):
    #     """
    #     Compute the out-degree centrality for nodes.

    #     See Also
    #     --------
    #     https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
    #     """
    #     return Counter(nx.out_degree_centrality(self.directed_graph))

    # def pagerank(self,
    #              alpha=0.85,
    #              personalization=None,
    #              max_iter=1000,
    #              tol=1e-06,
    #              nstart=None,
    #              weight='weight',
    #              dangling=None,
    #              use_directed=True,
    #              reverse=True):
    #     """
    #     Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`).  Use with `lum.clu.processors.doc.DirectedGraph.graph`.
    #     Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).

    #     See Also
    #     --------
    #     `processors.paths.DependencyUtils.pagerank`
    #     Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
    #     """
    #     # check whether or not to reverse directed graph
    #     dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True)
    #     # determine graph to use
    #     graph = dg if use_directed else self.undirected_graph
    #     return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)

    # def _build_incoming(self, edges):
    #     dep_dict = defaultdict(list)
    #     for edge in edges:
    #         dep_dict[edge.destination].append((edge.source, edge.relation))
    #     return dep_dict

    # def _build_outgoing(self, edges):
    #     dep_dict = defaultdict(list)
    #     for edge in edges:
    #         dep_dict[edge.source].append((edge.destination, edge.relation))
    #     return dep_dict

    # def _build_labeled(self):
    #     labeled = []
    #     for out in self.outgoing:
    #         for (dest, rel) in self.outgoing[out]:
    #             labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))
    #     return labeled

    # def _build_unlabeled(self):
    #     unlabeled = []
    #     for out in self.outgoing:
    #         for (dest, _) in self.outgoing[out]:
    #             unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))
    #     return unlabeled

    # def _graph_to_JSON_dict(self):
    #     dg_dict = dict()
    #     dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]
    #     dg_dict["roots"] = self.roots
    #     return dg_dict

    # def to_JSON_dict(self):
    #     return {self.kind:self._graph_to_JSON_dict()}

Ancestors

pydantic.main.BaseModel

Class variables

var STANFORD_BASIC_DEPENDENCIES : ClassVar[str]

var STANFORD_COLLAPSED_DEPENDENCIES : ClassVar[str]

var edges : list[lum.clu.processors.directed_graph.Edge]

Storage class for directed graphs.

Parameters

kind : str: The name of the directed graph.
deps : dict: A dictionary of {edges: [{source, destination, relation}], roots: [int]}
words : [str]: A list of the word form of the tokens from the originating Sentence.

Attributes

_words : [str]: A list of the word form of the tokens from the originating Sentence.
roots : [int]: A list of indices for the syntactic dependency graph's roots. Generally this is a single token index.
edges : list[lum.clu.processors.doc.Edge]: A list of lum.clu.processors.doc.Edge

incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.

outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.

labeled : [str]: A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").
unlabeled : [str]: A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").
graph : networkx.Graph: A networkx.graph representation of the DirectedGraph. Used by shortest_path

Methods

bag_of_labeled_dependencies_from_tokens(form) Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation. bag_of_unlabeled_dependencies_from_tokens(form) Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.

var model_computed_fields

var model_config

var model_fields

var roots : list[int]