Module lum.clu.processors.directed_graph
Expand source code
from pydantic import BaseModel, Field
import typing
__all__ = ["DirectedGraph"]
class Edge(BaseModel):
source: int = Field(description="0-based index of token serving as relation's source")
destination: int = Field(description="0-based index of token serving as relation's destination")
relation: str = Field(description="label for relation")
class DirectedGraph(BaseModel):
STANFORD_BASIC_DEPENDENCIES: typing.ClassVar[str] = "stanford-basic"
STANFORD_COLLAPSED_DEPENDENCIES: typing.ClassVar[str] = "stanford-collapsed"
roots: list[int] = Field(description="Roots of the directed graph")
edges: list[Edge] = Field(description="the directed edges that comprise the graph")
"""
Storage class for directed graphs.
Parameters
----------
kind : str
The name of the directed graph.
deps : dict
A dictionary of {edges: [{source, destination, relation}], roots: [int]}
words : [str]
A list of the word form of the tokens from the originating `Sentence`.
Attributes
----------
_words : [str]
A list of the word form of the tokens from the originating `Sentence`.
roots : [int]
A list of indices for the syntactic dependency graph's roots. Generally this is a single token index.
edges: list[lum.clu.processors.doc.Edge]
A list of `lum.clu.processors.doc.Edge`
incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.
outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.
labeled : [str]
A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").
unlabeled : [str]
A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").
graph : networkx.Graph
A `networkx.graph` representation of the `DirectedGraph`. Used by `shortest_path`
Methods
-------
bag_of_labeled_dependencies_from_tokens(form)
Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
bag_of_unlabeled_dependencies_from_tokens(form)
Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
"""
# def __init__(self, kind, deps, words):
# NLPDatum.__init__(self)
# self._words = [w.lower() for w in words]
# self.kind = kind
# self.roots = deps.get("roots", [])
# self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]
# self.incoming = self._build_incoming(self.edges)
# self.outgoing = self._build_outgoing(self.edges)
# self.labeled = self._build_labeled()
# self.unlabeled = self._build_unlabeled()
# self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)
# self.undirected_graph = self.directed_graph.to_undirected()
# def __unicode__(self):
# return self.edges
# def __eq__(self, other):
# if isinstance(other, self.__class__):
# return self.to_JSON() == other.to_JSON()
# else:
# return False
# def __ne__(self, other):
# return not self.__eq__(other)
# def __hash__(self):
# return hash(self.to_JSON())
# def shortest_paths(self, start, end):
# """
# Find the shortest paths in the syntactic depedency graph
# between the provided start and end nodes.
# Parameters
# ----------
# start : int or [int]
# A single token index or list of token indices serving as the start of the graph traversal.
# end : int or [int]
# A single token index or list of token indices serving as the end of the graph traversal.
# See Also
# --------
# `processors.paths.DependencyUtils.shortest_path`
# """
# paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)
# return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]
# def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):
# """
# Find the shortest path in the syntactic depedency graph
# between the provided start and end nodes.
# Parameters
# ----------
# start : int or [int]
# A single token index or list of token indices serving as the start of the graph traversal.
# end : int or [int]
# A single token index or list of token indices serving as the end of the graph traversal.
# scoring_func : function
# A function that scores each path in a list of [(source index, directed relation, destination index)] paths. Each path has the form [(source index, relation, destination index)].
# The path with the maximum score will be returned.
# See Also
# --------
# `processors.paths.DependencyUtils.shortest_path`
# """
# paths = self.shortest_paths(start, end)
# return None if not paths else max(paths, key=scoring_func)
# def degree_centrality(self):
# """
# Compute the degree centrality for nodes.
# See Also
# --------
# https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
# """
# return Counter(nx.degree_centrality(self.directed_graph))
# def in_degree_centrality(self):
# """
# Compute the in-degree centrality for nodes.
# See Also
# --------
# https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
# """
# return Counter(nx.in_degree_centrality(self.directed_graph))
# def out_degree_centrality(self):
# """
# Compute the out-degree centrality for nodes.
# See Also
# --------
# https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
# """
# return Counter(nx.out_degree_centrality(self.directed_graph))
# def pagerank(self,
# alpha=0.85,
# personalization=None,
# max_iter=1000,
# tol=1e-06,
# nstart=None,
# weight='weight',
# dangling=None,
# use_directed=True,
# reverse=True):
# """
# Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`). Use with `lum.clu.processors.doc.DirectedGraph.graph`.
# Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).
# See Also
# --------
# `processors.paths.DependencyUtils.pagerank`
# Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
# """
# # check whether or not to reverse directed graph
# dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True)
# # determine graph to use
# graph = dg if use_directed else self.undirected_graph
# return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)
# def _build_incoming(self, edges):
# dep_dict = defaultdict(list)
# for edge in edges:
# dep_dict[edge.destination].append((edge.source, edge.relation))
# return dep_dict
# def _build_outgoing(self, edges):
# dep_dict = defaultdict(list)
# for edge in edges:
# dep_dict[edge.source].append((edge.destination, edge.relation))
# return dep_dict
# def _build_labeled(self):
# labeled = []
# for out in self.outgoing:
# for (dest, rel) in self.outgoing[out]:
# labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))
# return labeled
# def _build_unlabeled(self):
# unlabeled = []
# for out in self.outgoing:
# for (dest, _) in self.outgoing[out]:
# unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))
# return unlabeled
# def _graph_to_JSON_dict(self):
# dg_dict = dict()
# dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]
# dg_dict["roots"] = self.roots
# return dg_dict
# def to_JSON_dict(self):
# return {self.kind:self._graph_to_JSON_dict()}
Classes
class DirectedGraph (**data: Any)
-
Usage docs: https://docs.pydantic.dev/2.9/concepts/models/
A base class for creating Pydantic models.
Attributes
__class_vars__
- The names of the class variables defined on the model.
__private_attributes__
- Metadata about the private attributes of the model.
__signature__
- The synthesized
__init__
[Signature
][inspect.Signature] of the model. __pydantic_complete__
- Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
- The core schema of the model.
__pydantic_custom_init__
- Whether the model has a custom
__init__
function. __pydantic_decorators__
- Metadata containing the decorators defined on the model.
This replaces
Model.__validators__
andModel.__root_validators__
from Pydantic V1. __pydantic_generic_metadata__
- Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
- Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
- The name of the post-init method for the model, if defined.
__pydantic_root_model__
- Whether the model is a [
RootModel
][pydantic.root_model.RootModel]. __pydantic_serializer__
- The
pydantic-core
SchemaSerializer
used to dump instances of the model. __pydantic_validator__
- The
pydantic-core
SchemaValidator
used to validate instances of the model. __pydantic_extra__
- A dictionary containing extra values, if [
extra
][pydantic.config.ConfigDict.extra] is set to'allow'
. __pydantic_fields_set__
- The names of fields explicitly set during instantiation.
__pydantic_private__
- Values of private attributes set on the model instance.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError
][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.self
is explicitly positional-only to allowself
as a field name.Expand source code
class DirectedGraph(BaseModel): STANFORD_BASIC_DEPENDENCIES: typing.ClassVar[str] = "stanford-basic" STANFORD_COLLAPSED_DEPENDENCIES: typing.ClassVar[str] = "stanford-collapsed" roots: list[int] = Field(description="Roots of the directed graph") edges: list[Edge] = Field(description="the directed edges that comprise the graph") """ Storage class for directed graphs. Parameters ---------- kind : str The name of the directed graph. deps : dict A dictionary of {edges: [{source, destination, relation}], roots: [int]} words : [str] A list of the word form of the tokens from the originating `Sentence`. Attributes ---------- _words : [str] A list of the word form of the tokens from the originating `Sentence`. roots : [int] A list of indices for the syntactic dependency graph's roots. Generally this is a single token index. edges: list[lum.clu.processors.doc.Edge] A list of `lum.clu.processors.doc.Edge` incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph. outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph. labeled : [str] A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination"). unlabeled : [str] A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination"). graph : networkx.Graph A `networkx.graph` representation of the `DirectedGraph`. Used by `shortest_path` Methods ------- bag_of_labeled_dependencies_from_tokens(form) Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation. bag_of_unlabeled_dependencies_from_tokens(form) Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation. """ # def __init__(self, kind, deps, words): # NLPDatum.__init__(self) # self._words = [w.lower() for w in words] # self.kind = kind # self.roots = deps.get("roots", []) # self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]] # self.incoming = self._build_incoming(self.edges) # self.outgoing = self._build_outgoing(self.edges) # self.labeled = self._build_labeled() # self.unlabeled = self._build_unlabeled() # self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False) # self.undirected_graph = self.directed_graph.to_undirected() # def __unicode__(self): # return self.edges # def __eq__(self, other): # if isinstance(other, self.__class__): # return self.to_JSON() == other.to_JSON() # else: # return False # def __ne__(self, other): # return not self.__eq__(other) # def __hash__(self): # return hash(self.to_JSON()) # def shortest_paths(self, start, end): # """ # Find the shortest paths in the syntactic depedency graph # between the provided start and end nodes. # Parameters # ---------- # start : int or [int] # A single token index or list of token indices serving as the start of the graph traversal. # end : int or [int] # A single token index or list of token indices serving as the end of the graph traversal. # See Also # -------- # `processors.paths.DependencyUtils.shortest_path` # """ # paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end) # return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths] # def shortest_path(self, start, end, scoring_func=lambda path: -len(path)): # """ # Find the shortest path in the syntactic depedency graph # between the provided start and end nodes. # Parameters # ---------- # start : int or [int] # A single token index or list of token indices serving as the start of the graph traversal. # end : int or [int] # A single token index or list of token indices serving as the end of the graph traversal. # scoring_func : function # A function that scores each path in a list of [(source index, directed relation, destination index)] paths. Each path has the form [(source index, relation, destination index)]. # The path with the maximum score will be returned. # See Also # -------- # `processors.paths.DependencyUtils.shortest_path` # """ # paths = self.shortest_paths(start, end) # return None if not paths else max(paths, key=scoring_func) # def degree_centrality(self): # """ # Compute the degree centrality for nodes. # See Also # -------- # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html # """ # return Counter(nx.degree_centrality(self.directed_graph)) # def in_degree_centrality(self): # """ # Compute the in-degree centrality for nodes. # See Also # -------- # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html # """ # return Counter(nx.in_degree_centrality(self.directed_graph)) # def out_degree_centrality(self): # """ # Compute the out-degree centrality for nodes. # See Also # -------- # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html # """ # return Counter(nx.out_degree_centrality(self.directed_graph)) # def pagerank(self, # alpha=0.85, # personalization=None, # max_iter=1000, # tol=1e-06, # nstart=None, # weight='weight', # dangling=None, # use_directed=True, # reverse=True): # """ # Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`). Use with `lum.clu.processors.doc.DirectedGraph.graph`. # Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why). # See Also # -------- # `processors.paths.DependencyUtils.pagerank` # Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank) # """ # # check whether or not to reverse directed graph # dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True) # # determine graph to use # graph = dg if use_directed else self.undirected_graph # return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling) # def _build_incoming(self, edges): # dep_dict = defaultdict(list) # for edge in edges: # dep_dict[edge.destination].append((edge.source, edge.relation)) # return dep_dict # def _build_outgoing(self, edges): # dep_dict = defaultdict(list) # for edge in edges: # dep_dict[edge.source].append((edge.destination, edge.relation)) # return dep_dict # def _build_labeled(self): # labeled = [] # for out in self.outgoing: # for (dest, rel) in self.outgoing[out]: # labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest])) # return labeled # def _build_unlabeled(self): # unlabeled = [] # for out in self.outgoing: # for (dest, _) in self.outgoing[out]: # unlabeled.append("{}_{}".format(self._words[out], self._words[dest])) # return unlabeled # def _graph_to_JSON_dict(self): # dg_dict = dict() # dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges] # dg_dict["roots"] = self.roots # return dg_dict # def to_JSON_dict(self): # return {self.kind:self._graph_to_JSON_dict()}
Ancestors
- pydantic.main.BaseModel
Class variables
var STANFORD_BASIC_DEPENDENCIES : ClassVar[str]
var STANFORD_COLLAPSED_DEPENDENCIES : ClassVar[str]
var edges : list[lum.clu.processors.directed_graph.Edge]
-
Storage class for directed graphs.
Parameters
kind
:str
- The name of the directed graph.
deps
:dict
- A dictionary of {edges: [{source, destination, relation}], roots: [int]}
words
:[str]
- A list of the word form of the tokens from the originating
Sentence
.
Attributes
_words
:[str]
- A list of the word form of the tokens from the originating
Sentence
. roots
:[int]
- A list of indices for the syntactic dependency graph's roots. Generally this is a single token index.
edges
:list[lum.clu.processors.doc.Edge]
- A list of
lum.clu.processors.doc.Edge
incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.
outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.
labeled
:[str]
- A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").
unlabeled
:[str]
- A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").
graph
:networkx.Graph
- A
networkx.graph
representation of theDirectedGraph
. Used byshortest_path
Methods
bag_of_labeled_dependencies_from_tokens(form) Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation. bag_of_unlabeled_dependencies_from_tokens(form) Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
var model_computed_fields
var model_config
var model_fields
var roots : list[int]