Skip to content
2 changes: 2 additions & 0 deletions graphrag/config/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class StorageType(str, Enum):
"""The blob output type."""
cosmosdb = "cosmosdb"
"""The cosmosdb output type"""
postgres = "postgres"
"""The postgres output type."""

def __repr__(self):
"""Get a string representation."""
Expand Down
42 changes: 42 additions & 0 deletions graphrag/config/models/storage_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,45 @@ def validate_base_dir(cls, value, info):
description="The cosmosdb account url to use.",
default=graphrag_config_defaults.storage.cosmosdb_account_url,
)

### PostgreSQL
host: str = Field(
description="PostgreSQL server host (for postgres type).",
default="localhost"
)
port: int = Field(
description="PostgreSQL server port (for postgres type).",
default=5432
)
database: str = Field(
description="PostgreSQL database name (for postgres type).",
default="graphrag"
)
username: str | None = Field(
description="PostgreSQL username for authentication (for postgres type).",
default=None
)
password: str | None = Field(
description="PostgreSQL password for authentication (for postgres type).",
default=None
)
collection_prefix: str = Field(
description="Prefix for PostgreSQL collection names (for postgres type).",
default="graphrag_"
)
batch_size: int = Field(
description="Batch size for database operations (for postgres type).",
default=50
)
command_timeout: int = Field(
description="Command timeout for database operations (for postgres type).",
default=600
)
server_timeout: int = Field(
description="Server timeout for database connections (for postgres type).",
default=120
)
connection_timeout: int = Field(
description="Connection timeout for establishing database connections (for postgres type).",
default=60
)
12 changes: 9 additions & 3 deletions graphrag/index/operations/finalize_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ def finalize_entities(
layout_enabled: bool = False,
) -> pd.DataFrame:
"""All the steps to transform final entities."""
# # Remove the default column degree, x and y for Postgres storage compatibility. And below entities.merge method
# # will add them back with calculated values.
entities = entities.drop(columns=["degree", "x", "y"], errors="ignore")
graph = create_graph(relationships, edge_attr=["weight"])
graph_embeddings = None
if embed_config is not None and embed_config.enabled:
Expand All @@ -45,9 +48,12 @@ def finalize_entities(
final_entities["degree"] = final_entities["degree"].fillna(0).astype(int)
final_entities.reset_index(inplace=True)
final_entities["human_readable_id"] = final_entities.index
final_entities["id"] = final_entities["human_readable_id"].apply(
lambda _x: str(uuid4())
)

# Generate id if id is empty
if "id" not in final_entities.columns or final_entities["id"].isna().all():
final_entities["id"] = final_entities["human_readable_id"].apply(
lambda _x: str(uuid4())
)
return final_entities.loc[
:,
ENTITIES_FINAL_COLUMNS,
Expand Down
9 changes: 6 additions & 3 deletions graphrag/index/operations/finalize_relationships.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@ def finalize_relationships(

final_relationships.reset_index(inplace=True)
final_relationships["human_readable_id"] = final_relationships.index
final_relationships["id"] = final_relationships["human_readable_id"].apply(
lambda _x: str(uuid4())
)

# Generate id if there is no id
if "id" not in final_relationships.columns or final_relationships["id"].isna().all():
final_relationships["id"] = final_relationships["human_readable_id"].apply(
lambda _x: str(uuid4())
)

return final_relationships.loc[
:,
Expand Down
6 changes: 5 additions & 1 deletion graphrag/index/workflows/create_communities.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,11 @@ def create_communities(

# join it all up and add some new fields
final_communities = all_grouped.merge(entity_ids, on="community", how="inner")
final_communities["id"] = [str(uuid4()) for _ in range(len(final_communities))]

# Generate id if there is no id
if "id" not in final_communities.columns or final_communities["id"].isna().all():
final_communities["id"] = [str(uuid4()) for _ in range(len(final_communities))]

final_communities["human_readable_id"] = final_communities["community"]
final_communities["title"] = "Community " + final_communities["community"].astype(
str
Expand Down
2 changes: 2 additions & 0 deletions graphrag/storage/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import TYPE_CHECKING, ClassVar

from graphrag.config.enums import StorageType
from graphrag.storage.postgres_pipeline_storage import PostgresPipelineStorage
from graphrag.storage.blob_pipeline_storage import BlobPipelineStorage
from graphrag.storage.cosmosdb_pipeline_storage import CosmosDBPipelineStorage
from graphrag.storage.file_pipeline_storage import FilePipelineStorage
Expand Down Expand Up @@ -81,3 +82,4 @@ def is_supported_type(cls, storage_type: str) -> bool:
StorageFactory.register(StorageType.cosmosdb.value, CosmosDBPipelineStorage)
StorageFactory.register(StorageType.file.value, FilePipelineStorage)
StorageFactory.register(StorageType.memory.value, MemoryPipelineStorage)
StorageFactory.register(StorageType.postgres.value, PostgresPipelineStorage)
Loading
Loading