- Created .dockerignore to exclude unnecessary files from Docker builds. - Added .repomixignore for managing ignored patterns in Repomix. - Introduced Dockerfile.dev for development environment setup with Python 3.12. - Configured docker-compose.yaml to define services, including a PostgreSQL database. - Established a devcontainer.json for Visual Studio Code integration. - Implemented postCreate.sh for automatic dependency installation in the dev container. - Added constants.py to centralize configuration constants for the project. - Updated pyproject.toml to include new development dependencies. - Created initial documentation files for project overview and style conventions. - Added tests for new functionalities to ensure reliability and correctness.
229 lines
7.4 KiB
Python
229 lines
7.4 KiB
Python
"""initial_schema
|
|
|
|
Revision ID: 6a9d9f408f40
|
|
Revises:
|
|
Create Date: 2025-12-16 19:10:55.135444
|
|
|
|
"""
|
|
|
|
from collections.abc import Sequence
|
|
|
|
import sqlalchemy as sa
|
|
from alembic import op
|
|
from sqlalchemy.dialects import postgresql
|
|
|
|
# revision identifiers, used by Alembic.
|
|
revision: str = "6a9d9f408f40"
|
|
down_revision: str | Sequence[str] | None = None
|
|
branch_labels: str | Sequence[str] | None = None
|
|
depends_on: str | Sequence[str] | None = None
|
|
|
|
# Vector dimension for embeddings (OpenAI compatible)
|
|
EMBEDDING_DIM = 1536
|
|
|
|
|
|
def upgrade() -> None:
|
|
"""Create NoteFlow schema and tables."""
|
|
# Create schema
|
|
op.execute("CREATE SCHEMA IF NOT EXISTS noteflow")
|
|
|
|
# Enable pgvector extension
|
|
try:
|
|
op.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
except sa.exc.ProgrammingError as e:
|
|
raise RuntimeError(
|
|
f"Failed to create pgvector extension: {e}. "
|
|
"Ensure the database user has CREATE EXTENSION privileges, or "
|
|
"install pgvector manually: CREATE EXTENSION vector;"
|
|
) from e
|
|
|
|
# Create meetings table
|
|
op.create_table(
|
|
"meetings",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
|
sa.Column("title", sa.String(255), nullable=False),
|
|
sa.Column("state", sa.Integer(), nullable=False, server_default="1"),
|
|
sa.Column(
|
|
"created_at",
|
|
sa.DateTime(timezone=True),
|
|
nullable=False,
|
|
server_default=sa.text("now()"),
|
|
),
|
|
sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
|
|
sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True),
|
|
sa.Column(
|
|
"metadata",
|
|
postgresql.JSONB(astext_type=sa.Text()),
|
|
nullable=False,
|
|
server_default="{}",
|
|
),
|
|
sa.Column("wrapped_dek", sa.LargeBinary(), nullable=True),
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create segments table
|
|
op.create_table(
|
|
"segments",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column(
|
|
"meeting_id",
|
|
postgresql.UUID(as_uuid=True),
|
|
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
),
|
|
sa.Column("segment_id", sa.Integer(), nullable=False),
|
|
sa.Column("text", sa.Text(), nullable=False),
|
|
sa.Column("start_time", sa.Float(), nullable=False),
|
|
sa.Column("end_time", sa.Float(), nullable=False),
|
|
sa.Column("language", sa.String(10), nullable=False, server_default="en"),
|
|
sa.Column("language_confidence", sa.Float(), nullable=False, server_default="0.0"),
|
|
sa.Column("avg_logprob", sa.Float(), nullable=False, server_default="0.0"),
|
|
sa.Column("no_speech_prob", sa.Float(), nullable=False, server_default="0.0"),
|
|
sa.Column(
|
|
"created_at",
|
|
sa.DateTime(timezone=True),
|
|
nullable=False,
|
|
server_default=sa.text("now()"),
|
|
),
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Add vector column for embeddings (pgvector)
|
|
op.execute(f"ALTER TABLE noteflow.segments ADD COLUMN embedding vector({EMBEDDING_DIM})")
|
|
|
|
# Create index for vector similarity search
|
|
op.execute(
|
|
"CREATE INDEX IF NOT EXISTS ix_segments_embedding "
|
|
"ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)"
|
|
)
|
|
|
|
# Create index for meeting_id lookups
|
|
op.create_index(
|
|
"ix_segments_meeting_id",
|
|
"segments",
|
|
["meeting_id"],
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create word_timings table
|
|
op.create_table(
|
|
"word_timings",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column(
|
|
"segment_pk",
|
|
sa.Integer(),
|
|
sa.ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
),
|
|
sa.Column("word", sa.String(255), nullable=False),
|
|
sa.Column("start_time", sa.Float(), nullable=False),
|
|
sa.Column("end_time", sa.Float(), nullable=False),
|
|
sa.Column("probability", sa.Float(), nullable=False),
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create index for segment_pk lookups
|
|
op.create_index(
|
|
"ix_word_timings_segment_pk",
|
|
"word_timings",
|
|
["segment_pk"],
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create summaries table
|
|
op.create_table(
|
|
"summaries",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column(
|
|
"meeting_id",
|
|
postgresql.UUID(as_uuid=True),
|
|
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
unique=True,
|
|
),
|
|
sa.Column("executive_summary", sa.Text(), nullable=True),
|
|
sa.Column(
|
|
"generated_at",
|
|
sa.DateTime(timezone=True),
|
|
nullable=False,
|
|
server_default=sa.text("now()"),
|
|
),
|
|
sa.Column("model_version", sa.String(50), nullable=True),
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create key_points table
|
|
op.create_table(
|
|
"key_points",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column(
|
|
"summary_id",
|
|
sa.Integer(),
|
|
sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
),
|
|
sa.Column("text", sa.Text(), nullable=False),
|
|
sa.Column("start_time", sa.Float(), nullable=False, server_default="0.0"),
|
|
sa.Column("end_time", sa.Float(), nullable=False, server_default="0.0"),
|
|
sa.Column(
|
|
"segment_ids",
|
|
postgresql.JSONB(astext_type=sa.Text()),
|
|
nullable=False,
|
|
server_default="[]",
|
|
),
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create index for summary_id lookups
|
|
op.create_index(
|
|
"ix_key_points_summary_id",
|
|
"key_points",
|
|
["summary_id"],
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create action_items table
|
|
op.create_table(
|
|
"action_items",
|
|
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
sa.Column(
|
|
"summary_id",
|
|
sa.Integer(),
|
|
sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
),
|
|
sa.Column("text", sa.Text(), nullable=False),
|
|
sa.Column("assignee", sa.String(255), nullable=False, server_default=""),
|
|
sa.Column("due_date", sa.DateTime(timezone=True), nullable=True),
|
|
sa.Column("priority", sa.Integer(), nullable=False, server_default="0"),
|
|
sa.Column(
|
|
"segment_ids",
|
|
postgresql.JSONB(astext_type=sa.Text()),
|
|
nullable=False,
|
|
server_default="[]",
|
|
),
|
|
schema="noteflow",
|
|
)
|
|
|
|
# Create index for summary_id lookups
|
|
op.create_index(
|
|
"ix_action_items_summary_id",
|
|
"action_items",
|
|
["summary_id"],
|
|
schema="noteflow",
|
|
)
|
|
|
|
|
|
def downgrade() -> None:
|
|
"""Drop all NoteFlow tables and schema."""
|
|
# Drop tables in reverse order (respecting foreign keys)
|
|
op.drop_table("action_items", schema="noteflow")
|
|
op.drop_table("key_points", schema="noteflow")
|
|
op.drop_table("summaries", schema="noteflow")
|
|
op.drop_table("word_timings", schema="noteflow")
|
|
op.drop_table("segments", schema="noteflow")
|
|
op.drop_table("meetings", schema="noteflow")
|
|
|
|
# Drop schema
|
|
op.execute("DROP SCHEMA IF EXISTS noteflow CASCADE")
|