Files
noteflow/src/noteflow/infrastructure/persistence/migrations/versions/6a9d9f408f40_initial_schema.py
Travis Vasceannie b333ea5b23 Add initial Docker and development environment setup
- Created .dockerignore to exclude unnecessary files from Docker builds.
- Added .repomixignore for managing ignored patterns in Repomix.
- Introduced Dockerfile.dev for development environment setup with Python 3.12.
- Configured docker-compose.yaml to define services, including a PostgreSQL database.
- Established a devcontainer.json for Visual Studio Code integration.
- Implemented postCreate.sh for automatic dependency installation in the dev container.
- Added constants.py to centralize configuration constants for the project.
- Updated pyproject.toml to include new development dependencies.
- Created initial documentation files for project overview and style conventions.
- Added tests for new functionalities to ensure reliability and correctness.
2025-12-19 05:02:16 +00:00

229 lines
7.4 KiB
Python

"""initial_schema
Revision ID: 6a9d9f408f40
Revises:
Create Date: 2025-12-16 19:10:55.135444
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "6a9d9f408f40"
down_revision: str | Sequence[str] | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
# Vector dimension for embeddings (OpenAI compatible)
EMBEDDING_DIM = 1536
def upgrade() -> None:
"""Create NoteFlow schema and tables."""
# Create schema
op.execute("CREATE SCHEMA IF NOT EXISTS noteflow")
# Enable pgvector extension
try:
op.execute("CREATE EXTENSION IF NOT EXISTS vector")
except sa.exc.ProgrammingError as e:
raise RuntimeError(
f"Failed to create pgvector extension: {e}. "
"Ensure the database user has CREATE EXTENSION privileges, or "
"install pgvector manually: CREATE EXTENSION vector;"
) from e
# Create meetings table
op.create_table(
"meetings",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("title", sa.String(255), nullable=False),
sa.Column("state", sa.Integer(), nullable=False, server_default="1"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="{}",
),
sa.Column("wrapped_dek", sa.LargeBinary(), nullable=True),
schema="noteflow",
)
# Create segments table
op.create_table(
"segments",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"meeting_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("segment_id", sa.Integer(), nullable=False),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("start_time", sa.Float(), nullable=False),
sa.Column("end_time", sa.Float(), nullable=False),
sa.Column("language", sa.String(10), nullable=False, server_default="en"),
sa.Column("language_confidence", sa.Float(), nullable=False, server_default="0.0"),
sa.Column("avg_logprob", sa.Float(), nullable=False, server_default="0.0"),
sa.Column("no_speech_prob", sa.Float(), nullable=False, server_default="0.0"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
schema="noteflow",
)
# Add vector column for embeddings (pgvector)
op.execute(f"ALTER TABLE noteflow.segments ADD COLUMN embedding vector({EMBEDDING_DIM})")
# Create index for vector similarity search
op.execute(
"CREATE INDEX IF NOT EXISTS ix_segments_embedding "
"ON noteflow.segments USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)"
)
# Create index for meeting_id lookups
op.create_index(
"ix_segments_meeting_id",
"segments",
["meeting_id"],
schema="noteflow",
)
# Create word_timings table
op.create_table(
"word_timings",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"segment_pk",
sa.Integer(),
sa.ForeignKey("noteflow.segments.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("word", sa.String(255), nullable=False),
sa.Column("start_time", sa.Float(), nullable=False),
sa.Column("end_time", sa.Float(), nullable=False),
sa.Column("probability", sa.Float(), nullable=False),
schema="noteflow",
)
# Create index for segment_pk lookups
op.create_index(
"ix_word_timings_segment_pk",
"word_timings",
["segment_pk"],
schema="noteflow",
)
# Create summaries table
op.create_table(
"summaries",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"meeting_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("noteflow.meetings.id", ondelete="CASCADE"),
nullable=False,
unique=True,
),
sa.Column("executive_summary", sa.Text(), nullable=True),
sa.Column(
"generated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column("model_version", sa.String(50), nullable=True),
schema="noteflow",
)
# Create key_points table
op.create_table(
"key_points",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"summary_id",
sa.Integer(),
sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("start_time", sa.Float(), nullable=False, server_default="0.0"),
sa.Column("end_time", sa.Float(), nullable=False, server_default="0.0"),
sa.Column(
"segment_ids",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="[]",
),
schema="noteflow",
)
# Create index for summary_id lookups
op.create_index(
"ix_key_points_summary_id",
"key_points",
["summary_id"],
schema="noteflow",
)
# Create action_items table
op.create_table(
"action_items",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column(
"summary_id",
sa.Integer(),
sa.ForeignKey("noteflow.summaries.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("assignee", sa.String(255), nullable=False, server_default=""),
sa.Column("due_date", sa.DateTime(timezone=True), nullable=True),
sa.Column("priority", sa.Integer(), nullable=False, server_default="0"),
sa.Column(
"segment_ids",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default="[]",
),
schema="noteflow",
)
# Create index for summary_id lookups
op.create_index(
"ix_action_items_summary_id",
"action_items",
["summary_id"],
schema="noteflow",
)
def downgrade() -> None:
"""Drop all NoteFlow tables and schema."""
# Drop tables in reverse order (respecting foreign keys)
op.drop_table("action_items", schema="noteflow")
op.drop_table("key_points", schema="noteflow")
op.drop_table("summaries", schema="noteflow")
op.drop_table("word_timings", schema="noteflow")
op.drop_table("segments", schema="noteflow")
op.drop_table("meetings", schema="noteflow")
# Drop schema
op.execute("DROP SCHEMA IF EXISTS noteflow CASCADE")