Files
biz-bud/docs/db/structure.sql
Travis Vasceannie e0bfb7a2f2 feat: enhance coverage reporting and improve tool configuration (#55)
* feat: enhance coverage reporting and improve tool configuration

- Added support for JSON coverage reports in pyproject.toml.
- Updated .gitignore to include coverage.json and task files for better management.
- Introduced a new Type Safety Audit Report to document findings and recommendations for type safety improvements.
- Created a comprehensive coverage configuration guide to assist in understanding coverage reporting setup.
- Refactored tools configuration to utilize environment variables for concurrent scraping settings.

These changes improve the project's testing and reporting capabilities while enhancing overall code quality and maintainability.

* feat: enhance configuration handling and improve error logging

- Introduced a new utility function `_get_env_int` for robust environment variable integer retrieval with validation.
- Updated `WebToolsConfig` and `ToolsConfigModel` to utilize the new utility for environment variable defaults.
- Enhanced logging in `CircuitBreaker` to provide detailed state transition information.
- Improved URL handling in `url_analyzer.py` for better file extension extraction and normalization.
- Added type validation and logging in `SecureInputMixin` to ensure input sanitization and validation consistency.

These changes improve the reliability and maintainability of configuration management and error handling across the codebase.

* refactor: update imports and enhance .gitignore for improved organization

- Updated import paths in various example scripts to reflect the new structure under `biz_bud`.
- Enhanced .gitignore to include clearer formatting for task files.
- Removed obsolete function calls and improved error handling in several scripts.
- Added public alias for backward compatibility in `upload_r2r.py`.

These changes improve code organization, maintainability, and compatibility across the project.

* refactor: update graph paths in langgraph.json for improved organization

- Changed paths for research, catalog, paperless, and url_to_r2r graphs to reflect new directory structure.
- Added new entries for analysis and scraping graphs to enhance functionality.

These changes improve the organization and maintainability of the graph configurations.

* fix: enhance validation and error handling in date range and scraping functions

- Updated date validation in UserFiltersModel to ensure date values are strings.
- Improved error messages in create_scraped_content_dict to clarify conditions for success and failure.
- Enhanced test coverage for date validation and scraping content creation to ensure robustness.

These changes improve input validation and error handling across the application, enhancing overall reliability.

* refactor: streamline graph creation and enhance type annotations in examples

- Simplified graph creation in `catalog_ingredient_research_example.py` and `catalog_tech_components_example.py` by directly compiling the graph.
- Updated type annotations in `catalog_intel_with_config.py` for improved clarity and consistency.
- Enhanced error handling in catalog data processing to ensure robustness against unexpected data types.

These changes improve code readability, maintainability, and error resilience across example scripts.

* Update src/biz_bud/nodes/extraction/extractors.py

Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com>

* Update src/biz_bud/core/validation/pydantic_models.py

Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com>

* refactor: migrate Jina and Tavily clients to use ServiceFactory dependency injection

* refactor: migrate URL processing to provider-based architecture with improved error handling

* feat: add FirecrawlApp compatibility classes and mock implementations

* fix: add thread-safe locking to LazyLoader factory management

* feat: implement service restart and refactor cache decorator helpers

* refactor: move r2r_direct_api_call to tools.clients.r2r_utils and improve HTTP service error handling

* chore: update Sonar task IDs in report configuration

---------

Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com>
2025-08-04 00:54:52 -04:00

131 lines
5.2 KiB
SQL

-- public.rpt_master_products definition
-- Drop table
-- DROP TABLE public.rpt_master_products;
CREATE TABLE public.rpt_master_products (
id serial4 NOT NULL,
canonical_name varchar(500) NOT NULL,
canonical_description text NULL,
category varchar(100) NULL,
unit_of_measure varchar(50) NULL,
estimated_unit_price numeric(10, 2) NULL,
is_active bool DEFAULT true NULL,
first_seen_date timestamptz DEFAULT now() NULL,
last_updated timestamptz DEFAULT now() NULL,
total_occurrences int4 DEFAULT 1 NULL,
CONSTRAINT rpt_master_products_pkey PRIMARY KEY (id)
);
CREATE INDEX idx_rpt_master_products_canonical_name ON public.rpt_master_products USING btree (canonical_name);
CREATE INDEX idx_rpt_master_products_category ON public.rpt_master_products USING btree (category);
-- public.rpt_product_variations definition
-- Drop table
-- DROP TABLE public.rpt_product_variations;
CREATE TABLE public.rpt_product_variations (
id serial4 NOT NULL,
master_product_id int4 NOT NULL,
original_description varchar(500) NOT NULL,
verified_description varchar(500) NULL,
confidence_score numeric(3, 2) NULL,
verification_source varchar(100) NULL,
search_variations_used _text NULL,
successful_variation varchar(500) NULL,
verification_notes text NULL,
created_at timestamptz DEFAULT now() NULL,
occurrence_count int4 DEFAULT 1 NULL,
CONSTRAINT rpt_product_variations_pkey PRIMARY KEY (id),
CONSTRAINT rpt_product_variations_master_product_fkey FOREIGN KEY (master_product_id) REFERENCES public.rpt_master_products(id)
);
CREATE INDEX idx_rpt_product_variations_master_id ON public.rpt_product_variations USING btree (master_product_id);
CREATE INDEX idx_rpt_product_variations_original ON public.rpt_product_variations USING btree (original_description);
-- public.rpt_receipt_line_items definition
-- Drop table
-- DROP TABLE public.rpt_receipt_line_items;
CREATE TABLE public.rpt_receipt_line_items (
id int4 DEFAULT nextval('receipt_line_items_id_seq'::regclass) NOT NULL,
receipt_id numeric NOT NULL,
line_number int4 NULL,
product_name varchar(500) NOT NULL,
product_code varchar(100) NULL,
quantity numeric(10, 3) NULL,
unit_of_measure varchar(50) NULL,
unit_price numeric(10, 2) NULL,
total_price numeric(10, 2) NULL,
category varchar(100) NULL,
created_at timestamptz DEFAULT now() NULL,
master_product_id int4 NULL,
product_variation_id int4 NULL,
original_ocr_text varchar(500) NULL,
reconciliation_status varchar(50) DEFAULT 'pending'::character varying NULL,
reconciliation_confidence numeric(3, 2) NULL,
needs_review bool DEFAULT false NULL,
CONSTRAINT receipt_line_items_pkey PRIMARY KEY (id),
CONSTRAINT rpt_receipt_line_items_master_product_fkey FOREIGN KEY (master_product_id) REFERENCES public.rpt_master_products(id),
CONSTRAINT rpt_receipt_line_items_receipt_id_fkey FOREIGN KEY (receipt_id) REFERENCES public.rpt_receipts(id),
CONSTRAINT rpt_receipt_line_items_variation_fkey FOREIGN KEY (product_variation_id) REFERENCES public.rpt_product_variations(id)
);
CREATE INDEX idx_rpt_receipt_line_items_master_product ON public.rpt_receipt_line_items USING btree (master_product_id);
CREATE INDEX idx_rpt_receipt_line_items_receipt_id ON public.rpt_receipt_line_items USING btree (receipt_id);
CREATE INDEX idx_rpt_receipt_line_items_reconciliation_status ON public.rpt_receipt_line_items USING btree (reconciliation_status);
-- public.rpt_receipts definition
-- Drop table
-- DROP TABLE public.rpt_receipts;
CREATE TABLE public.rpt_receipts (
id numeric DEFAULT nextval('receipts_id_seq'::regclass) NOT NULL,
vendor_name varchar(255) NOT NULL,
vendor_address text NULL,
transaction_date date NULL,
transaction_time time NULL,
receipt_number varchar(100) NULL,
customer_info text NULL,
subtotal numeric(10, 2) NULL,
tax_amount numeric(10, 2) NULL,
final_total numeric(10, 2) NULL,
total_items int4 NULL,
payment_method varchar(50) NULL,
card_last_four bpchar(4) NULL,
card_type varchar(20) NULL,
raw_receipt_text text NULL,
created_at timestamptz DEFAULT now() NULL,
updated_at timestamptz DEFAULT now() NULL,
CONSTRAINT receipts_pkey PRIMARY KEY (id)
);
CREATE INDEX idx_rpt_receipts_payment_method ON public.rpt_receipts USING btree (payment_method);
CREATE INDEX idx_rpt_receipts_transaction_date ON public.rpt_receipts USING btree (transaction_date);
CREATE INDEX idx_rpt_receipts_vendor_name ON public.rpt_receipts USING btree (vendor_name);
-- public.rpt_reconciliation_log definition
-- Drop table
-- DROP TABLE public.rpt_reconciliation_log;
CREATE TABLE public.rpt_reconciliation_log (
id serial4 NOT NULL,
receipt_line_item_id int4 NOT NULL,
original_description varchar(500) NULL,
suggested_master_product_id int4 NULL,
action_taken varchar(50) NULL,
confidence_score numeric(3, 2) NULL,
reconciled_by varchar(100) NULL,
reconciled_at timestamptz DEFAULT now() NULL,
notes text NULL,
CONSTRAINT rpt_reconciliation_log_pkey PRIMARY KEY (id),
CONSTRAINT rpt_reconciliation_log_line_item_fkey FOREIGN KEY (receipt_line_item_id) REFERENCES public.rpt_receipt_line_items(id),
CONSTRAINT rpt_reconciliation_log_master_product_fkey FOREIGN KEY (suggested_master_product_id) REFERENCES public.rpt_master_products(id)
);
CREATE INDEX idx_rpt_reconciliation_log_receipt_item ON public.rpt_reconciliation_log USING btree (receipt_line_item_id);