# Generated by Django 5.2.6 on 2025-09-20 16:57

import django.db.models.deletion
import pgvector.django.vector
import uuid
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):

    dependencies = [
        ('django_cfg_knowbase', '0001_initial'),
        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
    ]

    operations = [
        migrations.CreateModel(
            name='ArchiveItem',
            fields=[
                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
                ('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
                ('updated_at', models.DateTimeField(auto_now=True)),
                ('relative_path', models.CharField(help_text='Path within archive', max_length=1024)),
                ('item_name', models.CharField(help_text='Item name', max_length=255)),
                ('item_type', models.CharField(help_text='MIME type', max_length=100)),
                ('content_type', models.CharField(choices=[('document', 'Document'), ('code', 'Code'), ('image', 'Image'), ('data', 'Data'), ('archive', 'Archive'), ('unknown', 'Unknown')], default='unknown', help_text='Content classification', max_length=20)),
                ('file_size', models.PositiveIntegerField(default=0, help_text='Item size in bytes')),
                ('content_hash', models.CharField(help_text='SHA-256 hash of item content', max_length=64)),
                ('raw_content', models.TextField(blank=True, help_text='Extracted text content')),
                ('is_processable', models.BooleanField(default=False, help_text='Whether item can be processed for chunks')),
                ('language', models.CharField(blank=True, help_text='Programming language or document language', max_length=50)),
                ('encoding', models.CharField(default='utf-8', help_text='Character encoding', max_length=50)),
                ('chunks_count', models.PositiveIntegerField(default=0, help_text='Number of chunks created')),
                ('total_tokens', models.PositiveIntegerField(default=0, help_text='Total tokens in all chunks')),
                ('processing_cost', models.FloatField(default=0.0, help_text='Processing cost for this item')),
                ('metadata', models.JSONField(blank=True, default=dict, help_text='Item-specific metadata', null=True)),
            ],
            options={
                'verbose_name': 'Archive Item',
                'verbose_name_plural': 'Archive Items',
                'db_table': 'django_cfg_knowbase_archive_items',
                'ordering': ['archive', 'relative_path'],
            },
        ),
        migrations.CreateModel(
            name='ArchiveItemChunk',
            fields=[
                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
                ('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
                ('updated_at', models.DateTimeField(auto_now=True)),
                ('content', models.TextField(help_text='Chunk text content')),
                ('chunk_index', models.PositiveIntegerField(help_text='Sequential chunk number within item')),
                ('chunk_type', models.CharField(choices=[('text', 'Text'), ('code', 'Code'), ('heading', 'Heading'), ('metadata', 'Metadata'), ('table', 'Table'), ('list', 'List')], default='text', help_text='Type of content in chunk', max_length=20)),
                ('context_metadata', models.JSONField(default=dict, help_text='Rich context information for AI processing')),
                ('embedding', pgvector.django.vector.VectorField(dimensions=1536, help_text='Vector embedding for semantic search', null=True)),
                ('token_count', models.PositiveIntegerField(default=0, help_text='Number of tokens in chunk')),
                ('character_count', models.PositiveIntegerField(default=0, help_text='Number of characters in chunk')),
                ('embedding_model', models.CharField(default='text-embedding-ada-002', help_text='Model used for embedding generation', max_length=100)),
                ('embedding_cost', models.FloatField(default=0.0, help_text='Cost in USD for embedding generation')),
            ],
            options={
                'verbose_name': 'Archive Item Chunk',
                'verbose_name_plural': 'Archive Item Chunks',
                'db_table': 'django_cfg_knowbase_archive_item_chunks',
                'ordering': ['item', 'chunk_index'],
            },
        ),
        migrations.CreateModel(
            name='DocumentArchive',
            fields=[
                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
                ('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
                ('updated_at', models.DateTimeField(auto_now=True)),
                ('title', models.CharField(help_text='Archive title', max_length=512)),
                ('description', models.TextField(blank=True, help_text='Archive description')),
                ('is_public', models.BooleanField(default=True, help_text='Whether this archive is publicly accessible')),
                ('archive_file', models.FileField(help_text='Uploaded archive file', upload_to='archives/%Y/%m/%d/')),
                ('original_filename', models.CharField(help_text='Original uploaded filename', max_length=255)),
                ('file_size', models.PositiveIntegerField(default=0, help_text='Archive size in bytes')),
                ('archive_type', models.CharField(choices=[('zip', 'ZIP'), ('tar', 'TAR'), ('tar.gz', 'TAR.GZ'), ('tar.bz2', 'TAR.BZ2')], help_text='Archive format', max_length=20)),
                ('content_hash', models.CharField(db_index=True, help_text='SHA-256 hash for duplicate detection', max_length=64)),
                ('processing_status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], db_index=True, default='pending', max_length=20)),
                ('processed_at', models.DateTimeField(blank=True, help_text='When processing completed', null=True)),
                ('processing_error', models.TextField(blank=True, default='', help_text='Error message if processing failed')),
                ('processing_duration_ms', models.PositiveIntegerField(default=0, help_text='Processing time in milliseconds')),
                ('total_items', models.PositiveIntegerField(default=0, help_text='Total items in archive')),
                ('processed_items', models.PositiveIntegerField(default=0, help_text='Successfully processed items')),
                ('total_chunks', models.PositiveIntegerField(default=0, help_text='Total chunks created')),
                ('vectorized_chunks', models.PositiveIntegerField(default=0, help_text='Chunks with embeddings')),
                ('total_tokens', models.PositiveIntegerField(default=0, help_text='Total tokens across all chunks')),
                ('total_cost_usd', models.FloatField(default=0.0, help_text='Total processing cost in USD')),
                ('metadata', models.JSONField(blank=True, default=dict, help_text='Additional archive metadata', null=True)),
            ],
            options={
                'verbose_name': 'Document Archive',
                'verbose_name_plural': 'Document Archives',
                'db_table': 'django_cfg_knowbase_document_archives',
                'ordering': ['-created_at'],
            },
        ),
        migrations.CreateModel(
            name='DocumentCategory',
            fields=[
                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
                ('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
                ('updated_at', models.DateTimeField(auto_now=True)),
                ('name', models.CharField(help_text='Category name', max_length=255, unique=True)),
                ('description', models.TextField(blank=True, help_text='Category description')),
                ('is_public', models.BooleanField(default=True, help_text='Whether documents in this category are publicly accessible')),
            ],
            options={
                'verbose_name': 'Document Category',
                'verbose_name_plural': 'Document Categories',
                'db_table': 'django_cfg_knowbase_document_categories',
                'ordering': ['name'],
            },
        ),
        migrations.CreateModel(
            name='ExternalData',
            fields=[
                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
                ('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
                ('updated_at', models.DateTimeField(auto_now=True)),
                ('title', models.CharField(help_text='Human-readable title for this external data source', max_length=512)),
                ('description', models.TextField(blank=True, help_text='Description of what this external data contains')),
                ('source_type', models.CharField(choices=[('model', 'Django Model'), ('api', 'API Endpoint'), ('database', 'Database Query'), ('file', 'File System'), ('custom', 'Custom Source')], default='model', help_text='Type of external data source', max_length=20)),
                ('source_identifier', models.CharField(blank=True, help_text="Unique identifier for the data source (e.g., 'vehicles_data.Vehicle')", max_length=255)),
                ('source_config', models.JSONField(blank=True, default=dict, help_text='Configuration for data extraction (fields, filters, etc.)')),
                ('content', models.TextField(blank=True, help_text='Extracted text content for vectorization')),
                ('content_hash', models.CharField(blank=True, db_index=True, help_text='SHA256 hash of content for change detection', max_length=64)),
                ('metadata', models.JSONField(blank=True, default=dict, help_text='Additional metadata from the source')),
                ('status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed'), ('outdated', 'Outdated')], default='pending', help_text='Current processing status', max_length=20)),
                ('processing_error', models.TextField(blank=True, help_text='Error message if processing failed')),
                ('chunk_size', models.PositiveIntegerField(default=1000, help_text='Size of text chunks for vectorization')),
                ('overlap_size', models.PositiveIntegerField(default=200, help_text='Overlap between chunks')),
                ('embedding_model', models.CharField(default='text-embedding-ada-002', help_text='Embedding model used for vectorization', max_length=100)),
                ('similarity_threshold', models.FloatField(default=0.5, help_text='Similarity threshold for this external data (0.0-1.0). Lower = more results, higher = more precise')),
                ('processed_at', models.DateTimeField(blank=True, help_text='When the data was last processed', null=True)),
                ('source_updated_at', models.DateTimeField(blank=True, help_text='When the source data was last updated', null=True)),
                ('total_chunks', models.PositiveIntegerField(default=0, help_text='Total number of chunks created')),
                ('total_tokens', models.PositiveIntegerField(default=0, help_text='Total tokens processed')),
                ('processing_cost', models.FloatField(default=0.0, help_text='Total cost for processing this data (USD)')),
                ('tags', models.JSONField(blank=True, default=list, help_text='Tags for categorization and filtering')),
                ('is_active', models.BooleanField(default=True, help_text='Whether this data source is active for search')),
                ('is_public', models.BooleanField(default=False, help_text='Whether this data is publicly searchable')),
            ],
            options={
                'verbose_name': 'External Data',
                'verbose_name_plural': 'External Data',
                'db_table': 'django_cfg_knowbase_external_data',
                'ordering': ['-processed_at', '-created_at'],
            },
        ),
        migrations.CreateModel(
            name='ExternalDataChunk',
            fields=[
                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
                ('created_at', models.DateTimeField(auto_now_add=True, db_index=True)),
                ('updated_at', models.DateTimeField(auto_now=True)),
                ('content', models.TextField(blank=True, help_text='Text content of the chunk')),
                ('chunk_index', models.PositiveIntegerField(default=0, help_text='Sequential index of this chunk within the external data')),
                ('embedding', pgvector.django.vector.VectorField(blank=True, dimensions=1536, help_text='Vector embedding for semantic search', null=True)),
                ('embedding_model', models.CharField(default='text-embedding-ada-002', help_text='Model used for embedding generation', max_length=100)),
                ('token_count', models.PositiveIntegerField(default=0, help_text='Number of tokens in this chunk')),
                ('character_count', models.PositiveIntegerField(default=0, help_text='Number of characters in this chunk')),
                ('embedding_cost', models.FloatField(default=0.0, help_text='Cost for generating this embedding (USD)')),
                ('chunk_metadata', models.JSONField(blank=True, default=dict, help_text='Additional metadata for this specific chunk')),
            ],
            options={
                'verbose_name': 'External Data Chunk',
                'verbose_name_plural': 'External Data Chunks',
                'db_table': 'django_cfg_knowbase_external_data_chunk',
                'ordering': ['external_data', 'chunk_index'],
            },
        ),
        migrations.RenameIndex(
            model_name='chatmessage',
            new_name='django_cfg__session_564129_idx',
            old_name='django_cfg_knowbase_ch_session_782e67_idx',
        ),
        migrations.RenameIndex(
            model_name='chatmessage',
            new_name='django_cfg__role_a6e8b7_idx',
            old_name='django_cfg_knowbase_ch_role_e0d53e_idx',
        ),
        migrations.RenameIndex(
            model_name='chatmessage',
            new_name='django_cfg__created_875295_idx',
            old_name='django_cfg_knowbase_ch_created_49bad9_idx',
        ),
        migrations.RenameIndex(
            model_name='chatsession',
            new_name='django_cfg__user_id_6bf22b_idx',
            old_name='django_cfg_knowbase_ch_user_id_227a9f_idx',
        ),
        migrations.RenameIndex(
            model_name='chatsession',
            new_name='django_cfg__is_acti_12448e_idx',
            old_name='django_cfg_knowbase_ch_is_acti_6fcdc8_idx',
        ),
        migrations.RenameIndex(
            model_name='document',
            new_name='django_cfg__user_id_ea1b86_idx',
            old_name='django_cfg_knowbase_do_user_id_3f12df_idx',
        ),
        migrations.RenameIndex(
            model_name='document',
            new_name='django_cfg__content_ebac19_idx',
            old_name='django_cfg_knowbase_do_content_0655c8_idx',
        ),
        migrations.RenameIndex(
            model_name='document',
            new_name='django_cfg__process_5f6075_idx',
            old_name='django_cfg_knowbase_do_process_f2c2a8_idx',
        ),
        migrations.RenameIndex(
            model_name='documentchunk',
            new_name='django_cfg__user_id_ddd3b5_idx',
            old_name='django_cfg_knowbase_do_user_id_a4e4de_idx',
        ),
        migrations.RenameIndex(
            model_name='documentchunk',
            new_name='django_cfg__documen_394c50_idx',
            old_name='django_cfg_knowbase_do_documen_64aaaa_idx',
        ),
        migrations.AddField(
            model_name='document',
            name='is_public',
            field=models.BooleanField(default=True, help_text='Whether this document is publicly accessible'),
        ),
        migrations.AlterField(
            model_name='chatmessage',
            name='cost_usd',
            field=models.FloatField(default=0.0, help_text='Cost in USD for this message'),
        ),
        migrations.AlterField(
            model_name='chatsession',
            name='total_cost_usd',
            field=models.FloatField(default=0.0, help_text='Total session cost for monitoring'),
        ),
        migrations.AlterField(
            model_name='document',
            name='metadata',
            field=models.JSONField(blank=True, default=dict, help_text='Additional document metadata', null=True),
        ),
        migrations.AlterField(
            model_name='document',
            name='total_cost_usd',
            field=models.FloatField(default=0.0, help_text='Total processing cost in USD'),
        ),
        migrations.AlterField(
            model_name='documentchunk',
            name='embedding_cost',
            field=models.FloatField(default=0.0, help_text='Cost in USD for embedding generation'),
        ),
        migrations.AlterField(
            model_name='documentchunk',
            name='metadata',
            field=models.JSONField(blank=True, default=dict, help_text='Chunk-specific metadata', null=True),
        ),
        migrations.AddField(
            model_name='archiveitem',
            name='user',
            field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
        ),
        migrations.AddField(
            model_name='archiveitemchunk',
            name='item',
            field=models.ForeignKey(help_text='Parent item', on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='django_cfg_knowbase.archiveitem'),
        ),
        migrations.AddField(
            model_name='archiveitemchunk',
            name='user',
            field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
        ),
        migrations.AddField(
            model_name='documentarchive',
            name='user',
            field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
        ),
        migrations.AddField(
            model_name='archiveitemchunk',
            name='archive',
            field=models.ForeignKey(help_text='Parent archive', on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='django_cfg_knowbase.documentarchive'),
        ),
        migrations.AddField(
            model_name='archiveitem',
            name='archive',
            field=models.ForeignKey(help_text='Parent archive', on_delete=django.db.models.deletion.CASCADE, related_name='items', to='django_cfg_knowbase.documentarchive'),
        ),
        migrations.AddField(
            model_name='documentarchive',
            name='categories',
            field=models.ManyToManyField(blank=True, help_text='Archive categories (supports multiple)', related_name='archives', to='django_cfg_knowbase.documentcategory'),
        ),
        migrations.AddField(
            model_name='document',
            name='categories',
            field=models.ManyToManyField(blank=True, help_text='Document categories (supports multiple)', related_name='documents', to='django_cfg_knowbase.documentcategory'),
        ),
        migrations.AddIndex(
            model_name='document',
            index=models.Index(fields=['is_public', '-created_at'], name='django_cfg__is_publ_ebcc98_idx'),
        ),
        migrations.AddField(
            model_name='externaldata',
            name='category',
            field=models.ForeignKey(blank=True, help_text='Category for organization', null=True, on_delete=django.db.models.deletion.SET_NULL, to='django_cfg_knowbase.documentcategory'),
        ),
        migrations.AddField(
            model_name='externaldata',
            name='user',
            field=models.ForeignKey(help_text='Owner of this record', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
        ),
        migrations.AddField(
            model_name='externaldatachunk',
            name='external_data',
            field=models.ForeignKey(help_text='External data this chunk belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='django_cfg_knowbase.externaldata'),
        ),
        migrations.AddField(
            model_name='externaldatachunk',
            name='user',
            field=models.ForeignKey(help_text='Owner of this chunk', on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
        ),
        migrations.AddIndex(
            model_name='archiveitemchunk',
            index=models.Index(fields=['user'], name='django_cfg__user_id_a41505_idx'),
        ),
        migrations.AddIndex(
            model_name='archiveitemchunk',
            index=models.Index(fields=['archive'], name='django_cfg__archive_176a8a_idx'),
        ),
        migrations.AddIndex(
            model_name='archiveitemchunk',
            index=models.Index(fields=['item', 'chunk_index'], name='django_cfg__item_id_ec8c08_idx'),
        ),
        migrations.AddIndex(
            model_name='archiveitemchunk',
            index=models.Index(fields=['chunk_type'], name='django_cfg__chunk_t_2a4993_idx'),
        ),
        migrations.AddConstraint(
            model_name='archiveitemchunk',
            constraint=models.UniqueConstraint(fields=('item', 'chunk_index'), name='unique_item_chunk'),
        ),
        migrations.AddIndex(
            model_name='archiveitem',
            index=models.Index(fields=['user'], name='django_cfg__user_id_73874f_idx'),
        ),
        migrations.AddIndex(
            model_name='archiveitem',
            index=models.Index(fields=['archive', 'relative_path'], name='django_cfg__archive_42a267_idx'),
        ),
        migrations.AddIndex(
            model_name='archiveitem',
            index=models.Index(fields=['content_type', 'is_processable'], name='django_cfg__content_ea7d23_idx'),
        ),
        migrations.AddIndex(
            model_name='archiveitem',
            index=models.Index(fields=['language'], name='django_cfg__languag_f02bdb_idx'),
        ),
        migrations.AddConstraint(
            model_name='archiveitem',
            constraint=models.UniqueConstraint(fields=('archive', 'relative_path'), name='unique_archive_item_path'),
        ),
        migrations.AddIndex(
            model_name='documentarchive',
            index=models.Index(fields=['user', 'processing_status'], name='django_cfg__user_id_e4fa6e_idx'),
        ),
        migrations.AddIndex(
            model_name='documentarchive',
            index=models.Index(fields=['content_hash'], name='django_cfg__content_22fdac_idx'),
        ),
        migrations.AddIndex(
            model_name='documentarchive',
            index=models.Index(fields=['-processed_at'], name='django_cfg__process_c78bf9_idx'),
        ),
        migrations.AddIndex(
            model_name='documentarchive',
            index=models.Index(fields=['is_public', '-created_at'], name='django_cfg__is_publ_3c6f28_idx'),
        ),
        migrations.AddConstraint(
            model_name='documentarchive',
            constraint=models.UniqueConstraint(fields=('user', 'content_hash'), name='unique_user_archive'),
        ),
        migrations.AddIndex(
            model_name='externaldata',
            index=models.Index(fields=['user', 'source_type'], name='django_cfg__user_id_d556c1_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldata',
            index=models.Index(fields=['status'], name='django_cfg__status_7b7d15_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldata',
            index=models.Index(fields=['is_active'], name='django_cfg__is_acti_9d3ce2_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldata',
            index=models.Index(fields=['processed_at'], name='django_cfg__process_fa1f31_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldata',
            index=models.Index(fields=['source_identifier'], name='django_cfg__source__e5ed3f_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldata',
            index=models.Index(fields=['content_hash'], name='django_cfg__content_f05aea_idx'),
        ),
        migrations.AlterUniqueTogether(
            name='externaldata',
            unique_together={('user', 'source_identifier')},
        ),
        migrations.AddIndex(
            model_name='externaldatachunk',
            index=models.Index(fields=['user', 'external_data'], name='django_cfg__user_id_7247ba_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldatachunk',
            index=models.Index(fields=['embedding_model'], name='django_cfg__embeddi_5947c8_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldatachunk',
            index=models.Index(fields=['token_count'], name='django_cfg__token_c_f8fa52_idx'),
        ),
        migrations.AddIndex(
            model_name='externaldatachunk',
            index=models.Index(fields=['chunk_index'], name='django_cfg__chunk_i_8ecf40_idx'),
        ),
        migrations.AlterUniqueTogether(
            name='externaldatachunk',
            unique_together={('external_data', 'chunk_index')},
        ),
    ]
