2025-12-22 20:40:55 +05:30
|
|
|
## Problems
|
|
|
|
|
|
|
|
|
|
### **Data Split**
|
|
|
|
|
- `MasterSettingsModel` stores: key, question, instructions, category etc....
|
|
|
|
|
- `ChecklistItem` stores: answer (expected value)
|
|
|
|
|
- To get a complete "checklist item", need to JOIN both tables.
|
|
|
|
|
- Also current `MasterSettings` store same keyterm for multiple checklists/playbooks.
|
|
|
|
|
- meaning if a keyterm is changed but for one checklists it will change for others too
|
|
|
|
|
- to prevent such scenarios, funny strategies need to be implemented in code
|
|
|
|
|
- **conclusion** unnecessary complexity is introduced.
|
|
|
|
|
|
|
|
|
|
### **complex queries for simple things**
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
# Current: Get checklist with all items (3 queries + manual merge)
|
|
|
|
|
checklist = ChecklistMetadata.objects.get(checklist_id=checklist_id)
|
|
|
|
|
checklist_items = ChecklistItem.objects.filter(checklist_id=checklist_id)
|
|
|
|
|
keyterm_ids = [item["keyterm_id_id"] for item in checklist_items]
|
|
|
|
|
keyterms = MasterSettingsModel.objects.filter(RowKey__in=keyterm_ids)
|
|
|
|
|
# Then manually merge...
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### **`is_active` for checklists**
|
|
|
|
|
|
|
|
|
|
- can be null/none - no active checklist found
|
|
|
|
|
- race conditions when switching active checklist
|
|
|
|
|
- scope - active for which team?? which contract type???
|
|
|
|
|
- can delete active checklist accidentally
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### Goals
|
|
|
|
|
|
|
|
|
|
- **Reduce Joins and Complexity**
|
|
|
|
|
- **Versioning**: Full history for keyterms, checklists, and contracts
|
|
|
|
|
|
|
|
|
|
### Document (File Storage)
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class Document():
|
|
|
|
|
class DocumentType(models.TextChoices):
|
|
|
|
|
CONTRACT = "CONTRACT", "Contract"
|
|
|
|
|
CHECKLIST = "CHECKLIST", "Checklist"
|
|
|
|
|
PLAYBOOK = "PLAYBOOK", "Playbook"
|
|
|
|
|
|
|
|
|
|
class FileType(models.TextChoices):
|
|
|
|
|
DOC = "DOC", "Word Document (Legacy)"
|
|
|
|
|
PDF = "PDF", "PDF"
|
|
|
|
|
DOCX = "DOCX", "Word Document"
|
|
|
|
|
CSV = "CSV", "CSV"
|
|
|
|
|
XLSX = "XLSX", "Excel"
|
|
|
|
|
|
|
|
|
|
id = models.UUIDField(db_index=True)
|
|
|
|
|
|
|
|
|
|
organization = models.ForeignKey(
|
|
|
|
|
"Organization",
|
|
|
|
|
on_delete=models.CASCADE,
|
|
|
|
|
related_name="documents"
|
|
|
|
|
)
|
|
|
|
|
user = models.ForeignKey(
|
|
|
|
|
"User",
|
|
|
|
|
on_delete=models.SET_NULL,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
document_type = models.CharField(
|
|
|
|
|
max_length=20,
|
|
|
|
|
choices=DocumentType.choices
|
2025-12-26 22:39:54 +05:30
|
|
|
) name = models.CharField(max_length=500) blob_url = models.URLField(max_length=2000) file_type = models.CharField(max_length=20, choices=FileType.choices)
|
2025-12-22 20:40:55 +05:30
|
|
|
size_bytes = models.BigIntegerField()
|
|
|
|
|
total_pages = models.IntegerField(null=True, blank=True)
|
|
|
|
|
|
|
|
|
|
# versioning ezzz
|
|
|
|
|
version = models.IntegerField(default=1)
|
|
|
|
|
previous_version = models.ForeignKey(
|
|
|
|
|
"self",
|
|
|
|
|
on_delete=models.SET_NULL,
|
|
|
|
|
null=True,
|
|
|
|
|
blank=True,
|
|
|
|
|
related_name="next_versions"
|
|
|
|
|
)
|
|
|
|
|
is_current = models.BooleanField(default=True)
|
|
|
|
|
|
|
|
|
|
def create_new_version(
|
|
|
|
|
self,
|
|
|
|
|
blob_url: str,
|
|
|
|
|
uploaded_by,
|
|
|
|
|
**file_metadata
|
|
|
|
|
) -> "Document":
|
|
|
|
|
self.is_current = False
|
|
|
|
|
self.save(update_fields=["is_current", "updated_at"])
|
|
|
|
|
|
|
|
|
|
return Document.objects.create(
|
|
|
|
|
id=self.id,
|
|
|
|
|
previous_version=self,
|
|
|
|
|
version=self.version + 1,
|
|
|
|
|
is_current=True,
|
|
|
|
|
organization=self.organization,
|
|
|
|
|
uploaded_by=uploaded_by,
|
|
|
|
|
document_type=self.document_type,
|
|
|
|
|
name=file_metadata.get("name", self.name),
|
|
|
|
|
blob_url=blob_url,
|
|
|
|
|
file_type=file_metadata.get("file_type", self.file_type),
|
|
|
|
|
size_bytes=file_metadata.get("size_bytes", self.size_bytes),
|
|
|
|
|
total_pages=file_metadata.get("total_pages"),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def get_current(cls, id: str) -> "Document":
|
|
|
|
|
return cls.objects.get(original_id=original_id, is_current=True)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def get_version_history(cls, original_id: str):
|
|
|
|
|
return cls.objects.filter(original_id=original_id).order_by("version")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def size_kb(self) -> float:
|
|
|
|
|
return self.size_bytes / 1024
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Checklist
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class Checklist():
|
2025-12-26 22:39:54 +05:30
|
|
|
id = models.UUIDField()
|
|
|
|
|
|
|
|
|
|
name = models.CharField(max_length=200)
|
|
|
|
|
description = models.TextField(blank=True)
|
|
|
|
|
|
|
|
|
|
document = models.ForeignKey("Document", on_delete=models.SET_NULL)
|
|
|
|
|
|
2025-12-22 20:40:55 +05:30
|
|
|
organization = models.ForeignKey(
|
|
|
|
|
"Organization",
|
|
|
|
|
on_delete=models.CASCADE,
|
|
|
|
|
)
|
2025-12-26 22:39:54 +05:30
|
|
|
|
2025-12-22 20:40:55 +05:30
|
|
|
created_by = models.ForeignKey(
|
|
|
|
|
"User",
|
|
|
|
|
on_delete=models.SET_NULL,
|
|
|
|
|
null=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Keyterm
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class Keyterm():
|
|
|
|
|
class KeytermType(models.TextChoices):
|
|
|
|
|
ANALYSIS = "ANALYSIS", "Analysis"
|
|
|
|
|
CHECKLIST = "CHECKLIST", "Checklist"
|
|
|
|
|
METADATA = "METADATA", "Metadata"
|
|
|
|
|
|
|
|
|
|
id = models.UUIDField(db_index=True)
|
2025-12-26 22:39:54 +05:30
|
|
|
|
|
|
|
|
key = models.CharField(max_length=500)
|
|
|
|
|
question = models.TextField()
|
|
|
|
|
instructions = models.TextField(blank=True)
|
2025-12-22 20:40:55 +05:30
|
|
|
|
|
|
|
|
organization = models.ForeignKey(
|
|
|
|
|
"Organization",
|
|
|
|
|
on_delete=models.CASCADE,
|
|
|
|
|
)
|
2025-12-26 22:39:54 +05:30
|
|
|
|
2025-12-22 20:40:55 +05:30
|
|
|
type = models.CharField(
|
|
|
|
|
choices=KeytermType.choices,
|
|
|
|
|
default=KeytermType.ANALYSIS
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
expected_answer = models.TextField(null=True, blank=True)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Contract
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class Contract():
|
|
|
|
|
class AnalysisStatus(models.TextChoices):
|
|
|
|
|
PENDING = "PENDING", "Pending"
|
|
|
|
|
ACCEPTED = "ACCEPTED", "Accepted"
|
|
|
|
|
REJECTED = "REJECTED", "Rejected"
|
|
|
|
|
|
|
|
|
|
class ContractType(models.TextChoices):
|
|
|
|
|
MSA = "MSA", "msa"
|
|
|
|
|
NDA = "NDA", "NDA"
|
|
|
|
|
# .... many more
|
|
|
|
|
|
|
|
|
|
document = models.OneToOneField(
|
|
|
|
|
"Document",
|
|
|
|
|
on_delete=models.CASCADE,
|
|
|
|
|
related_name="contract"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
organization = models.ForeignKey(
|
|
|
|
|
"Organization",
|
|
|
|
|
on_delete=models.CASCADE,
|
|
|
|
|
related_name="contracts"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
contract_type = models.CharField(choices=ContractType)
|
|
|
|
|
|
|
|
|
|
analysis_status = models.CharField(
|
|
|
|
|
choices=AnalysisStatus.choices,
|
|
|
|
|
default=AnalysisStatus.PENDING
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
analyzed_at = models.DateTimeField(null=True, blank=True)
|
|
|
|
|
analyzed_by = models.ForeignKey(
|
|
|
|
|
"User",
|
|
|
|
|
on_delete=models.SET_NULL,
|
|
|
|
|
null=True,
|
|
|
|
|
related_name="analyzed_contracts"
|
|
|
|
|
)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### **Analysis MODEL**
|
|
|
|
|
- large unstructured textual data
|
|
|
|
|
- Also while fetching might need to fetch multiple rows of data for same contract
|
|
|
|
|
- Hence it makes sense to store this data in the Mongo DB document storage.
|
|
|
|
|
- Easy to store unstructured data because its just JSON
|
|
|
|
|
- Easy to query analysis for any contract
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Contract specific data
|
|
|
|
|
|
|
|
|
|
**Problem**: COI has tenant_name, property_name etc. MSA has parties, term_length. different contracts have different fields.
|
|
|
|
|
|
|
|
|
|
**Options**:
|
|
|
|
|
- One table with all nullable fields, but it will be sparse and kinda messy
|
|
|
|
|
- Separate table per type (COI, MSA) -> lmaoo more schema tables than users
|
|
|
|
|
- JSON field -> flexible but no validations + gets messy after short amount of time
|
|
|
|
|
|
|
|
|
|
**SOLUTION**:
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class Contract():
|
|
|
|
|
document = models.OneToOneField("Document", ...)
|
|
|
|
|
organization = models.ForeignKey("Organization", ...)
|
|
|
|
|
contract_type = models.CharField(choices=ContractType)
|
|
|
|
|
analysis_status = models.CharField(...)
|
|
|
|
|
...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class COIContract():
|
|
|
|
|
contract = models.OneToOneField(Contract, on_delete=models.CASCADE, related_name="coi_details")
|
|
|
|
|
tenant_name = models.CharField()
|
|
|
|
|
property_name = models.CharField()
|
|
|
|
|
property_unit = models.CharField()
|
|
|
|
|
expiry_date = models.DateField()
|
|
|
|
|
# ..... ezzzzz
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MSAContract():
|
|
|
|
|
contract = models.OneToOneField(Contract, on_delete=models.CASCADE, related_name="msa_details")
|
|
|
|
|
party_a = models.CharField(max_length=500)
|
|
|
|
|
party_b = models.CharField(max_length=500)
|
|
|
|
|
term_months = models.IntegerField(null=True)
|
|
|
|
|
auto_renewal = models.BooleanField(default=False)
|
|
|
|
|
```
|
|
|
|
|
|
2025-12-26 22:39:54 +05:30
|
|
|
**Benefits**:
|
|
|
|
|
- `Contract` table stays clean
|
|
|
|
|
- type specific data is normalized
|
|
|
|
|
- no changes to existing code when adding new types, adding new contract type = new detail table + new handler
|
|
|
|
|
- lastly, easy to test handlers in isolation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
**Concern**: Separate tables per contract type could lead to "schema explosion"
|
|
|
|
|
BUT....
|
|
|
|
|
- Current system has 10-15 distinct contract **categories**
|
|
|
|
|
- Many categories share similar metadata structures
|
|
|
|
|
- Most contract types might only need generic metadata storage
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
### **Strategy Pattern** for structuring code.
|
2025-12-22 20:40:55 +05:30
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class IContractHandler():
|
|
|
|
|
def create_metadata(self, contract: Contract, metadata: dict) -> Any: ...
|
|
|
|
|
def get_metadata(self, contract: Contract) -> dict: ...
|
|
|
|
|
|
|
|
|
|
class COIHandler:
|
|
|
|
|
def create_metadata(self, contract: Contract, metadata: dict) -> COIDetails:
|
|
|
|
|
return COIDetails.objects.create(
|
|
|
|
|
contract=contract,
|
|
|
|
|
tenant_name=metadata.get("tenant_name", ""),
|
|
|
|
|
property_name=metadata.get("property_name", ""),
|
|
|
|
|
property_unit=metadata.get("property_unit", ""),
|
|
|
|
|
expiry_date=metadata.get("expiry_date"),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def get_metadata(self, contract: Contract) -> dict:
|
|
|
|
|
details = contract.coi_details
|
|
|
|
|
return {
|
|
|
|
|
"tenant_name": details.tenant_name,
|
|
|
|
|
"property_name": details.property_name,
|
|
|
|
|
"expiry_date": details.expiry_date,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ... similar methods
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class ContractService:
|
|
|
|
|
def __init__(self, ...):
|
|
|
|
|
|
|
|
|
|
@transaction.atomic
|
|
|
|
|
def create_contract(self, file: BinaryIO, team_id: str = None, **metadata) -> Contract:
|
|
|
|
|
document = self.document_service.upload(file=file, ...)
|
|
|
|
|
contract_type = self._detect_contract_type(document)
|
|
|
|
|
|
|
|
|
|
contract = Contract.objects.create(
|
|
|
|
|
document=document,
|
|
|
|
|
organization=self.organization,
|
|
|
|
|
contract_type=contract_type,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# implementation of strategy pattern in actual methods
|
|
|
|
|
handler = self.handler.get(contract_type)
|
|
|
|
|
handler.create_details(contract, metadata)
|
|
|
|
|
|
|
|
|
|
return contract
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Dependency Injection
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class IDocumentService(Protocol):
|
|
|
|
|
def upload(self, file: BinaryIO, document_type: str) -> Document: ...
|
|
|
|
|
def upload_new_version(self, document_id: str, file: BinaryIO) -> Document: ...
|
|
|
|
|
|
|
|
|
|
class IChecklistService(Protocol):
|
|
|
|
|
def get_active_checklist(self, organization_id: str, contract_type: str, team_id: str = None) -> Checklist: ...
|
|
|
|
|
|
|
|
|
|
class IContractService(Protocol):
|
|
|
|
|
def create_contract(self, file: BinaryIO, team_id: str = None, **metadata) -> Contract: ...
|
|
|
|
|
def replace_document(self, contract_id: str, file: BinaryIO) -> Contract: ...
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class DocumentService:
|
|
|
|
|
def __init__(self, organization: Organization, user: User):
|
|
|
|
|
self.organization = organization
|
|
|
|
|
self.user = user
|
|
|
|
|
|
|
|
|
|
def upload(self, file: BinaryIO, document_type: str) -> Document:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def upload_new_version(self, document_id: str, file: BinaryIO) -> Document:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
class ContractService:
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
organization: Organization,
|
|
|
|
|
user: User,
|
|
|
|
|
document_service: IDocumentService,
|
|
|
|
|
checklist_service: IChecklistService
|
|
|
|
|
):
|
|
|
|
|
self.organization = organization
|
|
|
|
|
self.user = user
|
|
|
|
|
self.document_service = document_service
|
|
|
|
|
self.checklist_service = checklist_service
|
|
|
|
|
|
|
|
|
|
@transaction.atomic
|
|
|
|
|
def create_contract(self, file: BinaryIO, team_id: str = None, **metadata) -> Contract:
|
|
|
|
|
document = self.document_service.upload(
|
|
|
|
|
file=file,
|
|
|
|
|
document_type=Document.DocumentType.CONTRACT,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
contract_type = self._detect_contract_type(document)
|
|
|
|
|
|
|
|
|
|
checklist = self.checklist_service.get_active_checklist(
|
|
|
|
|
organization_id=str(self.organization.id),
|
|
|
|
|
contract_type=contract_type,
|
|
|
|
|
team_id=team_id
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return Contract.objects.create(
|
|
|
|
|
document=document,
|
|
|
|
|
organization=self.organization,
|
|
|
|
|
checklist=checklist,
|
|
|
|
|
contract_type=contract_type,
|
|
|
|
|
**metadata
|
|
|
|
|
)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
class ServiceFactory:
|
|
|
|
|
def __init__(self, organization: Organization, user: User):
|
|
|
|
|
self.organization = organization
|
|
|
|
|
self.user = user
|
|
|
|
|
self._document_service = None
|
|
|
|
|
self._checklist_service = None
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def document_service(self) -> DocumentService:
|
|
|
|
|
if not self._document_service:
|
|
|
|
|
self._document_service = DocumentService(self.organization, self.user)
|
|
|
|
|
return self._document_service
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def checklist_service(self) -> ChecklistService:
|
|
|
|
|
if not self._checklist_service:
|
|
|
|
|
self._checklist_service = ChecklistService(self.organization)
|
|
|
|
|
return self._checklist_service
|
|
|
|
|
|
|
|
|
|
def contract_service(self) -> ContractService:
|
|
|
|
|
return ContractService(
|
|
|
|
|
organization=self.organization,
|
|
|
|
|
user=self.user,
|
|
|
|
|
document_service=self.document_service,
|
|
|
|
|
checklist_service=self.checklist_service
|
|
|
|
|
)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### usage in views or anything
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
@api_view(["POST"])
|
|
|
|
|
@permission_classes([IsAuthenticated])
|
|
|
|
|
def upload_contract(request):
|
|
|
|
|
user = request.user
|
|
|
|
|
organization = user.organization
|
|
|
|
|
|
|
|
|
|
factory = ServiceFactory(organization, user)
|
|
|
|
|
contract_service = factory.contract_service()
|
|
|
|
|
|
|
|
|
|
contract = contract_service.create_contract(
|
|
|
|
|
file=request.FILES["file"],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return Response({"contract_id": str(contract.id)})
|
|
|
|
|
```
|
2025-12-26 22:39:54 +05:30
|
|
|
|