WOW
This commit is contained in:
466
01-schema-refactoring.md
Normal file
466
01-schema-refactoring.md
Normal file
@@ -0,0 +1,466 @@
|
||||
## Problems
|
||||
|
||||
### **Data Split**
|
||||
- `MasterSettingsModel` stores: key, question, instructions, category etc....
|
||||
- `ChecklistItem` stores: answer (expected value)
|
||||
- To get a complete "checklist item", need to JOIN both tables.
|
||||
- Also current `MasterSettings` store same keyterm for multiple checklists/playbooks.
|
||||
- meaning if a keyterm is changed but for one checklists it will change for others too
|
||||
- to prevent such scenarios, funny strategies need to be implemented in code
|
||||
- **conclusion** unnecessary complexity is introduced.
|
||||
|
||||
### **Funny ways versioning**
|
||||
- versioning means having older versions and newer versions and maintains a relation b/w them tooo so we know what change and how.
|
||||
- currently no such versioning system is there.
|
||||
|
||||
|
||||
### **complex queries for simple things**
|
||||
|
||||
```python
|
||||
# Current: Get checklist with all items (3 queries + manual merge)
|
||||
checklist = ChecklistMetadata.objects.get(checklist_id=checklist_id)
|
||||
checklist_items = ChecklistItem.objects.filter(checklist_id=checklist_id)
|
||||
keyterm_ids = [item["keyterm_id_id"] for item in checklist_items]
|
||||
keyterms = MasterSettingsModel.objects.filter(RowKey__in=keyterm_ids)
|
||||
# Then manually merge...
|
||||
```
|
||||
|
||||
### **`is_active` for checklists**
|
||||
|
||||
- can be null/none - no active checklist found
|
||||
- race conditions when switching active checklist
|
||||
- scope - active for which team?? which contract type???
|
||||
- can delete active checklist accidentally
|
||||
|
||||
---
|
||||
|
||||
### Goals
|
||||
|
||||
- **Reduce Joins and Complexity**
|
||||
- **Versioning**: Full history for keyterms, checklists, and contracts
|
||||
|
||||
### Document (File Storage)
|
||||
|
||||
```python
|
||||
class Document():
|
||||
class DocumentType(models.TextChoices):
|
||||
CONTRACT = "CONTRACT", "Contract"
|
||||
CHECKLIST = "CHECKLIST", "Checklist"
|
||||
PLAYBOOK = "PLAYBOOK", "Playbook"
|
||||
|
||||
class FileType(models.TextChoices):
|
||||
DOC = "DOC", "Word Document (Legacy)"
|
||||
PDF = "PDF", "PDF"
|
||||
DOCX = "DOCX", "Word Document"
|
||||
CSV = "CSV", "CSV"
|
||||
XLSX = "XLSX", "Excel"
|
||||
|
||||
id = models.UUIDField(db_index=True)
|
||||
|
||||
organization = models.ForeignKey(
|
||||
"Organization",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="documents"
|
||||
)
|
||||
user = models.ForeignKey(
|
||||
"User",
|
||||
on_delete=models.SET_NULL,
|
||||
)
|
||||
|
||||
document_type = models.CharField(
|
||||
max_length=20,
|
||||
choices=DocumentType.choices
|
||||
)
|
||||
name = models.CharField(max_length=500)
|
||||
blob_url = models.URLField(max_length=2000)
|
||||
file_type = models.CharField(max_length=20, choices=FileType.choices)
|
||||
size_bytes = models.BigIntegerField()
|
||||
total_pages = models.IntegerField(null=True, blank=True)
|
||||
|
||||
# versioning ezzz
|
||||
version = models.IntegerField(default=1)
|
||||
previous_version = models.ForeignKey(
|
||||
"self",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name="next_versions"
|
||||
)
|
||||
is_current = models.BooleanField(default=True)
|
||||
|
||||
def create_new_version(
|
||||
self,
|
||||
blob_url: str,
|
||||
uploaded_by,
|
||||
**file_metadata
|
||||
) -> "Document":
|
||||
self.is_current = False
|
||||
self.save(update_fields=["is_current", "updated_at"])
|
||||
|
||||
return Document.objects.create(
|
||||
id=self.id,
|
||||
previous_version=self,
|
||||
version=self.version + 1,
|
||||
is_current=True,
|
||||
organization=self.organization,
|
||||
uploaded_by=uploaded_by,
|
||||
document_type=self.document_type,
|
||||
name=file_metadata.get("name", self.name),
|
||||
blob_url=blob_url,
|
||||
file_type=file_metadata.get("file_type", self.file_type),
|
||||
size_bytes=file_metadata.get("size_bytes", self.size_bytes),
|
||||
total_pages=file_metadata.get("total_pages"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_current(cls, id: str) -> "Document":
|
||||
return cls.objects.get(original_id=original_id, is_current=True)
|
||||
|
||||
@classmethod
|
||||
def get_version_history(cls, original_id: str):
|
||||
return cls.objects.filter(original_id=original_id).order_by("version")
|
||||
|
||||
@property
|
||||
def size_kb(self) -> float:
|
||||
return self.size_bytes / 1024
|
||||
```
|
||||
|
||||
### Checklist
|
||||
|
||||
```python
|
||||
class Checklist():
|
||||
document = models.ForeignKey(
|
||||
"Document",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
|
||||
id = models.UUIDField(db_index=True)
|
||||
|
||||
version = models.IntegerField(default=1)
|
||||
previous_version = models.ForeignKey(
|
||||
"self",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name="next_versions"
|
||||
)
|
||||
is_current = models.BooleanField(default=True)
|
||||
|
||||
organization = models.ForeignKey(
|
||||
"Organization",
|
||||
on_delete=models.CASCADE,
|
||||
)
|
||||
|
||||
created_by = models.ForeignKey(
|
||||
"User",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
)
|
||||
|
||||
name = models.CharField(max_length=200)
|
||||
description = models.TextField(blank=True)
|
||||
```
|
||||
|
||||
### Keyterm
|
||||
|
||||
```python
|
||||
class Keyterm():
|
||||
class KeytermType(models.TextChoices):
|
||||
ANALYSIS = "ANALYSIS", "Analysis"
|
||||
CHECKLIST = "CHECKLIST", "Checklist"
|
||||
METADATA = "METADATA", "Metadata"
|
||||
|
||||
id = models.UUIDField(db_index=True)
|
||||
|
||||
version = models.IntegerField(default=1)
|
||||
previous_version = models.ForeignKey(
|
||||
"self",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name="next_versions"
|
||||
)
|
||||
is_current = models.BooleanField(default=True)
|
||||
|
||||
organization = models.ForeignKey(
|
||||
"Organization",
|
||||
on_delete=models.CASCADE,
|
||||
)
|
||||
|
||||
key = models.CharField(max_length=500)
|
||||
question = models.TextField()
|
||||
instructions = models.TextField(blank=True)
|
||||
|
||||
type = models.CharField(
|
||||
choices=KeytermType.choices,
|
||||
default=KeytermType.ANALYSIS
|
||||
)
|
||||
|
||||
expected_answer = models.TextField(null=True, blank=True)
|
||||
|
||||
is_active = models.BooleanField(default=True)
|
||||
```
|
||||
|
||||
### Contract
|
||||
|
||||
```python
|
||||
class Contract():
|
||||
class AnalysisStatus(models.TextChoices):
|
||||
PENDING = "PENDING", "Pending"
|
||||
ACCEPTED = "ACCEPTED", "Accepted"
|
||||
REJECTED = "REJECTED", "Rejected"
|
||||
|
||||
class ContractType(models.TextChoices):
|
||||
MSA = "MSA", "msa"
|
||||
NDA = "NDA", "NDA"
|
||||
# .... many more
|
||||
|
||||
document = models.OneToOneField(
|
||||
"Document",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="contract"
|
||||
)
|
||||
|
||||
organization = models.ForeignKey(
|
||||
"Organization",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="contracts"
|
||||
)
|
||||
|
||||
contract_type = models.CharField(choices=ContractType)
|
||||
|
||||
analysis_status = models.CharField(
|
||||
choices=AnalysisStatus.choices,
|
||||
default=AnalysisStatus.PENDING
|
||||
)
|
||||
|
||||
analyzed_at = models.DateTimeField(null=True, blank=True)
|
||||
analyzed_by = models.ForeignKey(
|
||||
"User",
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
related_name="analyzed_contracts"
|
||||
)
|
||||
|
||||
def get_version_history(self):
|
||||
return Document.get_version_history(self.document.original_id)
|
||||
```
|
||||
|
||||
### **Analysis MODEL**
|
||||
- large unstructured textual data
|
||||
- Also while fetching might need to fetch multiple rows of data for same contract
|
||||
- Hence it makes sense to store this data in the Mongo DB document storage.
|
||||
- Easy to store unstructured data because its just JSON
|
||||
- Easy to query analysis for any contract
|
||||
|
||||
|
||||
### Contract specific data
|
||||
|
||||
**Problem**: COI has tenant_name, property_name etc. MSA has parties, term_length. different contracts have different fields.
|
||||
|
||||
**Options**:
|
||||
- One table with all nullable fields, but it will be sparse and kinda messy
|
||||
- Separate table per type (COI, MSA) -> lmaoo more schema tables than users
|
||||
- JSON field -> flexible but no validations + gets messy after short amount of time
|
||||
|
||||
**SOLUTION**:
|
||||
|
||||
```python
|
||||
class Contract():
|
||||
document = models.OneToOneField("Document", ...)
|
||||
organization = models.ForeignKey("Organization", ...)
|
||||
contract_type = models.CharField(choices=ContractType)
|
||||
analysis_status = models.CharField(...)
|
||||
...
|
||||
|
||||
|
||||
class COIContract():
|
||||
contract = models.OneToOneField(Contract, on_delete=models.CASCADE, related_name="coi_details")
|
||||
tenant_name = models.CharField()
|
||||
property_name = models.CharField()
|
||||
property_unit = models.CharField()
|
||||
expiry_date = models.DateField()
|
||||
# ..... ezzzzz
|
||||
|
||||
|
||||
class MSAContract():
|
||||
contract = models.OneToOneField(Contract, on_delete=models.CASCADE, related_name="msa_details")
|
||||
party_a = models.CharField(max_length=500)
|
||||
party_b = models.CharField(max_length=500)
|
||||
term_months = models.IntegerField(null=True)
|
||||
auto_renewal = models.BooleanField(default=False)
|
||||
```
|
||||
|
||||
### **Strategy Pattern** for type specific logic
|
||||
|
||||
```python
|
||||
class IContractHandler():
|
||||
def create_metadata(self, contract: Contract, metadata: dict) -> Any: ...
|
||||
def get_metadata(self, contract: Contract) -> dict: ...
|
||||
|
||||
class COIHandler:
|
||||
def create_metadata(self, contract: Contract, metadata: dict) -> COIDetails:
|
||||
return COIDetails.objects.create(
|
||||
contract=contract,
|
||||
tenant_name=metadata.get("tenant_name", ""),
|
||||
property_name=metadata.get("property_name", ""),
|
||||
property_unit=metadata.get("property_unit", ""),
|
||||
expiry_date=metadata.get("expiry_date"),
|
||||
)
|
||||
|
||||
def get_metadata(self, contract: Contract) -> dict:
|
||||
details = contract.coi_details
|
||||
return {
|
||||
"tenant_name": details.tenant_name,
|
||||
"property_name": details.property_name,
|
||||
"expiry_date": details.expiry_date,
|
||||
}
|
||||
|
||||
# ... similar methods
|
||||
```
|
||||
|
||||
```python
|
||||
class ContractService:
|
||||
def __init__(self, ...):
|
||||
|
||||
@transaction.atomic
|
||||
def create_contract(self, file: BinaryIO, team_id: str = None, **metadata) -> Contract:
|
||||
document = self.document_service.upload(file=file, ...)
|
||||
contract_type = self._detect_contract_type(document)
|
||||
|
||||
contract = Contract.objects.create(
|
||||
document=document,
|
||||
organization=self.organization,
|
||||
contract_type=contract_type,
|
||||
)
|
||||
|
||||
# implementation of strategy pattern in actual methods
|
||||
handler = self.handler.get(contract_type)
|
||||
handler.create_details(contract, metadata)
|
||||
|
||||
return contract
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- `Contract` table stays clean
|
||||
- type specific data is normalized
|
||||
- no changes to existing code when adding new types, adding new contract type = new detail table + new handler
|
||||
- lastly, easy to test handlers in isolation
|
||||
|
||||
---
|
||||
|
||||
### Dependency Injection
|
||||
|
||||
```python
|
||||
class IDocumentService(Protocol):
|
||||
def upload(self, file: BinaryIO, document_type: str) -> Document: ...
|
||||
def upload_new_version(self, document_id: str, file: BinaryIO) -> Document: ...
|
||||
|
||||
class IChecklistService(Protocol):
|
||||
def get_active_checklist(self, organization_id: str, contract_type: str, team_id: str = None) -> Checklist: ...
|
||||
|
||||
class IContractService(Protocol):
|
||||
def create_contract(self, file: BinaryIO, team_id: str = None, **metadata) -> Contract: ...
|
||||
def replace_document(self, contract_id: str, file: BinaryIO) -> Contract: ...
|
||||
```
|
||||
|
||||
```python
|
||||
class DocumentService:
|
||||
def __init__(self, organization: Organization, user: User):
|
||||
self.organization = organization
|
||||
self.user = user
|
||||
|
||||
def upload(self, file: BinaryIO, document_type: str) -> Document:
|
||||
pass
|
||||
|
||||
def upload_new_version(self, document_id: str, file: BinaryIO) -> Document:
|
||||
pass
|
||||
|
||||
class ContractService:
|
||||
def __init__(
|
||||
self,
|
||||
organization: Organization,
|
||||
user: User,
|
||||
document_service: IDocumentService,
|
||||
checklist_service: IChecklistService
|
||||
):
|
||||
self.organization = organization
|
||||
self.user = user
|
||||
self.document_service = document_service
|
||||
self.checklist_service = checklist_service
|
||||
|
||||
@transaction.atomic
|
||||
def create_contract(self, file: BinaryIO, team_id: str = None, **metadata) -> Contract:
|
||||
document = self.document_service.upload(
|
||||
file=file,
|
||||
document_type=Document.DocumentType.CONTRACT,
|
||||
)
|
||||
|
||||
contract_type = self._detect_contract_type(document)
|
||||
|
||||
checklist = self.checklist_service.get_active_checklist(
|
||||
organization_id=str(self.organization.id),
|
||||
contract_type=contract_type,
|
||||
team_id=team_id
|
||||
)
|
||||
|
||||
return Contract.objects.create(
|
||||
document=document,
|
||||
organization=self.organization,
|
||||
checklist=checklist,
|
||||
contract_type=contract_type,
|
||||
**metadata
|
||||
)
|
||||
```
|
||||
|
||||
```python
|
||||
class ServiceFactory:
|
||||
def __init__(self, organization: Organization, user: User):
|
||||
self.organization = organization
|
||||
self.user = user
|
||||
self._document_service = None
|
||||
self._checklist_service = None
|
||||
|
||||
@property
|
||||
def document_service(self) -> DocumentService:
|
||||
if not self._document_service:
|
||||
self._document_service = DocumentService(self.organization, self.user)
|
||||
return self._document_service
|
||||
|
||||
@property
|
||||
def checklist_service(self) -> ChecklistService:
|
||||
if not self._checklist_service:
|
||||
self._checklist_service = ChecklistService(self.organization)
|
||||
return self._checklist_service
|
||||
|
||||
def contract_service(self) -> ContractService:
|
||||
return ContractService(
|
||||
organization=self.organization,
|
||||
user=self.user,
|
||||
document_service=self.document_service,
|
||||
checklist_service=self.checklist_service
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### usage in views or anything
|
||||
|
||||
```python
|
||||
@api_view(["POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def upload_contract(request):
|
||||
user = request.user
|
||||
organization = user.organization
|
||||
|
||||
factory = ServiceFactory(organization, user)
|
||||
contract_service = factory.contract_service()
|
||||
|
||||
contract = contract_service.create_contract(
|
||||
file=request.FILES["file"],
|
||||
)
|
||||
|
||||
return Response({"contract_id": str(contract.id)})
|
||||
```
|
||||
Reference in New Issue
Block a user