mirror of
https://github.com/hoshikawa2/oci_vision_invoice.git
synced 2026-03-03 16:09:39 +00:00
First Commit
This commit is contained in:
12
.idea/.gitignore
generated
vendored
Normal file
12
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Environment-dependent path to Maven home directory
|
||||||
|
/mavenHomeManager.xml
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
# Zeppelin ignored files
|
||||||
|
/ZeppelinRemoteNotebooks/
|
||||||
6
.idea/misc.xml
generated
Normal file
6
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" languageLevel="JDK_23" default="true" project-jdk-name="23" project-jdk-type="JavaSDK">
|
||||||
|
<output url="file://$PROJECT_DIR$/out" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/oci_vision_invoice.iml" filepath="$PROJECT_DIR$/.idea/oci_vision_invoice.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
9
.idea/oci_vision_invoice.iml
generated
Normal file
9
.idea/oci_vision_invoice.iml
generated
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="JAVA_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||||
|
<exclude-output />
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
24
.oca/custom_code_review_guidelines.txt
Normal file
24
.oca/custom_code_review_guidelines.txt
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Sample guideline, please follow similar structure for guideline with code samples
|
||||||
|
# 1. Suggest using streams instead of simple loops for better readability.
|
||||||
|
# <example>
|
||||||
|
# *Comment:
|
||||||
|
# Category: Minor
|
||||||
|
# Issue: Use streams instead of a loop for better readability.
|
||||||
|
# Code Block:
|
||||||
|
#
|
||||||
|
# ```java
|
||||||
|
# // Calculate squares of numbers
|
||||||
|
# List<Integer> squares = new ArrayList<>();
|
||||||
|
# for (int number : numbers) {
|
||||||
|
# squares.add(number * number);
|
||||||
|
# }
|
||||||
|
# ```
|
||||||
|
# Recommendation:
|
||||||
|
#
|
||||||
|
# ```java
|
||||||
|
# // Calculate squares of numbers
|
||||||
|
# List<Integer> squares = Arrays.stream(numbers)
|
||||||
|
# .map(n -> n * n) // Map each number to its square
|
||||||
|
# .toList();
|
||||||
|
# ```
|
||||||
|
# </example>
|
||||||
221
README.md
Normal file
221
README.md
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
# 📄 Automatic Invoice Processing with OCI Vision and OCI Generative AI
|
||||||
|
|
||||||
|
## 🧠 Objective
|
||||||
|
|
||||||
|
This tutorial demonstrates how to implement an automated pipeline that monitors a bucket in Oracle Cloud Infrastructure (OCI) for incoming invoice images, extracts textual content using **OCI Vision**, and then applies **OCI Generative AI** (LLM) to extract structured fiscal data like invoice number, customer, and item list.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 Use Cases
|
||||||
|
|
||||||
|
- Automating invoice ingestion from Object Storage.
|
||||||
|
- Extracting structured data from semi-structured scanned documents.
|
||||||
|
- Integrating OCR and LLM in real-time pipelines using OCI AI services.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧱 Oracle Cloud Services Used
|
||||||
|
|
||||||
|
| Service | Purpose |
|
||||||
|
|----------------------------|-------------------------------------------------------------------------|
|
||||||
|
| **OCI Vision** | Performs OCR (Optical Character Recognition) on uploaded invoice images.|
|
||||||
|
| **OCI Generative AI** | Extracts structured JSON data from raw OCR text using few-shot prompts. |
|
||||||
|
| **Object Storage** | Stores input invoice images and output JSON results. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ⚙️ Prerequisites
|
||||||
|
|
||||||
|
1. An OCI account with access to:
|
||||||
|
- Vision AI
|
||||||
|
- Generative AI
|
||||||
|
- Object Storage
|
||||||
|
2. A Python 3.10 at least
|
||||||
|
3. A bucket for input images (e.g., `input-bucket`) and another for output files (e.g., `output-bucket`).
|
||||||
|
4. A [config](./files/config) with:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"oci_profile": "DEFAULT",
|
||||||
|
"namespace": "your_namespace",
|
||||||
|
"input_bucket": "input-bucket",
|
||||||
|
"output_bucket": "output-bucket",
|
||||||
|
"compartment_id": "ocid1.compartment.oc1..xxxx",
|
||||||
|
"llm_endpoint": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🛠️ How to Run
|
||||||
|
|
||||||
|
1. Execute the [requirements.txt](./files/requirements.txt) with:
|
||||||
|
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
2. Run the Python script [main.py](./files/main.py).
|
||||||
|
3. Upload invoice images (e.g., `.png`, `.jpg`) to your input bucket.
|
||||||
|
4. Wait for the image to be processed and the extracted JSON saved in the output bucket.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧩 Code Walkthrough
|
||||||
|
|
||||||
|
### 1. Load Configuration
|
||||||
|
|
||||||
|
```python
|
||||||
|
with open("./config", "r") as f:
|
||||||
|
config_data = json.load(f)
|
||||||
|
```
|
||||||
|
|
||||||
|
> Loads all required configuration values such as namespace, bucket names, compartment ID, and LLM endpoint.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Initialize OCI Clients
|
||||||
|
|
||||||
|
```python
|
||||||
|
oci_config = oci.config.from_file("~/.oci/config", PROFILE)
|
||||||
|
object_storage = oci.object_storage.ObjectStorageClient(oci_config)
|
||||||
|
ai_vision_client = oci.ai_vision.AIServiceVisionClient(oci_config)
|
||||||
|
```
|
||||||
|
|
||||||
|
> Sets up the OCI SDK clients to access Object Storage and AI Vision services.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Initialize LLM
|
||||||
|
|
||||||
|
```python
|
||||||
|
llm = ChatOCIGenAI(
|
||||||
|
model_id="meta.llama-3.1-405b-instruct",
|
||||||
|
service_endpoint=LLM_ENDPOINT,
|
||||||
|
compartment_id=COMPARTMENT_ID,
|
||||||
|
auth_profile=PROFILE,
|
||||||
|
model_kwargs={"temperature": 0.7, "top_p": 0.75, "max_tokens": 2000},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
> Initializes the OCI Generative AI model for natural language understanding and text-to-structure conversion.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Few-shot Prompt
|
||||||
|
|
||||||
|
```python
|
||||||
|
few_shot_examples = [ ... ]
|
||||||
|
instruction = """
|
||||||
|
You are a fiscal data extractor.
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
> Uses few-shot learning by providing an example of expected output so the model learns how to extract structured fields like `number of invoice`, `customer`, `location`, and `items`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5. OCR with OCI Vision
|
||||||
|
|
||||||
|
```python
|
||||||
|
def perform_ocr(file_name):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
> This function:
|
||||||
|
> - Sends the image to OCI Vision.
|
||||||
|
> - Requests text detection.
|
||||||
|
> - Returns the extracted raw text.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 6. Data Extraction with LLM
|
||||||
|
|
||||||
|
```python
|
||||||
|
def extract_data_with_llm(ocr_text, file_name):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
> This function:
|
||||||
|
> - Combines instructions + few-shot example + OCR text.
|
||||||
|
> - Sends it to OCI Generative AI.
|
||||||
|
> - Receives structured JSON fields (as string).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 7. Save Output to Object Storage
|
||||||
|
|
||||||
|
```python
|
||||||
|
def save_output(result, file_name):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
> Uploads the structured result into the output bucket using the original filename (with `.json` extension).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 8. Main Loop: Monitor and Process
|
||||||
|
|
||||||
|
```python
|
||||||
|
def monitor_bucket():
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
> Main routine that:
|
||||||
|
> - Monitors the input bucket every 30 seconds.
|
||||||
|
> - Detects new `.png`, `.jpg`, `.jpeg` files.
|
||||||
|
> - Runs OCR + LLM + Upload in sequence.
|
||||||
|
> - Keeps track of already processed files in memory.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 9. Entry Point
|
||||||
|
|
||||||
|
```python
|
||||||
|
if __name__ == "__main__":
|
||||||
|
monitor_bucket()
|
||||||
|
```
|
||||||
|
|
||||||
|
> Starts the bucket watcher and begins processing invoices automatically.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ Expected Output
|
||||||
|
|
||||||
|
For each uploaded invoice image:
|
||||||
|
- A corresponding `.json` file is generated with structured content like:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"file": "nota123.png",
|
||||||
|
"result": "{ "nf": "NF102030", "customer": "Comercial ABC Ltda", ... }",
|
||||||
|
"timestamp": "2025-07-21T12:34:56.789Z"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧪 Testing Suggestions
|
||||||
|
|
||||||
|
- Use real or dummy invoices with legible product lines and emitente.
|
||||||
|
- Upload multiple images in sequence to see automated processing.
|
||||||
|
- Log into OCI Console > Object Storage to verify results in both buckets.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📌 Notes
|
||||||
|
|
||||||
|
- OCI Vision supports Portuguese OCR (`language="POR"` can be used instead of `"ENG"`).
|
||||||
|
- LLM prompt can be adjusted to extract other fields like `CNPJ`, `quantidade`, `data de emissão`, etc.
|
||||||
|
- Consider persisting `processed_files` with a database or file to make the process fault-tolerant.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📚 References
|
||||||
|
|
||||||
|
- [OCI Vision Documentation](https://docs.oracle.com/en-us/iaas/vision/)
|
||||||
|
- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/generative-ai/)
|
||||||
|
- [LangChain OCI Integration](https://python.langchain.com/docs/integrations/chat/oci_gen_ai/)
|
||||||
|
|
||||||
|
## Acknowledgments
|
||||||
|
|
||||||
|
- **Author** - Cristiano Hoshikawa (Oracle LAD A-Team Solution Engineer)
|
||||||
8
files/config
Normal file
8
files/config
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"oci_profile": "DEFAULT",
|
||||||
|
"compartment_id": "<YOUR COMPARTMENT OCID>",
|
||||||
|
"namespace": "<YOUR NAMESPACE OCID>",
|
||||||
|
"input_bucket": "<YOUR INVOICES IMAGES BUCKET NAME>",
|
||||||
|
"output_bucket": "<YOUR OUTPUT JSON FILES BUCKET NAME>",
|
||||||
|
"llm_endpoint": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
|
||||||
|
}
|
||||||
150
files/main.py
Normal file
150
files/main.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
import time
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import oci
|
||||||
|
from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI
|
||||||
|
from langchain.schema import HumanMessage
|
||||||
|
|
||||||
|
# ====================
|
||||||
|
# 1. Load Configuration
|
||||||
|
# ====================
|
||||||
|
with open("./config", "r") as f:
|
||||||
|
config_data = json.load(f)
|
||||||
|
|
||||||
|
NAMESPACE = config_data["namespace"]
|
||||||
|
INPUT_BUCKET = config_data["input_bucket"]
|
||||||
|
OUTPUT_BUCKET = config_data["output_bucket"]
|
||||||
|
PROFILE = config_data["oci_profile"]
|
||||||
|
COMPARTMENT_ID = config_data["compartment_id"]
|
||||||
|
LLM_ENDPOINT = config_data["llm_endpoint"]
|
||||||
|
|
||||||
|
# ====================
|
||||||
|
# 2. Initialize OCI Clients
|
||||||
|
# ====================
|
||||||
|
oci_config = oci.config.from_file("~/.oci/config", PROFILE)
|
||||||
|
object_storage = oci.object_storage.ObjectStorageClient(oci_config)
|
||||||
|
ai_vision_client = oci.ai_vision.AIServiceVisionClient(oci_config)
|
||||||
|
|
||||||
|
# ====================
|
||||||
|
# 3. Initialize LLM
|
||||||
|
# ====================
|
||||||
|
llm = ChatOCIGenAI(
|
||||||
|
model_id="meta.llama-3.1-405b-instruct",
|
||||||
|
service_endpoint=LLM_ENDPOINT,
|
||||||
|
compartment_id=COMPARTMENT_ID,
|
||||||
|
auth_profile=PROFILE,
|
||||||
|
model_kwargs={"temperature": 0.7, "top_p": 0.75, "max_tokens": 2000},
|
||||||
|
)
|
||||||
|
|
||||||
|
# ====================
|
||||||
|
# 4. Few-shot Prompt Base
|
||||||
|
# ====================
|
||||||
|
few_shot_examples = [
|
||||||
|
"""
|
||||||
|
Invoice text:
|
||||||
|
"EMITENTE": "Comercial ABC Ltda - Rua A, 123 - Belo Horizonte - MG"
|
||||||
|
"NF": "NF102030"
|
||||||
|
"DESTINATÁRIO": "Distribuidora XYZ - São Paulo - SP"
|
||||||
|
"DESCRIÇÃO DO PRODUTO":
|
||||||
|
"Cabo HDMI 2.0 2m, preto" | PRICE: 39.90
|
||||||
|
"Teclado Mecânico RGB ABNT2" | PRICE: 199.99
|
||||||
|
"Mouse Gamer 3200DPI" | PRICE: 89.50
|
||||||
|
|
||||||
|
Extracted fields (JSON format):
|
||||||
|
{
|
||||||
|
"nf": "NF102030",
|
||||||
|
"customer": "Comercial ABC Ltda",
|
||||||
|
"location": "MG",
|
||||||
|
"items": [
|
||||||
|
{"description": "Cabo HDMI 2.0 2m, preto", "price": 39.90},
|
||||||
|
{"description": "Teclado Mecânico RGB ABNT2", "price": 199.99},
|
||||||
|
{"description": "Mouse Gamer 3200DPI", "price": 89.50}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
]
|
||||||
|
|
||||||
|
instruction = """
|
||||||
|
You are a fiscal data extractor.
|
||||||
|
|
||||||
|
Your goal is to:
|
||||||
|
- Extract the invoice number (field 'nf')
|
||||||
|
- Extract the customer name (field 'customer')
|
||||||
|
- Extract the state (field 'location') — ⚠️ use **only** the state of the EMITTER company, based on its name and address.
|
||||||
|
- Extract the list of products and prices (field 'items')
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ====================
|
||||||
|
# 5. Bucket Monitoring and Processing
|
||||||
|
# ====================
|
||||||
|
processed_files = set()
|
||||||
|
|
||||||
|
def perform_ocr(file_name):
|
||||||
|
print(f"📄 Performing OCR on: {file_name}")
|
||||||
|
|
||||||
|
response = ai_vision_client.analyze_document(
|
||||||
|
analyze_document_details=oci.ai_vision.models.AnalyzeDocumentDetails(
|
||||||
|
features=[
|
||||||
|
oci.ai_vision.models.DocumentTableDetectionFeature(
|
||||||
|
feature_type="TEXT_DETECTION")],
|
||||||
|
document=oci.ai_vision.models.ObjectStorageDocumentDetails(
|
||||||
|
source="OBJECT_STORAGE",
|
||||||
|
namespace_name=NAMESPACE,
|
||||||
|
bucket_name=INPUT_BUCKET,
|
||||||
|
object_name=file_name),
|
||||||
|
compartment_id=COMPARTMENT_ID,
|
||||||
|
language="ENG",
|
||||||
|
document_type="INVOICE")
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response.data)
|
||||||
|
|
||||||
|
return response.data
|
||||||
|
|
||||||
|
def extract_data_with_llm(ocr_text, file_name):
|
||||||
|
prompt = instruction + "\n" + "\n".join(few_shot_examples) + f"\nInvoice text:\n{ocr_text}\nExtracted fields (JSON format):"
|
||||||
|
response = llm([HumanMessage(content=prompt)])
|
||||||
|
|
||||||
|
print(response.content)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"file": file_name,
|
||||||
|
"result": response.content,
|
||||||
|
"timestamp": datetime.utcnow().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
def save_output(result, file_name):
|
||||||
|
output_name = Path(file_name).stem + ".json"
|
||||||
|
object_storage.put_object(
|
||||||
|
namespace_name=NAMESPACE,
|
||||||
|
bucket_name=OUTPUT_BUCKET,
|
||||||
|
object_name=output_name,
|
||||||
|
put_object_body=json.dumps(result, ensure_ascii=False).encode("utf-8")
|
||||||
|
)
|
||||||
|
print(f"✅ Result saved as {output_name} in the output bucket.")
|
||||||
|
|
||||||
|
def monitor_bucket():
|
||||||
|
print("📡 Monitoring input bucket...")
|
||||||
|
while True:
|
||||||
|
objects = object_storage.list_objects(
|
||||||
|
namespace_name=NAMESPACE,
|
||||||
|
bucket_name=INPUT_BUCKET
|
||||||
|
).data.objects
|
||||||
|
|
||||||
|
for obj in objects:
|
||||||
|
file_name = obj.name
|
||||||
|
if file_name.endswith((".png", ".jpg", ".jpeg")) and file_name not in processed_files:
|
||||||
|
try:
|
||||||
|
ocr_text = perform_ocr(file_name)
|
||||||
|
result = extract_data_with_llm(ocr_text, file_name)
|
||||||
|
save_output(result, file_name)
|
||||||
|
processed_files.add(file_name)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error processing {file_name}: {e}")
|
||||||
|
|
||||||
|
time.sleep(30) # Wait 30 seconds before checking again
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
monitor_bucket()
|
||||||
13
files/requirements.txt
Normal file
13
files/requirements.txt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
langchain==0.3.23
|
||||||
|
langchain_community~=0.3.12
|
||||||
|
langchain_cohere
|
||||||
|
oci-cli~=3.58.0
|
||||||
|
langchain-core~=0.3.56
|
||||||
|
langchain-text-splitters~=0.3.8
|
||||||
|
ollama
|
||||||
|
llama_index
|
||||||
|
langgraph==0.3.25
|
||||||
|
requests==2.32.3
|
||||||
|
oci~=2.154.0
|
||||||
|
setuptools~=79.0.1
|
||||||
|
tqdm~=4.67.1
|
||||||
Reference in New Issue
Block a user