diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..bb84ef7 Binary files /dev/null and b/.DS_Store differ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..7bc07ec --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Environment-dependent path to Maven home directory +/mavenHomeManager.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..89ee753 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..ad755dc --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/qlora_training.iml b/.idea/qlora_training.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/qlora_training.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.oca/custom_code_review_guidelines.txt b/.oca/custom_code_review_guidelines.txt new file mode 100644 index 0000000..a0a3b63 --- /dev/null +++ b/.oca/custom_code_review_guidelines.txt @@ -0,0 +1,24 @@ +# Sample guideline, please follow similar structure for guideline with code samples +# 1. Suggest using streams instead of simple loops for better readability. +# +# *Comment: +# Category: Minor +# Issue: Use streams instead of a loop for better readability. +# Code Block: +# +# ```java +# // Calculate squares of numbers +# List squares = new ArrayList<>(); +# for (int number : numbers) { +# squares.add(number * number); +# } +# ``` +# Recommendation: +# +# ```java +# // Calculate squares of numbers +# List squares = Arrays.stream(numbers) +# .map(n -> n * n) // Map each number to its square +# .toList(); +# ``` +# diff --git a/inference.py b/inference.py new file mode 100644 index 0000000..ab3acf0 --- /dev/null +++ b/inference.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig +import torch + +# Caminho do modelo base (sem fine-tuning) +base_model_name = "mistralai/Mistral-7B-Instruct-v0.2" + +# Configuração de quantização 4-bit +bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16 +) + +# Carrega tokenizer do modelo base +tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) +tokenizer.pad_token = tokenizer.eos_token + +# Carrega modelo base com quantização 4-bit +model = AutoModelForCausalLM.from_pretrained( + base_model_name, + quantization_config=bnb_config, + device_map="auto", + trust_remote_code=True +) +model.eval() + +# Função para gerar resposta +def gerar_resposta(prompt, max_tokens=2000): + inputs = tokenizer(prompt, return_tensors="pt").to(model.device) + with torch.no_grad(): + output = model.generate( + **inputs, + max_new_tokens=max_tokens, + do_sample=True, + top_p=0.9, + temperature=0.1 + ) + resposta = tokenizer.decode(output[0], skip_special_tokens=True) + return resposta + +# Exemplo de uso +if __name__ == "__main__": + while True: + prompt = input("\nDigite sua pergunta (ou 'sair'): ") + if prompt.lower() == "sair": + break + resultado = gerar_resposta(prompt) + print("\n📎 Resposta gerada:") + print(resultado)