62 lines
2.2 KiB
Python
62 lines
2.2 KiB
Python
import os
|
||
from loguru import logger
|
||
# from vllm import LLM, SamplingParams
|
||
from langchain_ollama import ChatOllama
|
||
from langchain_core.messages import HumanMessage
|
||
|
||
# 自动下载模型时,指定使用modelscope; 否则,会从HuggingFace下载
|
||
os.environ['VLLM_USE_MODELSCOPE']='True'
|
||
|
||
# def get_completion(prompts, model, tokenizer=None, max_tokens=512, temperature=0.8, top_p=0.95, max_model_len=2048):
|
||
# stop_token_ids = [151329, 151336, 151338]
|
||
# # 创建采样参数。temperature 控制生成文本的多样性,top_p 控制核心采样的概率
|
||
# sampling_params = SamplingParams(temperature=temperature, top_p=top_p, max_tokens=max_tokens, stop_token_ids=stop_token_ids)
|
||
# # 初始化 vLLM 推理引擎
|
||
# llm = LLM(model=model, tokenizer=tokenizer, max_model_len=max_model_len,trust_remote_code=True)
|
||
# outputs = llm.generate(prompts, sampling_params)
|
||
# return outputs
|
||
|
||
def vl_test():
|
||
logger.info("vl_test")
|
||
|
||
# 使用LangChain 1.x的ChatOllama类创建客户端
|
||
client = ChatOllama(
|
||
base_url="http://192.168.10.11:11434",
|
||
model="llava-phi3:latest", # "qwen3-vl:8b",
|
||
temperature=0.7,
|
||
)
|
||
|
||
# 测试调用qwen3-vl:8b视觉大模型
|
||
try:
|
||
# 使用LangChain 1.x的方式构建消息
|
||
message = HumanMessage(
|
||
content=[
|
||
{
|
||
"type": "text",
|
||
"text": "请描述这张图片的内容"
|
||
},
|
||
# 如果需要添加图像,可以使用以下格式:
|
||
# {
|
||
# "type": "image_url",
|
||
# "image_url": {
|
||
# "url": "https://example.com/image.jpg" # 或者base64编码的图片数据
|
||
# }
|
||
# }
|
||
]
|
||
)
|
||
|
||
# 调用模型
|
||
response = client.invoke([message])
|
||
|
||
# 获取模型响应
|
||
result = response.content
|
||
logger.info(f"qwen3-vl:8b响应: {result}")
|
||
return result
|
||
except Exception as e:
|
||
logger.error(f"调用qwen3-vl:8b失败: {e}")
|
||
raise e
|
||
|
||
|
||
# 如果直接运行该文件,执行测试
|
||
if __name__ == "__main__":
|
||
vl_test() |