python
import requests
import json
import time
def chat_completion_generator(query, timeout=50):
baidu_url = 'https://llm_ip/v1/chat-messages'
baidu_headers = {
'Authorization': 'Bearer app-2IdfEuDM0EwoKGVxEjC8',
'Content-Type': 'application/json'
}
baidu_data = {
"inputs": {},
"query": query,
"response_mode": "streaming", # 假设API支持streaming模式
"user": "abc-123"
}
try:
# 设置stream=True以启用流式处理
response = requests.post(baidu_url, headers=baidu_headers, json=baidu_data, timeout=timeout, stream=True)
if response.status_code == 200:
# 使用迭代的方式逐步获取数据
for line in response.iter_lines():
if line:
# 去除前缀 "data: "
line = line.decode('utf-8').strip()
if line.startswith("data:"):
line = line[5:].strip()
try:
# 解析JSON数据
event_data = json.loads(line)
# 处理不同的事件类型
event = event_data.get('event')
if event == 'message':
answer = event_data.get('answer', '')
# 流式输出answer内容
for char in answer:
print(char,end='', flush=True)
time.sleep(0.05)
except json.JSONDecodeError as e:
print(f"Failed to decode JSON: {e}")
else:
print(f"Error: {response.status_code}")
except Exception as e:
print(f"Request failed, error: {e}")
# 示例调用
query = "你能告诉我今天天气怎么样吗?"
chat_completion_generator(query)
Moshi 结合了一个大规模文本 LLM(Helium)和一个小型音频语言模型,实现了语音到语音的直接理解和生成。通过分层流式架构和多流音频处理,模型首次实现了全双工对话能力(可以在边输出对话的时候,同时还在监听说话人说话,可以做到打断)。