模型名称映射
对于所有 CoreML(ANE)模型,需要填写对应的插件 ID;GGUF 格式模型运行于 CPU/GPU,无需填写插件 ID 或模型名称(plugin 参数可省略)。
| 模型名称 | 插件 ID | HuggingFace 仓库名 |
|---|
| NexaAI/EmbedNeural-ANE | ane | NexaAI/EmbedNeural-ANE |
| NexaAI/parakeet-tdt-0.6b-v3-ane | ane | NexaAI/parakeet-tdt-0.6b-v3-ane |
ASR 用法
端侧语音识别,将音频转写为文本。
基础用法
import NexaSdk
// 加载模型
let asr = try Asr()
try await asr.load(from: "<path/to/model/dir>")
// 转写音频文件
let result = try await asr.transcribe(options: .init(audioPath: "<your-audio-path>"))
print(result.asrResult)
ASR 流式模式
import NexaSdk
// 加载模型
let asr = try Asr()
try await asr.load(from: "<path/to/model/dir>")
do {
// 开始录制并流式输出
let stream = try asr.startRecordingStream()
for try await content in stream {
print(content)
}
// 停止流
asr.stopRecordingStream()
} catch {
asr.stopRecordingStream()
}
API 说明
核心方法
func load(from repoFolder: URL) async throws
- 从 HuggingFace 本地仓库目录加载 ASR 模型
- 参数:
repoFolder 模型文件所在目录
- 返回:无
- 抛出:加载失败时抛错
- 说明:异步方法,需要
await
func startRecordingStream(config: ASRStreamConfig = .init(), block tapBlock: AVAudioNodeTapBlock? = nil)
- 启动录音并开启 ASR 流式识别
- 参数:
config 流配置;tapBlock 可选音频采样回调
- 返回:无
func stopRecordingStream()
func startRecording(block tapBlock: AVAudioNodeTapBlock? = nil)
- 仅开启录音
- 参数:可选采样回调
- 抛出:音频会话或引擎启动失败时
func stopRecording()
func startStream(config: ASRStreamConfig = .init()) throws -> AsyncThrowingStream<String, Error>
- 启动 ASR 流式模式
- 返回:异步流,持续产出转写文本
func stopStream(graceful: Bool = true)
- 停止流式识别
- 参数:
graceful=true 处理缓冲后停止;false 立即停止
func streamPushSamples(samples: [Float]) throws
- 将原始音频采样推入流式管线
- 参数:
samples PCM 采样数组
AsrResult
/// ASR transcription result
public struct AsrResult: Codable {
/// Transcribed text
public let transcript: String
/// Confidence scores for each unit
public let confidences: [Float]
/// Timestamp pairs: [start, end] for each unit
public let timestamps: [Float]
}
AsrResponse
public struct AsrResponse: Codable {
public let asrResult: AsrResult
public let profileData: ProfileData?
}
AsrOptions
public struct AsrOptions: Codable {
public let modelPath: String
public let language: Language
}
public enum Language: String, Codable {
case en
case ch
}
ASR 流式配置
public struct ASRStreamConfig {
// 时间戳模式
public enum TimestampMode: String {
case segment
case word
case none
}
// 语言
public var language: Language = .en
// 每个分片时长(默认 4.0 秒)
public var chunkDuration: Float
// 分片重叠时长(默认 3.0 秒)
public var overlapDuration: Float
// 采样率(默认 16000)
public var sampleRate: Int32
// 队列最大分片数(默认 10)
public var maxQueueSize: Int32
// 输入缓冲大小(默认 512)
public var bufferSize: Int32
// 时间戳模式:"none" | "segment" | "word"
public var timestamps: TimestampMode
// Beam size(默认 5)
public var beamSize: Int32
}
Embedding 用法
生成向量嵌入,支持语义检索/RAG。
基础用法
import NexaSdk
import Foundation
// 加载 ANE 模型的 Embedder
let repoDir: URL = URL(string: "path/to/model/dir")!
let embedder = try Embedder(from: repoDir, plugin: .ane)
// 针对多段文本生成向量
let texts = ["<your-text1>", "<your-text2>"]
let result = try embedder.embed(texts: texts, config: .init(batchSize: Int32(texts.count)))
for (i, vec) in result.embeddings.enumerated() {
let head = vec.prefix(10)
print("[\(i)]", Array(head))
}
API 说明
核心方法
convenience init(from repoFolder: URL, plugin: Plugin = .cpu_gpu)
- 从本地仓库目录初始化
- 参数:
repoFolder 模型目录;plugin 后端插件(默认 cpu_gpu)
func embed(inputIds: [[Int32]], config: EmbeddingConfig) throws -> EmbedResult
- 对预分词的 token ID 生成向量;仅支持
cpu_gpu
func embed(texts: [String], config: EmbeddingConfig) throws -> EmbedResult
func embed(imagePaths: [String], config: EmbeddingConfig) throws -> EmbedResult
func dim() throws -> Int32
EmbeddingConfig
public struct EmbeddingConfig {
/// batch 大小
public var batchSize: Int32
/// 归一化方式:"l2", "mean"
/// `nil` 表示不归一化
public var normalizeMethod: NormalizeMethod?
}
EmbedResult
public struct EmbedResult {
public let embeddings: [[Float]]
public let profileData: ProfileData
}
LLM 用法
用于文本生成与对话。
流式对话(CPU/GPU,GGUF)
import NexaSdk
let llm = try LLM()
// 从 GGUF 路径加载
try await llm.load(.init(modelPath: "<path/to/model/file>"))
let system = "You are a helpful AI assistant"
let userMsgs = [
"Tell me a long story, about 100 words",
"How are you"
]
var messages = [ChatMessage]()
messages.append(.init(role: .system, content: system))
for userMsg in userMsgs {
messages.append(.init(role: .user, content: userMsg))
// 生成
let stream = try await llm.generateAsyncStream(messages: messages)
var response = ""
for try await token in stream {
print(token, terminator: "")
response += token
}
messages.append(.init(role: .assistant, content: response))
print("\n\n")
}
多模态用法
VLM 视觉语言模型,用于图像理解与多模态对话。
流式对话(CPU/GPU,GGUF)
import NexaSdk
let vlm = try VLM()
try await vlm.load(.init(modelPath: "<path/to/model/file>", mmprojPath: "<path/to/mmproj/file>"))
let images = ["<path/to/your/image>"]
var config = GenerationConfig.default
config.imagePaths = images
let message = ChatMessage(role: .user, content: "What do you see in this image", images: images)
let stream = try await vlm.generateAsyncStream(messages: [message], options: .init(config: config))
for try await token in stream {
print(token, terminator: "")
}
print()
API 说明
核心方法
func load(_ options: ModelOptions) async throws
func load(from repoFolder: URL, modelFileName: String = "", mmprojFileName: String = "") throws
- 从本地 HuggingFace 仓库目录加载;文件名留空则用默认
func applyChatTemplate(messages: [ChatMessage], options: ChatTemplateOptions) async throws -> String
func generateAsyncStream(messages: [ChatMessage], options: GenerationOptions) async throws -> AsyncThrowingStream<String, any Error>
func generate(prompt: String, config: GenerationConfig) async throws -> GenerateResult
func reset()
func stopStream()
func saveKVCache(to path: String) / func loadKVCache(from path: String)
GenerationConfig
public struct GenerationConfig {
public var maxTokens: Int32
public var stop: [String]
public var nPast: Int32
public var samplerConfig: SamplerConfig
public var imagePaths: [String]
public var audioPaths: [String]
}
SamplerConfig
public struct SamplerConfig {
public var temperature: Float
public var topP: Float
public var topK: Int32
public var minP: Float
public var repetitionPenalty: Float
public var presencePenalty: Float
public var frequencyPenalty: Float
public var seed: Int32
public var grammarPath: String?
}
ProfileData
public struct ProfileData: CustomStringConvertible, Codable {
public let ttft: Int64
public let promptTime: Int64
public let decodeTime: Int64
public let promptTokens: Int64
public let generatedTokens: Int64
public let audioDuration: Int64
public let prefillSpeed: Double
public let decodingSpeed: Double
public let realTimeFactor: Double
public let stopReason: String
}
重排(Rerank)用法
提升检索相关性,对文档列表进行重排序。
基础用法
import NexaSdk
import Foundation
// 加载 CPU/GPU 重排模型
let repoDir = URL(string: "<path/to/model/dir>")!
let reranker = try Reranker(from: repoDir)
let query = "What is machine learning?"
let documents = [
"Machine learning is a subset of artificial intelligence that enables computers to learn and make decisions ",
"without being explicitly programmed.",
"Machine learning algorithms build mathematical models based on sample data to make predictions or decisions.",
"Deep learning is a subset of machine learning that uses neural networks with multiple layers.",
"Python is a popular programming language for machine learning and data science.",
"The weather today is sunny and warm."
]
let result = try await reranker.rerank(query, documents: documents)
print(result.scores)
API 说明
核心方法
init(modelPath: String, tokenizerPath: String? = nil, deviceId: String? = nil, plugin: Plugin = .cpu_gpu) throws
func rerank(_ query: String, documents: [String], config: RerankConfig = .init()) async throws -> RerankerResult
convenience init(from repoFolder: URL, plugin: Plugin = .cpu_gpu) throws
RerankerResult
public struct RerankerResult {
public let scores: [Float]
public let profileData: ProfileData
}
RerankConfig
public struct RerankConfig {
/// batch 大小
public var batchSize: Int32
/// 归一化:"softmax" | "min-max" | "l2";`nil` 表示不归一化
public var normalizeMethod: NormalizeMethod?
public enum NormalizeMethod: String, CaseIterable {
case softmax
case minMax = "min-max"
case l2
}
}
如何选择 CPU/GPU 与 ANE
Nexa iOS / macOS SDK 提供两种加速模式:CPU/GPU 与 ANE。每个模型仅适配特定硬件,请查阅对应 HuggingFace 模型卡。
CPU/GPU 模式
// CPU/GPU embedder - 使用 GGUF 格式模型
let embedder = try Embedder(from: "<path/to/model/dir>", plugin: .cpu_gpu)
ANE 模式
// ANE embedder - 使用 CoreML 格式模型
let embedder = try Embedder(from: "<path/to/model/dir>", plugin: .ane)
Embedder 模块同时支持 CPU/GPU 与 ANE;LLM、VLM、Reranker 目前仅支持 CPU/GPU;ASR 仅支持 ANE,不支持 CPU/GPU。
需要帮助?
加入社区获取支持、分享项目并与其他开发者交流。