huggingface · Joannis · Jan 29, 2026
diff --git a/Examples/transformers-cli/Package.swift b/Examples/transformers-cli/Package.swift
@@ -11,6 +11,7 @@ let package = Package(
         // If you copy this manifest as a template, use the following line instead
         //.package(url: "https://github.com/huggingface/swift-transformers", from: "1.0.0"),
         .package(url: "https://github.com/apple/swift-argument-parser", from: "1.3.0"),
+        .package(url: "https://github.com/apple/swift-container-plugin", from: "1.0.0"),
     ],
     targets: [
         .executableTarget(
@@ -19,6 +20,14 @@ let package = Package(
                 .product(name: "Transformers", package: "swift-transformers"),
                 .product(name: "ArgumentParser", package: "swift-argument-parser"),
             ]
+        ),
+        .executableTarget(
+            name: "transformers-cli-linux",
+            dependencies: [
+                .product(name: "Hub", package: "swift-transformers"),
+                .product(name: "Tokenizers", package: "swift-transformers"),
+                .product(name: "ArgumentParser", package: "swift-argument-parser"),
+            ]
         )
     ]
 )
diff --git a/Examples/transformers-cli/Sources/transformers-cli-linux/main.swift b/Examples/transformers-cli/Sources/transformers-cli-linux/main.swift
@@ -0,0 +1,227 @@
+import ArgumentParser
+import Foundation
+import Hub
+import Tokenizers
+
+/// Returns a HubApi configured to use persistent storage on WendyOS (/mnt/app),
+/// or the default location otherwise.
+func createHubApi() -> HubApi {
+    let wendyPersistentPath = "/mnt/app"
+    if FileManager.default.fileExists(atPath: wendyPersistentPath) {
+        let downloadBase = URL(filePath: wendyPersistentPath).appending(component: "huggingface")
+        return HubApi(downloadBase: downloadBase)
+    }
+    return HubApi()
+}
+
+@main
+struct TransformersLinuxCLI: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        commandName: "transformers-cli-linux",
+        abstract: "Cross-platform CLI for HuggingFace Transformers (tokenization & Hub)",
+        version: "0.0.1",
+        subcommands: [Demo.self, Tokenize.self, Decode.self, Download.self, ChatTemplate.self],
+        defaultSubcommand: Demo.self
+    )
+}
+
+struct Demo: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        abstract: "Run a demo showcasing tokenizer capabilities"
+    )
+
+    @Option(name: .shortAndLong, help: "HuggingFace model ID")
+    var model: String = "bert-base-uncased"
+
+    func run() async throws {
+        print("Swift Transformers Demo")
+        print("=======================\n")
+
+        let hub = createHubApi()
+        print("Loading tokenizer for '\(model)'...")
+        print("Cache location: \(hub.downloadBase.path)")
+        let tokenizer = try await AutoTokenizer.from(pretrained: model, hubApi: hub)
+        print("Tokenizer loaded successfully!\n")
+
+        // Demo 1: Basic tokenization
+        let text1 = "Hello, world! Welcome to Swift Transformers."
+        print("Demo 1: Basic Tokenization")
+        print("---------------------------")
+        print("Input: \"\(text1)\"")
+        let tokens1 = tokenizer.encode(text: text1)
+        print("Tokens: \(tokens1)")
+        print("Token count: \(tokens1.count)")
+        print("Decoded: \"\(tokenizer.decode(tokens: tokens1))\"\n")
+
+        // Demo 2: Subword tokenization
+        let text2 = "Tokenization handles unknownwords and subwords nicely."
+        print("Demo 2: Subword Tokenization")
+        print("-----------------------------")
+        print("Input: \"\(text2)\"")
+        let tokens2 = tokenizer.encode(text: text2)
+        print("Tokens: \(tokens2)")
+        print("Token count: \(tokens2.count)\n")
+
+        // Demo 3: Special tokens
+        let text3 = "Testing special tokens"
+        print("Demo 3: Encoding with Special Tokens")
+        print("-------------------------------------")
+        print("Input: \"\(text3)\"")
+        let tokens3 = tokenizer.encode(text: text3)
+        print("Tokens (with special tokens): \(tokens3)")
+        print("Token count: \(tokens3.count)\n")
+
+        print("Demo complete! Try other commands:")
+        print("  tokenize <text>     - Tokenize custom text")
+        print("  decode <ids>        - Decode token IDs")
+        print("  download <model>    - Download a tokenizer")
+        print("  chat-template <msg> - Apply chat template")
+    }
+}
+
+struct Tokenize: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        abstract: "Tokenize text using a HuggingFace tokenizer"
+    )
+
+    @Argument(help: "Text to tokenize")
+    var text: String
+
+    @Option(name: .shortAndLong, help: "HuggingFace model ID (e.g., 'bert-base-uncased')")
+    var model: String = "bert-base-uncased"
+
+    @Option(name: .shortAndLong, help: "Path to local tokenizer folder")
+    var localPath: String?
+
+    @Flag(name: .shortAndLong, help: "Show token strings alongside IDs")
+    var verbose: Bool = false
+
+    func run() async throws {
+        let hub = createHubApi()
+        let tokenizer: Tokenizer
+        if let localPath {
+            let url = URL(filePath: localPath, directoryHint: .isDirectory)
+            tokenizer = try await AutoTokenizer.from(modelFolder: url, hubApi: hub)
+        } else {
+            tokenizer = try await AutoTokenizer.from(pretrained: model, hubApi: hub)
+        }
+
+        let tokens = tokenizer.encode(text: text)
+
+        print("Input: \"\(text)\"")
+        print("Token count: \(tokens.count)")
+        print("Token IDs: \(tokens)")
+
+        if verbose {
+            let tokenStrings = tokenizer.decode(tokens: tokens)
+            print("Decoded: \"\(tokenStrings)\"")
+        }
+    }
+}
+
+struct Decode: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        abstract: "Decode token IDs back to text"
+    )
+
+    @Argument(help: "Token IDs to decode (comma-separated)")
+    var tokenIds: String
+
+    @Option(name: .shortAndLong, help: "HuggingFace model ID")
+    var model: String = "bert-base-uncased"
+
+    @Option(name: .shortAndLong, help: "Path to local tokenizer folder")
+    var localPath: String?
+
+    func run() async throws {
+        let hub = createHubApi()
+        let tokenizer: Tokenizer
+        if let localPath {
+            let url = URL(filePath: localPath, directoryHint: .isDirectory)
+            tokenizer = try await AutoTokenizer.from(modelFolder: url, hubApi: hub)
+        } else {
+            tokenizer = try await AutoTokenizer.from(pretrained: model, hubApi: hub)
+        }
+
+        let ids = tokenIds.split(separator: ",").compactMap { Int($0.trimmingCharacters(in: .whitespaces)) }
+        let decoded = tokenizer.decode(tokens: ids)
+
+        print("Token IDs: \(ids)")
+        print("Decoded: \"\(decoded)\"")
+    }
+}
+
+struct Download: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        abstract: "Download a tokenizer from HuggingFace Hub"
+    )
+
+    @Argument(help: "HuggingFace model ID to download")
+    var model: String
+
+    @Option(name: .shortAndLong, help: "Output directory (defaults to HF cache)")
+    var output: String?
+
+    func run() async throws {
+        let hub = createHubApi()
+        print("Downloading tokenizer for '\(model)'...")
+        print("Cache location: \(hub.downloadBase.path)")
+
+        let repo = Hub.Repo(id: model)
+
+        // Download tokenizer files
+        let files = ["tokenizer.json", "tokenizer_config.json", "vocab.txt", "vocab.json", "merges.txt"]
+
+        for file in files {
+            do {
+                let url = try await hub.snapshot(from: repo, matching: [file])
+                print("  Downloaded: \(file) -> \(url.path)")
+            } catch {
+                // File might not exist for this tokenizer type
+            }
+        }
+
+        print("Done! Tokenizer cached locally.")
+
+        // Verify it works
+        let tokenizer = try await AutoTokenizer.from(pretrained: model, hubApi: hub)
+        let testTokens = tokenizer.encode(text: "Hello, world!")
+        print("Verification: \"Hello, world!\" -> \(testTokens.count) tokens")
+    }
+}
+
+struct ChatTemplate: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        abstract: "Apply a chat template to messages"
+    )
+
+    @Option(name: .shortAndLong, help: "HuggingFace model ID")
+    var model: String = "microsoft/Phi-3-mini-4k-instruct"
+
+    @Option(name: .shortAndLong, help: "System message")
+    var system: String?
+
+    @Argument(help: "User message")
+    var message: String
+
+    func run() async throws {
+        let hub = createHubApi()
+        let tokenizer = try await AutoTokenizer.from(pretrained: model, hubApi: hub)
+
+        var messages: [[String: String]] = []
+
+        if let system {
+            messages.append(["role": "system", "content": system])
+        }
+        messages.append(["role": "user", "content": message])
+
+        let tokens = try tokenizer.applyChatTemplate(messages: messages)
+        let formatted = tokenizer.decode(tokens: tokens)
+
+        print("Formatted prompt:")
+        print("---")
+        print(formatted)
+        print("---")
+        print("\nToken count: \(tokens.count)")
+    }
+}
diff --git a/Package.swift b/Package.swift
@@ -19,6 +19,7 @@ let package = Package(
     dependencies: [
         .package(url: "https://github.com/huggingface/swift-jinja.git", from: "2.0.0"),
         .package(url: "https://github.com/apple/swift-collections.git", from: "1.0.0"),
+        .package(url: "https://github.com/apple/swift-crypto.git", "3.0.0"..<"5.0.0"),
     ],
     targets: [
         .target(name: "Generation", dependencies: ["Tokenizers"]),
@@ -27,6 +28,7 @@ let package = Package(
             dependencies: [
                 .product(name: "Jinja", package: "swift-jinja"),
                 .product(name: "OrderedCollections", package: "swift-collections"),
+                .product(name: "Crypto", package: "swift-crypto"),
             ],
             resources: [
                 .process("Resources")

diff --git a/Sources/Generation/Generation.swift b/Sources/Generation/Generation.swift
@@ -5,12 +5,6 @@
 //  Created by Pedro Cuenca on 7/5/23.
 //
 
-#if canImport(CoreML)
-import CoreML
-
-import CoreML
-import Tokenizers
-
 /// Supported text generation modes.
 public enum GenerationMode {
     /// Contrastive search generation mode
@@ -33,6 +27,16 @@ public typealias InputTokens = [Int]
 /// Array of token IDs representing generated output tokens.
 public typealias GenerationOutput = [Int]
 
+/// Callback for receiving generated tokens during streaming.
+public typealias PredictionTokensCallback = (GenerationOutput) -> Void
+
+/// Callback for receiving generated text during streaming.
+public typealias PredictionStringCallback = (String) -> Void
+
+#if canImport(CoreML)
+import CoreML
+import Tokenizers
+
 /// A callable model that predicts the next token after a given sequence.
 ///
 /// - Parameter tokens: Input token sequence
@@ -41,12 +45,6 @@ public typealias GenerationOutput = [Int]
 @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
 public typealias NextTokenModel = (MLTensor, GenerationConfig) async -> MLTensor
 
-/// Callback for receiving generated tokens during streaming.
-public typealias PredictionTokensCallback = (GenerationOutput) -> Void
-
-/// Callback for receiving generated text during streaming.
-public typealias PredictionStringCallback = (String) -> Void
-
 /// Protocol for text generation implementations.
 @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
 public protocol Generation {

diff --git a/Sources/Hub/Config.swift b/Sources/Hub/Config.swift
@@ -234,31 +234,41 @@ public struct Config: Hashable, Sendable,
 
     private static func convertToBinaryDistinctKeys(_ object: Any) -> Config {
         if let dict = object as? [NSString: Any] {
-            Config(Dictionary(uniqueKeysWithValues: dict.map { (BinaryDistinctString($0.key), convertToBinaryDistinctKeys($0.value)) }))
+            return Config(Dictionary(uniqueKeysWithValues: dict.map { (BinaryDistinctString($0.key), convertToBinaryDistinctKeys($0.value)) }))
         } else if let array = object as? [Any] {
-            Config(array.map { convertToBinaryDistinctKeys($0) })
+            return Config(array.map { convertToBinaryDistinctKeys($0) })
         } else {
             switch object {
             case let obj as String:
-                Config(obj)
+                return Config(obj)
             case let obj as Int:
-                Config(obj)
+                return Config(obj)
             case let obj as Float:
-                Config(obj)
+                return Config(obj)
             case let obj as Bool:
-                Config(obj)
+                return Config(obj)
             case let obj as NSNumber:
+                #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) || os(visionOS)
                 if CFNumberIsFloatType(obj) {
-                    Config(obj.floatValue)
+                    return Config(obj.floatValue)
                 } else {
-                    Config(obj.intValue)
+                    return Config(obj.intValue)
                 }
+                #else
+                // On Linux, check objCType to determine if it's a floating point number
+                let type = String(cString: obj.objCType)
+                if type == "f" || type == "d" {
+                    return Config(obj.floatValue)
+                } else {
+                    return Config(obj.intValue)
+                }
+                #endif
             case _ as NSNull:
-                Config()
+                return Config()
             case let obj as Config:
-                obj
+                return obj
             case let obj as (UInt, String):
-                Config((obj.0, BinaryDistinctString(obj.1)))
+                return Config((obj.0, BinaryDistinctString(obj.1)))
             default:
                 fatalError("unknown type: \(type(of: object)) \(object)")
             }