Add scan flow MVP and local Axiom skill workspace

This snapshot establishes the camera-to-result recognition flow and related tests while checking in the project skill/docs assets required for the configured local tooling.
This commit is contained in:
Matthias
2026-04-19 21:11:32 +02:00
parent 577214d474
commit a60a76b797
679 changed files with 138964 additions and 73 deletions

View File

@@ -0,0 +1,83 @@
import CoreGraphics
import Vision
struct RecognizedTextLine: Equatable {
let text: String
let confidence: Double
let normalizedBounds: CGRect
}
struct OCRTextPayload: Equatable {
let rawText: String
let lines: [RecognizedTextLine]
let averageConfidence: Double
}
protocol CardTextRecognizing {
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload
}
enum VisionCardOCRServiceError: LocalizedError {
case noRecognizedText
var errorDescription: String? {
switch self {
case .noRecognizedText:
return "Vision OCR could not find readable card text."
}
}
}
struct VisionCardOCRService: CardTextRecognizing {
private let customWords = [
"Pokémon", "Charizard", "Glurak", "Pikachu", "Blastoise", "Venusaur",
"Illustration", "Scarlet", "Violet", "Trainer", "Holo", "Rare", "Ultra",
"VMAX", "GX", "ex", "Base", "Set", "Promo", "Shiny"
]
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload {
try await withCheckedThrowingContinuation { continuation in
let request = VNRecognizeTextRequest { request, error in
if let error {
continuation.resume(throwing: error)
return
}
let observations = (request.results as? [VNRecognizedTextObservation]) ?? []
let lines = observations.compactMap { observation -> RecognizedTextLine? in
guard let candidate = observation.topCandidates(1).first else { return nil }
return RecognizedTextLine(
text: candidate.string,
confidence: Double(candidate.confidence),
normalizedBounds: observation.boundingBox
)
}
guard !lines.isEmpty else {
continuation.resume(throwing: VisionCardOCRServiceError.noRecognizedText)
return
}
let average = lines.map(\.confidence).reduce(0, +) / Double(lines.count)
continuation.resume(returning: OCRTextPayload(
rawText: lines.map(\.text).joined(separator: "\n"),
lines: lines,
averageConfidence: average
))
}
request.recognitionLevel = .accurate
request.recognitionLanguages = ["en-US", "de-DE"]
request.usesLanguageCorrection = true
request.customWords = customWords
let handler = VNImageRequestHandler(cgImage: image.analysisCGImage, options: [:])
do {
try handler.perform([request])
} catch {
continuation.resume(throwing: error)
}
}
}
}