Add scan flow MVP and local Axiom skill workspace
This snapshot establishes the camera-to-result recognition flow and related tests while checking in the project skill/docs assets required for the configured local tooling.
This commit is contained in:
83
StackDex/Services/VisionCardOCRService.swift
Normal file
83
StackDex/Services/VisionCardOCRService.swift
Normal file
@@ -0,0 +1,83 @@
|
||||
import CoreGraphics
|
||||
import Vision
|
||||
|
||||
struct RecognizedTextLine: Equatable {
|
||||
let text: String
|
||||
let confidence: Double
|
||||
let normalizedBounds: CGRect
|
||||
}
|
||||
|
||||
struct OCRTextPayload: Equatable {
|
||||
let rawText: String
|
||||
let lines: [RecognizedTextLine]
|
||||
let averageConfidence: Double
|
||||
}
|
||||
|
||||
protocol CardTextRecognizing {
|
||||
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload
|
||||
}
|
||||
|
||||
enum VisionCardOCRServiceError: LocalizedError {
|
||||
case noRecognizedText
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .noRecognizedText:
|
||||
return "Vision OCR could not find readable card text."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct VisionCardOCRService: CardTextRecognizing {
|
||||
private let customWords = [
|
||||
"Pokémon", "Charizard", "Glurak", "Pikachu", "Blastoise", "Venusaur",
|
||||
"Illustration", "Scarlet", "Violet", "Trainer", "Holo", "Rare", "Ultra",
|
||||
"VMAX", "GX", "ex", "Base", "Set", "Promo", "Shiny"
|
||||
]
|
||||
|
||||
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload {
|
||||
try await withCheckedThrowingContinuation { continuation in
|
||||
let request = VNRecognizeTextRequest { request, error in
|
||||
if let error {
|
||||
continuation.resume(throwing: error)
|
||||
return
|
||||
}
|
||||
|
||||
let observations = (request.results as? [VNRecognizedTextObservation]) ?? []
|
||||
let lines = observations.compactMap { observation -> RecognizedTextLine? in
|
||||
guard let candidate = observation.topCandidates(1).first else { return nil }
|
||||
return RecognizedTextLine(
|
||||
text: candidate.string,
|
||||
confidence: Double(candidate.confidence),
|
||||
normalizedBounds: observation.boundingBox
|
||||
)
|
||||
}
|
||||
|
||||
guard !lines.isEmpty else {
|
||||
continuation.resume(throwing: VisionCardOCRServiceError.noRecognizedText)
|
||||
return
|
||||
}
|
||||
|
||||
let average = lines.map(\.confidence).reduce(0, +) / Double(lines.count)
|
||||
continuation.resume(returning: OCRTextPayload(
|
||||
rawText: lines.map(\.text).joined(separator: "\n"),
|
||||
lines: lines,
|
||||
averageConfidence: average
|
||||
))
|
||||
}
|
||||
|
||||
request.recognitionLevel = .accurate
|
||||
request.recognitionLanguages = ["en-US", "de-DE"]
|
||||
request.usesLanguageCorrection = true
|
||||
request.customWords = customWords
|
||||
|
||||
let handler = VNImageRequestHandler(cgImage: image.analysisCGImage, options: [:])
|
||||
|
||||
do {
|
||||
try handler.perform([request])
|
||||
} catch {
|
||||
continuation.resume(throwing: error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user