This snapshot establishes the camera-to-result recognition flow and related tests while checking in the project skill/docs assets required for the configured local tooling.
84 lines
2.8 KiB
Swift
84 lines
2.8 KiB
Swift
import CoreGraphics
|
|
import Vision
|
|
|
|
struct RecognizedTextLine: Equatable {
|
|
let text: String
|
|
let confidence: Double
|
|
let normalizedBounds: CGRect
|
|
}
|
|
|
|
struct OCRTextPayload: Equatable {
|
|
let rawText: String
|
|
let lines: [RecognizedTextLine]
|
|
let averageConfidence: Double
|
|
}
|
|
|
|
protocol CardTextRecognizing {
|
|
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload
|
|
}
|
|
|
|
enum VisionCardOCRServiceError: LocalizedError {
|
|
case noRecognizedText
|
|
|
|
var errorDescription: String? {
|
|
switch self {
|
|
case .noRecognizedText:
|
|
return "Vision OCR could not find readable card text."
|
|
}
|
|
}
|
|
}
|
|
|
|
struct VisionCardOCRService: CardTextRecognizing {
|
|
private let customWords = [
|
|
"Pokémon", "Charizard", "Glurak", "Pikachu", "Blastoise", "Venusaur",
|
|
"Illustration", "Scarlet", "Violet", "Trainer", "Holo", "Rare", "Ultra",
|
|
"VMAX", "GX", "ex", "Base", "Set", "Promo", "Shiny"
|
|
]
|
|
|
|
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload {
|
|
try await withCheckedThrowingContinuation { continuation in
|
|
let request = VNRecognizeTextRequest { request, error in
|
|
if let error {
|
|
continuation.resume(throwing: error)
|
|
return
|
|
}
|
|
|
|
let observations = (request.results as? [VNRecognizedTextObservation]) ?? []
|
|
let lines = observations.compactMap { observation -> RecognizedTextLine? in
|
|
guard let candidate = observation.topCandidates(1).first else { return nil }
|
|
return RecognizedTextLine(
|
|
text: candidate.string,
|
|
confidence: Double(candidate.confidence),
|
|
normalizedBounds: observation.boundingBox
|
|
)
|
|
}
|
|
|
|
guard !lines.isEmpty else {
|
|
continuation.resume(throwing: VisionCardOCRServiceError.noRecognizedText)
|
|
return
|
|
}
|
|
|
|
let average = lines.map(\.confidence).reduce(0, +) / Double(lines.count)
|
|
continuation.resume(returning: OCRTextPayload(
|
|
rawText: lines.map(\.text).joined(separator: "\n"),
|
|
lines: lines,
|
|
averageConfidence: average
|
|
))
|
|
}
|
|
|
|
request.recognitionLevel = .accurate
|
|
request.recognitionLanguages = ["en-US", "de-DE"]
|
|
request.usesLanguageCorrection = true
|
|
request.customWords = customWords
|
|
|
|
let handler = VNImageRequestHandler(cgImage: image.analysisCGImage, options: [:])
|
|
|
|
do {
|
|
try handler.perform([request])
|
|
} catch {
|
|
continuation.resume(throwing: error)
|
|
}
|
|
}
|
|
}
|
|
}
|