Add scan flow MVP and local Axiom skill workspace

This snapshot establishes the camera-to-result recognition flow and related tests while checking in the project skill/docs assets required for the configured local tooling.
This commit is contained in:
Matthias
2026-04-19 21:11:32 +02:00
parent 577214d474
commit a60a76b797
679 changed files with 138964 additions and 73 deletions

View File

@@ -0,0 +1,64 @@
import UIKit
enum CardRecognitionPipelineError: LocalizedError {
case ocrUnavailable(String)
var errorDescription: String? {
switch self {
case .ocrUnavailable(let message):
return message
}
}
}
struct CardRecognitionPipeline {
let preprocessor: ImagePreprocessor
let networkStatusProvider: any NetworkStatusProviding
let cloudOCRClient: any CloudOCRClient
let fallbackOCR: any CardTextRecognizing
let extractor: CardTextHeuristicExtractor
let enhancer: any CardFieldEnhancing
init(
preprocessor: ImagePreprocessor = ImagePreprocessor(),
networkStatusProvider: any NetworkStatusProviding,
cloudOCRClient: any CloudOCRClient = StubCloudOCRClient(),
fallbackOCR: any CardTextRecognizing = VisionCardOCRService(),
extractor: CardTextHeuristicExtractor = CardTextHeuristicExtractor(),
enhancer: any CardFieldEnhancing = NoOpCardFieldEnhancer()
) {
self.preprocessor = preprocessor
self.networkStatusProvider = networkStatusProvider
self.cloudOCRClient = cloudOCRClient
self.fallbackOCR = fallbackOCR
self.extractor = extractor
self.enhancer = enhancer
}
func recognizeCard(in image: UIImage) async throws -> RecognitionSession {
let preparedImage = try preprocessor.prepare(image)
if networkStatusProvider.isOnline,
let cloudResponse = try? await cloudOCRClient.recognizeText(from: CloudOCRRequest(jpegData: preparedImage.uploadJPEGData)),
!cloudResponse.markdown.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
let cloudLines = cloudResponse.markdown
.split(whereSeparator: \.isNewline)
.map { RecognizedTextLine(text: String($0), confidence: 0.95, normalizedBounds: .zero) }
let payload = OCRTextPayload(rawText: cloudResponse.markdown, lines: cloudLines, averageConfidence: 0.95)
var draft = extractor.extract(payload: payload, source: .cloud)
draft = await enhancer.enhance(draft: draft, rawText: payload.rawText)
return RecognitionSession(draft: draft, thumbnailJPEGData: preparedImage.thumbnailJPEGData)
}
let note = networkStatusProvider.isOnline
? "Cloud OCR is stubbed in this MVP, so StackDex used the local Vision pipeline."
: "Offline erkannt — Ergebnis kann weniger genau sein."
let source: RecognitionSource = networkStatusProvider.isOnline ? .onDeviceFallback : .onDeviceOffline
let payload = try await fallbackOCR.recognizeText(in: preparedImage)
var draft = extractor.extract(payload: payload, source: source, notes: [note])
draft = await enhancer.enhance(draft: draft, rawText: payload.rawText)
return RecognitionSession(draft: draft, thumbnailJPEGData: preparedImage.thumbnailJPEGData)
}
}