Add scan flow MVP and local Axiom skill workspace

This snapshot establishes the camera-to-result recognition flow and related tests while checking in the project skill/docs assets required for the configured local tooling.
This commit is contained in:
Matthias
2026-04-19 21:11:32 +02:00
parent 577214d474
commit a60a76b797
679 changed files with 138964 additions and 73 deletions

View File

@@ -5,57 +5,35 @@
// Created by Matthias Meister on 18.04.26.
//
import SwiftUI
import SwiftData
import SwiftUI
struct ContentView: View {
@Environment(\.modelContext) private var modelContext
@Query private var items: [Item]
@StateObject private var flowModel = ScanFlowModel()
var body: some View {
NavigationSplitView {
List {
ForEach(items) { item in
NavigationLink {
Text("Item at \(item.timestamp, format: Date.FormatStyle(date: .numeric, time: .standard))")
} label: {
Text(item.timestamp, format: Date.FormatStyle(date: .numeric, time: .standard))
NavigationStack(path: $flowModel.path) {
ScanView(flowModel: flowModel)
.navigationDestination(for: ScanRoute.self) { route in
switch route {
case .result:
ResultEditorView(flowModel: flowModel, mode: .recognized)
case .manual:
ResultEditorView(flowModel: flowModel, mode: .manual)
}
}
.onDelete(perform: deleteItems)
}
.toolbar {
ToolbarItem(placement: .navigationBarTrailing) {
EditButton()
.task {
flowModel.startObservers()
await flowModel.prepareVisibleServices()
}
ToolbarItem {
Button(action: addItem) {
Label("Add Item", systemImage: "plus")
}
.onDisappear {
flowModel.stopObservers()
}
}
} detail: {
Text("Select an item")
}
}
private func addItem() {
withAnimation {
let newItem = Item(timestamp: Date())
modelContext.insert(newItem)
}
}
private func deleteItems(offsets: IndexSet) {
withAnimation {
for index in offsets {
modelContext.delete(items[index])
}
}
}
}
#Preview {
ContentView()
.modelContainer(for: Item.self, inMemory: true)
.modelContainer(for: ConfirmedScanRecord.self, inMemory: true)
}

View File

@@ -0,0 +1,132 @@
import Combine
import Foundation
import SwiftUI
enum ScanRoute: Hashable {
case result
case manual
}
@MainActor
final class ScanFlowModel: ObservableObject {
@Published var path: [ScanRoute] = []
@Published var currentSession: RecognitionSession?
@Published var isRecognizing = false
@Published var transientMessage: String?
let cameraService: CameraService
let photoLibraryService: PhotoLibraryService
let networkMonitor: NetworkMonitor
private let pipeline: CardRecognitionPipeline
init() {
let cameraService = CameraService()
let photoLibraryService = PhotoLibraryService()
let networkMonitor = NetworkMonitor()
self.cameraService = cameraService
self.photoLibraryService = photoLibraryService
self.networkMonitor = networkMonitor
self.pipeline = CardRecognitionPipeline(
networkStatusProvider: networkMonitor,
cloudOCRClient: StubCloudOCRClient(),
fallbackOCR: VisionCardOCRService(),
enhancer: NoOpCardFieldEnhancer()
)
}
func startObservers() {
networkMonitor.startMonitoring()
}
func stopObservers() {
networkMonitor.stopMonitoring()
cameraService.stopSession()
}
func prepareVisibleServices() async {
await cameraService.prepareIfAuthorized()
await photoLibraryService.refreshRecentsIfPossible()
}
func enableCamera() async {
await cameraService.requestAccessAndStart()
}
func capturePhoto() async {
do {
isRecognizing = true
let image = try await cameraService.capturePhoto()
try await recognize(image: image)
} catch {
transientMessage = error.localizedDescription
}
isRecognizing = false
}
func importRecentPhoto(_ item: RecentPhotoItem) async {
isRecognizing = true
defer { isRecognizing = false }
guard let image = await photoLibraryService.loadImage(for: item) else {
transientMessage = "The selected photo could not be loaded."
return
}
do {
try await recognize(image: image)
} catch {
currentSession = RecognitionSession(
draft: CardRecognitionDraft.manualPrefill(rawText: currentSession?.draft.rawText ?? ""),
thumbnailJPEGData: currentSession?.thumbnailJPEGData
)
path = [.manual]
transientMessage = error.localizedDescription
}
}
func startManualEntry() {
if currentSession == nil {
currentSession = RecognitionSession(draft: .manualPrefill())
} else {
currentSession?.draft.source = .manual
currentSession?.draft.confidence = .low
}
path = [.manual]
}
func openManualEntryFromResult() {
guard currentSession != nil else {
startManualEntry()
return
}
currentSession?.draft.source = .manual
if path.last != .manual {
path.append(.manual)
}
}
func updateDraft(_ draft: CardRecognitionDraft) {
currentSession?.draft = draft
}
func returnToScan(message: String? = nil) {
currentSession = nil
path = []
transientMessage = message
}
private func recognize(image: UIImage) async throws {
let session = try await pipeline.recognizeCard(in: image)
currentSession = session
if session.draft.hasDetectedContent {
path = [.result]
} else {
currentSession?.draft.source = .manual
currentSession?.draft.confidence = .low
path = [.manual]
}
}
}

View File

@@ -1,18 +0,0 @@
//
// Item.swift
// StackDex
//
// Created by Matthias Meister on 18.04.26.
//
import Foundation
import SwiftData
@Model
final class Item {
var timestamp: Date
init(timestamp: Date) {
self.timestamp = timestamp
}
}

View File

@@ -0,0 +1,138 @@
import Foundation
enum RecognitionSource: String, Codable, CaseIterable, Identifiable {
case cloud
case onDeviceOffline
case onDeviceFallback
case manual
var id: String { rawValue }
var title: String {
switch self {
case .cloud:
return "Cloud OCR"
case .onDeviceOffline:
return "On-Device (Offline)"
case .onDeviceFallback:
return "On-Device Fallback"
case .manual:
return "Manual Entry"
}
}
var detail: String {
switch self {
case .cloud:
return "Prepared client boundary for future Convex/Mistral OCR."
case .onDeviceOffline:
return "Offline erkannt — Ergebnis kann weniger genau sein."
case .onDeviceFallback:
return "Cloud path unavailable — used the on-device Vision fallback."
case .manual:
return "No image is persisted after confirmation."
}
}
}
enum ConfidenceLevel: String, Codable, CaseIterable, Comparable {
case low
case medium
case high
private var score: Int {
switch self {
case .low: return 0
case .medium: return 1
case .high: return 2
}
}
static func < (lhs: ConfidenceLevel, rhs: ConfidenceLevel) -> Bool {
lhs.score < rhs.score
}
var title: String { rawValue.capitalized }
var helperText: String {
switch self {
case .high:
return "Looks solid — a quick confirmation should be enough."
case .medium:
return "Please review the extracted fields before confirming."
case .low:
return "Low confidence — manual corrections are recommended."
}
}
}
enum CardRarity: String, CaseIterable, Identifiable {
case unknown = "Unknown"
case common = "Common"
case uncommon = "Uncommon"
case rare = "Rare"
case holoRare = "Holo Rare"
case ultraRare = "Ultra Rare"
case illustrationRare = "Illustration Rare"
case specialArtRare = "Special Art Rare"
case hyperRare = "Hyper Rare"
case secretRare = "Secret Rare"
var id: String { rawValue }
}
struct CardRecognitionDraft: Equatable {
var cardName: String
var cardNumber: String
var setIdentifier: String
var rarity: String
var source: RecognitionSource
var confidence: ConfidenceLevel
var notes: [String]
var rawText: String
var hasDetectedContent: Bool {
!cardName.isBlank || !cardNumber.isBlank || !setIdentifier.isBlank || !rarity.isBlank
}
var combinedNumberAndSet: String {
let components = [cardNumber.trimmedNilIfEmpty, setIdentifier.trimmedNilIfEmpty].compactMap { $0 }
return components.joined(separator: " · ")
}
static func manualPrefill(rawText: String = "") -> CardRecognitionDraft {
CardRecognitionDraft(
cardName: "",
cardNumber: "",
setIdentifier: "",
rarity: CardRarity.unknown.rawValue,
source: .manual,
confidence: .low,
notes: ["Manual entry is always available when OCR misses fields."],
rawText: rawText
)
}
}
struct RecognitionSession: Identifiable, Equatable {
let id: UUID
var draft: CardRecognitionDraft
var thumbnailJPEGData: Data?
init(id: UUID = UUID(), draft: CardRecognitionDraft, thumbnailJPEGData: Data? = nil) {
self.id = id
self.draft = draft
self.thumbnailJPEGData = thumbnailJPEGData
}
}
private extension String {
var isBlank: Bool {
trimmedNilIfEmpty == nil
}
var trimmedNilIfEmpty: String? {
let trimmed = trimmingCharacters(in: .whitespacesAndNewlines)
return trimmed.isEmpty ? nil : trimmed
}
}

View File

@@ -0,0 +1,46 @@
import Foundation
import SwiftData
@Model
final class ConfirmedScanRecord {
var confirmedAt: Date
var cardName: String
var cardNumber: String
var setIdentifier: String
var rarity: String
var recognitionSource: String
var confidence: String
var rawTextPreview: String
init(
confirmedAt: Date = .now,
cardName: String,
cardNumber: String,
setIdentifier: String,
rarity: String,
recognitionSource: String,
confidence: String,
rawTextPreview: String
) {
self.confirmedAt = confirmedAt
self.cardName = cardName
self.cardNumber = cardNumber
self.setIdentifier = setIdentifier
self.rarity = rarity
self.recognitionSource = recognitionSource
self.confidence = confidence
self.rawTextPreview = rawTextPreview
}
convenience init(draft: CardRecognitionDraft) {
self.init(
cardName: draft.cardName,
cardNumber: draft.cardNumber,
setIdentifier: draft.setIdentifier,
rarity: draft.rarity,
recognitionSource: draft.source.rawValue,
confidence: draft.confidence.rawValue,
rawTextPreview: String(draft.rawText.prefix(240))
)
}
}

View File

@@ -0,0 +1,172 @@
import AVFoundation
import Combine
import OSLog
import UIKit
struct CameraSessionState {
private(set) var configurationDepth = 0
var canStartSession: Bool {
configurationDepth == 0
}
mutating func beginConfiguration() {
configurationDepth += 1
}
mutating func commitConfiguration() {
configurationDepth = max(0, configurationDepth - 1)
}
}
enum CameraServiceError: LocalizedError {
case unauthorized
case unavailable
case captureFailed
var errorDescription: String? {
switch self {
case .unauthorized:
return "Camera access is required to capture a card photo."
case .unavailable:
return "The device camera is unavailable."
case .captureFailed:
return "The photo could not be captured."
}
}
}
@MainActor
final class CameraService: NSObject, ObservableObject {
private static let logger = Logger(subsystem: "dev.matthiasmeister.StackDex", category: "CameraService")
@Published private(set) var authorizationStatus: AVAuthorizationStatus = AVCaptureDevice.authorizationStatus(for: .video)
@Published private(set) var isSessionRunning = false
@Published private(set) var isConfigured = false
let session = AVCaptureSession()
private let photoOutput = AVCapturePhotoOutput()
private var captureContinuation: CheckedContinuation<UIImage, Error>?
private var sessionState = CameraSessionState()
func prepareIfAuthorized() async {
authorizationStatus = AVCaptureDevice.authorizationStatus(for: .video)
guard authorizationStatus == .authorized else { return }
await configureAndStartSessionIfNeeded()
}
func requestAccessAndStart() async {
authorizationStatus = AVCaptureDevice.authorizationStatus(for: .video)
if authorizationStatus == .notDetermined {
authorizationStatus = await AVCaptureDevice.requestAccess(for: .video) ? .authorized : .denied
}
guard authorizationStatus == .authorized else { return }
await configureAndStartSessionIfNeeded()
}
func stopSession() {
if session.isRunning {
session.stopRunning()
isSessionRunning = false
}
}
func capturePhoto() async throws -> UIImage {
guard authorizationStatus == .authorized else {
throw CameraServiceError.unauthorized
}
if !isSessionRunning {
await configureAndStartSessionIfNeeded()
}
return try await withCheckedThrowingContinuation { continuation in
captureContinuation = continuation
photoOutput.capturePhoto(with: AVCapturePhotoSettings(), delegate: self)
}
}
private func configureAndStartSessionIfNeeded() async {
if isConfigured {
startSessionIfNeeded()
return
}
var shouldStartSession = false
sessionState.beginConfiguration()
Self.logger.debug("Camera session beginConfiguration depth=\(self.sessionState.configurationDepth)")
session.beginConfiguration()
do {
session.sessionPreset = .photo
guard let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back) else {
throw CameraServiceError.unavailable
}
let input = try AVCaptureDeviceInput(device: device)
if !session.inputs.contains(where: { ($0 as? AVCaptureDeviceInput)?.device == device }) && session.canAddInput(input) {
session.addInput(input)
}
if !session.outputs.contains(photoOutput) && session.canAddOutput(photoOutput) {
session.addOutput(photoOutput)
}
isConfigured = true
shouldStartSession = true
} catch {
isConfigured = false
Self.logger.error("Camera session configuration failed: \(String(describing: error), privacy: .public)")
}
session.commitConfiguration()
sessionState.commitConfiguration()
Self.logger.debug("Camera session commitConfiguration depth=\(self.sessionState.configurationDepth)")
if shouldStartSession {
startSessionIfNeeded()
}
}
private func startSessionIfNeeded() {
guard sessionState.canStartSession else {
let message = "Attempted to start AVCaptureSession before commitConfiguration completed."
Self.logger.fault("\(message, privacy: .public)")
assertionFailure(message)
return
}
guard !session.isRunning else { return }
Self.logger.debug("Starting camera session")
session.startRunning()
isSessionRunning = true
}
}
extension CameraService: AVCapturePhotoCaptureDelegate {
nonisolated func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
Task { @MainActor in
if let error {
captureContinuation?.resume(throwing: error)
captureContinuation = nil
return
}
guard let data = photo.fileDataRepresentation(), let image = UIImage(data: data) else {
captureContinuation?.resume(throwing: CameraServiceError.captureFailed)
captureContinuation = nil
return
}
captureContinuation?.resume(returning: image)
captureContinuation = nil
}
}
}

View File

@@ -0,0 +1,64 @@
import UIKit
enum CardRecognitionPipelineError: LocalizedError {
case ocrUnavailable(String)
var errorDescription: String? {
switch self {
case .ocrUnavailable(let message):
return message
}
}
}
struct CardRecognitionPipeline {
let preprocessor: ImagePreprocessor
let networkStatusProvider: any NetworkStatusProviding
let cloudOCRClient: any CloudOCRClient
let fallbackOCR: any CardTextRecognizing
let extractor: CardTextHeuristicExtractor
let enhancer: any CardFieldEnhancing
init(
preprocessor: ImagePreprocessor = ImagePreprocessor(),
networkStatusProvider: any NetworkStatusProviding,
cloudOCRClient: any CloudOCRClient = StubCloudOCRClient(),
fallbackOCR: any CardTextRecognizing = VisionCardOCRService(),
extractor: CardTextHeuristicExtractor = CardTextHeuristicExtractor(),
enhancer: any CardFieldEnhancing = NoOpCardFieldEnhancer()
) {
self.preprocessor = preprocessor
self.networkStatusProvider = networkStatusProvider
self.cloudOCRClient = cloudOCRClient
self.fallbackOCR = fallbackOCR
self.extractor = extractor
self.enhancer = enhancer
}
func recognizeCard(in image: UIImage) async throws -> RecognitionSession {
let preparedImage = try preprocessor.prepare(image)
if networkStatusProvider.isOnline,
let cloudResponse = try? await cloudOCRClient.recognizeText(from: CloudOCRRequest(jpegData: preparedImage.uploadJPEGData)),
!cloudResponse.markdown.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
let cloudLines = cloudResponse.markdown
.split(whereSeparator: \.isNewline)
.map { RecognizedTextLine(text: String($0), confidence: 0.95, normalizedBounds: .zero) }
let payload = OCRTextPayload(rawText: cloudResponse.markdown, lines: cloudLines, averageConfidence: 0.95)
var draft = extractor.extract(payload: payload, source: .cloud)
draft = await enhancer.enhance(draft: draft, rawText: payload.rawText)
return RecognitionSession(draft: draft, thumbnailJPEGData: preparedImage.thumbnailJPEGData)
}
let note = networkStatusProvider.isOnline
? "Cloud OCR is stubbed in this MVP, so StackDex used the local Vision pipeline."
: "Offline erkannt — Ergebnis kann weniger genau sein."
let source: RecognitionSource = networkStatusProvider.isOnline ? .onDeviceFallback : .onDeviceOffline
let payload = try await fallbackOCR.recognizeText(in: preparedImage)
var draft = extractor.extract(payload: payload, source: source, notes: [note])
draft = await enhancer.enhance(draft: draft, rawText: payload.rawText)
return RecognitionSession(draft: draft, thumbnailJPEGData: preparedImage.thumbnailJPEGData)
}
}

View File

@@ -0,0 +1,155 @@
import CoreGraphics
import Foundation
struct CardTextHeuristicExtractor {
private let numberRegex = try? NSRegularExpression(pattern: #"\b\d{1,3}\s*/\s*\d{1,3}\b"#)
private let rarityKeywords: [(needle: String, rarity: CardRarity)] = [
("special art rare", .specialArtRare),
("illustration rare", .illustrationRare),
("hyper rare", .hyperRare),
("secret rare", .secretRare),
("ultra rare", .ultraRare),
("holo rare", .holoRare),
("uncommon", .uncommon),
("common", .common),
("rare", .rare),
]
func extract(payload: OCRTextPayload, source: RecognitionSource, notes: [String] = []) -> CardRecognitionDraft {
let cleanedLines = payload.lines
.map { line in
RecognizedTextLine(
text: normalize(line.text),
confidence: line.confidence,
normalizedBounds: line.normalizedBounds
)
}
.filter { !$0.text.isEmpty }
let rawText = cleanedLines.map(\.text).joined(separator: "\n")
let cardNumber = extractCardNumber(from: rawText)
let rarity = extractRarity(from: rawText)
let cardName = extractCardName(from: cleanedLines)
let setIdentifier = extractSetIdentifier(from: cleanedLines, cardNumber: cardNumber, rarity: rarity)
let foundCount = [cardName, cardNumber, setIdentifier, rarity == CardRarity.unknown.rawValue ? "" : rarity]
.filter { !$0.isEmpty }
.count
let confidence: ConfidenceLevel
if foundCount >= 3 && payload.averageConfidence >= 0.68 {
confidence = .high
} else if foundCount >= 2 && payload.averageConfidence >= 0.45 {
confidence = .medium
} else {
confidence = .low
}
var draftNotes = notes
if confidence != .high {
draftNotes.append(confidence.helperText)
}
if foundCount == 0 {
draftNotes.append("No structured match was found — manual entry is prefilled with OCR fragments.")
}
return CardRecognitionDraft(
cardName: cardName,
cardNumber: cardNumber,
setIdentifier: setIdentifier,
rarity: rarity,
source: source,
confidence: confidence,
notes: Array(Set(draftNotes)),
rawText: rawText
)
}
private func extractCardName(from lines: [RecognizedTextLine]) -> String {
let upperLines = lines
.filter { $0.normalizedBounds.midY > 0.45 }
.sorted { lhs, rhs in
if lhs.normalizedBounds.midY == rhs.normalizedBounds.midY {
return lhs.confidence > rhs.confidence
}
return lhs.normalizedBounds.midY > rhs.normalizedBounds.midY
}
let candidate = upperLines.first { line in
let text = line.text
return text.rangeOfCharacter(from: .decimalDigits) == nil &&
!text.localizedCaseInsensitiveContains("hp") &&
!text.localizedCaseInsensitiveContains("trainer") &&
text.count > 2
}
return candidate?.text ?? lines.first(where: { !$0.text.isEmpty })?.text ?? ""
}
private func extractCardNumber(from rawText: String) -> String {
guard let numberRegex else { return "" }
let range = NSRange(rawText.startIndex..., in: rawText)
guard let match = numberRegex.firstMatch(in: rawText, options: [], range: range),
let matchRange = Range(match.range, in: rawText) else {
return ""
}
return String(rawText[matchRange]).replacingOccurrences(of: " ", with: "")
}
private func extractSetIdentifier(from lines: [RecognizedTextLine], cardNumber: String, rarity: String) -> String {
guard !lines.isEmpty else { return "" }
if let lineContainingNumber = lines.first(where: { !cardNumber.isEmpty && $0.text.contains(cardNumber) }) {
let stripped = lineContainingNumber.text
.replacingOccurrences(of: cardNumber, with: "")
.replacingOccurrences(of: "", with: " ")
.replacingOccurrences(of: "·", with: " ")
.trimmingCharacters(in: .whitespacesAndNewlines)
if stripped.count > 2 {
return stripped
}
}
let bottomCandidates = lines
.filter { $0.normalizedBounds.midY < 0.35 }
.map(\.text)
.filter {
!$0.localizedCaseInsensitiveContains(rarity) &&
$0.rangeOfCharacter(from: .letters) != nil &&
!$0.localizedCaseInsensitiveContains("hp")
}
return bottomCandidates.first ?? ""
}
private func extractRarity(from rawText: String) -> String {
let lowered = rawText.lowercased()
if lowered.contains("") || lowered.contains("holo") {
return CardRarity.holoRare.rawValue
}
if lowered.contains("") {
return CardRarity.uncommon.rawValue
}
if lowered.contains("") {
return CardRarity.common.rawValue
}
for keyword in rarityKeywords {
if lowered.contains(keyword.needle) {
return keyword.rarity.rawValue
}
}
return CardRarity.unknown.rawValue
}
private func normalize(_ value: String) -> String {
value
.replacingOccurrences(of: " ", with: " ")
.trimmingCharacters(in: .whitespacesAndNewlines)
}
}

View File

@@ -0,0 +1,19 @@
import Foundation
struct CloudOCRRequest {
let jpegData: Data
}
struct CloudOCRTextResponse {
let markdown: String
}
protocol CloudOCRClient {
func recognizeText(from request: CloudOCRRequest) async throws -> CloudOCRTextResponse?
}
struct StubCloudOCRClient: CloudOCRClient {
func recognizeText(from request: CloudOCRRequest) async throws -> CloudOCRTextResponse? {
nil
}
}

View File

@@ -0,0 +1,14 @@
import Foundation
protocol CardFieldEnhancing {
func enhance(draft: CardRecognitionDraft, rawText: String) async -> CardRecognitionDraft
}
struct NoOpCardFieldEnhancer: CardFieldEnhancing {
func enhance(draft: CardRecognitionDraft, rawText: String) async -> CardRecognitionDraft {
// TODO: Replace this with a Foundation Models-backed enhancer once the runtime
// integration is ready and verified locally. Keeping the boundary injectable means
// the call site stays stable while the MVP compiles cleanly today.
draft
}
}

View File

@@ -0,0 +1,99 @@
import CoreImage
import UIKit
struct PreparedImage {
let normalizedImage: UIImage
let analysisCGImage: CGImage
let uploadJPEGData: Data
let thumbnailJPEGData: Data?
}
enum ImagePreprocessorError: LocalizedError {
case unableToCreateImage
var errorDescription: String? {
switch self {
case .unableToCreateImage:
return "The selected image could not be prepared for OCR."
}
}
}
struct ImagePreprocessor {
private let ciContext = CIContext()
func prepare(_ image: UIImage) throws -> PreparedImage {
let upright = normalized(image)
let resized = resizedImage(from: upright, maxDimension: 2_048)
let enhanced = enhancedImage(from: resized) ?? resized
guard let cgImage = makeCGImage(from: enhanced) else {
throw ImagePreprocessorError.unableToCreateImage
}
return PreparedImage(
normalizedImage: enhanced,
analysisCGImage: cgImage,
uploadJPEGData: enhanced.jpegData(compressionQuality: 0.82) ?? Data(),
thumbnailJPEGData: resizedImage(from: enhanced, maxDimension: 240).jpegData(compressionQuality: 0.65)
)
}
private func normalized(_ image: UIImage) -> UIImage {
guard image.imageOrientation != .up else { return image }
let renderer = UIGraphicsImageRenderer(size: image.size)
return renderer.image { _ in
image.draw(in: CGRect(origin: .zero, size: image.size))
}
}
private func resizedImage(from image: UIImage, maxDimension: CGFloat) -> UIImage {
let largestDimension = max(image.size.width, image.size.height)
guard largestDimension > maxDimension else { return image }
let scale = maxDimension / largestDimension
let targetSize = CGSize(width: image.size.width * scale, height: image.size.height * scale)
let renderer = UIGraphicsImageRenderer(size: targetSize)
return renderer.image { _ in
image.draw(in: CGRect(origin: .zero, size: targetSize))
}
}
private func enhancedImage(from image: UIImage) -> UIImage? {
guard let ciImage = CIImage(image: image) else { return nil }
let adjusted = ciImage
.applyingFilter("CIColorControls", parameters: [
kCIInputContrastKey: 1.08,
kCIInputSaturationKey: 0.96,
kCIInputBrightnessKey: 0.01,
])
.applyingFilter("CISharpenLuminance", parameters: [
kCIInputSharpnessKey: 0.35,
])
guard let cgImage = ciContext.createCGImage(adjusted, from: adjusted.extent) else {
return nil
}
return UIImage(cgImage: cgImage)
}
private func makeCGImage(from image: UIImage) -> CGImage? {
if let cgImage = image.cgImage {
return cgImage
}
if let ciImage = image.ciImage {
return ciContext.createCGImage(ciImage, from: ciImage.extent)
}
let renderer = UIGraphicsImageRenderer(size: image.size)
let rendered = renderer.image { _ in
image.draw(in: CGRect(origin: .zero, size: image.size))
}
return rendered.cgImage
}
}

View File

@@ -0,0 +1,37 @@
import Combine
import Foundation
import Network
protocol NetworkStatusProviding: AnyObject {
var isOnline: Bool { get }
func startMonitoring()
func stopMonitoring()
}
@MainActor
final class NetworkMonitor: ObservableObject, NetworkStatusProviding {
@Published private(set) var isOnline = true
private let monitor = NWPathMonitor()
private let queue = DispatchQueue(label: "StackDex.NetworkMonitor")
private var isMonitoring = false
func startMonitoring() {
guard !isMonitoring else { return }
isMonitoring = true
monitor.pathUpdateHandler = { [weak self] path in
let isSatisfied = path.status == .satisfied
DispatchQueue.main.async {
self?.isOnline = isSatisfied
}
}
monitor.start(queue: queue)
}
func stopMonitoring() {
guard isMonitoring else { return }
monitor.cancel()
isMonitoring = false
}
}

View File

@@ -0,0 +1,120 @@
import Combine
import Photos
import PhotosUI
import SwiftUI
import UIKit
struct RecentPhotoItem: Identifiable, Equatable {
let id: String
let asset: PHAsset
let thumbnail: UIImage
}
@MainActor
final class PhotoLibraryService: NSObject, ObservableObject {
@Published private(set) var authorizationStatus: PHAuthorizationStatus = PHPhotoLibrary.authorizationStatus(for: .readWrite)
@Published private(set) var recentPhotos: [RecentPhotoItem] = []
@Published private(set) var isLoading = false
private let imageManager = PHCachingImageManager()
var canBrowseRecents: Bool {
authorizationStatus == .authorized || authorizationStatus == .limited
}
var statusMessage: String {
switch authorizationStatus {
case .authorized:
return "Recent photos"
case .limited:
return "Limited photo access"
case .denied:
return "Photo access denied"
case .restricted:
return "Photo access restricted"
case .notDetermined:
return "Show recent photos"
@unknown default:
return "Photo access unavailable"
}
}
func requestAccessAndLoad() async {
if authorizationStatus == .notDetermined {
authorizationStatus = await PHPhotoLibrary.requestAuthorization(for: .readWrite)
}
await refreshRecentsIfPossible()
}
func refreshRecentsIfPossible() async {
authorizationStatus = PHPhotoLibrary.authorizationStatus(for: .readWrite)
guard canBrowseRecents else {
recentPhotos = []
return
}
isLoading = true
defer { isLoading = false }
let options = PHFetchOptions()
options.sortDescriptors = [NSSortDescriptor(key: "creationDate", ascending: false)]
options.fetchLimit = 12
let assets = PHAsset.fetchAssets(with: .image, options: options)
var results: [RecentPhotoItem] = []
assets.enumerateObjects { asset, _, _ in
results.append(contentsOf: self.thumbnailItem(for: asset).map { [$0] } ?? [])
}
recentPhotos = results
}
func loadImage(for item: RecentPhotoItem) async -> UIImage? {
await withCheckedContinuation { continuation in
let options = PHImageRequestOptions()
options.deliveryMode = .highQualityFormat
options.resizeMode = .fast
options.isNetworkAccessAllowed = true
imageManager.requestImageDataAndOrientation(for: item.asset, options: options) { data, _, _, _ in
continuation.resume(returning: data.flatMap(UIImage.init(data:)))
}
}
}
func presentLimitedLibraryPicker() {
guard let scene = UIApplication.shared.connectedScenes.first as? UIWindowScene,
let rootViewController = scene.keyWindow?.rootViewController else {
return
}
PHPhotoLibrary.shared().presentLimitedLibraryPicker(from: rootViewController)
}
func openSettings() {
guard let url = URL(string: UIApplication.openSettingsURLString) else { return }
UIApplication.shared.open(url)
}
private func thumbnailItem(for asset: PHAsset) -> RecentPhotoItem? {
let targetSize = CGSize(width: 180, height: 180)
let options = PHImageRequestOptions()
options.deliveryMode = .opportunistic
options.resizeMode = .fast
options.isSynchronous = true
var thumbnailImage: UIImage?
imageManager.requestImage(for: asset, targetSize: targetSize, contentMode: .aspectFill, options: options) { image, _ in
thumbnailImage = image
}
guard let thumbnailImage else { return nil }
return RecentPhotoItem(id: asset.localIdentifier, asset: asset, thumbnail: thumbnailImage)
}
}
private extension UIWindowScene {
var keyWindow: UIWindow? {
windows.first(where: \.isKeyWindow)
}
}

View File

@@ -0,0 +1,83 @@
import CoreGraphics
import Vision
struct RecognizedTextLine: Equatable {
let text: String
let confidence: Double
let normalizedBounds: CGRect
}
struct OCRTextPayload: Equatable {
let rawText: String
let lines: [RecognizedTextLine]
let averageConfidence: Double
}
protocol CardTextRecognizing {
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload
}
enum VisionCardOCRServiceError: LocalizedError {
case noRecognizedText
var errorDescription: String? {
switch self {
case .noRecognizedText:
return "Vision OCR could not find readable card text."
}
}
}
struct VisionCardOCRService: CardTextRecognizing {
private let customWords = [
"Pokémon", "Charizard", "Glurak", "Pikachu", "Blastoise", "Venusaur",
"Illustration", "Scarlet", "Violet", "Trainer", "Holo", "Rare", "Ultra",
"VMAX", "GX", "ex", "Base", "Set", "Promo", "Shiny"
]
func recognizeText(in image: PreparedImage) async throws -> OCRTextPayload {
try await withCheckedThrowingContinuation { continuation in
let request = VNRecognizeTextRequest { request, error in
if let error {
continuation.resume(throwing: error)
return
}
let observations = (request.results as? [VNRecognizedTextObservation]) ?? []
let lines = observations.compactMap { observation -> RecognizedTextLine? in
guard let candidate = observation.topCandidates(1).first else { return nil }
return RecognizedTextLine(
text: candidate.string,
confidence: Double(candidate.confidence),
normalizedBounds: observation.boundingBox
)
}
guard !lines.isEmpty else {
continuation.resume(throwing: VisionCardOCRServiceError.noRecognizedText)
return
}
let average = lines.map(\.confidence).reduce(0, +) / Double(lines.count)
continuation.resume(returning: OCRTextPayload(
rawText: lines.map(\.text).joined(separator: "\n"),
lines: lines,
averageConfidence: average
))
}
request.recognitionLevel = .accurate
request.recognitionLanguages = ["en-US", "de-DE"]
request.usesLanguageCorrection = true
request.customWords = customWords
let handler = VNImageRequestHandler(cgImage: image.analysisCGImage, options: [:])
do {
try handler.perform([request])
} catch {
continuation.resume(throwing: error)
}
}
}
}

View File

@@ -12,7 +12,7 @@ import SwiftData
struct StackDexApp: App {
var sharedModelContainer: ModelContainer = {
let schema = Schema([
Item.self,
ConfirmedScanRecord.self,
])
let modelConfiguration = ModelConfiguration(schema: schema, isStoredInMemoryOnly: false)

View File

@@ -0,0 +1,28 @@
import AVFoundation
import SwiftUI
struct CameraPreviewView: UIViewRepresentable {
let session: AVCaptureSession
func makeUIView(context: Context) -> PreviewView {
let view = PreviewView()
view.previewLayer.session = session
view.previewLayer.videoGravity = .resizeAspectFill
return view
}
func updateUIView(_ uiView: PreviewView, context: Context) {
uiView.previewLayer.session = session
}
}
final class PreviewView: UIView {
override class var layerClass: AnyClass { AVCaptureVideoPreviewLayer.self }
var previewLayer: AVCaptureVideoPreviewLayer {
guard let layer = layer as? AVCaptureVideoPreviewLayer else {
fatalError("PreviewView requires AVCaptureVideoPreviewLayer.")
}
return layer
}
}

View File

@@ -0,0 +1,171 @@
import SwiftData
import SwiftUI
import UIKit
struct ResultEditorView: View {
enum Mode {
case recognized
case manual
var title: String {
switch self {
case .recognized:
return "Review Result"
case .manual:
return "Manual Entry"
}
}
}
@Environment(\.modelContext) private var modelContext
@ObservedObject var flowModel: ScanFlowModel
let mode: Mode
var body: some View {
Group {
if let session = flowModel.currentSession {
Form {
if let image = session.thumbnailJPEGData.flatMap(UIImage.init(data:)) {
Section {
Image(uiImage: image)
.resizable()
.scaledToFit()
.frame(maxWidth: .infinity)
.clipShape(RoundedRectangle(cornerRadius: 18))
}
}
Section("Recognition") {
LabeledContent("Source", value: draftBinding.wrappedValue.source.title)
LabeledContent("Confidence", value: draftBinding.wrappedValue.confidence.title)
Text(draftBinding.wrappedValue.confidence.helperText)
.font(.footnote)
.foregroundStyle(.secondary)
Text(draftBinding.wrappedValue.source.detail)
.font(.footnote)
.foregroundStyle(.secondary)
}
Section("Card details") {
TextField("Card name", text: draftBinding.cardName)
.textInputAutocapitalization(.words)
TextField("Card number", text: draftBinding.cardNumber)
.textInputAutocapitalization(.never)
TextField("Set", text: draftBinding.setIdentifier)
.textInputAutocapitalization(.words)
Picker("Rarity", selection: draftBinding.rarity) {
ForEach(CardRarity.allCases) { rarity in
Text(rarity.rawValue).tag(rarity.rawValue)
}
}
}
if !draftBinding.wrappedValue.notes.isEmpty {
Section("Hints") {
ForEach(Array(draftBinding.wrappedValue.notes.enumerated()), id: \.offset) { _, note in
Text(note)
.font(.subheadline)
}
}
}
if !draftBinding.wrappedValue.rawText.isEmpty {
Section("OCR fragments") {
Text(draftBinding.wrappedValue.rawText)
.font(.footnote.monospaced())
.foregroundStyle(.secondary)
.textSelection(.enabled)
}
}
Section {
Button("Confirm and Log") {
confirmDraft()
}
.buttonStyle(.borderedProminent)
Button(mode == .recognized ? "Manual Entry" : "Re-scan") {
if mode == .recognized {
flowModel.openManualEntryFromResult()
} else {
flowModel.returnToScan()
}
}
if mode == .recognized {
Button("Re-scan") {
flowModel.returnToScan()
}
.foregroundStyle(.red)
}
}
}
} else {
ContentUnavailableView("No scan loaded", systemImage: "rectangle.and.text.magnifyingglass", description: Text("Capture or import a card image first."))
}
}
.navigationTitle(mode.title)
.navigationBarTitleDisplayMode(.inline)
}
private var draftBinding: Binding<CardRecognitionDraft> {
Binding(
get: {
flowModel.currentSession?.draft ?? .manualPrefill()
},
set: { flowModel.updateDraft($0) }
)
}
private func confirmDraft() {
let draft = draftBinding.wrappedValue
modelContext.insert(ConfirmedScanRecord(draft: draft))
flowModel.returnToScan(message: "Saved to temporary local log.")
}
}
private extension Binding where Value == CardRecognitionDraft {
var cardName: Binding<String> {
Binding<String>(
get: { wrappedValue.cardName },
set: {
var draft = wrappedValue
draft.cardName = $0
wrappedValue = draft
}
)
}
var cardNumber: Binding<String> {
Binding<String>(
get: { wrappedValue.cardNumber },
set: {
var draft = wrappedValue
draft.cardNumber = $0
wrappedValue = draft
}
)
}
var setIdentifier: Binding<String> {
Binding<String>(
get: { wrappedValue.setIdentifier },
set: {
var draft = wrappedValue
draft.setIdentifier = $0
wrappedValue = draft
}
)
}
var rarity: Binding<String> {
Binding<String>(
get: { wrappedValue.rarity },
set: {
var draft = wrappedValue
draft.rarity = $0
wrappedValue = draft
}
)
}
}

View File

@@ -0,0 +1,255 @@
import Photos
import SwiftData
import SwiftUI
struct ScanView: View {
@ObservedObject var flowModel: ScanFlowModel
@Query(sort: \ConfirmedScanRecord.confirmedAt, order: .reverse) private var confirmedRecords: [ConfirmedScanRecord]
var body: some View {
ScrollView {
VStack(alignment: .leading, spacing: 20) {
cameraSection
recentPhotosSection
actionSection
temporaryLogSection
}
.padding(20)
}
.navigationTitle("Scan Card")
.navigationBarTitleDisplayMode(.inline)
.overlay {
if flowModel.isRecognizing {
ProgressView("Recognizing card…")
.padding(.horizontal, 18)
.padding(.vertical, 14)
.background(.ultraThinMaterial, in: RoundedRectangle(cornerRadius: 16))
}
}
.alert("StackDex", isPresented: messageIsPresented) {
Button("OK", role: .cancel) {
flowModel.transientMessage = nil
}
} message: {
Text(flowModel.transientMessage ?? "")
}
}
private var cameraSection: some View {
VStack(alignment: .leading, spacing: 12) {
Text("Camera")
.font(.headline)
ZStack {
RoundedRectangle(cornerRadius: 24)
.fill(.black.opacity(0.92))
if flowModel.cameraService.isSessionRunning {
CameraPreviewView(session: flowModel.cameraService.session)
.clipShape(RoundedRectangle(cornerRadius: 24))
} else {
cameraPlaceholder
}
RoundedRectangle(cornerRadius: 20)
.stroke(.white.opacity(0.7), style: StrokeStyle(lineWidth: 2, dash: [10, 8]))
.padding(28)
}
.frame(height: 440)
if flowModel.cameraService.authorizationStatus == .authorized {
HStack {
Spacer()
Button {
Task { await flowModel.capturePhoto() }
} label: {
ZStack {
Circle().fill(.white).frame(width: 76, height: 76)
Circle().stroke(.black.opacity(0.85), lineWidth: 3).frame(width: 62, height: 62)
}
}
.accessibilityLabel("Capture card photo")
Spacer()
}
}
}
}
@ViewBuilder
private var cameraPlaceholder: some View {
VStack(spacing: 14) {
Image(systemName: "camera.viewfinder")
.font(.system(size: 44, weight: .semibold))
.foregroundStyle(.white)
Text(cameraPlaceholderText)
.font(.headline)
.foregroundStyle(.white)
.multilineTextAlignment(.center)
Text("Scan starts after an explicit shutter tap. No image is stored after recognition.")
.font(.subheadline)
.foregroundStyle(.white.opacity(0.75))
.multilineTextAlignment(.center)
.padding(.horizontal, 24)
if flowModel.cameraService.authorizationStatus != .authorized {
Button(cameraButtonTitle) {
if flowModel.cameraService.authorizationStatus == .denied {
flowModel.photoLibraryService.openSettings()
} else {
Task { await flowModel.enableCamera() }
}
}
.buttonStyle(.borderedProminent)
}
}
.padding(24)
}
private var recentPhotosSection: some View {
VStack(alignment: .leading, spacing: 12) {
HStack {
Text("Recent photos")
.font(.headline)
Spacer()
if flowModel.photoLibraryService.authorizationStatus == .limited {
Button("Manage") {
flowModel.photoLibraryService.presentLimitedLibraryPicker()
}
}
}
switch flowModel.photoLibraryService.authorizationStatus {
case .authorized, .limited:
if flowModel.photoLibraryService.isLoading {
ProgressView()
.frame(maxWidth: .infinity, minHeight: 72)
} else if flowModel.photoLibraryService.recentPhotos.isEmpty {
secondaryPanel("No recent photos available.")
} else {
ScrollView(.horizontal, showsIndicators: false) {
HStack(spacing: 12) {
ForEach(flowModel.photoLibraryService.recentPhotos) { item in
Button {
Task { await flowModel.importRecentPhoto(item) }
} label: {
Image(uiImage: item.thumbnail)
.resizable()
.scaledToFill()
.frame(width: 88, height: 88)
.clipShape(RoundedRectangle(cornerRadius: 14))
}
.buttonStyle(.plain)
}
}
}
}
case .denied, .restricted:
secondaryPanel("Photo access is unavailable. You can keep scanning with the camera or open Settings to allow recent-photo import.") {
Button("Open Settings") {
flowModel.photoLibraryService.openSettings()
}
.buttonStyle(.bordered)
}
case .notDetermined:
secondaryPanel("Recent photos stay optional and are requested only when you ask for them.") {
Button("Show Recents") {
Task { await flowModel.photoLibraryService.requestAccessAndLoad() }
}
.buttonStyle(.borderedProminent)
}
@unknown default:
secondaryPanel("Recent photos are unavailable on this device.")
}
}
}
private var actionSection: some View {
VStack(alignment: .leading, spacing: 12) {
Text("Fallback")
.font(.headline)
secondaryPanel(flowModel.networkMonitor.isOnline ? "Cloud OCR is prepared as an injectable boundary, but the default client is intentionally stubbed for this local MVP. Vision OCR remains fully functional." : "Youre offline, so StackDex will use the on-device Vision pipeline.") {
Button("Enter Manually") {
flowModel.startManualEntry()
}
.buttonStyle(.bordered)
}
}
}
private var temporaryLogSection: some View {
VStack(alignment: .leading, spacing: 12) {
Text("Temporary confirmations")
.font(.headline)
if confirmedRecords.isEmpty {
secondaryPanel("Confirmed cards are logged locally in SwiftData so the MVP can prove the end-to-end flow without storing source images.")
} else {
VStack(spacing: 10) {
ForEach(Array(confirmedRecords.prefix(3))) { record in
HStack(alignment: .top) {
VStack(alignment: .leading, spacing: 4) {
Text(record.cardName.isEmpty ? "Unnamed card" : record.cardName)
.font(.subheadline.weight(.semibold))
Text([record.cardNumber, record.setIdentifier].filter { !$0.isEmpty }.joined(separator: " · "))
.font(.caption)
.foregroundStyle(.secondary)
}
Spacer()
Text(record.confirmedAt, style: .time)
.font(.caption2)
.foregroundStyle(.secondary)
}
.padding(12)
.background(.thinMaterial, in: RoundedRectangle(cornerRadius: 14))
}
}
}
}
}
private var messageIsPresented: Binding<Bool> {
Binding(
get: { flowModel.transientMessage != nil },
set: { if !$0 { flowModel.transientMessage = nil } }
)
}
private var cameraPlaceholderText: String {
switch flowModel.cameraService.authorizationStatus {
case .authorized:
return "Starting camera preview…"
case .denied:
return "Camera access is denied"
case .restricted:
return "Camera access is restricted"
case .notDetermined:
return "Enable the camera to scan a card"
@unknown default:
return "Camera unavailable"
}
}
private var cameraButtonTitle: String {
switch flowModel.cameraService.authorizationStatus {
case .denied, .restricted:
return "Open Settings"
default:
return "Enable Camera"
}
}
private func secondaryPanel(_ text: String, @ViewBuilder actions: () -> some View = { EmptyView() }) -> some View {
VStack(alignment: .leading, spacing: 10) {
Text(text)
.font(.subheadline)
.foregroundStyle(.secondary)
actions()
}
.frame(maxWidth: .infinity, alignment: .leading)
.padding(14)
.background(.thinMaterial, in: RoundedRectangle(cornerRadius: 16))
}
}