MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Erscheinungsbild
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung Markierung: Zurückgesetzt |
||
| Zeile 3: | Zeile 3: | ||
'use strict'; | 'use strict'; | ||
// ================== | // ========= KONFIG ========= | ||
// Wenn du enger in Kategorien suchen willst, trage sie hier ein. | |||
// | |||
const ADOS_CATEGORIES = [ | const ADOS_CATEGORIES = [ | ||
'Alle A Dream of Scotland Abfüllungen', | 'Alle A Dream of Scotland Abfüllungen', | ||
| Zeile 18: | Zeile 15: | ||
]; | ]; | ||
// Wörter, die oft auf ADOS-Labels stehen und uns beim Fuzzy-Match helfen | |||
const KNOWN_TOKENS = [ | const KNOWN_TOKENS = [ | ||
// | // Marken/Distilleries (Auszug – erweiterbar) | ||
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | 'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | ||
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | 'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | ||
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | 'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | ||
' | 'Bunnahabhain','Springbank','Caperdonich','Linkwood','Glen Scotia', | ||
// Serien/ADOS-Sprache | |||
'A Dream of Scotland','A Dream of Ireland','The Tasteful 8','Heroes of Childhood', | |||
'Cask Strength',' | 'Cask Strength','Single Malt','Unicorn','Space Girls','Whisky Elfen', | ||
'The Fine Art of Whisky','Friendly Mr. Z','Rumbastic' | |||
]; | ]; | ||
// ========= | // ========= UI HILFSFUNKTIONEN ========= | ||
function hasUI () { | function hasUI () { | ||
return !!document.getElementById('ados-scan-run') && | return !!document.getElementById('ados-scan-run') && | ||
!!document.getElementById('ados-scan-file'); | !!document.getElementById('ados-scan-file'); | ||
} | } | ||
function setStatus (t) { | function setStatus (t) { | ||
const el = document.getElementById('ados-scan-status'); | |||
if (el) el.textContent = t || ''; | if (el) el.textContent = t || ''; | ||
} | } | ||
function setProgress (p) { | function setProgress (p) { | ||
const bar = document.getElementById('ados-scan-progress'); | |||
if (!bar) return; | if (!bar) return; | ||
if (p == null) { bar.hidden = true; bar.value = 0; } | if (p == null) { bar.hidden = true; bar.value = 0; } | ||
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | ||
} | } | ||
function showPreview (file) { | function showPreview (file) { | ||
const url = URL.createObjectURL(file); | |||
const prev = document.getElementById('ados-scan-preview'); | |||
if (prev) { | if (prev) { | ||
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">'; | prev.innerHTML = '<img alt="Vorschau" src="' + url + '">'; | ||
| Zeile 63: | Zeile 51: | ||
} | } | ||
} | } | ||
const dbg = (msg) => { try { console.log('[LabelScan]', msg); } catch(e){} }; | |||
function | // ========= TESSERACT WORKER (einmalig) ========= | ||
let workerPromise = null; | |||
function ensureWorker () { | |||
if (workerPromise) return workerPromise; | |||
workerPromise = new Promise((resolve, reject) => { | |||
if (window.Tesseract && Tesseract.createWorker) { | |||
const worker = Tesseract.createWorker({ | |||
logger: m => { | |||
if (m?.status === 'recognizing text' && typeof m.progress === 'number') { | |||
setProgress(m.progress); | |||
} | |||
} | |||
}); | |||
(async () => { | |||
try { | |||
await worker.load(); | |||
await worker.loadLanguage('eng+deu'); // englisch + deutsch | |||
await worker.initialize('eng+deu'); | |||
// OCR-Parameter: eher „Block Text“ | |||
await worker.setParameters({ | |||
tessedit_pageseg_mode: '6', // PSM 6: ein Block mit Text | |||
preserve_interword_spaces: '1', | |||
user_defined_dpi: '300' | |||
}); | |||
resolve(worker); | |||
} catch (e) { | |||
reject(e); | |||
} | |||
})(); | |||
} else { | |||
// Fallback: Bibliothek nachladen | |||
const s = document.createElement('script'); | |||
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js'; | |||
s.async = true; | |||
s.onload = () => { | |||
if (!Tesseract?.createWorker) return reject(new Error('Tesseract lädt, aber createWorker fehlt')); | |||
resolve(ensureWorker()); | |||
}; | |||
s.onerror = () => reject(new Error('Tesseract konnte nicht geladen werden')); | |||
document.head.appendChild(s); | |||
} | |||
}); | |||
return workerPromise; | |||
} | } | ||
// ================== | // ========= BILD-VORVERARBEITUNG ========= | ||
function toCanvasFromImage (img, maxSide) { | |||
const MAX = maxSide || 1800; | |||
const scale = Math.min(1, (img.width > img.height) ? (MAX / img.width) : (MAX / img.height)); | |||
const w = Math.max(1, Math.round(img.width * scale)); | |||
function | const h = Math.max(1, Math.round(img.height * scale)); | ||
const c = document.createElement('canvas'); | |||
c.width = w; c.height = h; | |||
const ctx = c.getContext('2d', { willReadFrequently: true }); | |||
ctx.imageSmoothingEnabled = true; | |||
ctx.drawImage(img, 0, 0, w, h); | |||
return c; | |||
return | |||
} | } | ||
function grayscaleContrastUnsharp (canvas) { | |||
const ctx = canvas.getContext('2d', { willReadFrequently: true }); | |||
const { width: w, height: h } = canvas; | |||
const id = ctx.getImageData(0, 0, w, h); | |||
const d = id.data; | |||
// 1) Graustufen + Kontrast | |||
let min=255, max=0; | |||
const gray = new Uint8ClampedArray(w*h); | |||
for (let i=0, j=0; i<d.length; i+=4, j++) { | |||
const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2]; | |||
gray[j] = g; | |||
if (g<min) min=g; if (g>max) max=g; | |||
} | |||
const span = Math.max(1, max-min); | |||
for (let j=0; j<gray.length; j++) { | |||
let v = (gray[j]-min)/span; // 0..1 | |||
v = Math.pow(v, 0.9); // leichte Gamma-Korrektur | |||
const | gray[j] = Math.max(0, Math.min(255, Math.round(v*255))); | ||
const | |||
} | } | ||
// | // 2) Leichtes Unsharp Mask | ||
// einfacher 3x3-Box-Blur und dann Original + Amount*(Original-Blur) | |||
const | const blur = new Uint8ClampedArray(gray.length); | ||
const | const off = [-w-1,-w,-w+1, -1,0,1, w-1,w,w+1]; | ||
for (let y=1;y<h-1;y++){ | for (let y=1;y<h-1;y++){ | ||
for (let x=1;x<w-1;x++){ | for (let x=1;x<w-1;x++){ | ||
let s=0; | |||
const idx=y*w+x; | |||
const | for (let k=0;k<9;k++) s += gray[idx+off[k]]; | ||
blur[idx] = s/9; | |||
} | } | ||
} | } | ||
const amount=0.65; | |||
for (let i=0;i<gray.length;i++){ | |||
let v = gray[i] + amount*(gray[i] - (blur[i]||gray[i])); | |||
gray[i] = v<0?0:v>255?255:v; | |||
} | |||
// | // 3) Adaptive Schwelle light (global + lokale Korrektur) | ||
// global threshold um den Mittelwert, dann leichte Aufhellung dunkler Zeichen | |||
let sum=0; for (let i=0;i<gray.length;i++) sum+=gray[i]; | |||
const mean = sum/gray.length; | |||
for (let i=0, p=0; i<d.length; i+=4, p++) { | |||
const v = gray[p] < mean*0.97 ? 0 : 255; // binär | |||
d[i]=d[i+1]=d[i+2]=v; d[i+3]=255; | |||
} | } | ||
ctx.putImageData(id, 0, 0); | |||
return canvas; | |||
} | |||
function rotateCanvas (src, deg) { | |||
const rad = deg * Math.PI/180; | |||
const w = src.width, h = src.height; | |||
const swap = (deg % 180) !== 0; | |||
const c = document.createElement('canvas'); | |||
c.width = swap ? h : w; | |||
c.height = swap ? w : h; | |||
const ctx = c.getContext('2d'); | |||
ctx.translate(c.width/2, c.height/2); | |||
ctx.rotate(rad); | |||
ctx.drawImage(src, -w/2, -h/2); | |||
return c; | return c; | ||
} | } | ||
function cropCanvas (src, x, y, w, h) { | |||
function | const c = document.createElement('canvas'); | ||
const c = document.createElement('canvas'); c.width=w; c.height=h; | c.width = w; c.height = h; | ||
c.getContext('2d').drawImage( | c.getContext('2d').drawImage(src, x, y, w, h, 0, 0, w, h); | ||
return c; | return c; | ||
} | } | ||
function buildCandidates (base) { | |||
// Vollbild + zentrale & untere Bänder, je Rotation 0/+90/-90 | |||
// | const L = []; | ||
const rotations = [0, 90, -90]; | |||
rotations.forEach((deg) => { | |||
const r = deg ? rotateCanvas(base, deg) : base; | |||
const w = r.width, h = r.height; | |||
const full = grayscaleContrastUnsharp(r.cloneNode ? r.cloneNode(true) : r); | |||
L.push(full); | |||
// zentral ~70% Bereich | |||
const cw = Math.round(w*0.8), ch = Math.round(h*0.7); | |||
const cx = Math.round((w-cw)/2), cy = Math.round((h-ch)/2); | |||
L.push(grayscaleContrastUnsharp(cropCanvas(r, cx, cy, cw, ch))); | |||
// unteres Band (viele ADOS haben unten Textblöcke) | |||
const bh = Math.round(h*0.38); | |||
L.push(grayscaleContrastUnsharp(cropCanvas(r, 0, h-bh, w, bh))); | |||
}); | }); | ||
return | return L; | ||
} | } | ||
// ========= | // ========= OCR PIPELINE ========= | ||
async function runOCR (file) { | |||
const worker = await ensureWorker(); | |||
setProgress(0); | |||
// Bild laden → Canvas → Kandidaten erzeugen | |||
const img = await new Promise((res, rej) => { | |||
const o = new Image(); | |||
const base = | o.onload = () => res(o); | ||
o.onerror = () => rej(new Error('Bild konnte nicht geladen werden')); | |||
o.src = URL.createObjectURL(file); | |||
}); | |||
const base = toCanvasFromImage(img, 1800); | |||
const candidates = buildCandidates(base); | |||
let best = { text: '', conf: 0 }; | |||
// Zwei PSM-Modi probieren (6 → Block, 7 → eine Textzeile – robust gegen plakative Schriften) | |||
const PSMs = ['6','7']; | |||
for (const c of candidates) { | |||
for (const psm of PSMs) { | |||
for (const | |||
for (const | |||
try { | try { | ||
const | await worker.setParameters({ tessedit_pageseg_mode: psm }); | ||
const { data } = await worker.recognize(c); | |||
} catch(e){ / | const text = data?.text ? String(data.text) : ''; | ||
const conf = (data?.confidence || 0); | |||
// Heuristik: genug Buchstaben? | |||
const letters = (text.match(/[A-Za-zÄÖÜäöüß]{2,}/g) || []).length; | |||
const score = conf + letters*1.5; | |||
if (score > (best.conf + (best.letters||0)*1.5)) { | |||
best = { text, conf, letters }; | |||
} | |||
// Wenn sehr gut: früh abbrechen | |||
if (conf > 75 && letters > 15) break; | |||
} catch (e) { | |||
// einfach nächsten Kandidaten probieren | |||
} | |||
} | } | ||
} | } | ||
setProgress(null); | setProgress(null); | ||
// Debug-Ausgabe | |||
return | const dbgEl = document.getElementById('ados-scan-ocr'); | ||
if (dbgEl) dbgEl.textContent = best.text || '(kein Text erkannt)'; | |||
return best.text || ''; | |||
} | } | ||
// ================== | // ========= HINWEISE EXTRAHIEREN & SUCHE ========= | ||
function extractHints (text) { | function extractHints (text) { | ||
const raw = String(text || '').replace(/\s+/g, ' ').trim(); | const raw = String(text || '').replace(/\s+/g, ' ').trim(); | ||
const | const names = []; | ||
KNOWN_TOKENS.forEach(t => { | KNOWN_TOKENS.forEach(t => { | ||
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | ||
if (re.test(raw)) | if (re.test(raw)) names.push(t); | ||
}); | }); | ||
const ages = []; | const ages = []; | ||
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi | let m; const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | ||
while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); } | |||
while ((m = ageRe.exec(raw)) !== null) { | |||
const years = []; | const years = []; | ||
const yearRe = /\b(19|20)\d{2}\b/g; | const yearRe = /\b(19|20)\d{2}\b/g; | ||
while ((m = yearRe.exec(raw)) !== null) { | while ((m = yearRe.exec(raw)) !== null) { const y = m[0]; if (!years.includes(y)) years.push(y); } | ||
const words = []; | |||
const seen = new Set(); | |||
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | ||
while ((m = wordRe.exec(raw)) !== null) { const w = m[0]; if (!seen.has(w)) { seen.add(w); words.push(w); if (words.length>=8) break; } } | |||
while (( | |||
return { names | return { names, ages, years, words, raw }; | ||
} | } | ||
async function searchWikiSmart (hints, limit) { | |||
await mw.loader.using('mediawiki.api'); | |||
const api = new mw.Api(); | |||
const ns0 = 0, MAX = limit || 12; | |||
const incats = ADOS_CATEGORIES.length | |||
? ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ') | |||
: ''; | |||
const buckets = []; | |||
// PASS 1: intitle | // PASS 1: sehr eng (intitle + Kategorien) | ||
if (hints.names.length) { | if (hints.names.length) { | ||
hints.names.forEach(n => { | hints.names.forEach(n => { | ||
if (hints.ages.length) hints.ages.forEach(a => | if (hints.ages.length) hints.ages.forEach(a => buckets.push(`intitle:"${n}" intitle:${a} ${incats}`.trim())); | ||
if (hints.years.length) hints.years.forEach(y => | if (hints.years.length) hints.years.forEach(y => buckets.push(`intitle:"${n}" "${y}" ${incats}`.trim())); | ||
buckets.push(`intitle:"${n}" ${incats}`.trim()); | |||
}); | }); | ||
} | } | ||
// PASS 2: keyword-bündel | |||
const key = [].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3)) | |||
.map(x => `"${x}"`).join(' '); | |||
if (key) buckets.push(`${key} ${incats}`.trim()); | |||
// PASS | // PASS 3: ohne Kategorien (breiter Fallback) | ||
if (ADOS_CATEGORIES.length) { | |||
if (hints.names.length) hints.names.forEach(n => buckets.push(`intitle:"${n}"`)); | |||
if (key) buckets.push(key); | |||
} | |||
// PASS | // PASS 4: Prefix | ||
if (hints.names.length) buckets.push(hints.names[0]); | |||
if (hints.names.length) | else if (hints.words.length) buckets.push(hints.words[0]); | ||
if ( | |||
const seen = new Set(); const out = []; | const seen = new Set(); const out = []; | ||
async function runSearch (q) { | |||
async function | if (!q) return; | ||
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 }); | const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace: ns0, srlimit: MAX, formatversion:2 }); | ||
(r.query?.search || []).forEach(it => { | (r.query?.search || []).forEach(it => { if (!seen.has(it.title)) { seen.add(it.title); out.push(it); } }); | ||
} | } | ||
for (const q of buckets) { | |||
for (const q of | await runSearch(q); | ||
if (out.length >= MAX) break; | if (out.length >= MAX) break; | ||
} | } | ||
if (out.length < MAX) { | |||
// Prefix-Fallback | |||
const p = hints.names[0] || hints.words[0] || ''; | |||
if (p) { | |||
const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace: ns0, pslimit: MAX }); | |||
(r.query?.prefixsearch || []).forEach(it => { | |||
const title = it.title || it['*']; | |||
if (!seen.has(title)) { seen.add(title); out.push({ title, snippet:'' }); } | |||
}); | |||
} | |||
} | |||
return out.slice(0, MAX); | return out.slice(0, MAX); | ||
} | } | ||
function esc (s) { return mw.html.escape(String(s||'')); } | |||
function | |||
function renderResults (items) { | function renderResults (items) { | ||
const box = document.getElementById('ados-scan-results'); | |||
if (!box) return; | if (!box) return; | ||
box.innerHTML = ''; | box.innerHTML = ''; | ||
| Zeile 386: | Zeile 348: | ||
return; | return; | ||
} | } | ||
items.slice(0, 12).forEach | items.slice(0, 12).forEach(it => { | ||
const title = it.title || ''; | |||
const link = mw.util.getUrl(title.replace(/ /g,'_')); | |||
const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"'); | |||
const div = document.createElement('div'); | |||
div.className = 'ados-hit'; | div.className = 'ados-hit'; | ||
div.innerHTML = | div.innerHTML = | ||
'<b><a href="' + link + '">' + esc(title) + '</a></b>' + | '<b><a href="'+link+'">'+esc(title)+'</a></b>' + | ||
(snip ? '<div class="meta">' + snip + '</div>' : ''); | (snip ? '<div class="meta">'+snip+'</div>' : ''); | ||
box.appendChild(div); | box.appendChild(div); | ||
}); | }); | ||
} | } | ||
// ================== | // ========= EVENT-BINDING ========= | ||
let BOUND=false; | |||
function bind() { | |||
function bind () { | |||
if (BOUND || !hasUI()) return; | if (BOUND || !hasUI()) return; | ||
const runBtn = document.getElementById('ados-scan-run'); | |||
const bigBtn = document.getElementById('ados-scan-bigbtn'); | |||
const fileIn = document.getElementById('ados-scan-file'); | |||
if (!runBtn || !fileIn) return; | if (!runBtn || !fileIn) return; | ||
BOUND=true; | |||
if (bigBtn) bigBtn.addEventListener('click', | if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click()); | ||
fileIn.addEventListener('change', function () { | fileIn.addEventListener('change', function(){ | ||
if (this.files && this.files[0]) showPreview(this.files[0]); | if (this.files && this.files[0]) showPreview(this.files[0]); | ||
}); | }); | ||
function | runBtn.addEventListener('click', async function (ev) { | ||
ev.preventDefault(); | ev.preventDefault(); | ||
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | ||
try { | |||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | |||
setStatus('Vorverarbeitung & Texterkennung …'); | |||
const text = await runOCR(fileIn.files[0]); | |||
setStatus('Suche im Wiki …'); | |||
const hints = extractHints(text); | |||
const hits = await searchWikiSmart(hints, 12); | |||
renderResults(hits); | |||
setStatus('Fertig.'); | |||
} catch (e) { | |||
console.error('[LabelScan]', e); | |||
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | |||
} finally { | |||
runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen'; | |||
} | |||
}); | |||
} | |||
} | |||
} | } | ||
if (document.readyState === 'loading') | if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bind); | ||
else bind(); | |||
setTimeout(bind, 250); setTimeout(bind, 1000); | |||
new MutationObserver(() => { if (!BOUND) bind(); }) | |||
.observe(document.documentElement || document.body, { childList:true, subtree:true }); | |||
setTimeout(bind, 250); | |||
})(); | })(); | ||
Version vom 6. November 2025, 00:09 Uhr
/* global mw, Tesseract */
(function () {
'use strict';
// ========= KONFIG =========
// Wenn du enger in Kategorien suchen willst, trage sie hier ein.
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
// Wörter, die oft auf ADOS-Labels stehen und uns beim Fuzzy-Match helfen
const KNOWN_TOKENS = [
// Marken/Distilleries (Auszug – erweiterbar)
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Bunnahabhain','Springbank','Caperdonich','Linkwood','Glen Scotia',
// Serien/ADOS-Sprache
'A Dream of Scotland','A Dream of Ireland','The Tasteful 8','Heroes of Childhood',
'Cask Strength','Single Malt','Unicorn','Space Girls','Whisky Elfen',
'The Fine Art of Whisky','Friendly Mr. Z','Rumbastic'
];
// ========= UI HILFSFUNKTIONEN =========
function hasUI () {
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
function setStatus (t) {
const el = document.getElementById('ados-scan-status');
if (el) el.textContent = t || '';
}
function setProgress (p) {
const bar = document.getElementById('ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview (file) {
const url = URL.createObjectURL(file);
const prev = document.getElementById('ados-scan-preview');
if (prev) {
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
prev.setAttribute('aria-hidden', 'false');
}
}
const dbg = (msg) => { try { console.log('[LabelScan]', msg); } catch(e){} };
// ========= TESSERACT WORKER (einmalig) =========
let workerPromise = null;
function ensureWorker () {
if (workerPromise) return workerPromise;
workerPromise = new Promise((resolve, reject) => {
if (window.Tesseract && Tesseract.createWorker) {
const worker = Tesseract.createWorker({
logger: m => {
if (m?.status === 'recognizing text' && typeof m.progress === 'number') {
setProgress(m.progress);
}
}
});
(async () => {
try {
await worker.load();
await worker.loadLanguage('eng+deu'); // englisch + deutsch
await worker.initialize('eng+deu');
// OCR-Parameter: eher „Block Text“
await worker.setParameters({
tessedit_pageseg_mode: '6', // PSM 6: ein Block mit Text
preserve_interword_spaces: '1',
user_defined_dpi: '300'
});
resolve(worker);
} catch (e) {
reject(e);
}
})();
} else {
// Fallback: Bibliothek nachladen
const s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = () => {
if (!Tesseract?.createWorker) return reject(new Error('Tesseract lädt, aber createWorker fehlt'));
resolve(ensureWorker());
};
s.onerror = () => reject(new Error('Tesseract konnte nicht geladen werden'));
document.head.appendChild(s);
}
});
return workerPromise;
}
// ========= BILD-VORVERARBEITUNG =========
function toCanvasFromImage (img, maxSide) {
const MAX = maxSide || 1800;
const scale = Math.min(1, (img.width > img.height) ? (MAX / img.width) : (MAX / img.height));
const w = Math.max(1, Math.round(img.width * scale));
const h = Math.max(1, Math.round(img.height * scale));
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d', { willReadFrequently: true });
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
return c;
}
function grayscaleContrastUnsharp (canvas) {
const ctx = canvas.getContext('2d', { willReadFrequently: true });
const { width: w, height: h } = canvas;
const id = ctx.getImageData(0, 0, w, h);
const d = id.data;
// 1) Graustufen + Kontrast
let min=255, max=0;
const gray = new Uint8ClampedArray(w*h);
for (let i=0, j=0; i<d.length; i+=4, j++) {
const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
gray[j] = g;
if (g<min) min=g; if (g>max) max=g;
}
const span = Math.max(1, max-min);
for (let j=0; j<gray.length; j++) {
let v = (gray[j]-min)/span; // 0..1
v = Math.pow(v, 0.9); // leichte Gamma-Korrektur
gray[j] = Math.max(0, Math.min(255, Math.round(v*255)));
}
// 2) Leichtes Unsharp Mask
// einfacher 3x3-Box-Blur und dann Original + Amount*(Original-Blur)
const blur = new Uint8ClampedArray(gray.length);
const off = [-w-1,-w,-w+1, -1,0,1, w-1,w,w+1];
for (let y=1;y<h-1;y++){
for (let x=1;x<w-1;x++){
let s=0;
const idx=y*w+x;
for (let k=0;k<9;k++) s += gray[idx+off[k]];
blur[idx] = s/9;
}
}
const amount=0.65;
for (let i=0;i<gray.length;i++){
let v = gray[i] + amount*(gray[i] - (blur[i]||gray[i]));
gray[i] = v<0?0:v>255?255:v;
}
// 3) Adaptive Schwelle light (global + lokale Korrektur)
// global threshold um den Mittelwert, dann leichte Aufhellung dunkler Zeichen
let sum=0; for (let i=0;i<gray.length;i++) sum+=gray[i];
const mean = sum/gray.length;
for (let i=0, p=0; i<d.length; i+=4, p++) {
const v = gray[p] < mean*0.97 ? 0 : 255; // binär
d[i]=d[i+1]=d[i+2]=v; d[i+3]=255;
}
ctx.putImageData(id, 0, 0);
return canvas;
}
function rotateCanvas (src, deg) {
const rad = deg * Math.PI/180;
const w = src.width, h = src.height;
const swap = (deg % 180) !== 0;
const c = document.createElement('canvas');
c.width = swap ? h : w;
c.height = swap ? w : h;
const ctx = c.getContext('2d');
ctx.translate(c.width/2, c.height/2);
ctx.rotate(rad);
ctx.drawImage(src, -w/2, -h/2);
return c;
}
function cropCanvas (src, x, y, w, h) {
const c = document.createElement('canvas');
c.width = w; c.height = h;
c.getContext('2d').drawImage(src, x, y, w, h, 0, 0, w, h);
return c;
}
function buildCandidates (base) {
// Vollbild + zentrale & untere Bänder, je Rotation 0/+90/-90
const L = [];
const rotations = [0, 90, -90];
rotations.forEach((deg) => {
const r = deg ? rotateCanvas(base, deg) : base;
const w = r.width, h = r.height;
const full = grayscaleContrastUnsharp(r.cloneNode ? r.cloneNode(true) : r);
L.push(full);
// zentral ~70% Bereich
const cw = Math.round(w*0.8), ch = Math.round(h*0.7);
const cx = Math.round((w-cw)/2), cy = Math.round((h-ch)/2);
L.push(grayscaleContrastUnsharp(cropCanvas(r, cx, cy, cw, ch)));
// unteres Band (viele ADOS haben unten Textblöcke)
const bh = Math.round(h*0.38);
L.push(grayscaleContrastUnsharp(cropCanvas(r, 0, h-bh, w, bh)));
});
return L;
}
// ========= OCR PIPELINE =========
async function runOCR (file) {
const worker = await ensureWorker();
setProgress(0);
// Bild laden → Canvas → Kandidaten erzeugen
const img = await new Promise((res, rej) => {
const o = new Image();
o.onload = () => res(o);
o.onerror = () => rej(new Error('Bild konnte nicht geladen werden'));
o.src = URL.createObjectURL(file);
});
const base = toCanvasFromImage(img, 1800);
const candidates = buildCandidates(base);
let best = { text: '', conf: 0 };
// Zwei PSM-Modi probieren (6 → Block, 7 → eine Textzeile – robust gegen plakative Schriften)
const PSMs = ['6','7'];
for (const c of candidates) {
for (const psm of PSMs) {
try {
await worker.setParameters({ tessedit_pageseg_mode: psm });
const { data } = await worker.recognize(c);
const text = data?.text ? String(data.text) : '';
const conf = (data?.confidence || 0);
// Heuristik: genug Buchstaben?
const letters = (text.match(/[A-Za-zÄÖÜäöüß]{2,}/g) || []).length;
const score = conf + letters*1.5;
if (score > (best.conf + (best.letters||0)*1.5)) {
best = { text, conf, letters };
}
// Wenn sehr gut: früh abbrechen
if (conf > 75 && letters > 15) break;
} catch (e) {
// einfach nächsten Kandidaten probieren
}
}
}
setProgress(null);
// Debug-Ausgabe
const dbgEl = document.getElementById('ados-scan-ocr');
if (dbgEl) dbgEl.textContent = best.text || '(kein Text erkannt)';
return best.text || '';
}
// ========= HINWEISE EXTRAHIEREN & SUCHE =========
function extractHints (text) {
const raw = String(text || '').replace(/\s+/g, ' ').trim();
const names = [];
KNOWN_TOKENS.forEach(t => {
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
if (re.test(raw)) names.push(t);
});
const ages = [];
let m; const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }
const years = [];
const yearRe = /\b(19|20)\d{2}\b/g;
while ((m = yearRe.exec(raw)) !== null) { const y = m[0]; if (!years.includes(y)) years.push(y); }
const words = [];
const seen = new Set();
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
while ((m = wordRe.exec(raw)) !== null) { const w = m[0]; if (!seen.has(w)) { seen.add(w); words.push(w); if (words.length>=8) break; } }
return { names, ages, years, words, raw };
}
async function searchWikiSmart (hints, limit) {
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const ns0 = 0, MAX = limit || 12;
const incats = ADOS_CATEGORIES.length
? ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ')
: '';
const buckets = [];
// PASS 1: sehr eng (intitle + Kategorien)
if (hints.names.length) {
hints.names.forEach(n => {
if (hints.ages.length) hints.ages.forEach(a => buckets.push(`intitle:"${n}" intitle:${a} ${incats}`.trim()));
if (hints.years.length) hints.years.forEach(y => buckets.push(`intitle:"${n}" "${y}" ${incats}`.trim()));
buckets.push(`intitle:"${n}" ${incats}`.trim());
});
}
// PASS 2: keyword-bündel
const key = [].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x => `"${x}"`).join(' ');
if (key) buckets.push(`${key} ${incats}`.trim());
// PASS 3: ohne Kategorien (breiter Fallback)
if (ADOS_CATEGORIES.length) {
if (hints.names.length) hints.names.forEach(n => buckets.push(`intitle:"${n}"`));
if (key) buckets.push(key);
}
// PASS 4: Prefix
if (hints.names.length) buckets.push(hints.names[0]);
else if (hints.words.length) buckets.push(hints.words[0]);
const seen = new Set(); const out = [];
async function runSearch (q) {
if (!q) return;
const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace: ns0, srlimit: MAX, formatversion:2 });
(r.query?.search || []).forEach(it => { if (!seen.has(it.title)) { seen.add(it.title); out.push(it); } });
}
for (const q of buckets) {
await runSearch(q);
if (out.length >= MAX) break;
}
if (out.length < MAX) {
// Prefix-Fallback
const p = hints.names[0] || hints.words[0] || '';
if (p) {
const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace: ns0, pslimit: MAX });
(r.query?.prefixsearch || []).forEach(it => {
const title = it.title || it['*'];
if (!seen.has(title)) { seen.add(title); out.push({ title, snippet:'' }); }
});
}
}
return out.slice(0, MAX);
}
function esc (s) { return mw.html.escape(String(s||'')); }
function renderResults (items) {
const box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0, 12).forEach(it => {
const title = it.title || '';
const link = mw.util.getUrl(title.replace(/ /g,'_'));
const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"');
const div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="'+link+'">'+esc(title)+'</a></b>' +
(snip ? '<div class="meta">'+snip+'</div>' : '');
box.appendChild(div);
});
}
// ========= EVENT-BINDING =========
let BOUND=false;
function bind() {
if (BOUND || !hasUI()) return;
const runBtn = document.getElementById('ados-scan-run');
const bigBtn = document.getElementById('ados-scan-bigbtn');
const fileIn = document.getElementById('ados-scan-file');
if (!runBtn || !fileIn) return;
BOUND=true;
if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
fileIn.addEventListener('change', function(){
if (this.files && this.files[0]) showPreview(this.files[0]);
});
runBtn.addEventListener('click', async function (ev) {
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
try {
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Vorverarbeitung & Texterkennung …');
const text = await runOCR(fileIn.files[0]);
setStatus('Suche im Wiki …');
const hints = extractHints(text);
const hits = await searchWikiSmart(hints, 12);
renderResults(hits);
setStatus('Fertig.');
} catch (e) {
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen';
}
});
}
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bind);
else bind();
setTimeout(bind, 250); setTimeout(bind, 1000);
new MutationObserver(() => { if (!BOUND) bind(); })
.observe(document.documentElement || document.body, { childList:true, subtree:true });
})();