MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Erscheinungsbild
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 3: | Zeile 3: | ||
'use strict'; | 'use strict'; | ||
// ========== | // ============================= | ||
// KONFIGURATION | |||
// ============================= | |||
// ← Für Tests leer lassen: const ADOS_CATEGORIES = []; | |||
const ADOS_CATEGORIES = [ | |||
'Alle A Dream of Scotland Abfüllungen', | |||
'Alle A Dream of Ireland Abfüllungen', | |||
'Alle A Dream of... – Der Rest der Welt Abfüllungen', | |||
'Friendly Mr. Z Whiskytainment Abfüllungen', | |||
'Die Whisky Elfen Abfüllungen', | |||
'The Fine Art of Whisky Abfüllungen', | |||
'Alle Rumbastic Abfüllungen' | |||
]; | |||
const KNOWN_TOKENS = [ | const KNOWN_TOKENS = [ | ||
// Serien / Reihen | |||
'The Tasteful 8', 'Heroes of Childhood', 'A Dream of Scotland', | |||
'A Dream of Ireland', 'The Fine Art of Whisky', 'Friendly Mr. Z', | |||
'Die Whisky Elfen', 'Rumbastic', | |||
// Brennereien / Regionen | |||
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | 'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | ||
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | 'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | ||
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | 'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | ||
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland' | 'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland','Irland', | ||
// typische Label-Wörter | |||
'Cask Strength','First Fill','Bourbon Barrel','Sherry','PX','Oloroso' | |||
]; | ]; | ||
// ========== | // ============================= | ||
// UI-Hilfen | |||
// ============================= | |||
function hasUI () { | function hasUI () { | ||
return !!document.getElementById('ados-scan-run') && | return !!document.getElementById('ados-scan-run') && | ||
!!document.getElementById('ados-scan-file'); | !!document.getElementById('ados-scan-file'); | ||
} | } | ||
function setStatus (t) { | function setStatus (t) { | ||
var el = document.getElementById('ados-scan-status'); | var el = document.getElementById('ados-scan-status'); | ||
if (el) el.textContent = t || ''; | if (el) el.textContent = t || ''; | ||
} | } | ||
function setProgress (p) { | function setProgress (p) { | ||
var bar = document.getElementById('ados-scan-progress'); | var bar = document.getElementById('ados-scan-progress'); | ||
| Zeile 77: | Zeile 54: | ||
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | ||
} | } | ||
function showPreview (file) { | function showPreview (file) { | ||
var url = URL.createObjectURL(file); | var url = URL.createObjectURL(file); | ||
| Zeile 86: | Zeile 64: | ||
} | } | ||
// ========== Tesseract | function showOCRText (t) { | ||
var el = document.getElementById('ados-scan-ocr'); | |||
if (el) el.textContent = (t || '').trim(); | |||
} | |||
// ============================= | |||
// Tesseract laden (nur 1x) | |||
// ============================= | |||
var tesseractReady; | var tesseractReady; | ||
function ensureTesseract () { | function ensureTesseract () { | ||
| Zeile 109: | Zeile 95: | ||
} | } | ||
// ========== | // ============================= | ||
// Vorverarbeitung (OCR) | |||
// Graustufen + Unsharp + adaptive Schwelle | |||
// ============================= | |||
async function preprocessImage (file) { | async function preprocessImage (file) { | ||
const img = await new Promise((res, rej) => { | const img = await new Promise((res, rej) => { | ||
| Zeile 118: | Zeile 108: | ||
}); | }); | ||
const MAX = 1800; | const MAX = 1800; | ||
const | const s = Math.min(1, (img.width > img.height) ? MAX / img.width : MAX / img.height); | ||
const w = Math.round(img.width * | const w = Math.round(img.width * s), h = Math.round(img.height * s); | ||
const h = | |||
const c = document.createElement('canvas'); c.width = w; c.height = h; | |||
const g = c.getContext('2d', { willReadFrequently: true }); | |||
g.imageSmoothingEnabled = true; | |||
g.drawImage(img, 0, 0, w, h); | |||
// → Graustufen | |||
let id = g.getImageData(0, 0, w, h), d = id.data; | |||
for (let i=0;i<d.length;i+=4){ | |||
const y = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2]; | |||
d[i]=d[i+1]=d[i+2]=y; | |||
} | |||
g.putImageData(id, 0, 0); | |||
const | // → Unsharp (leichter Hochpass) | ||
id = g.getImageData(0,0,w,h); d = id.data; | |||
const copy = new Uint8ClampedArray(d); | |||
const idx = (x,y)=>4*(y*w+x); | |||
for (let y=1;y<h-1;y++){ | |||
for (let x=1;x<w-1;x++){ | |||
const i0=idx(x,y), a=copy[i0], b=copy[idx(x-1,y)], c0=copy[idx(x+1,y)], | |||
d0=copy[idx(x,y-1)], e=copy[idx(x,y+1)]; | |||
const lap = 4*a - b - c0 - d0 - e; | |||
const v = Math.max(0, Math.min(255, a + 0.3*lap)); | |||
d[i0]=d[i0+1]=d[i0+2]=v; | |||
} | |||
} | |||
g.putImageData(id,0,0); | |||
// | // → adaptive Schwelle (lokaler Mittelwert) | ||
const id = | const win = 25, half = (win|0); | ||
id = g.getImageData(0,0,w,h); d = id.data; | |||
for (let | for (let y=0;y<h;y++){ | ||
for (let x=0;x<w;x++){ | |||
let sum=0, cnt=0; | |||
for (let yy=Math.max(0,y-half); yy<=Math.min(h-1,y+half); yy+=5){ | |||
for (let xx=Math.max(0,x-half); xx<=Math.min(w-1,x+half); xx+=5){ | |||
sum += d[4*(yy*w+xx)]; | |||
cnt++; | |||
} | |||
} | |||
const thr = (sum/cnt) - 6; | |||
const i = 4*(y*w+x); | |||
const v = d[i] < thr ? 0 : 255; | |||
d[i]=d[i+1]=d[i+2]=v; | |||
} | |||
} | } | ||
g.putImageData(id,0,0); | |||
return c; | |||
} | |||
return c; // | // Hilfsfunktionen für Varianten | ||
function crop(canvas, x, y, w, h){ | |||
const c = document.createElement('canvas'); c.width=w; c.height=h; | |||
c.getContext('2d').drawImage(canvas, x, y, w, h, 0, 0, w, h); | |||
return c; | |||
} | |||
function rotate(canvas, deg){ | |||
const r = document.createElement('canvas'); | |||
const ctx = r.getContext('2d'); | |||
if (deg % 180 === 0){ r.width=canvas.width; r.height=canvas.height; } | |||
else { r.width=canvas.height; r.height=canvas.width; } | |||
ctx.translate(r.width/2, r.height/2); | |||
ctx.rotate(deg*Math.PI/180); | |||
ctx.drawImage(canvas, -canvas.width/2, -canvas.height/2); | |||
return r; | |||
} | } | ||
async function ocrOne(canvas, lang) { | |||
async function | const res = await Tesseract.recognize(canvas, lang, { | ||
// Sparse text funktioniert bei Labels (verschieden orientierte Textblöcke) | |||
tessedit_pageseg_mode: 11, | |||
preserve_interword_spaces: 1 | |||
const res = await Tesseract.recognize(canvas, | |||
tessedit_pageseg_mode: | |||
preserve_interword_spaces: 1 | |||
}); | }); | ||
return { text: (res?.data?.text||'').trim(), conf: res?.data?.confidence||0 }; | |||
} | |||
// ============================= | |||
// Mehrfach-OCR (Rotationen/Regionen) + Fallback-Sprache | |||
// ============================= | |||
async function runOCR(file){ | |||
await ensureTesseract(); | |||
setProgress(0.01); | |||
const base = await preprocessImage(file); | |||
// Kandidatenflächen | |||
const variants = []; | |||
variants.push(base); // komplett | |||
variants.push(crop(base, 0, 0, Math.round(base.width*0.4), base.height)); // linke Spalte | |||
variants.push(crop(base, 0, Math.round(base.height*0.72), base.width, Math.round(base.height*0.28))); // unteres Banner | |||
// + Rotationen | |||
const more = []; | |||
for (const v of variants){ | |||
more.push(v, rotate(v, 90), rotate(v, -90)); | |||
} | |||
// zwei Sprachmodi testen | |||
const results = []; | |||
for (const canv of more){ | |||
for (const lang of ['deu+eng','eng']){ | |||
try { | |||
const r = await ocrOne(canv, lang); | |||
results.push(r); | |||
} catch(e){ /* einzelne Fehlschläge ignorieren */ } | |||
} | |||
} | |||
setProgress(null); | setProgress(null); | ||
results.sort((a,b)=> (b.conf||0)-(a.conf||0)); | |||
return (results[0]?.text)||''; | |||
return ( | |||
} | } | ||
// ============================= | |||
// Hinweise aus OCR | |||
// ============================= | |||
function extractHints (text) { | function extractHints (text) { | ||
const raw = String(text || '').replace(/\s+/g, ' ').trim(); | const raw = String(text || '').replace(/\s+/g, ' ').trim(); | ||
const foundNames = []; | const foundNames = []; | ||
KNOWN_TOKENS.forEach(t => { | KNOWN_TOKENS.forEach(t => { | ||
| Zeile 181: | Zeile 239: | ||
}); | }); | ||
const ages = []; | |||
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | ||
let m; | |||
while ((m = ageRe.exec(raw)) !== null){ const n = m[1]; if (!ages.includes(n)) ages.push(n); } | while ((m = ageRe.exec(raw)) !== null) { | ||
const n = m[1]; if (!ages.includes(n)) ages.push(n); | |||
} | |||
const years = []; | |||
const yearRe = /\b(19|20)\d{2}\b/g; | const yearRe = /\b(19|20)\d{2}\b/g; | ||
while ((m = yearRe.exec(raw)) !== null) { | |||
if (!years.includes(m[0])) years.push(m[0]); | |||
} | |||
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | |||
const | const uniq = new Set(); let w; const words = []; | ||
const | while ((w = wordRe.exec(raw)) !== null) { | ||
const s = w[0]; | |||
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; } | |||
} | |||
return { names: foundNames, ages, years, words, raw }; | |||
} | |||
// ============================= | |||
// Suche (3 Pässe) + Fallbacks | |||
// ============================= | |||
function esc (s) { return mw.html.escape(String(s || '')); } | |||
return | function incatStr () { | ||
return (ADOS_CATEGORIES || []).map(c => 'incategory:"' + c + '"').join(' '); | |||
} | } | ||
async function searchWikiSmart (hints, limit) { | async function searchWikiSmart (hints, limit) { | ||
await mw.loader.using('mediawiki.api'); | await mw.loader.using(['mediawiki.api','mediawiki.util','mediawiki.html']); | ||
const api = new mw.Api(); | const api = new mw.Api(); | ||
const ns0 = 0; | const ns0 = 0; | ||
const MAX = limit || 12; | const MAX = limit || 12; | ||
// PASS 1: intitle-Kombis (präzise) | // PASS 1: intitle-Kombis (präzise) | ||
| Zeile 285: | Zeile 317: | ||
for (const p of pass3) { | for (const p of pass3) { | ||
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | ||
(r.query?.prefixsearch || []).forEach(it=>{ | (r.query?.prefixsearch || []).forEach(it => { | ||
const title = it.title || it['*']; | const title = it.title || it['*']; | ||
const k = title; if (seen.has(k)) return; seen.add(k); | const k = title; | ||
if (seen.has(k)) return; | |||
seen.add(k); | |||
out.push({ title, snippet: '' }); | out.push({ title, snippet: '' }); | ||
}); | }); | ||
| Zeile 296: | Zeile 330: | ||
} | } | ||
// | // ganz einfacher Fuzzy-Fallback auf Suchergebnissen | ||
async function | function scoreTitle(title, hints){ | ||
const t = String(title||'').toLowerCase(); | |||
let s = 0; | |||
hints.names.forEach(n => { if (t.includes(n.toLowerCase())) s += 1.0; }); | |||
hints.words.forEach(n => { if (t.includes(n.toLowerCase())) s += 0.4; }); | |||
hints.ages.forEach(a => { if (t.includes(String(a))) s += 0.4; }); | |||
hints.years.forEach(y => { if (t.includes(String(y))) s += 0.4; }); | |||
return s; | |||
} | |||
async function fallbackFuzzyTitles(hints, limit){ | |||
await mw.loader.using('mediawiki.api'); | await mw.loader.using('mediawiki.api'); | ||
const api = new mw.Api(); | const api = new mw.Api(); | ||
const | const MAX = limit || 12; | ||
// Breite Suche mit Tokens (mit/ohne Kategorie) | |||
const q1 = [] | |||
.concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3)) | |||
.map(x => `"${x}"`).join(' '); | |||
const q = `${q1} ${incatStr()}`.trim(); | |||
const r = await api.get({ action:'query', list:'search', srsearch:q || hints.raw.split(/\s+/).slice(0,6).join(' '), srlimit:50, formatversion:2 }); | |||
const items = (r.query?.search || []); | |||
const scored = items.map(it => ({ ...it, _score: scoreTitle(it.title, hints) })); | |||
scored.sort((a,b)=> b._score - a._score); | |||
const top = scored.slice(0, MAX).filter(x=> x._score >= 0.10); // großzügiger | |||
return top; | |||
return | |||
} | } | ||
function | async function broadSearchNoCategory(hints, limit){ | ||
await mw.loader.using('mediawiki.api'); | |||
const api = new mw.Api(); | |||
const MAX = limit || 12; | |||
const parts = [] | |||
const | .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3)) | ||
.map(x => `"${x}"`); | |||
const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' '); | |||
const r = await api.get({ action:'query', list:'search', srsearch:q, srlimit:MAX, formatversion:2 }); | |||
return (r.query?.search || []); | |||
} | } | ||
// ============================= | |||
// Ergebnisse rendern | |||
// ============================= | |||
function renderResults (items) { | function renderResults (items) { | ||
var box = document.getElementById('ados-scan-results'); | var box = document.getElementById('ados-scan-results'); | ||
| Zeile 379: | Zeile 388: | ||
items.slice(0, 12).forEach(function (it) { | items.slice(0, 12).forEach(function (it) { | ||
var title = it.title || ''; | var title = it.title || ''; | ||
var link = | var link = mw.util.getUrl(title.replace(/ /g, '_')); | ||
var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"'); | var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"'); | ||
var div = document.createElement('div'); | var div = document.createElement('div'); | ||
div.className = 'ados-hit'; | div.className = 'ados-hit'; | ||
div.innerHTML = | div.innerHTML = | ||
'<b><a href="' + link + '">' + | '<b><a href="' + link + '">' + esc(title) + '</a></b>' + | ||
(snip ? '<div class="meta">' + snip + '</div>' : ''); | (snip ? '<div class="meta">' + snip + '</div>' : ''); | ||
box.appendChild(div); | box.appendChild(div); | ||
| Zeile 390: | Zeile 399: | ||
} | } | ||
// ========== Binding ========== | // ============================= | ||
// Binding | |||
// ============================= | |||
var BOUND = false; | var BOUND = false; | ||
function bind () { | function bind () { | ||
| Zeile 398: | Zeile 410: | ||
var fileIn = document.getElementById('ados-scan-file'); | var fileIn = document.getElementById('ados-scan-file'); | ||
var bigBtn = document.getElementById('ados-scan-bigbtn'); | var bigBtn = document.getElementById('ados-scan-bigbtn'); | ||
var form | var form = document.getElementById('ados-scan-form'); | ||
if (!runBtn || !fileIn) return; | if (!runBtn || !fileIn) return; | ||
| Zeile 408: | Zeile 420: | ||
if (this.files && this.files[0]) showPreview(this.files[0]); | if (this.files && this.files[0]) showPreview(this.files[0]); | ||
}); | }); | ||
function onSubmit(ev){ | |||
ev.preventDefault(); | ev.preventDefault(); | ||
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | ||
var f = fileIn.files[0]; | var f = fileIn.files[0]; | ||
try { | (async function(){ | ||
try { | |||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | |||
setStatus('Erkenne Label …'); | |||
const text = await runOCR(f); | |||
showOCRText(text); | |||
setStatus('Suche im Wiki …'); | |||
const hints = extractHints(text); | |||
if (!hits || !hits.length) { | let hits = await searchWikiSmart(hints, 12); | ||
if (!hits || !hits.length) { | |||
setStatus('Kein direkter Treffer – Fuzzy über Kategorien …'); | |||
} | hits = await fallbackFuzzyTitles(hints, 12); | ||
} | |||
if (!hits || !hits.length) { | |||
setStatus('Kein Treffer – breite Suche ohne Kategorien …'); | |||
hits = await broadSearchNoCategory(hints, 12); | |||
} | |||
renderResults(hits); | |||
setStatus('Fertig.'); | |||
} catch (e) { | |||
} | console.error('[LabelScan]', e); | ||
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | |||
} finally { | |||
runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen'; | |||
} | |||
})(); | |||
} | |||
runBtn.addEventListener('click', onSubmit); | |||
if (form) form.addEventListener('submit', onSubmit); | |||
// Sicherheit | |||
// Sicherheit | |||
var wrap = document.getElementById('ados-labelscan'); | var wrap = document.getElementById('ados-labelscan'); | ||
if (wrap) wrap.style.position = 'relative'; | if (wrap) wrap.style.position = 'relative'; | ||
| Zeile 458: | Zeile 467: | ||
} | } | ||
if (document.readyState === 'loading') { | if (document.readyState === 'loading') { | ||
document.addEventListener('DOMContentLoaded', bind); | document.addEventListener('DOMContentLoaded', bind); | ||
| Zeile 468: | Zeile 476: | ||
var mo = new MutationObserver(function () { if (!BOUND) bind(); }); | var mo = new MutationObserver(function () { if (!BOUND) bind(); }); | ||
mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | ||
})(); | })(); | ||
Version vom 5. November 2025, 23:20 Uhr
/* global mw, Tesseract */
(function () {
'use strict';
// =============================
// KONFIGURATION
// =============================
// ← Für Tests leer lassen: const ADOS_CATEGORIES = [];
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
const KNOWN_TOKENS = [
// Serien / Reihen
'The Tasteful 8', 'Heroes of Childhood', 'A Dream of Scotland',
'A Dream of Ireland', 'The Fine Art of Whisky', 'Friendly Mr. Z',
'Die Whisky Elfen', 'Rumbastic',
// Brennereien / Regionen
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland','Irland',
// typische Label-Wörter
'Cask Strength','First Fill','Bourbon Barrel','Sherry','PX','Oloroso'
];
// =============================
// UI-Hilfen
// =============================
function hasUI () {
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
function setStatus (t) {
var el = document.getElementById('ados-scan-status');
if (el) el.textContent = t || '';
}
function setProgress (p) {
var bar = document.getElementById('ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview (file) {
var url = URL.createObjectURL(file);
var prev = document.getElementById('ados-scan-preview');
if (prev) {
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
prev.setAttribute('aria-hidden', 'false');
}
}
function showOCRText (t) {
var el = document.getElementById('ados-scan-ocr');
if (el) el.textContent = (t || '').trim();
}
// =============================
// Tesseract laden (nur 1x)
// =============================
var tesseractReady;
function ensureTesseract () {
if (tesseractReady) return tesseractReady;
tesseractReady = new Promise(function (resolve, reject) {
if (window.Tesseract) return resolve();
var s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = resolve;
s.onerror = function () {
var s2 = document.createElement('script');
s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async = true;
s2.onload = resolve;
s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
document.head.appendChild(s2);
};
document.head.appendChild(s);
});
return tesseractReady;
}
// =============================
// Vorverarbeitung (OCR)
// Graustufen + Unsharp + adaptive Schwelle
// =============================
async function preprocessImage (file) {
const img = await new Promise((res, rej) => {
const o = new Image();
o.onload = () => res(o);
o.onerror = rej;
o.src = URL.createObjectURL(file);
});
const MAX = 1800;
const s = Math.min(1, (img.width > img.height) ? MAX / img.width : MAX / img.height);
const w = Math.round(img.width * s), h = Math.round(img.height * s);
const c = document.createElement('canvas'); c.width = w; c.height = h;
const g = c.getContext('2d', { willReadFrequently: true });
g.imageSmoothingEnabled = true;
g.drawImage(img, 0, 0, w, h);
// → Graustufen
let id = g.getImageData(0, 0, w, h), d = id.data;
for (let i=0;i<d.length;i+=4){
const y = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
d[i]=d[i+1]=d[i+2]=y;
}
g.putImageData(id, 0, 0);
// → Unsharp (leichter Hochpass)
id = g.getImageData(0,0,w,h); d = id.data;
const copy = new Uint8ClampedArray(d);
const idx = (x,y)=>4*(y*w+x);
for (let y=1;y<h-1;y++){
for (let x=1;x<w-1;x++){
const i0=idx(x,y), a=copy[i0], b=copy[idx(x-1,y)], c0=copy[idx(x+1,y)],
d0=copy[idx(x,y-1)], e=copy[idx(x,y+1)];
const lap = 4*a - b - c0 - d0 - e;
const v = Math.max(0, Math.min(255, a + 0.3*lap));
d[i0]=d[i0+1]=d[i0+2]=v;
}
}
g.putImageData(id,0,0);
// → adaptive Schwelle (lokaler Mittelwert)
const win = 25, half = (win|0);
id = g.getImageData(0,0,w,h); d = id.data;
for (let y=0;y<h;y++){
for (let x=0;x<w;x++){
let sum=0, cnt=0;
for (let yy=Math.max(0,y-half); yy<=Math.min(h-1,y+half); yy+=5){
for (let xx=Math.max(0,x-half); xx<=Math.min(w-1,x+half); xx+=5){
sum += d[4*(yy*w+xx)];
cnt++;
}
}
const thr = (sum/cnt) - 6;
const i = 4*(y*w+x);
const v = d[i] < thr ? 0 : 255;
d[i]=d[i+1]=d[i+2]=v;
}
}
g.putImageData(id,0,0);
return c;
}
// Hilfsfunktionen für Varianten
function crop(canvas, x, y, w, h){
const c = document.createElement('canvas'); c.width=w; c.height=h;
c.getContext('2d').drawImage(canvas, x, y, w, h, 0, 0, w, h);
return c;
}
function rotate(canvas, deg){
const r = document.createElement('canvas');
const ctx = r.getContext('2d');
if (deg % 180 === 0){ r.width=canvas.width; r.height=canvas.height; }
else { r.width=canvas.height; r.height=canvas.width; }
ctx.translate(r.width/2, r.height/2);
ctx.rotate(deg*Math.PI/180);
ctx.drawImage(canvas, -canvas.width/2, -canvas.height/2);
return r;
}
async function ocrOne(canvas, lang) {
const res = await Tesseract.recognize(canvas, lang, {
// Sparse text funktioniert bei Labels (verschieden orientierte Textblöcke)
tessedit_pageseg_mode: 11,
preserve_interword_spaces: 1
});
return { text: (res?.data?.text||'').trim(), conf: res?.data?.confidence||0 };
}
// =============================
// Mehrfach-OCR (Rotationen/Regionen) + Fallback-Sprache
// =============================
async function runOCR(file){
await ensureTesseract();
setProgress(0.01);
const base = await preprocessImage(file);
// Kandidatenflächen
const variants = [];
variants.push(base); // komplett
variants.push(crop(base, 0, 0, Math.round(base.width*0.4), base.height)); // linke Spalte
variants.push(crop(base, 0, Math.round(base.height*0.72), base.width, Math.round(base.height*0.28))); // unteres Banner
// + Rotationen
const more = [];
for (const v of variants){
more.push(v, rotate(v, 90), rotate(v, -90));
}
// zwei Sprachmodi testen
const results = [];
for (const canv of more){
for (const lang of ['deu+eng','eng']){
try {
const r = await ocrOne(canv, lang);
results.push(r);
} catch(e){ /* einzelne Fehlschläge ignorieren */ }
}
}
setProgress(null);
results.sort((a,b)=> (b.conf||0)-(a.conf||0));
return (results[0]?.text)||'';
}
// =============================
// Hinweise aus OCR
// =============================
function extractHints (text) {
const raw = String(text || '').replace(/\s+/g, ' ').trim();
const foundNames = [];
KNOWN_TOKENS.forEach(t => {
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
if (re.test(raw)) foundNames.push(t);
});
const ages = [];
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
let m;
while ((m = ageRe.exec(raw)) !== null) {
const n = m[1]; if (!ages.includes(n)) ages.push(n);
}
const years = [];
const yearRe = /\b(19|20)\d{2}\b/g;
while ((m = yearRe.exec(raw)) !== null) {
if (!years.includes(m[0])) years.push(m[0]);
}
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq = new Set(); let w; const words = [];
while ((w = wordRe.exec(raw)) !== null) {
const s = w[0];
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
}
return { names: foundNames, ages, years, words, raw };
}
// =============================
// Suche (3 Pässe) + Fallbacks
// =============================
function esc (s) { return mw.html.escape(String(s || '')); }
function incatStr () {
return (ADOS_CATEGORIES || []).map(c => 'incategory:"' + c + '"').join(' ');
}
async function searchWikiSmart (hints, limit) {
await mw.loader.using(['mediawiki.api','mediawiki.util','mediawiki.html']);
const api = new mw.Api();
const ns0 = 0;
const MAX = limit || 12;
// PASS 1: intitle-Kombis (präzise)
const pass1 = [];
if (hints.names.length) {
hints.names.forEach(n => {
if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
pass1.push(`intitle:"${n}" ${incatStr()}`);
});
}
// PASS 2: gewichtete Volltextsuche
const key = []
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
.map(x => `"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incatStr()}` ] : [];
// PASS 3: Prefix auf Titel
const pass3 = [];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
const seen = new Set(); const out = [];
async function runSr (q) {
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
(r.query?.search || []).forEach(it => {
const k = it.title;
if (seen.has(k)) return;
seen.add(k);
out.push(it);
});
}
for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
// Prefix (list=prefixsearch)
for (const p of pass3) {
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
(r.query?.prefixsearch || []).forEach(it => {
const title = it.title || it['*'];
const k = title;
if (seen.has(k)) return;
seen.add(k);
out.push({ title, snippet: '' });
});
if (out.length >= MAX) break;
}
return out.slice(0, MAX);
}
// ganz einfacher Fuzzy-Fallback auf Suchergebnissen
function scoreTitle(title, hints){
const t = String(title||'').toLowerCase();
let s = 0;
hints.names.forEach(n => { if (t.includes(n.toLowerCase())) s += 1.0; });
hints.words.forEach(n => { if (t.includes(n.toLowerCase())) s += 0.4; });
hints.ages.forEach(a => { if (t.includes(String(a))) s += 0.4; });
hints.years.forEach(y => { if (t.includes(String(y))) s += 0.4; });
return s;
}
async function fallbackFuzzyTitles(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const MAX = limit || 12;
// Breite Suche mit Tokens (mit/ohne Kategorie)
const q1 = []
.concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x => `"${x}"`).join(' ');
const q = `${q1} ${incatStr()}`.trim();
const r = await api.get({ action:'query', list:'search', srsearch:q || hints.raw.split(/\s+/).slice(0,6).join(' '), srlimit:50, formatversion:2 });
const items = (r.query?.search || []);
const scored = items.map(it => ({ ...it, _score: scoreTitle(it.title, hints) }));
scored.sort((a,b)=> b._score - a._score);
const top = scored.slice(0, MAX).filter(x=> x._score >= 0.10); // großzügiger
return top;
}
async function broadSearchNoCategory(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const MAX = limit || 12;
const parts = []
.concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x => `"${x}"`);
const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');
const r = await api.get({ action:'query', list:'search', srsearch:q, srlimit:MAX, formatversion:2 });
return (r.query?.search || []);
}
// =============================
// Ergebnisse rendern
// =============================
function renderResults (items) {
var box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0, 12).forEach(function (it) {
var title = it.title || '';
var link = mw.util.getUrl(title.replace(/ /g, '_'));
var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"');
var div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="' + link + '">' + esc(title) + '</a></b>' +
(snip ? '<div class="meta">' + snip + '</div>' : '');
box.appendChild(div);
});
}
// =============================
// Binding
// =============================
var BOUND = false;
function bind () {
if (BOUND || !hasUI()) return;
var runBtn = document.getElementById('ados-scan-run');
var fileIn = document.getElementById('ados-scan-file');
var bigBtn = document.getElementById('ados-scan-bigbtn');
var form = document.getElementById('ados-scan-form');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound = '1'; BOUND = true;
if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
fileIn.addEventListener('change', function () {
if (this.files && this.files[0]) showPreview(this.files[0]);
});
function onSubmit(ev){
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
var f = fileIn.files[0];
(async function(){
try {
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Erkenne Label …');
const text = await runOCR(f);
showOCRText(text);
setStatus('Suche im Wiki …');
const hints = extractHints(text);
let hits = await searchWikiSmart(hints, 12);
if (!hits || !hits.length) {
setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
hits = await fallbackFuzzyTitles(hints, 12);
}
if (!hits || !hits.length) {
setStatus('Kein Treffer – breite Suche ohne Kategorien …');
hits = await broadSearchNoCategory(hints, 12);
}
renderResults(hits);
setStatus('Fertig.');
} catch (e) {
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
}
})();
}
runBtn.addEventListener('click', onSubmit);
if (form) form.addEventListener('submit', onSubmit);
// Sicherheit
var wrap = document.getElementById('ados-labelscan');
if (wrap) wrap.style.position = 'relative';
runBtn.style.position = 'relative';
runBtn.style.zIndex = '9999';
runBtn.style.pointerEvents = 'auto';
}
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
setTimeout(bind, 250);
setTimeout(bind, 1000);
var mo = new MutationObserver(function () { if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();