MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 1: | Zeile 1: | ||
/* global mw, Tesseract */ | /* global mw, Tesseract */ | ||
(function(){ | (function () { | ||
'use strict'; | 'use strict'; | ||
// | // === ADOS: Kategorien & Tokens (ggf. erweitern) =========================== | ||
function hasUI(){ | const ADOS_CATEGORIES = [ | ||
'Alle A Dream of Scotland Abfüllungen', | |||
'Alle A Dream of Ireland Abfüllungen', | |||
'Alle A Dream of... – Der Rest der Welt Abfüllungen', | |||
'Friendly Mr. Z Whiskytainment Abfüllungen', | |||
'Die Whisky Elfen Abfüllungen', | |||
'The Fine Art of Whisky Abfüllungen', | |||
'Alle Rumbastic Abfüllungen' | |||
]; | |||
const KNOWN_TOKENS = [ | |||
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | |||
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | |||
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | |||
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland' | |||
]; | |||
// === UI Präsenz =========================================================== | |||
function hasUI () { | |||
return !!document.getElementById('ados-scan-run') && | return !!document.getElementById('ados-scan-run') && | ||
!!document.getElementById('ados-scan-file'); | !!document.getElementById('ados-scan-file'); | ||
} | } | ||
// | // === UI Helpers =========================================================== | ||
function setStatus(t){ var el = document.getElementById('ados-scan-status'); if(el) el.textContent = t || ''; } | function setStatus (t) { | ||
function setProgress(p){ | var el = document.getElementById('ados-scan-status'); | ||
if (el) el.textContent = t || ''; | |||
} | |||
function setProgress (p) { | |||
var bar = document.getElementById('ados-scan-progress'); | var bar = document.getElementById('ados-scan-progress'); | ||
if(!bar) return; | if (!bar) return; | ||
if(p == null){ bar. | if (p == null) { bar.hidden = true; bar.value = 0; } | ||
else { bar. | else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | ||
} | } | ||
function showPreview(file){ | function showPreview (file) { | ||
var url = URL.createObjectURL(file); | var url = URL.createObjectURL(file); | ||
var prev = document.getElementById('ados-scan-preview'); | var prev = document.getElementById('ados-scan-preview'); | ||
if(prev){ | if (prev) { | ||
prev.innerHTML = | prev.innerHTML = '<img alt="Vorschau" src="' + url + '">'; | ||
prev.setAttribute('aria-hidden', 'false'); | |||
prev.setAttribute('aria-hidden','false'); | |||
} | } | ||
} | } | ||
// | // === Tesseract bei Bedarf laden ========================================== | ||
var tesseractReady; | var tesseractReady; | ||
function ensureTesseract(){ | function ensureTesseract () { | ||
if (tesseractReady) return tesseractReady; | if (tesseractReady) return tesseractReady; | ||
tesseractReady = new Promise(function(resolve, reject){ | tesseractReady = new Promise(function (resolve, reject) { | ||
if (window.Tesseract) return resolve(); | if (window.Tesseract) return resolve(); | ||
var s = document.createElement('script'); | var s = document.createElement('script'); | ||
| Zeile 37: | Zeile 57: | ||
s.async = true; | s.async = true; | ||
s.onload = resolve; | s.onload = resolve; | ||
s.onerror = function(){ | s.onerror = function () { | ||
var s2 = document.createElement('script'); | var s2 = document.createElement('script'); | ||
s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js'; | s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js'; | ||
s2.async = true; | s2.async = true; | ||
s2.onload = resolve; | s2.onload = resolve; | ||
s2.onerror = function(){ reject(new Error('Tesseract konnte nicht geladen werden')); }; | s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); }; | ||
document.head.appendChild(s2); | document.head.appendChild(s2); | ||
}; | }; | ||
| Zeile 50: | Zeile 70: | ||
} | } | ||
// - | // === Bild-Vorverarbeitung (für bessere OCR) =============================== | ||
async function runOCR(file){ | async function preprocessImage (file) { | ||
const img = await new Promise((res, rej) => { | |||
const o = new Image(); | |||
o.onload = () => res(o); | |||
o.onerror = rej; | |||
o.src = URL.createObjectURL(file); | |||
}); | |||
// Längste Kante auf ~1800px skalieren (schärfer für OCR) | |||
const MAX = 1800; | |||
const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height); | |||
const w = Math.round(img.width * scale); | |||
const h = Math.round(img.height * scale); | |||
const c = document.createElement('canvas'); | |||
c.width = w; c.height = h; | |||
const ctx = c.getContext('2d'); | |||
ctx.imageSmoothingEnabled = true; | |||
ctx.drawImage(img, 0, 0, w, h); | |||
// Graustufen + leichter Kontrastboost | |||
const id = ctx.getImageData(0, 0, w, h); | |||
const d = id.data; | |||
for (let i = 0; i < d.length; i += 4) { | |||
const g = 0.2126 * d[i] + 0.7152 * d[i + 1] + 0.0722 * d[i + 2]; | |||
const v = Math.max(0, Math.min(255, (g - 128) * 1.15 + 128)); | |||
d[i] = d[i + 1] = d[i + 2] = v; | |||
} | |||
ctx.putImageData(id, 0, 0); | |||
return c; // Canvas an Tesseract übergeben | |||
} | |||
// === OCR (nutzt Vorverarbeitung) ========================================= | |||
async function runOCR (file) { | |||
await ensureTesseract(); | await ensureTesseract(); | ||
setProgress(0); | setProgress(0); | ||
const canvas = await preprocessImage(file); | |||
logger: function(m){ | |||
if(m && m.status === 'recognizing text' && typeof m.progress === 'number'){ | const res = await Tesseract.recognize(canvas, 'deu+eng', { | ||
// psm 6: ein Block Text – robust für Label | |||
tessedit_pageseg_mode: 6, | |||
preserve_interword_spaces: 1, | |||
logger: function (m) { | |||
if (m && m.status === 'recognizing text' && typeof m.progress === 'number') { | |||
setProgress(m.progress); | setProgress(m.progress); | ||
} | } | ||
} | } | ||
}); | }); | ||
setProgress(null); | setProgress(null); | ||
return (res && res.data && res.data.text) || ''; | return (res && res.data && res.data.text) || ''; | ||
} | } | ||
function extractHints(text){ | // === Hinweise aus OCR extrahieren ======================================== | ||
function extractHints (text) { | |||
const raw = String(text || '').replace(/\s+/g, ' ').trim(); | |||
// Distillery-/Marken-Token, die wirklich im Text vorkommen | |||
const foundNames = []; | |||
KNOWN_TOKENS.forEach(t => { | |||
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | |||
if (re.test(raw)) foundNames.push(t); | |||
}); | |||
// Alter: 12 years, 12 yo, 12-year-old, 14 Jahre | |||
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | |||
const ages = []; | |||
let m; | |||
while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); } | |||
// Jahrgänge | |||
const yearRe = /\b(19|20)\d{2}\b/g; | |||
const years = []; | |||
while ((m = yearRe.exec(raw)) !== null) { if (!years.includes(m[0])) years.push(m[0]); } | |||
// ein paar „Promi-Wörter“ | |||
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | |||
const uniq = new Set(); let w; const words = []; | |||
while ((w = wordRe.exec(raw)) !== null) { | |||
const s = w[0]; | |||
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; } | |||
} | |||
return { names: foundNames, ages, years, words, raw }; | |||
} | } | ||
// | // === Smarte Wiki-Suche (3 Pässe) ========================================= | ||
async function | async function searchWikiSmart (hints, limit) { | ||
await mw.loader.using('mediawiki.api'); | await mw.loader.using('mediawiki.api'); | ||
const api = new mw.Api(); | |||
const ns0 = 0; | |||
const MAX = limit || 12; | |||
function incatStr () { | |||
return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' '); | |||
} | |||
}); | // PASS 1: intitle-Kombis (präzise) | ||
return ( | const pass1 = []; | ||
if (hints.names.length) { | |||
hints.names.forEach(n => { | |||
if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`)); | |||
if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`)); | |||
pass1.push(`intitle:"${n}" ${incatStr()}`); | |||
}); | |||
} | |||
// PASS 2: gewichtete Volltextsuche | |||
const key = [] | |||
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3)) | |||
.map(x => `"${x}"`).join(' '); | |||
const pass2 = key ? [ `${key} ${incatStr()}` ] : []; | |||
// PASS 3: Prefix auf Titel | |||
const pass3 = []; | |||
if (hints.names.length) pass3.push(hints.names[0]); | |||
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]); | |||
const seen = new Set(); const out = []; | |||
async function runSr (q) { | |||
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 }); | |||
(r.query?.search || []).forEach(it => { | |||
const k = it.title; | |||
if (seen.has(k)) return; | |||
seen.add(k); | |||
out.push(it); | |||
}); | |||
} | |||
for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | |||
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | |||
// Prefix (list=prefixsearch) | |||
for (const p of pass3) { | |||
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | |||
(r.query?.prefixsearch || []).forEach(it => { | |||
const title = it.title || it['*']; | |||
const k = title; | |||
if (seen.has(k)) return; | |||
seen.add(k); | |||
out.push({ title, snippet: '' }); | |||
}); | |||
if (out.length >= MAX) break; | |||
} | |||
return out.slice(0, MAX); | |||
} | } | ||
function esc(s){ return mw.html.escape(String(s||'')); } | // === HTML Escaping & Treffer-Rendering =================================== | ||
function esc (s) { return mw.html.escape(String(s || '')); } | |||
function renderResults(items){ | function renderResults (items) { | ||
var box = document.getElementById('ados-scan-results'); | var box = document.getElementById('ados-scan-results'); | ||
if (!box) return; | if (!box) return; | ||
box.innerHTML = ''; | box.innerHTML = ''; | ||
if (!items || !items.length){ | if (!items || !items.length) { | ||
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>'; | box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>'; | ||
return; | return; | ||
} | } | ||
items.slice(0,12).forEach(function(it){ | items.slice(0, 12).forEach(function (it) { | ||
var title = it.title || ''; | var title = it.title || ''; | ||
var link | var link = mw.util.getUrl(title.replace(/ /g, '_')); | ||
var snip | var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"'); | ||
var div | var div = document.createElement('div'); | ||
div.className = 'ados-hit'; | div.className = 'ados-hit'; | ||
div.innerHTML = | div.innerHTML = | ||
'<b><a href="'+link+'">'+esc(title)+'</a></b>' + | '<b><a href="' + link + '">' + esc(title) + '</a></b>' + | ||
(snip ? '<div class="meta">'+snip+'</div>' : ''); | (snip ? '<div class="meta">' + snip + '</div>' : ''); | ||
box.appendChild(div); | box.appendChild(div); | ||
}); | }); | ||
} | } | ||
// | // === Binding ============================================================== | ||
var BOUND = false; | var BOUND = false; | ||
function bind(){ | function bind () { | ||
if (BOUND || !hasUI()) return; | if (BOUND || !hasUI()) return; | ||
var runBtn = document.getElementById('ados-scan-run'); | var runBtn = document.getElementById('ados-scan-run'); | ||
| Zeile 138: | Zeile 254: | ||
if (!runBtn || !fileIn) return; | if (!runBtn || !fileIn) return; | ||
if (runBtn.dataset.bound === '1') return; | if (runBtn.dataset.bound === '1') return; | ||
runBtn.dataset.bound = '1'; BOUND = true; | runBtn.dataset.bound = '1'; BOUND = true; | ||
if (bigBtn) bigBtn.addEventListener('click', function(){ fileIn.click(); }); | if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); }); | ||
fileIn.addEventListener('change', function(){ | fileIn.addEventListener('change', function () { | ||
if (this.files && this.files[0]) showPreview(this.files[0]); | if (this.files && this.files[0]) showPreview(this.files[0]); | ||
}); | }); | ||
runBtn.addEventListener('click', async function(ev){ | runBtn.addEventListener('click', async function (ev) { | ||
ev.preventDefault(); | ev.preventDefault(); | ||
if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | ||
var f = fileIn.files[0]; | var f = fileIn.files[0]; | ||
try{ | try { | ||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | ||
setStatus('Erkenne Label …'); | setStatus('Erkenne Label …'); | ||
var text | var text = await runOCR(f); | ||
setStatus('Suche im Wiki …'); | setStatus('Suche im Wiki …'); | ||
var hints = extractHints(text); | var hints = extractHints(text); | ||
var | var hits = await searchWikiSmart(hints, 12); | ||
renderResults(hits); | renderResults(hits); | ||
setStatus('Fertig.'); | setStatus('Fertig.'); | ||
} catch (e){ | } catch (e) { | ||
console.error('[LabelScan]', e); | console.error('[LabelScan]', e); | ||
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | ||
| Zeile 177: | Zeile 291: | ||
} | } | ||
// Erstbindung + Fallbacks + Observer | // Erstbindung + Fallbacks + Observer | ||
if (document.readyState === 'loading'){ | if (document.readyState === 'loading') { | ||
document.addEventListener('DOMContentLoaded', bind); | document.addEventListener('DOMContentLoaded', bind); | ||
} else { | } else { | ||
| Zeile 185: | Zeile 299: | ||
setTimeout(bind, 250); | setTimeout(bind, 250); | ||
setTimeout(bind, 1000); | setTimeout(bind, 1000); | ||
var mo = new MutationObserver(function () { if (!BOUND) bind(); }); | |||
var mo = new MutationObserver(function(){ if (!BOUND) bind(); }); | mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | ||
mo.observe(document.documentElement || document.body, { childList:true, subtree:true }); | |||
})(); | })(); | ||