Zum Inhalt springen

MediaWiki:Gadget-LabelScan.js

Aus ADOS Wiki

Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.

  • Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
  • Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
  • Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw, Tesseract */
(function () {
  'use strict';

  // === ADOS: Kategorien & Tokens (ggf. erweitern) ===========================
  const ADOS_CATEGORIES = [
    'Alle A Dream of Scotland Abfüllungen',
    'Alle A Dream of Ireland Abfüllungen',
    'Alle A Dream of... – Der Rest der Welt Abfüllungen',
    'Friendly Mr. Z Whiskytainment Abfüllungen',
    'Die Whisky Elfen Abfüllungen',
    'The Fine Art of Whisky Abfüllungen',
    'Alle Rumbastic Abfüllungen'
  ];

  const KNOWN_TOKENS = [
    'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
    'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
    'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
    'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
  ];

  // === UI Präsenz ===========================================================
  function hasUI () {
    return !!document.getElementById('ados-scan-run') &&
           !!document.getElementById('ados-scan-file');
  }

  // === UI Helpers ===========================================================
  function setStatus (t) {
    var el = document.getElementById('ados-scan-status');
    if (el) el.textContent = t || '';
  }
  function setProgress (p) {
    var bar = document.getElementById('ados-scan-progress');
    if (!bar) return;
    if (p == null) { bar.hidden = true; bar.value = 0; }
    else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
  }
  function showPreview (file) {
    var url = URL.createObjectURL(file);
    var prev = document.getElementById('ados-scan-preview');
    if (prev) {
      prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
      prev.setAttribute('aria-hidden', 'false');
    }
  }

  // === Tesseract bei Bedarf laden ==========================================
  var tesseractReady;
  function ensureTesseract () {
    if (tesseractReady) return tesseractReady;
    tesseractReady = new Promise(function (resolve, reject) {
      if (window.Tesseract) return resolve();
      var s = document.createElement('script');
      s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
      s.async = true;
      s.onload = resolve;
      s.onerror = function () {
        var s2 = document.createElement('script');
        s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
        s2.async = true;
        s2.onload = resolve;
        s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
        document.head.appendChild(s2);
      };
      document.head.appendChild(s);
    });
    return tesseractReady;
  }

  // === Bild-Vorverarbeitung (für bessere OCR) ===============================
  async function preprocessImage (file) {
    const img = await new Promise((res, rej) => {
      const o = new Image();
      o.onload = () => res(o);
      o.onerror = rej;
      o.src = URL.createObjectURL(file);
    });

    // Längste Kante auf ~1800px skalieren (schärfer für OCR)
    const MAX = 1800;
    const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
    const w = Math.round(img.width * scale);
    const h = Math.round(img.height * scale);

    const c = document.createElement('canvas');
    c.width = w; c.height = h;
    const ctx = c.getContext('2d');
    ctx.imageSmoothingEnabled = true;
    ctx.drawImage(img, 0, 0, w, h);

    // Graustufen + leichter Kontrastboost
    const id = ctx.getImageData(0, 0, w, h);
    const d = id.data;
    for (let i = 0; i < d.length; i += 4) {
      const g = 0.2126 * d[i] + 0.7152 * d[i + 1] + 0.0722 * d[i + 2];
      const v = Math.max(0, Math.min(255, (g - 128) * 1.15 + 128));
      d[i] = d[i + 1] = d[i + 2] = v;
    }
    ctx.putImageData(id, 0, 0);

    return c; // Canvas an Tesseract übergeben
  }

  // === OCR (nutzt Vorverarbeitung) =========================================
  async function runOCR (file) {
    await ensureTesseract();
    setProgress(0);
    const canvas = await preprocessImage(file);

    const res = await Tesseract.recognize(canvas, 'deu+eng', {
      // psm 6: ein Block Text – robust für Label
      tessedit_pageseg_mode: 6,
      preserve_interword_spaces: 1,
      logger: function (m) {
        if (m && m.status === 'recognizing text' && typeof m.progress === 'number') {
          setProgress(m.progress);
        }
      }
    });

    setProgress(null);
    return (res && res.data && res.data.text) || '';
  }

  // === Hinweise aus OCR extrahieren ========================================
  function extractHints (text) {
    const raw = String(text || '').replace(/\s+/g, ' ').trim();

    // Distillery-/Marken-Token, die wirklich im Text vorkommen
    const foundNames = [];
    KNOWN_TOKENS.forEach(t => {
      const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
      if (re.test(raw)) foundNames.push(t);
    });

    // Alter: 12 years, 12 yo, 12-year-old, 14 Jahre
    const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
    const ages = [];
    let m;
    while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }

    // Jahrgänge
    const yearRe = /\b(19|20)\d{2}\b/g;
    const years = [];
    while ((m = yearRe.exec(raw)) !== null) { if (!years.includes(m[0])) years.push(m[0]); }

    // ein paar „Promi-Wörter“
    const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
    const uniq = new Set(); let w; const words = [];
    while ((w = wordRe.exec(raw)) !== null) {
      const s = w[0];
      if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
    }

    return { names: foundNames, ages, years, words, raw };
  }

  // === Smarte Wiki-Suche (3 Pässe) =========================================
  async function searchWikiSmart (hints, limit) {
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const ns0 = 0;
    const MAX = limit || 12;

    function incatStr () {
      return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ');
    }

    // PASS 1: intitle-Kombis (präzise)
    const pass1 = [];
    if (hints.names.length) {
      hints.names.forEach(n => {
        if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
        if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
        pass1.push(`intitle:"${n}" ${incatStr()}`);
      });
    }

    // PASS 2: gewichtete Volltextsuche
    const key = []
      .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
      .map(x => `"${x}"`).join(' ');
    const pass2 = key ? [ `${key} ${incatStr()}` ] : [];

    // PASS 3: Prefix auf Titel
    const pass3 = [];
    if (hints.names.length) pass3.push(hints.names[0]);
    if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);

    const seen = new Set(); const out = [];

    async function runSr (q) {
      const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
      (r.query?.search || []).forEach(it => {
        const k = it.title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push(it);
      });
    }

    for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
    for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }

    // Prefix (list=prefixsearch)
    for (const p of pass3) {
      const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
      (r.query?.prefixsearch || []).forEach(it => {
        const title = it.title || it['*'];
        const k = title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push({ title, snippet: '' });
      });
      if (out.length >= MAX) break;
    }

    return out.slice(0, MAX);
  }

  // === HTML Escaping & Treffer-Rendering ===================================
  function esc (s) { return mw.html.escape(String(s || '')); }

  function renderResults (items) {
    var box = document.getElementById('ados-scan-results');
    if (!box) return;
    box.innerHTML = '';
    if (!items || !items.length) {
      box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
      return;
    }
    items.slice(0, 12).forEach(function (it) {
      var title = it.title || '';
      var link = mw.util.getUrl(title.replace(/ /g, '_'));
      var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
      var div = document.createElement('div');
      div.className = 'ados-hit';
      div.innerHTML =
        '<b><a href="' + link + '">' + esc(title) + '</a></b>' +
        (snip ? '<div class="meta">' + snip + '</div>' : '');
      box.appendChild(div);
    });
  }

  // === Binding ==============================================================
  var BOUND = false;
  function bind () {
    if (BOUND || !hasUI()) return;
    var runBtn = document.getElementById('ados-scan-run');
    var fileIn = document.getElementById('ados-scan-file');
    var bigBtn = document.getElementById('ados-scan-bigbtn');

    if (!runBtn || !fileIn) return;
    if (runBtn.dataset.bound === '1') return;
    runBtn.dataset.bound = '1'; BOUND = true;

    if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
    fileIn.addEventListener('change', function () {
      if (this.files && this.files[0]) showPreview(this.files[0]);
    });

    runBtn.addEventListener('click', async function (ev) {
      ev.preventDefault();
      if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
      var f = fileIn.files[0];
      try {
        runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
        setStatus('Erkenne Label …');
        var text = await runOCR(f);
        setStatus('Suche im Wiki …');
        var hints = extractHints(text);
        var hits = await searchWikiSmart(hints, 12);
        renderResults(hits);
        setStatus('Fertig.');
      } catch (e) {
        console.error('[LabelScan]', e);
        setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
      } finally {
        runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
      }
    });

    // Sicherheit gegen Overlays
    var wrap = document.getElementById('ados-labelscan');
    if (wrap) wrap.style.position = 'relative';
    runBtn.style.position = 'relative';
    runBtn.style.zIndex = '9999';
    runBtn.style.pointerEvents = 'auto';
  }

  // Erstbindung + Fallbacks + Observer
  if (document.readyState === 'loading') {
    document.addEventListener('DOMContentLoaded', bind);
  } else {
    bind();
  }
  setTimeout(bind, 250);
  setTimeout(bind, 1000);
  var mo = new MutationObserver(function () { if (!BOUND) bind(); });
  mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();