Zum Inhalt springen

MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen

Aus ADOS Wiki
Keine Bearbeitungszusammenfassung
Markierung: Manuelle Zurücksetzung
Keine Bearbeitungszusammenfassung
Zeile 18: Zeile 18:
   ];
   ];


  const KNOWN_TOKENS = [
const KNOWN_TOKENS = [
    // Serien / Reihen
  // Distillery / Herkunft / Regionen
    'The Tasteful 8', 'Heroes of Childhood', 'A Dream of Scotland',
  'Ireland','Irland','Irish','Single Malt','Bourbon Barrel',
    'A Dream of Ireland', 'The Fine Art of Whisky', 'Friendly Mr. Z',
  'Cask Strength','1st Fill','First Fill',
    'Die Whisky Elfen', 'Rumbastic',
  'Aged','Years','Yo',


    // Brennereien / Regionen
  // ADOS Serien / Motivserien
    'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
  'A Dream of Scotland','A Dream of Ireland',
    'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
  'The Tasteful 8','Heroes of Childhood',
    'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
  'Space Girls','Fine Art of Whisky','The Fine Art of Whisky',
    'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland','Irland',
  'Friendly Mr. Z','Whiskytainment','Rumbastic',


    // typische Label-Wörter
  // Häufige Motivwörter
    'Cask Strength','First Fill','Bourbon Barrel','Sherry','PX','Oloroso'
  'Unicorn','Bull','Hero','Childhood',
   ];
 
  // Distillery Namen, universell
  'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
   'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn',
  'Lagavulin','Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet',
  'Inchgower','Bunnahabhain','Springbank','Caperdonich','Linkwood','Glen Scotia'
];


   // =============================
   // =============================
Zeile 232: Zeile 238:
   function extractHints (text) {
   function extractHints (text) {
     const raw = String(text || '').replace(/\s+/g, ' ').trim();
     const raw = String(text || '').replace(/\s+/g, ' ').trim();
// Speziell für "The Tasteful 8 / Heroes of Childhood"
if (/TASTEFUL\s*8/i.test(raw)) {
  if (!raw.includes('The Tasteful 8')) raw += ' The Tasteful 8';
}
if (/HEROES\s+OF\s+CHILDHOOD/i.test(raw)) {
  if (!raw.includes('Heroes of Childhood')) raw += ' Heroes of Childhood';
}


     const foundNames = [];
     const foundNames = [];

Version vom 6. November 2025, 00:22 Uhr

/* global mw, Tesseract */
(function () {
  'use strict';

  // =============================
  //   KONFIGURATION
  // =============================

  // ← Für Tests leer lassen: const ADOS_CATEGORIES = [];
  const ADOS_CATEGORIES = [
    'Alle A Dream of Scotland Abfüllungen',
    'Alle A Dream of Ireland Abfüllungen',
    'Alle A Dream of... – Der Rest der Welt Abfüllungen',
    'Friendly Mr. Z Whiskytainment Abfüllungen',
    'Die Whisky Elfen Abfüllungen',
    'The Fine Art of Whisky Abfüllungen',
    'Alle Rumbastic Abfüllungen'
  ];

const KNOWN_TOKENS = [
  // Distillery / Herkunft / Regionen
  'Ireland','Irland','Irish','Single Malt','Bourbon Barrel',
  'Cask Strength','1st Fill','First Fill',
  'Aged','Years','Yo',

  // ADOS Serien / Motivserien
  'A Dream of Scotland','A Dream of Ireland',
  'The Tasteful 8','Heroes of Childhood',
  'Space Girls','Fine Art of Whisky','The Fine Art of Whisky',
  'Friendly Mr. Z','Whiskytainment','Rumbastic',

  // Häufige Motivwörter
  'Unicorn','Bull','Hero','Childhood',

  // Distillery Namen, universell
  'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
  'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn',
  'Lagavulin','Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet',
  'Inchgower','Bunnahabhain','Springbank','Caperdonich','Linkwood','Glen Scotia'
];

  // =============================
  //   UI-Hilfen
  // =============================

  function hasUI () {
    return !!document.getElementById('ados-scan-run') &&
           !!document.getElementById('ados-scan-file');
  }

  function setStatus (t) {
    var el = document.getElementById('ados-scan-status');
    if (el) el.textContent = t || '';
  }

  function setProgress (p) {
    var bar = document.getElementById('ados-scan-progress');
    if (!bar) return;
    if (p == null) { bar.hidden = true; bar.value = 0; }
    else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
  }

  function showPreview (file) {
    var url = URL.createObjectURL(file);
    var prev = document.getElementById('ados-scan-preview');
    if (prev) {
      prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
      prev.setAttribute('aria-hidden', 'false');
    }
  }

  function showOCRText (t) {
    var el = document.getElementById('ados-scan-ocr');
    if (el) el.textContent = (t || '').trim();
  }

  // =============================
  //   Tesseract laden (nur 1x)
  // =============================

  var tesseractReady;
  function ensureTesseract () {
    if (tesseractReady) return tesseractReady;
    tesseractReady = new Promise(function (resolve, reject) {
      if (window.Tesseract) return resolve();
      var s = document.createElement('script');
      s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
      s.async = true;
      s.onload = resolve;
      s.onerror = function () {
        var s2 = document.createElement('script');
        s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
        s2.async = true;
        s2.onload = resolve;
        s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
        document.head.appendChild(s2);
      };
      document.head.appendChild(s);
    });
    return tesseractReady;
  }

  // =============================
  //   Vorverarbeitung (OCR)
  //   Graustufen + Unsharp + adaptive Schwelle
  // =============================

  async function preprocessImage (file) {
    const img = await new Promise((res, rej) => {
      const o = new Image();
      o.onload = () => res(o);
      o.onerror = rej;
      o.src = URL.createObjectURL(file);
    });

    const MAX = 1800;
    const s = Math.min(1, (img.width > img.height) ? MAX / img.width : MAX / img.height);
    const w = Math.round(img.width * s), h = Math.round(img.height * s);

    const c = document.createElement('canvas'); c.width = w; c.height = h;
    const g = c.getContext('2d', { willReadFrequently: true });
    g.imageSmoothingEnabled = true;
    g.drawImage(img, 0, 0, w, h);

    // → Graustufen
    let id = g.getImageData(0, 0, w, h), d = id.data;
    for (let i=0;i<d.length;i+=4){
      const y = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
      d[i]=d[i+1]=d[i+2]=y;
    }
    g.putImageData(id, 0, 0);

    // → Unsharp (leichter Hochpass)
    id = g.getImageData(0,0,w,h); d = id.data;
    const copy = new Uint8ClampedArray(d);
    const idx = (x,y)=>4*(y*w+x);
    for (let y=1;y<h-1;y++){
      for (let x=1;x<w-1;x++){
        const i0=idx(x,y), a=copy[i0], b=copy[idx(x-1,y)], c0=copy[idx(x+1,y)],
              d0=copy[idx(x,y-1)], e=copy[idx(x,y+1)];
        const lap = 4*a - b - c0 - d0 - e;
        const v = Math.max(0, Math.min(255, a + 0.3*lap));
        d[i0]=d[i0+1]=d[i0+2]=v;
      }
    }
    g.putImageData(id,0,0);

    // → adaptive Schwelle (lokaler Mittelwert)
    const win = 25, half = (win|0);
    id = g.getImageData(0,0,w,h); d = id.data;
    for (let y=0;y<h;y++){
      for (let x=0;x<w;x++){
        let sum=0, cnt=0;
        for (let yy=Math.max(0,y-half); yy<=Math.min(h-1,y+half); yy+=5){
          for (let xx=Math.max(0,x-half); xx<=Math.min(w-1,x+half); xx+=5){
            sum += d[4*(yy*w+xx)];
            cnt++;
          }
        }
        const thr = (sum/cnt) - 6;
        const i = 4*(y*w+x);
        const v = d[i] < thr ? 0 : 255;
        d[i]=d[i+1]=d[i+2]=v;
      }
    }
    g.putImageData(id,0,0);

    return c;
  }

  // Hilfsfunktionen für Varianten
  function crop(canvas, x, y, w, h){
    const c = document.createElement('canvas'); c.width=w; c.height=h;
    c.getContext('2d').drawImage(canvas, x, y, w, h, 0, 0, w, h);
    return c;
  }
  function rotate(canvas, deg){
    const r = document.createElement('canvas');
    const ctx = r.getContext('2d');
    if (deg % 180 === 0){ r.width=canvas.width; r.height=canvas.height; }
    else { r.width=canvas.height; r.height=canvas.width; }
    ctx.translate(r.width/2, r.height/2);
    ctx.rotate(deg*Math.PI/180);
    ctx.drawImage(canvas, -canvas.width/2, -canvas.height/2);
    return r;
  }

  async function ocrOne(canvas, lang) {
    const res = await Tesseract.recognize(canvas, lang, {
      // Sparse text funktioniert bei Labels (verschieden orientierte Textblöcke)
      tessedit_pageseg_mode: 11,
      preserve_interword_spaces: 1
    });
    return { text: (res?.data?.text||'').trim(), conf: res?.data?.confidence||0 };
  }

  // =============================
  //   Mehrfach-OCR (Rotationen/Regionen) + Fallback-Sprache
  // =============================

  async function runOCR(file){
    await ensureTesseract();
    setProgress(0.01);
    const base = await preprocessImage(file);

    // Kandidatenflächen
    const variants = [];
    variants.push(base); // komplett
    variants.push(crop(base, 0, 0, Math.round(base.width*0.4), base.height)); // linke Spalte
    variants.push(crop(base, 0, Math.round(base.height*0.72), base.width, Math.round(base.height*0.28))); // unteres Banner

    // + Rotationen
    const more = [];
    for (const v of variants){
      more.push(v, rotate(v, 90), rotate(v, -90));
    }

    // zwei Sprachmodi testen
    const results = [];
    for (const canv of more){
      for (const lang of ['deu+eng','eng']){
        try {
          const r = await ocrOne(canv, lang);
          results.push(r);
        } catch(e){ /* einzelne Fehlschläge ignorieren */ }
      }
    }
    setProgress(null);

    results.sort((a,b)=> (b.conf||0)-(a.conf||0));
    return (results[0]?.text)||'';
  }

  // =============================
  //   Hinweise aus OCR
  // =============================

  function extractHints (text) {
    const raw = String(text || '').replace(/\s+/g, ' ').trim();

// Speziell für "The Tasteful 8 / Heroes of Childhood"
if (/TASTEFUL\s*8/i.test(raw)) {
  if (!raw.includes('The Tasteful 8')) raw += ' The Tasteful 8';
}
if (/HEROES\s+OF\s+CHILDHOOD/i.test(raw)) {
  if (!raw.includes('Heroes of Childhood')) raw += ' Heroes of Childhood';
}

    const foundNames = [];
    KNOWN_TOKENS.forEach(t => {
      const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
      if (re.test(raw)) foundNames.push(t);
    });

    const ages = [];
    const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
    let m;
    while ((m = ageRe.exec(raw)) !== null) {
      const n = m[1]; if (!ages.includes(n)) ages.push(n);
    }

    const years = [];
    const yearRe = /\b(19|20)\d{2}\b/g;
    while ((m = yearRe.exec(raw)) !== null) {
      if (!years.includes(m[0])) years.push(m[0]);
    }

    const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
    const uniq = new Set(); let w; const words = [];
    while ((w = wordRe.exec(raw)) !== null) {
      const s = w[0];
      if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
    }

    return { names: foundNames, ages, years, words, raw };
  }

  // =============================
  //   Suche (3 Pässe) + Fallbacks
  // =============================

  function esc (s) { return mw.html.escape(String(s || '')); }

  function incatStr () {
    return (ADOS_CATEGORIES || []).map(c => 'incategory:"' + c + '"').join(' ');
  }

  async function searchWikiSmart (hints, limit) {
    await mw.loader.using(['mediawiki.api','mediawiki.util','mediawiki.html']);
    const api = new mw.Api();
    const ns0 = 0;
    const MAX = limit || 12;

    // PASS 1: intitle-Kombis (präzise)
    const pass1 = [];
    if (hints.names.length) {
      hints.names.forEach(n => {
        if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
        if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
        pass1.push(`intitle:"${n}" ${incatStr()}`);
      });
    }

    // PASS 2: gewichtete Volltextsuche
    const key = []
      .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
      .map(x => `"${x}"`).join(' ');
    const pass2 = key ? [ `${key} ${incatStr()}` ] : [];

    // PASS 3: Prefix auf Titel
    const pass3 = [];
    if (hints.names.length) pass3.push(hints.names[0]);
    if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);

    const seen = new Set(); const out = [];

    async function runSr (q) {
      const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
      (r.query?.search || []).forEach(it => {
        const k = it.title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push(it);
      });
    }

    for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
    for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }

    // Prefix (list=prefixsearch)
    for (const p of pass3) {
      const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
      (r.query?.prefixsearch || []).forEach(it => {
        const title = it.title || it['*'];
        const k = title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push({ title, snippet: '' });
      });
      if (out.length >= MAX) break;
    }

    return out.slice(0, MAX);
  }

  // ganz einfacher Fuzzy-Fallback auf Suchergebnissen
  function scoreTitle(title, hints){
    const t = String(title||'').toLowerCase();
    let s = 0;
    hints.names.forEach(n => { if (t.includes(n.toLowerCase())) s += 1.0; });
    hints.words.forEach(n => { if (t.includes(n.toLowerCase())) s += 0.4; });
    hints.ages.forEach(a => { if (t.includes(String(a))) s += 0.4; });
    hints.years.forEach(y => { if (t.includes(String(y))) s += 0.4; });
    return s;
  }

  async function fallbackFuzzyTitles(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12;

    // Breite Suche mit Tokens (mit/ohne Kategorie)
    const q1 = []
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .map(x => `"${x}"`).join(' ');
    const q = `${q1} ${incatStr()}`.trim();

    const r = await api.get({ action:'query', list:'search', srsearch:q || hints.raw.split(/\s+/).slice(0,6).join(' '), srlimit:50, formatversion:2 });
    const items = (r.query?.search || []);
    const scored = items.map(it => ({ ...it, _score: scoreTitle(it.title, hints) }));
    scored.sort((a,b)=> b._score - a._score);
    const top = scored.slice(0, MAX).filter(x=> x._score >= 0.10); // großzügiger
    return top;
  }

  async function broadSearchNoCategory(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12;

    const parts = []
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .map(x => `"${x}"`);
    const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');

    const r = await api.get({ action:'query', list:'search', srsearch:q, srlimit:MAX, formatversion:2 });
    return (r.query?.search || []);
  }

  // =============================
  //   Ergebnisse rendern
  // =============================

  function renderResults (items) {
    var box = document.getElementById('ados-scan-results');
    if (!box) return;
    box.innerHTML = '';
    if (!items || !items.length) {
      box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
      return;
    }
    items.slice(0, 12).forEach(function (it) {
      var title = it.title || '';
      var link = mw.util.getUrl(title.replace(/ /g, '_'));
      var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
      var div = document.createElement('div');
      div.className = 'ados-hit';
      div.innerHTML =
        '<b><a href="' + link + '">' + esc(title) + '</a></b>' +
        (snip ? '<div class="meta">' + snip + '</div>' : '');
      box.appendChild(div);
    });
  }

  // =============================
  //   Binding
  // =============================

  var BOUND = false;
  function bind () {
    if (BOUND || !hasUI()) return;

    var runBtn = document.getElementById('ados-scan-run');
    var fileIn = document.getElementById('ados-scan-file');
    var bigBtn = document.getElementById('ados-scan-bigbtn');
    var form = document.getElementById('ados-scan-form');

    if (!runBtn || !fileIn) return;
    if (runBtn.dataset.bound === '1') return;
    runBtn.dataset.bound = '1'; BOUND = true;

    if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
    fileIn.addEventListener('change', function () {
      if (this.files && this.files[0]) showPreview(this.files[0]);
    });

    function onSubmit(ev){
      ev.preventDefault();
      if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
      var f = fileIn.files[0];
      (async function(){
        try {
          runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
          setStatus('Erkenne Label …');
          const text = await runOCR(f);
          showOCRText(text);

          setStatus('Suche im Wiki …');
          const hints = extractHints(text);

          let hits = await searchWikiSmart(hints, 12);
          if (!hits || !hits.length) {
            setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
            hits = await fallbackFuzzyTitles(hints, 12);
          }
          if (!hits || !hits.length) {
            setStatus('Kein Treffer – breite Suche ohne Kategorien …');
            hits = await broadSearchNoCategory(hints, 12);
          }

          renderResults(hits);
          setStatus('Fertig.');
        } catch (e) {
          console.error('[LabelScan]', e);
          setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
        } finally {
          runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
        }
      })();
    }

    runBtn.addEventListener('click', onSubmit);
    if (form) form.addEventListener('submit', onSubmit);

    // Sicherheit
    var wrap = document.getElementById('ados-labelscan');
    if (wrap) wrap.style.position = 'relative';
    runBtn.style.position = 'relative';
    runBtn.style.zIndex = '9999';
    runBtn.style.pointerEvents = 'auto';
  }

  if (document.readyState === 'loading') {
    document.addEventListener('DOMContentLoaded', bind);
  } else {
    bind();
  }
  setTimeout(bind, 250);
  setTimeout(bind, 1000);
  var mo = new MutationObserver(function () { if (!BOUND) bind(); });
  mo.observe(document.documentElement || document.body, { childList: true, subtree: true });

})();