Zum Inhalt springen

MediaWiki:Gadget-LabelScan.js

Aus ADOS Wiki

Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.

  • Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
  • Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
  • Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw, Tesseract */
(function () {
  'use strict';

  // ========================================================================
  //  KONFIG
  // ========================================================================
  // In welchen Kategorien wird gesucht?
  const ADOS_CATEGORIES = [
    'Alle A Dream of Scotland Abfüllungen',
    'Alle A Dream of Ireland Abfüllungen',
    'Alle A Dream of... – Der Rest der Welt Abfüllungen',
    'Friendly Mr. Z Whiskytainment Abfüllungen',
    'Die Whisky Elfen Abfüllungen',
    'The Fine Art of Whisky Abfüllungen',
    'Alle Rumbastic Abfüllungen'
  ];

  // Häufige Distillery-/Serien-Tokens (zum „Einhaken“ in die Suche)
  const KNOWN_TOKENS = [
    'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
    'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
    'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
    'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland',
    'A Dream of Scotland','A Dream of Ireland','The Fine Art of Whisky',
    'The Tasteful 8','Friendly Mr. Z','Die Whisky Elfen','Rumbastic'
  ];

  // Debug: Roh-OCR unten anzeigen, wenn true
  window.ADOS_SCAN_DEBUG = window.ADOS_SCAN_DEBUG || false;

  // ========================================================================
  //  DOM-Helfer
  // ========================================================================
  function byId(id){ return document.getElementById(id); }
  function hasUI(){
    return !!byId('ados-scan-run') && !!byId('ados-scan-file');
  }
  function setStatus(t){ const el = byId('ados-scan-status'); if (el) el.textContent = t || ''; }
  function setProgress(p){
    const bar = byId('ados-scan-progress'); if (!bar) return;
    if (p == null){ bar.hidden = true; bar.value = 0; }
    else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
  }
  function showPreview(file){
    const url = URL.createObjectURL(file);
    const prev = byId('ados-scan-preview');
    if (prev){ prev.innerHTML = '<img alt="Vorschau" src="'+url+'">'; prev.setAttribute('aria-hidden','false'); }
  }

  // ========================================================================
  //  TESSERACT sauber als WORKER laden (deu+eng)
  // ========================================================================
  let _ocrWorkerPromise = null;
  function getOcrWorker(){
    if (_ocrWorkerPromise) return _ocrWorkerPromise;
    _ocrWorkerPromise = (async () => {
      if (!window.Tesseract){
        await new Promise((res, rej) => {
          const s=document.createElement('script');
          s.src='https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
          s.async=true; s.onload=res; s.onerror=() => {
            const s2=document.createElement('script');
            s2.src='https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
            s2.async=true; s2.onload=res; s2.onerror=rej; document.head.appendChild(s2);
          };
          document.head.appendChild(s);
        });
      }
      const { createWorker } = Tesseract;
      const worker = await createWorker({
        workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
        corePath:   'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core.wasm.js',
        langPath:   'https://tessdata.projectnaptha.com/5',
        logger: m => {
          if (m && m.status === 'recognizing text' && typeof m.progress === 'number'){
            setProgress(0.05 + m.progress * 0.9);
          }
        }
      });
      await worker.loadLanguage('deu+eng');
      await worker.initialize('deu+eng');
      await worker.setParameters({ tessedit_pageseg_mode: '11', user_defined_dpi: '300' });
      return worker;
    })();
    return _ocrWorkerPromise;
  }

  // ========================================================================
  //  Bild-Vorverarbeitung: Skalierung + adaptives Thresholding
  // ========================================================================
  function scaleToCanvas(img, maxSide = 2000){
    const s = Math.min(1, maxSide / Math.max(img.width, img.height));
    const w = Math.round(img.width * s), h = Math.round(img.height * s);
    const c = document.createElement('canvas'); c.width=w; c.height=h;
    const ctx=c.getContext('2d'); ctx.imageSmoothingEnabled = true;
    ctx.drawImage(img,0,0,w,h);
    return c;
  }

  function adaptiveThreshold(src){
    const w=src.width, h=src.height;
    const out=document.createElement('canvas'); out.width=w; out.height=h;
    const sctx=src.getContext('2d'), octx=out.getContext('2d');
    const id=sctx.getImageData(0,0,w,h), d=id.data;
    const gray=new Uint8ClampedArray(w*h);
    for(let i=0,j=0;i<d.length;i+=4,++j){
      gray[j]=(0.2126*d[i]+0.7152*d[i+1]+0.0722*d[i+2])|0;
    }
    const S=new Uint32Array((w+1)*(h+1));
    for(let y=1;y<=h;y++){
      let row=0;
      for(let x=1;x<=w;x++){
        const v=gray[(y-1)*w + (x-1)];
        row+=v; S[y*(w+1)+x]=S[(y-1)*(w+1)+x]+row;
      }
    }
    const win=Math.max(15, Math.round(Math.min(w,h)/24));
    const outId=octx.createImageData(w,h), od=outId.data;
    const C=7;
    for(let y=0;y<h;y++){
      const y0=Math.max(0,y-win), y1=Math.min(h-1,y+win);
      for(let x=0;x<w;x++){
        const x0=Math.max(0,x-win), x1=Math.min(w-1,x+win);
        const A=S[y0*(w+1)+x0], B=S[(y1+1)*(w+1)+x0], Cc=S[y0*(w+1)+(x1+1)], Dd=S[(y1+1)*(w+1)+(x1+1)];
        const area=(x1-x0+1)*(y1-y0+1);
        const mean=(Dd + A - B - Cc)/area;
        const g=gray[y*w+x];
        const pix = g < (mean - C) ? 0 : 255;
        const k=(y*w+x)*4; od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255;
      }
    }
    octx.putImageData(outId,0,0);
    return out;
  }

  // ========================================================================
  //  OCR: mehrere Strategien (PSM 11 → 6 → 4, binarisiert & farbig)
  // ========================================================================
  async function runOCR(file){
    setProgress(0.02);
    const img = await new Promise((res, rej) => {
      const o=new Image(); o.onload=()=>res(o); o.onerror=rej;
      o.src=URL.createObjectURL(file);
    });

    const base = scaleToCanvas(img, 2000);
    const bin  = adaptiveThreshold(base);

    const worker = await getOcrWorker();
    const candidates = [
      { canvas: bin,  psm: '11' },
      { canvas: base, psm: '11' },
      { canvas: bin,  psm: '6'  },
      { canvas: base, psm: '6'  },
      { canvas: bin,  psm: '4'  },
      { canvas: base, psm: '4'  }
    ];

    let best = '';
    for (let i=0;i<candidates.length;i++){
      const c = candidates[i];
      await worker.setParameters({ tessedit_pageseg_mode: c.psm });
      const { data } = await worker.recognize(c.canvas);
      const txt = (data && data.text ? data.text : '').trim();
      if (txt.length > best.length) best = txt;
      if (best.length > 40) break;
      setProgress(0.96 + i * 0.008);
    }
    setProgress(null);

    if (window.ADOS_SCAN_DEBUG){
      const box = byId('ados-scan-ocr');
      if (box) box.textContent = best || '(leer)';
    }
    return best;
  }

  // ========================================================================
  //  Hinweise aus OCR destillieren
  // ========================================================================
  function extractHints(text){
    const raw = String(text||'').replace(/\s+/g,' ').trim();

    const names = [];
    KNOWN_TOKENS.forEach(t=>{
      const re = new RegExp('\\b'+t.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+'\\b','i');
      if (re.test(raw)) names.push(t);
    });

    const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
    const ages=[]; let m; while((m=ageRe.exec(raw))!==null){ if(!ages.includes(m[1])) ages.push(m[1]); }

    const yearRe = /\b(19|20)\d{2}\b/g;
    const years=[]; while((m=yearRe.exec(raw))!==null){ if(!years.includes(m[0])) years.push(m[0]); }

    const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
    const uniq=new Set(); const words=[]; let w;
    while((w=wordRe.exec(raw))!==null){ const s=w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; } }

    return { names, ages, years, words, raw };
  }

  // ========================================================================
  //  Suche im Wiki (3 Pässe)
  // ========================================================================
  async function searchWikiSmart(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12, ns0=0;

    function incats(){
      return ADOS_CATEGORIES.map(c => 'incategory:"'+c+'"').join(' ');
    }

    const pass1=[];
    if (hints.names.length){
      hints.names.forEach(n=>{
        if (hints.ages.length) hints.ages.forEach(a=> pass1.push(`intitle:"${n}" intitle:${a} ${incats()}`));
        if (hints.years.length) hints.years.forEach(y=> pass1.push(`intitle:"${n}" "${y}" ${incats()}`));
        pass1.push(`intitle:"${n}" ${incats()}`);
      });
    }

    const key=[].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
                 .map(x=>`"${x}"`).join(' ');
    const pass2 = key ? [ `${key} ${incats()}` ] : [];

    const pass3=[];
    if (hints.names.length) pass3.push(hints.names[0]);
    if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);

    const seen=new Set(), out=[];

    async function runSr(q){
      const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
      (r.query?.search || []).forEach(it=>{
        const k=it.title; if (seen.has(k)) return; seen.add(k); out.push(it);
      });
    }

    for (const q of pass1){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
    for (const q of pass2){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }

    for (const p of pass3){
      const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace:ns0, pslimit:MAX });
      (r.query?.prefixsearch || []).forEach(it=>{
        const title = it.title || it['*']; const k=title;
        if (seen.has(k)) return; seen.add(k); out.push({ title, snippet:'' });
      });
      if (out.length>=MAX) break;
    }

    return out.slice(0,MAX);
  }

  // ========================================================================
  //  Treffer rendern
  // ========================================================================
  function esc(s){ return mw.html.escape(String(s||'')); }
  function renderResults(items){
    const box = byId('ados-scan-results'); if (!box) return;
    box.innerHTML='';
    if (!items || !items.length){
      box.innerHTML='<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
      return;
    }
    items.slice(0,12).forEach(it=>{
      const title = it.title || '';
      const link  = mw.util.getUrl(title.replace(/ /g,'_'));
      const snip  = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/&quot;/g,'"');
      const div=document.createElement('div'); div.className='ados-hit';
      div.innerHTML = '<b><a href="'+link+'">'+esc(title)+'</a></b>' + (snip?'<div class="meta">'+snip+'</div>':'');
      box.appendChild(div);
    });
  }

  // ========================================================================
  //  BINDING
  // ========================================================================
  let BOUND=false;
  function bind(){
    if (BOUND || !hasUI()) return;
    const runBtn = byId('ados-scan-run');
    const fileIn = byId('ados-scan-file');
    const bigBtn = byId('ados-scan-bigbtn');
    const drop   = byId('ados-scan-drop');

    if (!runBtn || !fileIn) return;
    if (runBtn.dataset.bound === '1') return;
    runBtn.dataset.bound='1'; BOUND=true;

    if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
    fileIn.addEventListener('change', function(){ if (this.files && this.files[0]) showPreview(this.files[0]); });

    // Drag&Drop
    if (drop){
      ['dragenter','dragover'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.add('is-drag'); }));
      ['dragleave','drop'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.remove('is-drag'); }));
      drop.addEventListener('drop', e=>{
        const f = e.dataTransfer && e.dataTransfer.files && e.dataTransfer.files[0];
        if (f){ fileIn.files = e.dataTransfer.files; showPreview(f); }
      });
    }

    // Klick „Erkennen & suchen“
    runBtn.addEventListener('click', async function(ev){
      ev.preventDefault();
      if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
      const f = fileIn.files[0];
      try{
        runBtn.disabled=true; runBtn.textContent='Erkenne …';
        setStatus('Erkenne Label …');
        const text  = await runOCR(f);
        setStatus('Suche im Wiki …');
        const hints = extractHints(text);
        const hits  = await searchWikiSmart(hints, 12);
        renderResults(hits);
        setStatus('Fertig.');
      } catch(e){
        console.error('[LabelScan]', e);
        setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
      } finally {
        runBtn.disabled=false; runBtn.textContent='🔍 Erkennen & suchen';
      }
    });

    // Sicherheit gegen Overlays
    const wrap = byId('ados-labelscan'); if (wrap) wrap.style.position='relative';
    runBtn.style.position='relative'; runBtn.style.zIndex='9999'; runBtn.style.pointerEvents='auto';
  }

  // Erstbindung + Fallbacks + Observer
  if (document.readyState === 'loading'){ document.addEventListener('DOMContentLoaded', bind); } else { bind(); }
  setTimeout(bind, 250); setTimeout(bind, 1000);
  const mo = new MutationObserver(() => { if (!BOUND) bind(); });
  mo.observe(document.documentElement || document.body, { childList:true, subtree:true });

})();