Zum Inhalt springen

MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen

Aus ADOS Wiki
Keine Bearbeitungszusammenfassung
Keine Bearbeitungszusammenfassung
Zeile 3: Zeile 3:
   'use strict';
   'use strict';


   // ------------------------------------------------------------
   // ========================================================================
   // 0) Konfiguration
   // KONFIG
   // ------------------------------------------------------------
   // ========================================================================
  // Debug-Ausgabe der reinen OCR-Texte (Optional: im Browser einstellen)
   // In welchen Kategorien wird gesucht?
  // window.ADOS_SCAN_DEBUG = true;
 
   // In diesen Kategorien sollen Treffer bevorzugt gesucht werden:
   const ADOS_CATEGORIES = [
   const ADOS_CATEGORIES = [
     'Alle A Dream of Scotland Abfüllungen',
     'Alle A Dream of Scotland Abfüllungen',
Zeile 20: Zeile 17:
   ];
   ];


   // Distillery-/Marken-Tokens (wird für „hints“ verwendet)
   // Häufige Distillery-/Serien-Tokens (zum „Einhaken“ in die Suche)
   const KNOWN_TOKENS = [
   const KNOWN_TOKENS = [
     'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
     'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
     'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
     'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
     'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
     'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
     'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
     'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland',
    'A Dream of Scotland','A Dream of Ireland','The Fine Art of Whisky',
    'The Tasteful 8','Friendly Mr. Z','Die Whisky Elfen','Rumbastic'
   ];
   ];


   // ------------------------------------------------------------
   // Debug: Roh-OCR unten anzeigen, wenn true
   // 1) UI Helpers
  window.ADOS_SCAN_DEBUG = window.ADOS_SCAN_DEBUG || false;
   // ------------------------------------------------------------
 
   function hasUI () {
   // ========================================================================
     return !!document.getElementById('ados-scan-run') &&
   // DOM-Helfer
          !!document.getElementById('ados-scan-file');
  // ========================================================================
  }
  function byId(id){ return document.getElementById(id); }
  function setStatus (t) {
   function hasUI(){
    var el = document.getElementById('ados-scan-status');
     return !!byId('ados-scan-run') && !!byId('ados-scan-file');
    if (el) el.textContent = t || '';
   }
   }
   function setProgress (p) {
  function setStatus(t){ const el = byId('ados-scan-status'); if (el) el.textContent = t || ''; }
     var bar = document.getElementById('ados-scan-progress');
   function setProgress(p){
    if (!bar) return;
     const bar = byId('ados-scan-progress'); if (!bar) return;
     if (p == null) { bar.hidden = true; bar.value = 0; }
     if (p == null){ bar.hidden = true; bar.value = 0; }
     else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
     else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
   }
   }
   function showPreview (file) {
   function showPreview(file){
     var url = URL.createObjectURL(file);
     const url = URL.createObjectURL(file);
     var prev = document.getElementById('ados-scan-preview');
     const prev = byId('ados-scan-preview');
     if (prev) {
     if (prev){ prev.innerHTML = '<img alt="Vorschau" src="'+url+'">'; prev.setAttribute('aria-hidden','false'); }
      prev.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" src="' + url + '">';
      prev.setAttribute('aria-hidden', 'false');
    }
   }
   }
  function esc (s) { return mw.html.escape(String(s || '')); }


   // ------------------------------------------------------------
   // ========================================================================
   // 2) Tesseract bei Bedarf laden
   // TESSERACT sauber als WORKER laden (deu+eng)
   // ------------------------------------------------------------
   // ========================================================================
   var tesseractReady;
   let _ocrWorkerPromise = null;
   function ensureTesseract () {
   function getOcrWorker(){
     if (tesseractReady) return tesseractReady;
     if (_ocrWorkerPromise) return _ocrWorkerPromise;
     tesseractReady = new Promise(function (resolve, reject) {
     _ocrWorkerPromise = (async () => {
       if (window.Tesseract) return resolve();
       if (!window.Tesseract){
      var s = document.createElement('script');
        await new Promise((res, rej) => {
      s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
          const s=document.createElement('script');
      s.async = true;
          s.src='https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
      s.onload = resolve;
          s.async=true; s.onload=res; s.onerror=() => {
      s.onerror = function () {
            const s2=document.createElement('script');
        var s2 = document.createElement('script');
            s2.src='https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
        s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
            s2.async=true; s2.onload=res; s2.onerror=rej; document.head.appendChild(s2);
        s2.async = true;
          };
        s2.onload = resolve;
          document.head.appendChild(s);
        s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
        });
        document.head.appendChild(s2);
      }
       };
      const { createWorker } = Tesseract;
       document.head.appendChild(s);
      const worker = await createWorker({
     });
        workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
     return tesseractReady;
        corePath:  'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core.wasm.js',
        langPath:  'https://tessdata.projectnaptha.com/5',
        logger: m => {
          if (m && m.status === 'recognizing text' && typeof m.progress === 'number'){
            setProgress(0.05 + m.progress * 0.9);
          }
        }
      });
      await worker.loadLanguage('deu+eng');
       await worker.initialize('deu+eng');
       await worker.setParameters({ tessedit_pageseg_mode: '11', user_defined_dpi: '300' });
      return worker;
     })();
     return _ocrWorkerPromise;
   }
   }


   // ------------------------------------------------------------
   // ========================================================================
   // 3) Bild-Vorverarbeitung
   // Bild-Vorverarbeitung: Skalierung + adaptives Thresholding
  //    - skalieren
   // ========================================================================
  //    - adaptives Thresholding (besser gegen Glanz/Folie)
   function scaleToCanvas(img, maxSide = 2000){
   //   - relative Crops zum Auslesen bestimmter Zonen
     const s = Math.min(1, maxSide / Math.max(img.width, img.height));
  // ------------------------------------------------------------
     const w = Math.round(img.width * s), h = Math.round(img.height * s);
   function fixCanvasOrientation(img, maxSide=2200) {
     const c = document.createElement('canvas'); c.width=w; c.height=h;
     const scale = Math.min(1, maxSide / Math.max(img.width, img.height));
     const ctx=c.getContext('2d'); ctx.imageSmoothingEnabled = true;
     const w = Math.round(img.width * scale);
     ctx.drawImage(img,0,0,w,h);
    const h = Math.round(img.height * scale);
     const c = document.createElement('canvas');
    c.width = w; c.height = h;
     const ctx = c.getContext('2d');
    ctx.imageSmoothingEnabled = true;
     ctx.drawImage(img, 0, 0, w, h);
     return c;
     return c;
   }
   }
  function cropRel(srcCanvas, x, y, w, h) {
    const sw = srcCanvas.width, sh = srcCanvas.height;
    const cx = Math.round(x * sw), cy = Math.round(y * sh);
    const cw = Math.round(w * sw), ch = Math.round(h * sh);
    const out = document.createElement('canvas');
    out.width = cw; out.height = ch;
    const octx = out.getContext('2d');
    octx.drawImage(srcCanvas, cx, cy, cw, ch, 0, 0, cw, ch);
    return out;
  }
  function adaptiveThreshold(srcCanvas) {
    const w = srcCanvas.width, h = srcCanvas.height;
    const out = document.createElement('canvas'); out.width = w; out.height = h;
    const sctx = srcCanvas.getContext('2d');
    const octx = out.getContext('2d');
    const id = sctx.getImageData(0,0,w,h);
    const d = id.data;


     const gray = new Uint8ClampedArray(w*h);
  function adaptiveThreshold(src){
     for (let i=0,j=0;i<d.length;i+=4,++j) {
    const w=src.width, h=src.height;
       gray[j] = (0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2])|0;
    const out=document.createElement('canvas'); out.width=w; out.height=h;
    const sctx=src.getContext('2d'), octx=out.getContext('2d');
    const id=sctx.getImageData(0,0,w,h), d=id.data;
     const gray=new Uint8ClampedArray(w*h);
     for(let i=0,j=0;i<d.length;i+=4,++j){
       gray[j]=(0.2126*d[i]+0.7152*d[i+1]+0.0722*d[i+2])|0;
     }
     }
     const S = new Uint32Array((w+1)*(h+1));
     const S=new Uint32Array((w+1)*(h+1));
     for (let y=1;y<=h;y++) {
     for(let y=1;y<=h;y++){
       let rowsum = 0;
       let row=0;
       for (let x=1;x<=w;x++) {
       for(let x=1;x<=w;x++){
         const v = gray[(y-1)*w + (x-1)];
         const v=gray[(y-1)*w + (x-1)];
         rowsum += v;
         row+=v; S[y*(w+1)+x]=S[(y-1)*(w+1)+x]+row;
        S[y*(w+1)+x] = S[(y-1)*(w+1)+x] + rowsum;
       }
       }
     }
     }
     const win = Math.max(15, Math.round(Math.min(w,h)/24));
     const win=Math.max(15, Math.round(Math.min(w,h)/24));
     const outD = octx.createImageData(w,h); const od = outD.data;
     const outId=octx.createImageData(w,h), od=outId.data;
     const C = 7;
     const C=7;
 
     for(let y=0;y<h;y++){
     for (let y=0;y<h;y++) {
       const y0=Math.max(0,y-win), y1=Math.min(h-1,y+win);
       const y0 = Math.max(0, y - win), y1 = Math.min(h-1, y + win);
       for(let x=0;x<w;x++){
       for (let x=0;x<w;x++) {
         const x0=Math.max(0,x-win), x1=Math.min(w-1,x+win);
         const x0 = Math.max(0, x - win), x1 = Math.min(w-1, x + win);
         const A=S[y0*(w+1)+x0], B=S[(y1+1)*(w+1)+x0], Cc=S[y0*(w+1)+(x1+1)], Dd=S[(y1+1)*(w+1)+(x1+1)];
         const A = S[y0*(w+1)+x0];
         const area=(x1-x0+1)*(y1-y0+1);
        const B = S[(y1+1)*(w+1)+x0];
         const mean=(Dd + A - B - Cc)/area;
        const Cc= S[y0*(w+1)+(x1+1)];
         const g=gray[y*w+x];
        const Dd= S[(y1+1)*(w+1)+(x1+1)];
         const area = (x1-x0+1)*(y1-y0+1);
         const mean = ((Dd + A - B - Cc) / area);
         const g = gray[y*w + x];
         const pix = g < (mean - C) ? 0 : 255;
         const pix = g < (mean - C) ? 0 : 255;
         const k = (y*w + x)*4;
         const k=(y*w+x)*4; od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255;
        od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255;
       }
       }
     }
     }
     octx.putImageData(outD,0,0);
     octx.putImageData(outId,0,0);
     return out;
     return out;
   }
   }
   async function preprocessImage(file) {
 
  // ========================================================================
  //  OCR: mehrere Strategien (PSM 11 → 6 → 4, binarisiert & farbig)
  // ========================================================================
   async function runOCR(file){
    setProgress(0.02);
     const img = await new Promise((res, rej) => {
     const img = await new Promise((res, rej) => {
       const o = new Image();
       const o=new Image(); o.onload=()=>res(o); o.onerror=rej;
      o.onload = () => res(o);
       o.src=URL.createObjectURL(file);
      o.onerror = rej;
       o.src = URL.createObjectURL(file);
     });
     });
     const base = fixCanvasOrientation(img, 2200);
 
     const base = scaleToCanvas(img, 2000);
     const bin  = adaptiveThreshold(base);
     const bin  = adaptiveThreshold(base);
    return { base, bin };
  }
  // ------------------------------------------------------------
  // 4) OCR (Mehrzonen, Whitelists)
  // ------------------------------------------------------------
  async function runOCR(file) {
    await ensureTesseract();
    setProgress(0);
    const { base, bin } = await preprocessImage(file);


     const zones = [
     const worker = await getOcrWorker();
       { name:'header',  crop:[0.00,0.00,1.00,0.28],  psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -&.,’\'' },
    const candidates = [
       { name:'body',   crop:[0.00,0.28,1.00,0.52],  psm:6, whitelist:null },
       { canvas: bin,  psm: '11' },
       { name:'footer', crop:[0.00,0.80,1.00,0.20],  psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 %°.,-’\'' },
       { canvas: base, psm: '11' },
      { canvas: bin,  psm: '6},
       { canvas: base, psm: '6' },
      { canvas: bin,  psm: '4'  },
      { canvas: base, psm: '4' }
     ];
     ];


     const texts = [];
     let best = '';
     let step = 0, total = zones.length*2;
     for (let i=0;i<candidates.length;i++){
 
       const c = candidates[i];
    for (const z of zones) {
       await worker.setParameters({ tessedit_pageseg_mode: c.psm });
       const cropBin  = cropRel(bin,  ...z.crop);
      const { data } = await worker.recognize(c.canvas);
       const cropBase = cropRel(base, ...z.crop);
      const txt = (data && data.text ? data.text : '').trim();
 
      if (txt.length > best.length) best = txt;
      async function pass(canvas) {
       if (best.length > 40) break;
        const opts = { tessedit_pageseg_mode: z.psm, preserve_interword_spaces: 1 };
       setProgress(0.96 + i * 0.008);
        if (z.whitelist) opts.tessedit_char_whitelist = z.whitelist;
        const out = await Tesseract.recognize(canvas, 'deu+eng', {
          logger: m => { if(m.status==='recognizing text') setProgress((step + m.progress)/total); }
        , ...opts });
        step += 1;
        return out.data?.text || '';
       }
 
      const t1 = await pass(cropBin);
       const t2 = await pass(cropBase);
      texts.push(t1, t2);
     }
     }
     setProgress(null);
     setProgress(null);
    const full = texts.join('\n');


     // Optionales Debug auf der Seite
     if (window.ADOS_SCAN_DEBUG){
    try {
      const box = byId('ados-scan-ocr');
      if (window.ADOS_SCAN_DEBUG) {
      if (box) box.textContent = best || '(leer)';
        const box = document.getElementById('ados-scan-ocr');
     }
        if (box) box.textContent = full;
     return best;
      }
     } catch (e) {}
 
     return full;
   }
   }


   // ------------------------------------------------------------
   // ========================================================================
   // 5) Hints extrahieren (mit Normalisierung & Fuzzy-Fixes)
   // Hinweise aus OCR destillieren
   // ------------------------------------------------------------
   // ========================================================================
   function extractHints (text) {
   function extractHints(text){
     const raw = String(text || '').replace(/\s+/g, ' ').trim();
     const raw = String(text||'').replace(/\s+/g,' ').trim();
 
    // Aggressive Normalisierung
    let norm = raw
      .replace(/[“”„‟]/g,'"')
      .replace(/[’‘´`]/g,"'")
      .replace(/[|]/g,'I')
      .replace(/[\u2010-\u2015]/g,'-')
      .replace(/\s+/g,' ')
      .trim();
 
    // Häufige Fixes
    const fixes = [
      [/T[\s]*A[\s]*S[\s]*T[\s]*E[\s]*F[\s]*U[\s]*L[\s]*8/i, 'The Tasteful 8'],
      [/HEROE?S?\s+OF\s+CHILDHOOD/i, 'Heroes of Childhood'],
      [/IR(E|I)LAND/i, 'Ireland'],
      [/O?LOROSO/i, 'Oloroso'],
      [/PX/i, 'PX'],
      [/1ST\s*FILL/i, '1st Fill'],
      [/\b([12][0-9])\s*(?:Y(?:EARS?)?|YO|JAHRE?)\b/ig, (m,p)=>`${p} Years`],
    ];
    for (const [re, rep] of fixes) norm = norm.replace(re, rep);


    // Tokens, die im Text vorkommen
     const names = [];
     const foundNames = [];
     KNOWN_TOKENS.forEach(t=>{
     KNOWN_TOKENS.forEach(t => {
       const re = new RegExp('\\b'+t.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+'\\b','i');
       const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
       if (re.test(raw)) names.push(t);
       if (re.test(norm)) foundNames.push(t);
     });
     });


    // Serien
    if (/The Tasteful 8/i.test(norm) && !foundNames.includes('The Tasteful 8')) foundNames.push('The Tasteful 8');
    if (/Heroes of Childhood/i.test(norm) && !foundNames.includes('Heroes of Childhood')) foundNames.push('Heroes of Childhood');
    if (/Ireland/i.test(norm) && !foundNames.includes('Ireland')) foundNames.push('Ireland');
    // Alter
    const ages = [];
    let m;
     const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
     const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
     while ((m = ageRe.exec(norm)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }
     const ages=[]; let m; while((m=ageRe.exec(raw))!==null){ if(!ages.includes(m[1])) ages.push(m[1]); }


    // Jahrgänge
    const years = [];
     const yearRe = /\b(19|20)\d{2}\b/g;
     const yearRe = /\b(19|20)\d{2}\b/g;
     while ((m = yearRe.exec(norm)) !== null) { if (!years.includes(m[0])) years.push(m[0]); }
     const years=[]; while((m=yearRe.exec(raw))!==null){ if(!years.includes(m[0])) years.push(m[0]); }


    // ein paar markante Wörter
     const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
     const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
     const uniq = new Set(); let w; const words = [];
     const uniq=new Set(); const words=[]; let w;
     while ((w = wordRe.exec(norm)) !== null) {
     while((w=wordRe.exec(raw))!==null){ const s=w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; } }
      const s = w[0];
      if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
    }


     return { names: foundNames, ages, years, words, raw: norm };
     return { names, ages, years, words, raw };
   }
   }


   // ------------------------------------------------------------
   // ========================================================================
   // 6) Suche im Wiki (3 Pässe)
   // Suche im Wiki (3 Pässe)
   // ------------------------------------------------------------
   // ========================================================================
   async function searchWikiSmart (hints, limit) {
   async function searchWikiSmart(hints, limit){
     await mw.loader.using('mediawiki.api');
     await mw.loader.using('mediawiki.api');
     const api = new mw.Api();
     const api = new mw.Api();
    const ns0 = 0;
     const MAX = limit || 12, ns0=0;
     const MAX = limit || 12;


     function incatStr () {
     function incats(){
       return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ');
       return ADOS_CATEGORIES.map(c => 'incategory:"'+c+'"').join(' ');
     }
     }


     const pass1 = [];
     const pass1=[];
     if (hints.names.length) {
     if (hints.names.length){
       hints.names.forEach(n => {
       hints.names.forEach(n=>{
         if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
         if (hints.ages.length) hints.ages.forEach(a=> pass1.push(`intitle:"${n}" intitle:${a} ${incats()}`));
         if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
         if (hints.years.length) hints.years.forEach(y=> pass1.push(`intitle:"${n}" "${y}" ${incats()}`));
         pass1.push(`intitle:"${n}" ${incatStr()}`);
         pass1.push(`intitle:"${n}" ${incats()}`);
       });
       });
     }
     }


     const key = []
     const key=[].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
                .map(x=>`"${x}"`).join(' ');
      .map(x => `"${x}"`).join(' ');
     const pass2 = key ? [ `${key} ${incats()}` ] : [];
     const pass2 = key ? [ `${key} ${incatStr()}` ] : [];


     const pass3 = [];
     const pass3=[];
     if (hints.names.length) pass3.push(hints.names[0]);
     if (hints.names.length) pass3.push(hints.names[0]);
     if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
     if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);


     const seen = new Set(); const out = [];
     const seen=new Set(), out=[];


     async function runSr (q) {
     async function runSr(q){
       const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
       const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
       (r.query?.search || []).forEach(it => {
       (r.query?.search || []).forEach(it=>{
         const k = it.title;
         const k=it.title; if (seen.has(k)) return; seen.add(k); out.push(it);
        if (seen.has(k)) return;
        seen.add(k);
        out.push(it);
       });
       });
     }
     }


     for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
     for (const q of pass1){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
     for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
     for (const q of pass2){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }


     for (const p of pass3) {
     for (const p of pass3){
       const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
       const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace:ns0, pslimit:MAX });
       (r.query?.prefixsearch || []).forEach(it => {
       (r.query?.prefixsearch || []).forEach(it=>{
         const title = it.title || it['*'];
         const title = it.title || it['*']; const k=title;
        const k = title;
         if (seen.has(k)) return; seen.add(k); out.push({ title, snippet:'' });
         if (seen.has(k)) return;
        seen.add(k);
        out.push({ title, snippet: '' });
       });
       });
       if (out.length >= MAX) break;
       if (out.length>=MAX) break;
     }
     }


     return out.slice(0, MAX);
     return out.slice(0,MAX);
   }
   }


   // ------------------------------------------------------------
   // ========================================================================
   // 7) Treffer rendern
   // Treffer rendern
   // ------------------------------------------------------------
   // ========================================================================
   function renderResults (items) {
  function esc(s){ return mw.html.escape(String(s||'')); }
     var box = document.getElementById('ados-scan-results');
   function renderResults(items){
    if (!box) return;
     const box = byId('ados-scan-results'); if (!box) return;
     box.innerHTML = '';
     box.innerHTML='';
     if (!items || !items.length) {
     if (!items || !items.length){
       box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
       box.innerHTML='<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
       return;
       return;
     }
     }
     items.slice(0, 12).forEach(function (it) {
     items.slice(0,12).forEach(it=>{
       var title = it.title || '';
       const title = it.title || '';
       var link = mw.util.getUrl(title.replace(/ /g, '_'));
       const link = mw.util.getUrl(title.replace(/ /g,'_'));
       var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
       const snip = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/&quot;/g,'"');
       var div = document.createElement('div');
       const div=document.createElement('div'); div.className='ados-hit';
      div.className = 'ados-hit';
       div.innerHTML = '<b><a href="'+link+'">'+esc(title)+'</a></b>' + (snip?'<div class="meta">'+snip+'</div>':'');
       div.innerHTML =
        '<b><a href="' + link + '">' + esc(title) + '</a></b>' +
        (snip ? '<div class="meta">' + snip + '</div>' : '');
       box.appendChild(div);
       box.appendChild(div);
     });
     });
   }
   }


   // ------------------------------------------------------------
   // ========================================================================
   // 8) Bindings (Buttons, Dropzone, Fallbacks)
   // BINDING
   // ------------------------------------------------------------
   // ========================================================================
   var BOUND = false;
   let BOUND=false;
   function bind () {
   function bind(){
     if (BOUND || !hasUI()) return;
     if (BOUND || !hasUI()) return;
     var runBtn = document.getElementById('ados-scan-run');
     const runBtn = byId('ados-scan-run');
     var fileIn = document.getElementById('ados-scan-file');
     const fileIn = byId('ados-scan-file');
     var bigBtn = document.getElementById('ados-scan-bigbtn');
     const bigBtn = byId('ados-scan-bigbtn');
     var drop  = document.getElementById('ados-scan-drop');
     const drop  = byId('ados-scan-drop');


     if (!runBtn || !fileIn) return;
     if (!runBtn || !fileIn) return;
     if (runBtn.dataset.bound === '1') return;
     if (runBtn.dataset.bound === '1') return;
     runBtn.dataset.bound = '1'; BOUND = true;
     runBtn.dataset.bound='1'; BOUND=true;


     if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
     if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
     fileIn.addEventListener('change', function () {
     fileIn.addEventListener('change', function(){ if (this.files && this.files[0]) showPreview(this.files[0]); });
      if (this.files && this.files[0]) showPreview(this.files[0]);
    });


     // Drag&Drop
     // Drag&Drop
     if (drop) {
     if (drop){
       ['dragenter','dragover'].forEach(ev =>
       ['dragenter','dragover'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.add('is-drag'); }));
        drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.add('is-over'); }));
       ['dragleave','drop'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.remove('is-drag'); }));
       ['dragleave','drop'].forEach(ev =>
       drop.addEventListener('drop', e=>{
        drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.remove('is-over'); }));
         const f = e.dataTransfer && e.dataTransfer.files && e.dataTransfer.files[0];
       drop.addEventListener('drop', e => {
         if (f){ fileIn.files = e.dataTransfer.files; showPreview(f); }
         const f = e.dataTransfer?.files?.[0];
         if (f) { fileIn.files = e.dataTransfer.files; showPreview(f); }
       });
       });
     }
     }


     runBtn.addEventListener('click', async function (ev) {
    // Klick „Erkennen & suchen“
     runBtn.addEventListener('click', async function(ev){
       ev.preventDefault();
       ev.preventDefault();
       if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       var f = fileIn.files[0];
       const f = fileIn.files[0];
       try {
       try{
         runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
         runBtn.disabled=true; runBtn.textContent='Erkenne …';
         setStatus('Erkenne Label …');
         setStatus('Erkenne Label …');
         var text = await runOCR(f);
         const text = await runOCR(f);
        if (window.ADOS_SCAN_DEBUG) {
          const dbg = document.getElementById('ados-scan-ocr');
          if (dbg) dbg.textContent = text;
        }
         setStatus('Suche im Wiki …');
         setStatus('Suche im Wiki …');
         var hints = extractHints(text);
         const hints = extractHints(text);
         var hits = await searchWikiSmart(hints, 12);
         const hits = await searchWikiSmart(hints, 12);
         renderResults(hits);
         renderResults(hits);
         setStatus('Fertig.');
         setStatus('Fertig.');
       } catch (e) {
       } catch(e){
         console.error('[LabelScan]', e);
         console.error('[LabelScan]', e);
         setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
         setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
       } finally {
       } finally {
         runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen';
         runBtn.disabled=false; runBtn.textContent='🔍 Erkennen & suchen';
       }
       }
     });
     });


     // Sicherheit gegen Overlays
     // Sicherheit gegen Overlays
     var wrap = document.getElementById('ados-labelscan');
     const wrap = byId('ados-labelscan'); if (wrap) wrap.style.position='relative';
    if (wrap) wrap.style.position = 'relative';
     runBtn.style.position='relative'; runBtn.style.zIndex='9999'; runBtn.style.pointerEvents='auto';
     runBtn.style.position = 'relative';
    runBtn.style.zIndex = '9999';
    runBtn.style.pointerEvents = 'auto';
   }
   }


   // initial & Fallback-Bindings
   // Erstbindung + Fallbacks + Observer
   if (document.readyState === 'loading') {
   if (document.readyState === 'loading'){ document.addEventListener('DOMContentLoaded', bind); } else { bind(); }
    document.addEventListener('DOMContentLoaded', bind);
   setTimeout(bind, 250); setTimeout(bind, 1000);
  } else {
   const mo = new MutationObserver(() => { if (!BOUND) bind(); });
    bind();
   mo.observe(document.documentElement || document.body, { childList:true, subtree:true });
  }
 
   setTimeout(bind, 250);
  setTimeout(bind, 1000);
   var mo = new MutationObserver(function () { if (!BOUND) bind(); });
   mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();
})();

Version vom 6. November 2025, 00:57 Uhr

/* global mw, Tesseract */
(function () {
  'use strict';

  // ========================================================================
  //  KONFIG
  // ========================================================================
  // In welchen Kategorien wird gesucht?
  const ADOS_CATEGORIES = [
    'Alle A Dream of Scotland Abfüllungen',
    'Alle A Dream of Ireland Abfüllungen',
    'Alle A Dream of... – Der Rest der Welt Abfüllungen',
    'Friendly Mr. Z Whiskytainment Abfüllungen',
    'Die Whisky Elfen Abfüllungen',
    'The Fine Art of Whisky Abfüllungen',
    'Alle Rumbastic Abfüllungen'
  ];

  // Häufige Distillery-/Serien-Tokens (zum „Einhaken“ in die Suche)
  const KNOWN_TOKENS = [
    'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
    'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
    'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
    'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland',
    'A Dream of Scotland','A Dream of Ireland','The Fine Art of Whisky',
    'The Tasteful 8','Friendly Mr. Z','Die Whisky Elfen','Rumbastic'
  ];

  // Debug: Roh-OCR unten anzeigen, wenn true
  window.ADOS_SCAN_DEBUG = window.ADOS_SCAN_DEBUG || false;

  // ========================================================================
  //  DOM-Helfer
  // ========================================================================
  function byId(id){ return document.getElementById(id); }
  function hasUI(){
    return !!byId('ados-scan-run') && !!byId('ados-scan-file');
  }
  function setStatus(t){ const el = byId('ados-scan-status'); if (el) el.textContent = t || ''; }
  function setProgress(p){
    const bar = byId('ados-scan-progress'); if (!bar) return;
    if (p == null){ bar.hidden = true; bar.value = 0; }
    else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
  }
  function showPreview(file){
    const url = URL.createObjectURL(file);
    const prev = byId('ados-scan-preview');
    if (prev){ prev.innerHTML = '<img alt="Vorschau" src="'+url+'">'; prev.setAttribute('aria-hidden','false'); }
  }

  // ========================================================================
  //  TESSERACT sauber als WORKER laden (deu+eng)
  // ========================================================================
  let _ocrWorkerPromise = null;
  function getOcrWorker(){
    if (_ocrWorkerPromise) return _ocrWorkerPromise;
    _ocrWorkerPromise = (async () => {
      if (!window.Tesseract){
        await new Promise((res, rej) => {
          const s=document.createElement('script');
          s.src='https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
          s.async=true; s.onload=res; s.onerror=() => {
            const s2=document.createElement('script');
            s2.src='https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
            s2.async=true; s2.onload=res; s2.onerror=rej; document.head.appendChild(s2);
          };
          document.head.appendChild(s);
        });
      }
      const { createWorker } = Tesseract;
      const worker = await createWorker({
        workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
        corePath:   'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core.wasm.js',
        langPath:   'https://tessdata.projectnaptha.com/5',
        logger: m => {
          if (m && m.status === 'recognizing text' && typeof m.progress === 'number'){
            setProgress(0.05 + m.progress * 0.9);
          }
        }
      });
      await worker.loadLanguage('deu+eng');
      await worker.initialize('deu+eng');
      await worker.setParameters({ tessedit_pageseg_mode: '11', user_defined_dpi: '300' });
      return worker;
    })();
    return _ocrWorkerPromise;
  }

  // ========================================================================
  //  Bild-Vorverarbeitung: Skalierung + adaptives Thresholding
  // ========================================================================
  function scaleToCanvas(img, maxSide = 2000){
    const s = Math.min(1, maxSide / Math.max(img.width, img.height));
    const w = Math.round(img.width * s), h = Math.round(img.height * s);
    const c = document.createElement('canvas'); c.width=w; c.height=h;
    const ctx=c.getContext('2d'); ctx.imageSmoothingEnabled = true;
    ctx.drawImage(img,0,0,w,h);
    return c;
  }

  function adaptiveThreshold(src){
    const w=src.width, h=src.height;
    const out=document.createElement('canvas'); out.width=w; out.height=h;
    const sctx=src.getContext('2d'), octx=out.getContext('2d');
    const id=sctx.getImageData(0,0,w,h), d=id.data;
    const gray=new Uint8ClampedArray(w*h);
    for(let i=0,j=0;i<d.length;i+=4,++j){
      gray[j]=(0.2126*d[i]+0.7152*d[i+1]+0.0722*d[i+2])|0;
    }
    const S=new Uint32Array((w+1)*(h+1));
    for(let y=1;y<=h;y++){
      let row=0;
      for(let x=1;x<=w;x++){
        const v=gray[(y-1)*w + (x-1)];
        row+=v; S[y*(w+1)+x]=S[(y-1)*(w+1)+x]+row;
      }
    }
    const win=Math.max(15, Math.round(Math.min(w,h)/24));
    const outId=octx.createImageData(w,h), od=outId.data;
    const C=7;
    for(let y=0;y<h;y++){
      const y0=Math.max(0,y-win), y1=Math.min(h-1,y+win);
      for(let x=0;x<w;x++){
        const x0=Math.max(0,x-win), x1=Math.min(w-1,x+win);
        const A=S[y0*(w+1)+x0], B=S[(y1+1)*(w+1)+x0], Cc=S[y0*(w+1)+(x1+1)], Dd=S[(y1+1)*(w+1)+(x1+1)];
        const area=(x1-x0+1)*(y1-y0+1);
        const mean=(Dd + A - B - Cc)/area;
        const g=gray[y*w+x];
        const pix = g < (mean - C) ? 0 : 255;
        const k=(y*w+x)*4; od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255;
      }
    }
    octx.putImageData(outId,0,0);
    return out;
  }

  // ========================================================================
  //  OCR: mehrere Strategien (PSM 11 → 6 → 4, binarisiert & farbig)
  // ========================================================================
  async function runOCR(file){
    setProgress(0.02);
    const img = await new Promise((res, rej) => {
      const o=new Image(); o.onload=()=>res(o); o.onerror=rej;
      o.src=URL.createObjectURL(file);
    });

    const base = scaleToCanvas(img, 2000);
    const bin  = adaptiveThreshold(base);

    const worker = await getOcrWorker();
    const candidates = [
      { canvas: bin,  psm: '11' },
      { canvas: base, psm: '11' },
      { canvas: bin,  psm: '6'  },
      { canvas: base, psm: '6'  },
      { canvas: bin,  psm: '4'  },
      { canvas: base, psm: '4'  }
    ];

    let best = '';
    for (let i=0;i<candidates.length;i++){
      const c = candidates[i];
      await worker.setParameters({ tessedit_pageseg_mode: c.psm });
      const { data } = await worker.recognize(c.canvas);
      const txt = (data && data.text ? data.text : '').trim();
      if (txt.length > best.length) best = txt;
      if (best.length > 40) break;
      setProgress(0.96 + i * 0.008);
    }
    setProgress(null);

    if (window.ADOS_SCAN_DEBUG){
      const box = byId('ados-scan-ocr');
      if (box) box.textContent = best || '(leer)';
    }
    return best;
  }

  // ========================================================================
  //  Hinweise aus OCR destillieren
  // ========================================================================
  function extractHints(text){
    const raw = String(text||'').replace(/\s+/g,' ').trim();

    const names = [];
    KNOWN_TOKENS.forEach(t=>{
      const re = new RegExp('\\b'+t.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+'\\b','i');
      if (re.test(raw)) names.push(t);
    });

    const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
    const ages=[]; let m; while((m=ageRe.exec(raw))!==null){ if(!ages.includes(m[1])) ages.push(m[1]); }

    const yearRe = /\b(19|20)\d{2}\b/g;
    const years=[]; while((m=yearRe.exec(raw))!==null){ if(!years.includes(m[0])) years.push(m[0]); }

    const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
    const uniq=new Set(); const words=[]; let w;
    while((w=wordRe.exec(raw))!==null){ const s=w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; } }

    return { names, ages, years, words, raw };
  }

  // ========================================================================
  //  Suche im Wiki (3 Pässe)
  // ========================================================================
  async function searchWikiSmart(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12, ns0=0;

    function incats(){
      return ADOS_CATEGORIES.map(c => 'incategory:"'+c+'"').join(' ');
    }

    const pass1=[];
    if (hints.names.length){
      hints.names.forEach(n=>{
        if (hints.ages.length) hints.ages.forEach(a=> pass1.push(`intitle:"${n}" intitle:${a} ${incats()}`));
        if (hints.years.length) hints.years.forEach(y=> pass1.push(`intitle:"${n}" "${y}" ${incats()}`));
        pass1.push(`intitle:"${n}" ${incats()}`);
      });
    }

    const key=[].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
                 .map(x=>`"${x}"`).join(' ');
    const pass2 = key ? [ `${key} ${incats()}` ] : [];

    const pass3=[];
    if (hints.names.length) pass3.push(hints.names[0]);
    if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);

    const seen=new Set(), out=[];

    async function runSr(q){
      const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
      (r.query?.search || []).forEach(it=>{
        const k=it.title; if (seen.has(k)) return; seen.add(k); out.push(it);
      });
    }

    for (const q of pass1){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
    for (const q of pass2){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }

    for (const p of pass3){
      const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace:ns0, pslimit:MAX });
      (r.query?.prefixsearch || []).forEach(it=>{
        const title = it.title || it['*']; const k=title;
        if (seen.has(k)) return; seen.add(k); out.push({ title, snippet:'' });
      });
      if (out.length>=MAX) break;
    }

    return out.slice(0,MAX);
  }

  // ========================================================================
  //  Treffer rendern
  // ========================================================================
  function esc(s){ return mw.html.escape(String(s||'')); }
  function renderResults(items){
    const box = byId('ados-scan-results'); if (!box) return;
    box.innerHTML='';
    if (!items || !items.length){
      box.innerHTML='<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
      return;
    }
    items.slice(0,12).forEach(it=>{
      const title = it.title || '';
      const link  = mw.util.getUrl(title.replace(/ /g,'_'));
      const snip  = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/&quot;/g,'"');
      const div=document.createElement('div'); div.className='ados-hit';
      div.innerHTML = '<b><a href="'+link+'">'+esc(title)+'</a></b>' + (snip?'<div class="meta">'+snip+'</div>':'');
      box.appendChild(div);
    });
  }

  // ========================================================================
  //  BINDING
  // ========================================================================
  let BOUND=false;
  function bind(){
    if (BOUND || !hasUI()) return;
    const runBtn = byId('ados-scan-run');
    const fileIn = byId('ados-scan-file');
    const bigBtn = byId('ados-scan-bigbtn');
    const drop   = byId('ados-scan-drop');

    if (!runBtn || !fileIn) return;
    if (runBtn.dataset.bound === '1') return;
    runBtn.dataset.bound='1'; BOUND=true;

    if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
    fileIn.addEventListener('change', function(){ if (this.files && this.files[0]) showPreview(this.files[0]); });

    // Drag&Drop
    if (drop){
      ['dragenter','dragover'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.add('is-drag'); }));
      ['dragleave','drop'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.remove('is-drag'); }));
      drop.addEventListener('drop', e=>{
        const f = e.dataTransfer && e.dataTransfer.files && e.dataTransfer.files[0];
        if (f){ fileIn.files = e.dataTransfer.files; showPreview(f); }
      });
    }

    // Klick „Erkennen & suchen“
    runBtn.addEventListener('click', async function(ev){
      ev.preventDefault();
      if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
      const f = fileIn.files[0];
      try{
        runBtn.disabled=true; runBtn.textContent='Erkenne …';
        setStatus('Erkenne Label …');
        const text  = await runOCR(f);
        setStatus('Suche im Wiki …');
        const hints = extractHints(text);
        const hits  = await searchWikiSmart(hints, 12);
        renderResults(hits);
        setStatus('Fertig.');
      } catch(e){
        console.error('[LabelScan]', e);
        setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
      } finally {
        runBtn.disabled=false; runBtn.textContent='🔍 Erkennen & suchen';
      }
    });

    // Sicherheit gegen Overlays
    const wrap = byId('ados-labelscan'); if (wrap) wrap.style.position='relative';
    runBtn.style.position='relative'; runBtn.style.zIndex='9999'; runBtn.style.pointerEvents='auto';
  }

  // Erstbindung + Fallbacks + Observer
  if (document.readyState === 'loading'){ document.addEventListener('DOMContentLoaded', bind); } else { bind(); }
  setTimeout(bind, 250); setTimeout(bind, 1000);
  const mo = new MutationObserver(() => { if (!BOUND) bind(); });
  mo.observe(document.documentElement || document.body, { childList:true, subtree:true });

})();