Zum Inhalt springen

MediaWiki:Gadget-LabelScan.js

Aus ADOS Wiki

Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.

  • Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
  • Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
  • Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw */
(() => {
  'use strict';

  const CFG = {
    indexTitle: (window.LabelScanConfig && window.LabelScanConfig.indexTitle) ||
                'MediaWiki:Gadget-LabelScan-index.json',
    topK: 8,
    transformersURL: 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0',
    modelId: 'Xenova/clip-vit-base-patch32',
    maxSide: 1024,
    debug: true,

    // ⇩ Für Debug: Modell auch dann laden, wenn Index keine Embeddings hat
    forceModelWarmup: true
  };

  function log(...a){ if(CFG.debug) console.log('[LabelScan]',...a); }
  function warn(...a){ if(CFG.debug) console.warn('[LabelScan]',...a); }
  function err(...a){ console.error('[LabelScan]',...a); }

  function qs(id){ return document.getElementById(id); }
  function setStatus(t){ const el=qs('ados-scan-status'); if(el) el.textContent=t||''; }
  function setProgress(p){
    const bar=qs('ados-scan-progress'); if(!bar) return;
    if(p==null){ bar.hidden=true; bar.value=0; }
    else{ bar.hidden=false; bar.value=Math.max(0,Math.min(1,p)); }
  }
  function showPreview(file){
    const url=URL.createObjectURL(file);
    const prev=qs('ados-scan-preview');
    if(prev){
      prev.innerHTML='<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px;" src="'+url+'">';
      prev.setAttribute('aria-hidden','false');
    }
  }

  let INDEX=[];
  let INDEX_EMB=[];

  async function loadIndex({ ui=true } = {}){
    if(INDEX.length) return INDEX;
    if(ui){ setStatus('Index laden …'); setProgress(0.03); }

    const rawURL = mw.util.getUrl(CFG.indexTitle,{action:'raw',ctype:'application/json'});
    const res = await fetch(rawURL,{cache:'reload'});
    if(!res.ok) throw new Error('Index nicht ladbar: '+res.status);
    const json = await res.json();
    if(!Array.isArray(json)) throw new Error('Index ist keine Array-JSON');
    INDEX = json;

    INDEX_EMB = INDEX.map((it,i)=>{
      if(typeof it.embed==='string' && it.embed.length){
        try{ return base64ToFloat32(it.embed); }
        catch(e){ warn('Embed-Decode',i,it.title,e); return null; }
      }
      return null;
    });

    const withEmb = INDEX_EMB.filter(v=>v && v.length).length;
    log('Index geladen:', INDEX.length, 'Einträge');
    log('Embeddings vorhanden:', withEmb, '/', INDEX.length);

    // Exponiere Debug-Infos ins Fenster
    window._LabelScan = window._LabelScan || {};
    window._LabelScan.indexInfo = { total: INDEX.length, withEmbeddings: withEmb };

    if(ui) setProgress(0.06);
    return INDEX;
  }

  function base64ToFloat32(b64){
    const bin=atob(b64), len=bin.length;
    const buf=new ArrayBuffer(len);
    const view=new Uint8Array(buf);
    for(let i=0;i<len;i++) view[i]=bin.charCodeAt(i);
    return new Float32Array(buf);
  }

  // ------------------------- CLIP / Transformers -------------------------
  let _clipModulePromise=null;

  async function ensureClipExtractor(){
    if(_clipModulePromise) return _clipModulePromise;

    setStatus('Modell laden …');
    setProgress(0.08);

    _clipModulePromise = (async()=>{
      try{
        const mod = await import(/* webpackIgnore: true */ CFG.transformersURL);

        mod.env.localModelPath=null;
        mod.env.remoteModels=true;
        mod.env.allowRemoteModels=true;
        mod.env.useBrowserCache=true;

        const pipe = await mod.pipeline(
          'feature-extraction',
          CFG.modelId,
          { quantized:true }
        );

        log('CLIP ready:', pipe.model?.constructor?.name || 'unknown');
        return { mod, pipe };
      } catch(e){
        err('CLIP load failed:', e);
        throw e;
      }
    })();

    return _clipModulePromise;
  }

  async function embedFileImage(file){
    function loadImage(f){
      return new Promise((res,rej)=>{
        const url=URL.createObjectURL(f);
        const img=new Image();
        img.crossOrigin='anonymous';
        img.onload=()=>{URL.revokeObjectURL(url);res(img);};
        img.onerror=e=>{URL.revokeObjectURL(url);rej(e);};
        img.src=url;
      });
    }
    function scale(img,max){
      const c=document.createElement('canvas');
      let{width:w,height:h}=img;
      const s=Math.min(1,max/Math.max(w,h));
      w=Math.round(w*s); h=Math.round(h*s);
      c.width=w; c.height=h;
      const g=c.getContext('2d');
      g.imageSmoothingEnabled=true;
      g.drawImage(img,0,0,w,h);
      return c;
    }

    const { pipe } = await ensureClipExtractor();
    setStatus('Bild vorbereiten …'); setProgress(0.20);

    const img = await loadImage(file);
    const canvas = scale(img, CFG.maxSide);

    setStatus('Bild analysieren …'); setProgress(0.38);

    const out = await pipe(canvas);
    const raw = out && out.data;

    let vec;
    if (raw instanceof Float32Array) {
      vec = raw;
    } else if (Array.isArray(raw)) {
      // 1D/2D -> Float32
      vec = Array.isArray(raw[0]) ? meanPool2D(raw) : new Float32Array(raw);
    } else {
      throw new Error('Embedding-Format unerwartet');
    }
    return normalize(vec);
  }

  function meanPool2D(arr2d){
    const rows=arr2d.length;
    const dim=rows?arr2d[0].length:0;
    const sum=new Float32Array(dim);
    for(let r=0;r<rows;r++){
      const row=arr2d[r];
      for(let i=0;i<dim;i++) sum[i]+=row[i]||0;
    }
    for(let i=0;i<dim;i++) sum[i]/=(rows||1);
    return sum;
  }

  function normalize(v){
    let n=0; for(let i=0;i<v.length;i++) n+=v[i]*i? v[i]:v[i]*v[i]; // avoid JIT weirdness
    n=0; for(let i=0;i<v.length;i++) n+=v[i]*v[i];
    n=Math.sqrt(n)||1;
    const o=new Float32Array(v.length);
    for(let i=0;i<v.length;i++) o[i]=v[i]/n;
    return o;
  }
  function cosine(a,b){ let s=0,L=Math.min(a.length,b.length); for(let i=0;i<L;i++) s+=a[i]*b[i]; return s; }

  // ------------------------- Ranking / Render -------------------------
  function rankByCosine(q){
    const s=[];
    for(let i=0;i<INDEX.length;i++){
      const v=INDEX_EMB[i];
      if(!v) continue;
      s.push({i,score:cosine(q,v)});
    }
    s.sort((a,b)=>b.score-a.score);
    return s.slice(0,CFG.topK);
  }

  function renderResults(r){
    const box=qs('ados-scan-results');
    if(!box) return;
    box.innerHTML='';
    if(!r.length){ box.innerHTML='<div class="empty">Keine klaren Treffer.</div>'; return; }
    r.forEach(({i,score})=>{
      const it=INDEX[i];
      const link = mw.util.getUrl((it.title||'').replace(/ /g,'_'));
      const thumb=it.thumb||'';
      box.innerHTML+=
        `<div class="ados-hit" style="display:grid;grid-template-columns:60px 1fr auto;gap:10px;align-items:center;padding:.35rem 0;">
          ${thumb?`<img src="${thumb}" style="width:60px;border-radius:6px;">`:`<div></div>`}
          <div><b><a href="${link}">${mw.html.escape(it.title||'')}</a></b></div>
          <div style="color:#666;font-variant-numeric:tabular-nums">${score.toFixed(3)}</div>
        </div>`;
    });
  }

  // ------------------------- UI Bindings -------------------------
  let BOUND=false;
  function bindUI(){
    if(BOUND) return;
    const btnRun=qs('ados-scan-run');
    const inCam=qs('ados-scan-file-c