Zum Inhalt springen

MediaWiki:Gadget-LabelScanIndexer.js: Unterschied zwischen den Versionen

Aus ADOS Wiki
Keine Bearbeitungszusammenfassung
Keine Bearbeitungszusammenfassung
Zeile 1: Zeile 1:
/* Gadget: LabelScanIndexer (Auto-Save, ES5)
/* Gadget: LabelScanIndexer (Auto-Save, ES5, ohne import())
  * Erzeugt Embeddings lokal (CLIP) und speichert in MediaWiki:Gadget-LabelScan-index.json
  * Erzeugt Embeddings lokal (CLIP) und speichert in MediaWiki:Gadget-LabelScan-index.json
  * Läuft nur auf der Seite "Hilfe:LabelScan-Indexer"
  * Läuft nur auf "Hilfe:LabelScan-Indexer"
  */
  */


Zeile 12: Zeile 12:
   var TITLE = mw.config.get('wgTitle');        // Titel ohne Namespace
   var TITLE = mw.config.get('wgTitle');        // Titel ohne Namespace
   var ON_PAGE = (NS === 12 && TITLE === 'LabelScan-Indexer');
   var ON_PAGE = (NS === 12 && TITLE === 'LabelScan-Indexer');
   if (!ON_PAGE) {
   if (!ON_PAGE) { return; }
    return;
  }


   var INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';
   var INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';


   // ---------- Modell / Pfade ----------
   // ---------- Pfade / Modell ----------
   var TRANSFORMERS_URL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
  // WICHTIG: UMD-Bundle laden (global: window.transformers)
  var TRANSFORMERS_UMD = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/transformers.min.js';
   var WASM_DIR          = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';
 
   var MODEL_ID = 'Xenova/clip-vit-base-patch32';
   var MODEL_ID = 'Xenova/clip-vit-base-patch32';
   var LOCAL_MODEL_PATH = '/models';
   var LOCAL_MODEL_PATH = '/models';
Zeile 77: Zeile 78:
   }
   }


   // ---------- Transformers laden (einmalig) ----------
   function canvasToBlobPromise(canvas) {
  var _modelPromise = null;
    if (canvas.convertToBlob) {
  function ensureModel() {
      return canvas.convertToBlob({ type: 'image/jpeg', quality: 0.95 });
     if (_modelPromise) return _modelPromise;
    }
    return new Promise(function (resolve) {
      canvas.toBlob(function (b) { resolve(b); }, 'image/jpeg', 0.95);
     });
  }


     _modelPromise = import(TRANSFORMERS_URL).then(function (mod) {
  // ---------- Transformers laden (ohne import) ----------
       // Nur lokale Modelle zulassen
  var _libPromise = null;
       mod.env.allowLocalModels = true;
  function ensureLib() {
       mod.env.allowRemoteModels = false;
    if (_libPromise) return _libPromise;
       mod.env.localModelPath = LOCAL_MODEL_PATH;
     _libPromise = mw.loader.getScript(TRANSFORMERS_UMD).then(function () {
      if (!window.transformers) throw new Error('Transformers-UMD nicht verfügbar.');
       // Env konfigurieren
       var env = window.transformers.env;
      env.allowLocalModels = true;
       env.allowRemoteModels = false;
       env.localModelPath   = LOCAL_MODEL_PATH;


       // WASM-Runtime-Pfad
       // WASM-Runtime-Pfad
       mod.env.backends = mod.env.backends || {};
       env.backends = env.backends || {};
       mod.env.backends.onnx = mod.env.backends.onnx || {};
       env.backends.onnx = env.backends.onnx || {};
       mod.env.backends.onnx.wasm = mod.env.backends.onnx.wasm || {};
       env.backends.onnx.wasm = env.backends.onnx.wasm || {};
       mod.env.backends.onnx.wasm.wasmPaths =
       env.backends.onnx.wasm.wasmPaths = WASM_DIR;
        'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';


      return window.transformers;
    });
    return _libPromise;
  }
  var _modelPromise = null;
  function ensureModel() {
    if (_modelPromise) return _modelPromise;
    _modelPromise = ensureLib().then(function (tf) {
       return Promise.all([
       return Promise.all([
         mod.AutoProcessor.from_pretrained(MODEL_ID),
         tf.AutoProcessor.from_pretrained(MODEL_ID),
         mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
         tf.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
       ]).then(function (arr) {
       ]).then(function (arr) {
         var pack = { mod: mod, processor: arr[0], model: arr[1] };
         var pack = { mod: tf, processor: arr[0], model: arr[1] };
        // Debug
         try {
         try {
           var backend = pack.model && pack.model.session && pack.model.session.executionProvider || 'unknown';
           var backend = (pack.model && pack.model.session && pack.model.session.executionProvider) || 'unknown';
           console.log('[LabelScanIndexer] Modell geladen | Backend:', backend);
           console.log('[LabelScanIndexer] Modell geladen | Backend:', backend);
         } catch (e) { console.log('[LabelScanIndexer] Modell geladen'); }
         } catch (e) { console.log('[LabelScanIndexer] Modell geladen'); }
Zeile 108: Zeile 126:
       });
       });
     });
     });
     return _modelPromise;
     return _modelPromise;
  }
  function canvasToBlobPromise(canvas) {
    if (canvas.convertToBlob) {
      return canvas.convertToBlob({ type: 'image/jpeg', quality: 0.95 });
    }
    return new Promise(function (resolve) {
      canvas.toBlob(function (b) { resolve(b); }, 'image/jpeg', 0.95);
    });
   }
   }



Version vom 9. November 2025, 16:39 Uhr

/* Gadget: LabelScanIndexer (Auto-Save, ES5, ohne import())
 * Erzeugt Embeddings lokal (CLIP) und speichert in MediaWiki:Gadget-LabelScan-index.json
 * Läuft nur auf "Hilfe:LabelScan-Indexer"
 */

/* global mw */
(function () {
  'use strict';

  // ---------- Seitenerkennung ----------
  var NS = mw.config.get('wgNamespaceNumber'); // 12 = Hilfe/Help
  var TITLE = mw.config.get('wgTitle');        // Titel ohne Namespace
  var ON_PAGE = (NS === 12 && TITLE === 'LabelScan-Indexer');
  if (!ON_PAGE) { return; }

  var INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';

  // ---------- Pfade / Modell ----------
  // WICHTIG: UMD-Bundle laden (global: window.transformers)
  var TRANSFORMERS_UMD = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/transformers.min.js';
  var WASM_DIR          = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';

  var MODEL_ID = 'Xenova/clip-vit-base-patch32';
  var LOCAL_MODEL_PATH = '/models';

  // ---------- UI helpers ----------
  function $(id) { return document.getElementById(id); }
  function status(t) { var el = $('idx-status'); if (el) el.textContent = t || ''; }

  function hasInterfaceRight() {
    var groups = mw.config.get('wgUserGroups') || [];
    for (var i = 0; i < groups.length; i++) {
      if (groups[i] === 'interface-admin' || groups[i] === 'sysop') return true;
    }
    return false;
  }

  function float32ToBase64(vec) {
    var bytes = new Uint8Array(vec.buffer);
    var bin = '';
    var chunk = 0x8000;
    for (var i = 0; i < bytes.length; i += chunk) {
      bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk));
    }
    return btoa(bin);
  }

  // EXIF-korrekte Canvas-Erzeugung
  function fileToCanvasExif(file) {
    return new Promise(function (resolve, reject) {
      if ('createImageBitmap' in window) {
        createImageBitmap(file, { imageOrientation: 'from-image' }).then(function (bmp) {
          if ('OffscreenCanvas' in window) {
            var c1 = new OffscreenCanvas(bmp.width, bmp.height);
            c1.getContext('2d').drawImage(bmp, 0, 0);
            resolve(c1);
          } else {
            var c2 = document.createElement('canvas');
            c2.width = bmp.width; c2.height = bmp.height;
            c2.getContext('2d').drawImage(bmp, 0, 0);
            resolve(c2);
          }
        })["catch"](reject);
      } else {
        var url = URL.createObjectURL(file);
        var im = new Image();
        im.onload = function () {
          var c3 = document.createElement('canvas');
          c3.width = im.width; c3.height = im.height;
          c3.getContext('2d').drawImage(im, 0, 0);
          URL.revokeObjectURL(url);
          resolve(c3);
        };
        im.onerror = function (e) { URL.revokeObjectURL(url); reject(e); };
        im.src = url;
      }
    });
  }

  function canvasToBlobPromise(canvas) {
    if (canvas.convertToBlob) {
      return canvas.convertToBlob({ type: 'image/jpeg', quality: 0.95 });
    }
    return new Promise(function (resolve) {
      canvas.toBlob(function (b) { resolve(b); }, 'image/jpeg', 0.95);
    });
  }

  // ---------- Transformers laden (ohne import) ----------
  var _libPromise = null;
  function ensureLib() {
    if (_libPromise) return _libPromise;
    _libPromise = mw.loader.getScript(TRANSFORMERS_UMD).then(function () {
      if (!window.transformers) throw new Error('Transformers-UMD nicht verfügbar.');
      // Env konfigurieren
      var env = window.transformers.env;
      env.allowLocalModels = true;
      env.allowRemoteModels = false;
      env.localModelPath   = LOCAL_MODEL_PATH;

      // WASM-Runtime-Pfad
      env.backends = env.backends || {};
      env.backends.onnx = env.backends.onnx || {};
      env.backends.onnx.wasm = env.backends.onnx.wasm || {};
      env.backends.onnx.wasm.wasmPaths = WASM_DIR;

      return window.transformers;
    });
    return _libPromise;
  }

  var _modelPromise = null;
  function ensureModel() {
    if (_modelPromise) return _modelPromise;
    _modelPromise = ensureLib().then(function (tf) {
      return Promise.all([
        tf.AutoProcessor.from_pretrained(MODEL_ID),
        tf.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
      ]).then(function (arr) {
        var pack = { mod: tf, processor: arr[0], model: arr[1] };
        try {
          var backend = (pack.model && pack.model.session && pack.model.session.executionProvider) || 'unknown';
          console.log('[LabelScanIndexer] Modell geladen | Backend:', backend);
        } catch (e) { console.log('[LabelScanIndexer] Modell geladen'); }
        return pack;
      });
    });
    return _modelPromise;
  }

  function buildEmbeddingFromFile(file) {
    return ensureModel().then(function (pack) {
      return fileToCanvasExif(file).then(function (canvas) {
        return canvasToBlobPromise(canvas).then(function (blob) {
          return pack.mod.RawImage.fromBlob(blob).then(function (raw) {
            return pack.processor(raw, { return_tensors: 'pt' }).then(function (inputs) {
              return pack.model.forward({ pixel_values: inputs.pixel_values }).then(function (out) {
                var vec = (out && out.image_embeds && out.image_embeds.data) || (out && out.image_embeds);
                if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');

                // Normieren
                var i, n = 0;
                for (i = 0; i < vec.length; i++) n += vec[i] * vec[i];
                var norm = Math.sqrt(n) || 1;
                var v = new Float32Array(vec.length);
                for (i = 0; i < vec.length; i++) v[i] = vec[i] / norm;
                return v;
              });
            });
          });
        });
      });
    });
  }

  // ---------- Index laden/speichern ----------
  function fetchIndexJSON() {
    var url = mw.util.getUrl(INDEX_TITLE, { action: 'raw', ctype: 'application/json' });
    return fetch(url, { cache: 'no-store' }).then(function (res) {
      if (!res.ok) throw new Error('Index nicht ladbar: ' + res.status);
      return res.text();
    }).then(function (txt) {
      try { return JSON.parse(txt || '[]') || []; }
      catch (e) { return []; }
    });
  }

  function saveIndexJSON(newArray, summary) {
    return mw.loader.using(['mediawiki.api']).then(function () {
      var api = new mw.Api();
      var text = JSON.stringify(newArray, null, 2) + '\n';
      return api.postWithToken('csrf', {
        action: 'edit',
        title: INDEX_TITLE,
        text: text,
        summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
        nocreate: 0,
        bot: 1
      });
    });
  }

  // ---------- Click-Handler ----------
  var runBtn = document.getElementById('idx-run');
  if (!runBtn) {
    console.warn('[LabelScanIndexer] Button #idx-run nicht gefunden – ist das HTML auf der Seite eingebunden?');
  } else {
    runBtn.addEventListener('click', function () {
      if (!hasInterfaceRight()) {
        alert('⚠️ Du brauchst Admin/Interface-Rechte (editinterface).');
        return;
      }

      var titleEl = $('idx-title');
      var thumbEl = $('idx-thumb');
      var fileEl  = $('idx-file');

      var title = titleEl ? String(titleEl.value || '').trim() : '';
      var thumb = thumbEl ? String(thumbEl.value || '').trim() : '';
      var file  = (fileEl && fileEl.files && fileEl.files[0]) ? fileEl.files[0] : null;

      if (!title) { alert('Titel fehlt.'); return; }
      if (!file)  { alert('Bitte eine Bilddatei wählen.'); return; }

      runBtn.disabled = true;
      status('Embedding berechnen …');

      buildEmbeddingFromFile(file).then(function (vec) {
        var b64 = float32ToBase64(vec);
        var outBox = $('idx-out');
        if (outBox) outBox.value = JSON.stringify({ title: title, thumb: thumb, embed: b64 }, null, 2);

        status('Index laden …');
        return fetchIndexJSON().then(function (arr) {
          arr.push({ title: title, thumb: thumb, embed: b64 });
          status('Speichern …');
          return saveIndexJSON(arr, 'LabelScan: +1 embedding für "' + title + '"');
        });
      }).then(function () {
        status('Gespeichert ✅');
      })["catch"](function (e) {
        console.error(e);
        status('Fehler ❌ ' + (e && e.message ? e.message : e));
        alert('Fehler: ' + (e && e.message ? e.message : e));
      }).then(function () {
        runBtn.disabled = false;
      });
    });
  }

  console.log('[LabelScanIndexer] bereit');
})();