edsu · October 1, 2025 21:49
diff --git a/README.md b/README.md
diff --git a/behavior.js b/behavior.js
 class TrmBehavior {
  static id = "TrmBehavior";
  
  static isMatch() {
    return document.location.href == 'https://www.trm.dk/nyheder';
  }

  static init() {
    return {};
  }

  static runInIframes = false;

  async* run(ctx) {
    const {sleep, getState, addLink} = ctx.Lib;

    // keep clicking the button for more news stories from the archive
    while (true) {
      window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
      const button = document.querySelector("button#pagenation");
      if (button.checkVisibility()) {
        yield getState(ctx, "clicking pagination button");
        button.click();
      } else {
        yield getState(ctx, "finished pagination");
        break;
      }
      await sleep(1000);
    }

    // add each news story URL to the queue
    for (const a of document.querySelectorAll(".col-md-8 a")) {
      yield getState(ctx, `adding ${a.href}`, "links");
      ctx.Lib.addLink(a.href)
    }
  }
 }
diff --git a/run.sh b/run.sh
 #!/bin/bash

 docker pull webrecorder/browsertrix-crawler:latest

 docker run -p 9037:9037 --rm -v $PWD:/crawls/ webrecorder/browsertrix-crawler:latest crawl \
  --url https://www.trm.dk/nyheder \
  --scopeType any \
  --generateWACZ true \
  --screencastPort 9037 \
  --behaviors siteSpecific \
  --behaviorTimeout 3600 \
  --customBehaviors /crawls/behavior.js
	class TrmBehavior {
	static id = "TrmBehavior";

	static isMatch() {
	return document.location.href == 'https://www.trm.dk/nyheder';
	}

	static init() {
	return {};
	}

	static runInIframes = false;

	async* run(ctx) {
	const {sleep, getState, addLink} = ctx.Lib;

	// keep clicking the button for more news stories from the archive
	while (true) {
	window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
	const button = document.querySelector("button#pagenation");
	if (button.checkVisibility()) {
	yield getState(ctx, "clicking pagination button");
	button.click();
	} else {
	yield getState(ctx, "finished pagination");
	break;
	}
	await sleep(1000);
	}

	// add each news story URL to the queue
	for (const a of document.querySelectorAll(".col-md-8 a")) {
	yield getState(ctx, `adding ${a.href}`, "links");
	ctx.Lib.addLink(a.href)
	}
	}
	}
	#!/bin/bash

	docker pull webrecorder/browsertrix-crawler:latest

	docker run -p 9037:9037 --rm -v $PWD:/crawls/ webrecorder/browsertrix-crawler:latest crawl \
	--url https://www.trm.dk/nyheder \
	--scopeType any \
	--generateWACZ true \
	--screencastPort 9037 \
	--behaviors siteSpecific \
	--behaviorTimeout 3600 \
	--customBehaviors /crawls/behavior.js