davidefiocco · November 5, 2020 17:59
diff --git a/copyedits.jsonl b/copyedits.jsonl
 {"removed":"These results suggested that the distribution of bacterial communities was driven more by sample types than the separate caves from which samples were collected.","added":"These results suggest that the distribution of bacterial communities is driven more by sample types than the separate caves from which samples were collected.","meta":{"score":1}}
diff --git a/diff_text.py b/diff_text.py
 import prodigy
 from prodigy.components.loaders import JSONL

 # run with 
 # python -m prodigy diff-textcat copyedits copyedits.jsonl  -F .\diff_text.py

 def add_label_to_stream(stream, label):
    for eg in stream:
        # The 'label' you get from the command line is a list
        # so let's just assume it's always one and take the first
        eg["label"] = label[0]
        yield eg

 @prodigy.recipe(
    "diff-textcat",
    dataset=("The dataset to use", "positional", None, str),
    source=("The source data as a JSONL file", "positional", None, str),
    )
 def copyedit(dataset, source):
    stream = JSONL(source)
    blocks = [
        {"view_id": "diff"},  
        {"view_id": "classification"},
        ]
    stream = stream
    return {
        "stream": stream,
        "dataset": dataset,
        "view_id": "blocks",
        "config": {
            "blocks": blocks,
        }
    }
	import prodigy
	from prodigy.components.loaders import JSONL

	# run with
	# python -m prodigy diff-textcat copyedits copyedits.jsonl -F .\diff_text.py

	def add_label_to_stream(stream, label):
	for eg in stream:
	# The 'label' you get from the command line is a list
	# so let's just assume it's always one and take the first
	eg["label"] = label[0]
	yield eg

	@prodigy.recipe(
	"diff-textcat",
	dataset=("The dataset to use", "positional", None, str),
	source=("The source data as a JSONL file", "positional", None, str),
	)
	def copyedit(dataset, source):
	stream = JSONL(source)
	blocks = [
	{"view_id": "diff"},
	{"view_id": "classification"},
	]
	stream = stream
	return {
	"stream": stream,
	"dataset": dataset,
	"view_id": "blocks",
	"config": {
	"blocks": blocks,
	}
	}