afparsons · January 23, 2023 22:01
diff --git a/bibliographer.py b/bibliographer.py
 """
 A quick excerpt demonstrating usage of a custom `RegularExpressionMatcher` for spaCy 3.
 This is from one of my personal projects (HaleyNLP/Irnerius). Module-level imports and other code blocks have been elided.
 """

 class ComponentExtractionBibliographer(
    AbstractComponentMatcher,
    matcher=RegularExpressionMatcher,
 ):
    """
    """
    def __init__(self, nlp: Language, matcher_rules):
        super().__init__(nlp, matcher_rules)
        Span.set_extension('citation', default=None, force=True)

    @classmethod
    def _make_span_groups(cls, doc: Doc) -> None:
        """
        """
        citations: List[Span] = []

        # noinspection PyProtectedMember
        for annotation in doc._.annotations.citations:  # type: Annotation
            citations.append(annotation.span)
            # noinspection PyProtectedMember
            annotation.span._.set(
                name='citation',
                value=annotation,
            )

        doc.spans['citations'] = citations


 @Language.factory(
    name='haleynlp_extraction_bibliographer',
    default_config={'matcher_rules': {'@misc': 'haleynlp.en.component.config.bibliography'}},
    requires=['doc._.annotations', 'doc._.db']
 )
 def produce_component_extraction_bibliographer(
    nlp: Language,
    name: str,
    matcher_rules: Tuple,
 ) -> ComponentExtractionBibliographer:
    """
    """
    return ComponentExtractionBibliographer(nlp=nlp, matcher_rules=matcher_rules)
	"""
	A quick excerpt demonstrating usage of a custom `RegularExpressionMatcher` for spaCy 3.
	This is from one of my personal projects (HaleyNLP/Irnerius). Module-level imports and other code blocks have been elided.
	"""

	class ComponentExtractionBibliographer(
	AbstractComponentMatcher,
	matcher=RegularExpressionMatcher,
	):
	"""
	"""
	def __init__(self, nlp: Language, matcher_rules):
	super().__init__(nlp, matcher_rules)
	Span.set_extension('citation', default=None, force=True)

	@classmethod
	def _make_span_groups(cls, doc: Doc) -> None:
	"""
	"""
	citations: List[Span] = []

	# noinspection PyProtectedMember
	for annotation in doc._.annotations.citations: # type: Annotation
	citations.append(annotation.span)
	# noinspection PyProtectedMember
	annotation.span._.set(
	name='citation',
	value=annotation,
	)

	doc.spans['citations'] = citations


	@Language.factory(
	name='haleynlp_extraction_bibliographer',
	default_config={'matcher_rules': {'@misc': 'haleynlp.en.component.config.bibliography'}},
	requires=['doc._.annotations', 'doc._.db']
	)
	def produce_component_extraction_bibliographer(
	nlp: Language,
	name: str,
	matcher_rules: Tuple,
	) -> ComponentExtractionBibliographer:
	"""
	"""
	return ComponentExtractionBibliographer(nlp=nlp, matcher_rules=matcher_rules)
No results found