tmalsburg · September 12, 2025 06:31
diff --git a/poster.tex b/poster.tex
 \documentclass{beamer}
 \usepackage[orientation=portrait,size=a0,scale=1.8]{beamerposter}
 \usetheme{Darmstadt}
 \usecolortheme{seahorse}
 \usepackage{booktabs}  % Better table formatting

 \setbeamertemplate{blocks}[rounded][shadow=true]
 \setbeamertemplate{navigation symbols}{}

 % Biblatex setup:
 \usepackage[backend=biber,style=authoryear,sorting=nyt]{biblatex}
 \addbibresource{xliterature.bib}

 \usepackage{paralist}

 % Adjust column separation:
 \setlength{\columnsep}{2cm}

 % Title, Author, Affiliation:
 \title{Transformers Fail to Predict Consistent Effects for Agreement Attraction Configurations}

 \author{
  \parbox{0.45\linewidth}{
    \centering
    \textbf{Titus von der Malsburg}\\
    Institute of Linguistics\\
    University of Stuttgart\\
    [email protected]
  }
  \hspace{0.03\linewidth}
  \parbox{0.45\linewidth}{
    \centering
    \textbf{Sebastian Padó}\\
    Institute for Natural Language Processing\\
    University of Stuttgart\\
    [email protected]
  }
 }

 \begin{document}

 \begin{frame}[t]
  
  % Title block
  \begin{columns}[t]
    \begin{column}{0.98\textwidth}
      
      \vspace{-2em}
      \centering
      {\huge \color{black} \textbf{\inserttitle}\\[0.3em]}
      \insertauthor
      \vspace{1em}
      
    \end{column}
  \end{columns}

  \vspace{-0.5em}

  % Motivation and research question:
  
  \begin{block}{\large Background and Motivation}
    Recurrent neural language models have been shown to capture hallmark effects of human sentence processing such as the agreement attraction effect \parencite{LinzenLeonard2018CogSci, ArehalliLinzen2020CogSci}.
    Meanwhile, transformers have far surpassed recurrent networks in language modeling performance and some work suggests that they, too, capture human sentence processing difficulty in agreement attraction configurations \parencite{BazhukovEtAl2024CoNLL, RyuLewis2021CMCL, TimkeyLinzen2023EMNLP}.
    However, previous work has tested only subsets of the theoretically relevant syntactic configurations (a--d) and has mostly tested just a single transformer, GPT-2.
    To be viable as cognitive models, transformers \textbf{1.}\ must capture human performance across all relevant configurations and \textbf{2.}~must make consistent predictions across models. 
  \end{block}
  
  \vspace{1em}
 
  \centering
  {\bf \Large \color{blue} RQ: Are predictions of autoregressive transformers for agreement attraction configurations consistent with human performance and stable across models?}
  
  \vspace{1em}

  % Design:
  \begin{block}{\large Design}
    We used 384 sentences (48 sets) like (a--d) from Experiment 3 in \textcite{WagersEtAl2009}.
    Factors were RC subject number, grammaticality, and attraction (2×2×2 design) resulting in a complete sweep of all relevant configurations.
    Eleven pre-trained autoregressive transformers from different architectures were tested, including monolingual (GPT, GPT-2) and multilingual (Bloom, XGLM, Gemma 3) model families.
    The crucial subject-verb dependency, \textit{officer(s) want(s)}, was in the relative clause (RC).  The attractor noun, \textit{marine(s)}, appeared in the main clause.
    \vspace{0.2em}
    \begin{compactenum}[a.]
    \item \phantom{*}The \textit{marine(s)} who the \textbf{officer wants}  promote … \hfill (singular RC subject, grammatical)
    \item *The \textit{marine(s)} who the \textbf{officer want}   promote … \hfill (singular RC subject, ungrammatical)
    \item \phantom{*}The \textit{marine(s)} who the \textbf{officers want}  promote … \hfill (plural RC subject, grammatical)
    \item *The \textit{marine(s)} who the \textbf{officers wants} promote … \hfill (plural RC subject, ungrammatical)
    \end{compactenum}
    \vspace{0.2em}
    \textbf{Predictions:} Transformers’ surprisal predictions for the RC verb should \textbf{1.}\ match human performance, i.e.\ we expect to see attraction effects in (b) but not in (a), (c), (d), and \textbf{2.}\ should be consistent across transformer models.
  \end{block}

  \vspace{-0.4em}
  
  \centering
  \begin{columns}[t]
    \begin{column}{0.515\linewidth}
      \begin{block}{\large Surprisal by Model and Configuration}
        \centering
        \includegraphics{plots_simulation_results}
      \end{block}

      \vspace{0.3em}
      
      \begin{block}{\large Summary\phantom{g}}
        \begin{compactenum}
        \item Most models predicted the classic agreement attraction effect in the singular/ungrammatical configuration (b).
        \item BUT: None of the models predicted the overall pattern of effects observed in humans across configurations (a--d).  All models predicted effects not seen in humans.
        \item Inconsistent predictions across model families, but some consistency within families.  Little evidence for a role of parameter count.
        \end{compactenum}
        Our results add to existing concerns about the suitability of transformers as cognitive models of human sentence processing \parencite[e.g.,][]{OhSchuler2023, CuskleyEtAl2024}.
      \end{block}

    \end{column}
    \begin{column}{0.475\linewidth}
      \begin{block}{\large Observed vs.\ Predicted Effects \phantom{g}}
        \begin{table}[tbh!]
          \centering
          \scriptsize
          \begin{tabular}{lcccc}
            & \multicolumn{2}{c}{\textbf{Singular RC subject}} & \multicolumn{2}{c}{\textbf{Plural RC subject}} \\
                                       & \textbf{Grammatical}  & \textbf{Ungrammatical} & \textbf{Grammatical}  & \textbf{Ungrammatical} \\
            % \cmidrule(r){2-3}
            % \cmidrule(l){4-5}
            \midrule
            Human, Wagers et al.\ 2009 &                       & \color{red}$\searrow$  &                       &   \\
            \midrule
            Direction consistent with  & \color{red}$\nearrow$ & \color{red}$\searrow$  & \color{red}$\nearrow$ & \color{red}$\searrow$ \\
            attraction principles      &                       &                        &                       &  \\
            \midrule
            \midrule
            gpt (120M)                 &                       & $\searrow$             & $\searrow$            & $\searrow$ \\
            gpt2 (127M)                &                       & $\searrow$             & $\searrow$            & $\searrow$ \\
            gpt2-large (744M)          &                       & $\searrow$             & $\searrow$            &  \\
            \midrule
            bloom (560M)               & $\searrow$            & $\searrow$             &                       & $\searrow$ \\
            bloom (1.7B)               &                       & $\searrow$             & $\searrow$            & $\searrow$ \\
            bloom (3B)                 &                       & $\searrow$             & $\searrow$            &  \\
            \midrule
            xglm (564M)                & $\searrow$            &                        & $\searrow$            & $\searrow$ \\
            xglm (1.7B)                &                       &                        & $\searrow$            & $\searrow$ \\
            xglm (2.9B)                &                       &                        & $\searrow$            & $\searrow$ \\
            \midrule
            gemma-3 (1B)               & $\nearrow$            & $\searrow$             & $\searrow$            & $\nearrow$ \\
            gemma-3 (4B)               & $\nearrow$            & $\searrow$             & $\searrow$            &  \\
            \bottomrule
          \end{tabular}
          \vspace{1em}
          \caption{
            Observed and predicted agreement attraction effects in the four theoretically relevant configurations (a--d, columns 1--4).
            Arrows indicate significant effects.
            Upward arrow: inhibitory agreement attraction effect (slow down).
            Downward arrow: facilitatory effect (faster processing).
            First row shows human findings.
            Second row shows direction of (counterfactual) effects that could be explained in terms of attraction. 
            % None of the predicted effects in the plural/grammatical configuration can be explained by attraction.
          }
          \label{tab:results}
        \end{table}
      \end{block}

      \centering
      \vspace{1em}
      {\bf \Large \color{blue} Transformers deviate significantly \phantom{mm}from human performance.\newline
 \newline
        Predictions inconsistent across transformer models.}
      
    \end{column}
  \end{columns}

  % Bibliography:
  \vfill
  
  \begin{columns}
    \begin{column}{\textwidth}
      \color{gray}
      \textbf{Refs:}
      Wagers et al.\ (2009), JML;
      Cuskley et al.\ (2024), Open Mind;
      Linzen, Leonard (2018), CogSci;
      Arehalli, Linzen (2020), CogSci;
      Bazhukov et al.\ (2024), CoNLL;
      Timkey, Linzen (2023), EMNLP;
      Ryu, Lewis (2021), CMCL;
      Oh, Schuler (2023), TACL.
    \end{column}
  \end{columns}
  
 \end{frame}
 \end{document}
diff --git a/xliterature.bib b/xliterature.bib
 @Article{WagersEtAl2009,
  author    = {Matthew W. Wagers and Ellen F. Lau and Colin Phillips},
  title     = {Agreement attraction in comprehension: {Representations} and processes},
  journal   = {jml},
  year      = {2009},
  volume    = {61},
  number    = {2},
  pages     = {206--237},
  issn      = {0749-596X},
  doi       = {10.1016/j.jml.2009.04.002},
  keywords  = {Agreement, Comprehension, Syntax, Retrieval, Prediction},
 }

 @Article{CuskleyEtAl2024,
  author    = {Cuskley, Christine and Woods, Rebecca and Flaherty, Molly},
  title     = {The Limitations of Large Language Models for Understanding Human Language and Cognition},
  journal   = {Open Mind},
  year      = {2024},
  volume    = {8},
  pages     = {1058-1083},
  month     = {08},
  issn      = {2470-2986},
  doi       = {10.1162/opmi_a_00160},
 }

 @InProceedings{LinzenLeonard2018CogSci,
  author    = {Tal Linzen and Brian Leonard},
  title     = {Distinct patterns of syntactic agreement errors in recurrent networks and humans},
  booktitle = {Proceedings of the 40st Annual Meeting of the Cognitive Science Society},
  year      = {2018},
  editor    = {Chuck Kalish and Martina Rau and Tim Rogers and Jerry Zhu},
  address   = {Madison, WI, USA},
  organization= {Cognitive Science Society},
  publisher = {Cognitive Science Society},
 }

 @InProceedings{ArehalliLinzen2020CogSci,
  author    = {Suhas Arehalli and Tal Linzen},
  title     = {Neural Language Models Capture Some, But Not All, Agreement AttractionEffects},
  booktitle = {Proceedings of the 42st Annual Meeting of the Cognitive Science Society},
  year      = {2020},
  editor    = {Stephanie Denison and Michael Mack and Yang Xu and Blair Armstrong},
  organization= {Cognitive Science Society},
  publisher = {Cognitive Science Society},
 }

 @Article{HuangEtAl2024,
  author    = {Huang, Kuan-Jung and Arehalli, Suhas and Kugemoto, Mari and Muxica, Christian and Prasad, Grusha and Dillon, Brian and Linzen, Tal},
  title     = {Large-scale benchmark yields no evidence that language model suA; Ais; As; Aisis; Asesrprisal explains syntactic disambiguation difficulty},
  journal   = {Journal of Memory and Language},
  year      = {2024},
  volume    = {137},
  pages     = {104510},
  DOI       = {10.1016/j.jml.2024.104510},
  publisher = {Elsevier BV},
 }

 @InProceedings{RyuLewis2021CMCL,
  author    = {Ryu, Soo Hyun and Lewis, Richard},
  title     = {Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention},
  booktitle = {Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics},
  year      = {2021},
  editor    = {Chersoni, Emmanuele and Hollenstein, Nora and Jacobs, Cassandra and Oseki, Yohei and Pr{\'e}vot, Laurent and Santus, Enrico},
  pages     = {61--71},
  month     = jun,
  address   = {Online},
  publisher = {Association for Computational Linguistics},
  doi       = {10.18653/v1/2021.cmcl-1.6},
 }

 @InProceedings{TimkeyLinzen2023EMNLP,
  author    = {Timkey, William and Linzen, Tal},
  title     = {A Language Model with Limited Memory Capacity Captures Interference in Human Sentence Processing},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
  year      = {2023},
  editor    = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
  pages     = {8705--8720},
  month     = dec,
  address   = {Singapore},
  publisher = {Association for Computational Linguistics},
  doi       = {10.18653/v1/2023.findings-emnlp.582},
 }

 @InProceedings{BazhukovEtAl2024CoNLL,
  author    = {Bazhukov, Maxim and Voloshina, Ekaterina and Pletenev, Sergey and Anisimov, Arseny and Serikov, Oleg and Toldova, Svetlana},
  title     = {Of Models and Men: Probing Neural Networks for Agreement Attraction with Psycholinguistic Data},
  booktitle = {Proceedings of the 28th Conference on Computational Natural Language Learning},
  year      = {2024},
  editor    = {Barak, Libby and Alikhani, Malihe},
  pages     = {280--290},
  month     = nov,
  address   = {Miami, FL, USA},
  publisher = {Association for Computational Linguistics},
  doi       = {10.18653/v1/2024.conll-1.22},
 }
	\documentclass{beamer}
	\usepackage[orientation=portrait,size=a0,scale=1.8]{beamerposter}
	\usetheme{Darmstadt}
	\usecolortheme{seahorse}
	\usepackage{booktabs} % Better table formatting

	\setbeamertemplate{blocks}[rounded][shadow=true]
	\setbeamertemplate{navigation symbols}{}

	% Biblatex setup:
	\usepackage[backend=biber,style=authoryear,sorting=nyt]{biblatex}
	\addbibresource{xliterature.bib}

	\usepackage{paralist}

	% Adjust column separation:
	\setlength{\columnsep}{2cm}

	% Title, Author, Affiliation:
	\title{Transformers Fail to Predict Consistent Effects for Agreement Attraction Configurations}

	\author{
	\parbox{0.45\linewidth}{
	\centering
	\textbf{Titus von der Malsburg}\\
	Institute of Linguistics\\
	University of Stuttgart\\
	[email protected]
	}
	\hspace{0.03\linewidth}
	\parbox{0.45\linewidth}{
	\centering
	\textbf{Sebastian Padó}\\
	Institute for Natural Language Processing\\
	University of Stuttgart\\
	[email protected]
	}
	}

	\begin{document}

	\begin{frame}[t]

	% Title block
	\begin{columns}[t]
	\begin{column}{0.98\textwidth}

	\vspace{-2em}
	\centering
	{\huge \color{black} \textbf{\inserttitle}\\[0.3em]}
	\insertauthor
	\vspace{1em}

	\end{column}
	\end{columns}

	\vspace{-0.5em}

	% Motivation and research question:

	\begin{block}{\large Background and Motivation}
	Recurrent neural language models have been shown to capture hallmark effects of human sentence processing such as the agreement attraction effect \parencite{LinzenLeonard2018CogSci, ArehalliLinzen2020CogSci}.
	Meanwhile, transformers have far surpassed recurrent networks in language modeling performance and some work suggests that they, too, capture human sentence processing difficulty in agreement attraction configurations \parencite{BazhukovEtAl2024CoNLL, RyuLewis2021CMCL, TimkeyLinzen2023EMNLP}.
	However, previous work has tested only subsets of the theoretically relevant syntactic configurations (a--d) and has mostly tested just a single transformer, GPT-2.
	To be viable as cognitive models, transformers \textbf{1.}\ must capture human performance across all relevant configurations and \textbf{2.}~must make consistent predictions across models.
	\end{block}

	\vspace{1em}

	\centering
	{\bf \Large \color{blue} RQ: Are predictions of autoregressive transformers for agreement attraction configurations consistent with human performance and stable across models?}

	\vspace{1em}

	% Design:
	\begin{block}{\large Design}
	We used 384 sentences (48 sets) like (a--d) from Experiment 3 in \textcite{WagersEtAl2009}.
	Factors were RC subject number, grammaticality, and attraction (2×2×2 design) resulting in a complete sweep of all relevant configurations.
	Eleven pre-trained autoregressive transformers from different architectures were tested, including monolingual (GPT, GPT-2) and multilingual (Bloom, XGLM, Gemma 3) model families.
	The crucial subject-verb dependency, \textit{officer(s) want(s)}, was in the relative clause (RC). The attractor noun, \textit{marine(s)}, appeared in the main clause.
	\vspace{0.2em}
	\begin{compactenum}[a.]
	\item \phantom{*}The \textit{marine(s)} who the \textbf{officer wants} promote … \hfill (singular RC subject, grammatical)
	\item *The \textit{marine(s)} who the \textbf{officer want} promote … \hfill (singular RC subject, ungrammatical)
	\item \phantom{*}The \textit{marine(s)} who the \textbf{officers want} promote … \hfill (plural RC subject, grammatical)
	\item *The \textit{marine(s)} who the \textbf{officers wants} promote … \hfill (plural RC subject, ungrammatical)
	\end{compactenum}
	\vspace{0.2em}
	\textbf{Predictions:} Transformers’ surprisal predictions for the RC verb should \textbf{1.}\ match human performance, i.e.\ we expect to see attraction effects in (b) but not in (a), (c), (d), and \textbf{2.}\ should be consistent across transformer models.
	\end{block}

	\vspace{-0.4em}

	\centering
	\begin{columns}[t]
	\begin{column}{0.515\linewidth}
	\begin{block}{\large Surprisal by Model and Configuration}
	\centering
	\includegraphics{plots_simulation_results}
	\end{block}

	\vspace{0.3em}

	\begin{block}{\large Summary\phantom{g}}
	\begin{compactenum}
	\item Most models predicted the classic agreement attraction effect in the singular/ungrammatical configuration (b).
	\item BUT: None of the models predicted the overall pattern of effects observed in humans across configurations (a--d). All models predicted effects not seen in humans.
	\item Inconsistent predictions across model families, but some consistency within families. Little evidence for a role of parameter count.
	\end{compactenum}
	Our results add to existing concerns about the suitability of transformers as cognitive models of human sentence processing \parencite[e.g.,][]{OhSchuler2023, CuskleyEtAl2024}.
	\end{block}

	\end{column}
	\begin{column}{0.475\linewidth}
	\begin{block}{\large Observed vs.\ Predicted Effects \phantom{g}}
	\begin{table}[tbh!]
	\centering
	\scriptsize
	\begin{tabular}{lcccc}
	& \multicolumn{2}{c}{\textbf{Singular RC subject}} & \multicolumn{2}{c}{\textbf{Plural RC subject}} \\
	& \textbf{Grammatical} & \textbf{Ungrammatical} & \textbf{Grammatical} & \textbf{Ungrammatical} \\
	% \cmidrule(r){2-3}
	% \cmidrule(l){4-5}
	\midrule
	Human, Wagers et al.\ 2009 & & \color{red}$\searrow$ & & \\
	\midrule
	Direction consistent with & \color{red}$\nearrow$ & \color{red}$\searrow$ & \color{red}$\nearrow$ & \color{red}$\searrow$ \\
	attraction principles & & & & \\
	\midrule
	\midrule
	gpt (120M) & & $\searrow$ & $\searrow$ & $\searrow$ \\
	gpt2 (127M) & & $\searrow$ & $\searrow$ & $\searrow$ \\
	gpt2-large (744M) & & $\searrow$ & $\searrow$ & \\
	\midrule
	bloom (560M) & $\searrow$ & $\searrow$ & & $\searrow$ \\
	bloom (1.7B) & & $\searrow$ & $\searrow$ & $\searrow$ \\
	bloom (3B) & & $\searrow$ & $\searrow$ & \\
	\midrule
	xglm (564M) & $\searrow$ & & $\searrow$ & $\searrow$ \\
	xglm (1.7B) & & & $\searrow$ & $\searrow$ \\
	xglm (2.9B) & & & $\searrow$ & $\searrow$ \\
	\midrule
	gemma-3 (1B) & $\nearrow$ & $\searrow$ & $\searrow$ & $\nearrow$ \\
	gemma-3 (4B) & $\nearrow$ & $\searrow$ & $\searrow$ & \\
	\bottomrule
	\end{tabular}
	\vspace{1em}
	\caption{
	Observed and predicted agreement attraction effects in the four theoretically relevant configurations (a--d, columns 1--4).
	Arrows indicate significant effects.
	Upward arrow: inhibitory agreement attraction effect (slow down).
	Downward arrow: facilitatory effect (faster processing).
	First row shows human findings.
	Second row shows direction of (counterfactual) effects that could be explained in terms of attraction.
	% None of the predicted effects in the plural/grammatical configuration can be explained by attraction.
	}
	\label{tab:results}
	\end{table}
	\end{block}

	\centering
	\vspace{1em}
	{\bf \Large \color{blue} Transformers deviate significantly \phantom{mm}from human performance.\newline
	\newline
	Predictions inconsistent across transformer models.}

	\end{column}
	\end{columns}

	% Bibliography:
	\vfill

	\begin{columns}
	\begin{column}{\textwidth}
	\color{gray}
	\textbf{Refs:}
	Wagers et al.\ (2009), JML;
	Cuskley et al.\ (2024), Open Mind;
	Linzen, Leonard (2018), CogSci;
	Arehalli, Linzen (2020), CogSci;
	Bazhukov et al.\ (2024), CoNLL;
	Timkey, Linzen (2023), EMNLP;
	Ryu, Lewis (2021), CMCL;
	Oh, Schuler (2023), TACL.
	\end{column}
	\end{columns}

	\end{frame}
	\end{document}
	@Article{WagersEtAl2009,
	author = {Matthew W. Wagers and Ellen F. Lau and Colin Phillips},
	title = {Agreement attraction in comprehension: {Representations} and processes},
	journal = {jml},
	year = {2009},
	volume = {61},
	number = {2},
	pages = {206--237},
	issn = {0749-596X},
	doi = {10.1016/j.jml.2009.04.002},
	keywords = {Agreement, Comprehension, Syntax, Retrieval, Prediction},
	}

	@Article{CuskleyEtAl2024,
	author = {Cuskley, Christine and Woods, Rebecca and Flaherty, Molly},
	title = {The Limitations of Large Language Models for Understanding Human Language and Cognition},
	journal = {Open Mind},
	year = {2024},
	volume = {8},
	pages = {1058-1083},
	month = {08},
	issn = {2470-2986},
	doi = {10.1162/opmi_a_00160},
	}

	@InProceedings{LinzenLeonard2018CogSci,
	author = {Tal Linzen and Brian Leonard},
	title = {Distinct patterns of syntactic agreement errors in recurrent networks and humans},
	booktitle = {Proceedings of the 40st Annual Meeting of the Cognitive Science Society},
	year = {2018},
	editor = {Chuck Kalish and Martina Rau and Tim Rogers and Jerry Zhu},
	address = {Madison, WI, USA},
	organization= {Cognitive Science Society},
	publisher = {Cognitive Science Society},
	}

	@InProceedings{ArehalliLinzen2020CogSci,
	author = {Suhas Arehalli and Tal Linzen},
	title = {Neural Language Models Capture Some, But Not All, Agreement AttractionEffects},
	booktitle = {Proceedings of the 42st Annual Meeting of the Cognitive Science Society},
	year = {2020},
	editor = {Stephanie Denison and Michael Mack and Yang Xu and Blair Armstrong},
	organization= {Cognitive Science Society},
	publisher = {Cognitive Science Society},
	}

	@Article{HuangEtAl2024,
	author = {Huang, Kuan-Jung and Arehalli, Suhas and Kugemoto, Mari and Muxica, Christian and Prasad, Grusha and Dillon, Brian and Linzen, Tal},
	title = {Large-scale benchmark yields no evidence that language model suA; Ais; As; Aisis; Asesrprisal explains syntactic disambiguation difficulty},
	journal = {Journal of Memory and Language},
	year = {2024},
	volume = {137},
	pages = {104510},
	DOI = {10.1016/j.jml.2024.104510},
	publisher = {Elsevier BV},
	}

	@InProceedings{RyuLewis2021CMCL,
	author = {Ryu, Soo Hyun and Lewis, Richard},
	title = {Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention},
	booktitle = {Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics},
	year = {2021},
	editor = {Chersoni, Emmanuele and Hollenstein, Nora and Jacobs, Cassandra and Oseki, Yohei and Pr{\'e}vot, Laurent and Santus, Enrico},
	pages = {61--71},
	month = jun,
	address = {Online},
	publisher = {Association for Computational Linguistics},
	doi = {10.18653/v1/2021.cmcl-1.6},
	}

	@InProceedings{TimkeyLinzen2023EMNLP,
	author = {Timkey, William and Linzen, Tal},
	title = {A Language Model with Limited Memory Capacity Captures Interference in Human Sentence Processing},
	booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
	year = {2023},
	editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
	pages = {8705--8720},
	month = dec,
	address = {Singapore},
	publisher = {Association for Computational Linguistics},
	doi = {10.18653/v1/2023.findings-emnlp.582},
	}

	@InProceedings{BazhukovEtAl2024CoNLL,
	author = {Bazhukov, Maxim and Voloshina, Ekaterina and Pletenev, Sergey and Anisimov, Arseny and Serikov, Oleg and Toldova, Svetlana},
	title = {Of Models and Men: Probing Neural Networks for Agreement Attraction with Psycholinguistic Data},
	booktitle = {Proceedings of the 28th Conference on Computational Natural Language Learning},
	year = {2024},
	editor = {Barak, Libby and Alikhani, Malihe},
	pages = {280--290},
	month = nov,
	address = {Miami, FL, USA},
	publisher = {Association for Computational Linguistics},
	doi = {10.18653/v1/2024.conll-1.22},
	}