illy · September 3, 2013 20:29
diff --git a/twitter_ling.bib b/twitter_ling.bib
 %% This BibTeX bibliography file in UTF-8 format was created using Papers.
 %% http://mekentosj.com/papers/

 @article{Williams:2012p29780,
 author = {J Williams}, 
 journal = {Proceedings of the 2012 Student Research Workshop, EMNLP2012},
 title = {Extracting fine-grained durations for verbs from Twitter},
 abstract = {We seek to automatically estimate typical durations for events and habits described in Twitter tweets. A corpus of more than 14 million tweets containing temporal du- ration information was collected. These tweets were classified as to their habituality status using a bootstrapped, decision tree. For each verb lemma, associated duration information was collected for episodic and habitual uses of the verb. Summary statis- tics for 483 verb lemmas and their typical habit and episode durations has been com- piled and made available. This automati- cally generated duration information is broadly comparable to hand-annotation.},
 pages = {49},
 year = {2012},
 date-added = {2012-10-25 16:07:35 +0100},
 date-modified = {2013-06-11 11:50:03 +0100},
 pmid = {related:XFyMUXyrE6EJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Williams/Extracting%20fine-grained%20durations%20for%20verbs%20from%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29780},
 rating = {0}
 }

 @article{Williams:2012p29819,
 author = {J Williams}, 
 journal = {gradworks.umi.com
 },
 title = {Extracting and modeling typical durations of events and habits from Twitter},
 year = {2012},
 month = {Jan},
 date-added = {2012-10-26 12:49:49 +0100},
 date-modified = {2013-06-11 11:49:45 +0100},
 URL = {http://gradworks.umi.com/15/14/1514611.html},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Williams/Extracting%20and%20modeling%20typical%20durations%20of%20events%20and%20habits%20from.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29819},
 rating = {0}
 }

 @article{Owoputi:2012p28831,
 author = {O Owoputi and B O'Connor and C Dyer and K Gimpel and N Schneider}, 
 journal = {ark.cs.cmu.edu
 },
 title = {Part-of-Speech Tagging for Twitter: Word Clusters and Other Advances},
 abstract = {We present improvements to a Twitter part-of-speech tagger, making use of several new features and large- scale word clustering. With these changes, the tagging accuracy increased from 89.2% to 92.8% and the tagging speed is 40 times faster. In addition, we expanded our Twitter tokenizer to support a broader range of Unicode characters, emoticons, and URLs. Finally, we annotate and evaluate on a new tweet dataset, DAILYTWEET547, that is more statistically representative of English-language Twitter as a whole. The new tagger is released as TweetNLP version 0.3, along with the new annotated data and large-scale word clusters at http://www.ark.cs.cmu.edu/TweetNLP.},
 year = {2012},
 month = {Jan},
 date-added = {2012-09-30 22:01:58 +0100},
 date-modified = {2012-09-30 22:03:25 +0100},
 URL = {http://www.ark.cs.cmu.edu/TweetNLP/owoputi+etal.tr12.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Owoputi/Part-of-Speech%20Tagging%20for%20Twitter%20Word%20Clusters%20and%20Other%20Advances.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p28831},
 read = {Yes},
 rating = {0}
 }

 @article{Wigley:2011p24215,
 author = {R Wigley}, 
 journal = {Cultural Policy, Criticism and Management Research},
 title = {Novel noise? A systems-theoretical approach to Twitter},
 abstract = {This paper explores instances where communication using the medium of Twitter is shown to be in tension with communicative codes of the mass media and law, and asks whether the micro-blogging service can be described as a novel system of communication. Utilising Niklas Luhmann's systems-theoretical approach to sociological analysis to analyse specific cases, the paper assesses Twitter's potential stability as
 a social system based on communication. Evidence regarding the basic conditions of system formation is sought in three cases where Twitter may be identified as a conduit for communication resulting in action or dissent. In asking whether Twitter fulfils the properties required for system formation, this paper suggests that Luhmann's systems theory provides a valuable framework for deeper analysis of social media tools.},
 year = {2011},
 date-added = {2011-10-06 15:15:38 +0100},
 date-modified = {2012-04-18 14:09:33 +0100},
 URL = {http://culturalpolicyjournal.org/current-issue/%25EF%25BB%25BFnovel-noise/},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Wigley/Novel%20noise?%20A%20systems-theoretical%20approach%20to%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p24215},
 rating = {0}
 }

 @article{page_linguistics_2012,
 author = {Ruth Page}, 
 journal = {Discourse \{\&} Communication},
 title = {The Linguistics of Self-Branding and Micro-Celebrity in Twitter: The Role of Hashtags},
 abstract = {Twitter is a linguistic marketplace (Bourdieu, 1977) in which the processes of self-branding and micro-celebrity (Marwick, 2010) depend on visibility as a means of increasing social and economic gain. Hashtags are a potent resource within this system for promoting the visibility of a Twitter update (and, by implication, the update's author). This study analyses the frequency, types and grammatical context of hashtags which occurred in a dataset of approximately 92,000 tweets, taken from 100 publically available Twitter accounts, comparing the discourse styles of corporations, celebrity practitioners and `ordinary' Twitter members. The results suggest that practices of self-branding and micro-celebrity operate on a continuum which reflects and reinforces the social and economic hierarchies which exist in offline contexts. Despite claims that hashtags are `conversational', this study suggests that participatory culture in Twitter is not evenly distributed, and that the discourse of celebrity practitioners and corporations exhibits the synthetic personalization (Fairclough, 1989) typical of mainstream media forms of broadcast talk.},
 note = {Journal Article},
 number = {2},
 pages = {181--201},
 volume = {6},
 year = {2012},
 date-added = {2013-04-13 21:02:21 +0100},
 date-modified = {2013-04-13 21:02:22 +0100},
 doi = {10.1177/1750481312437441},
 URL = {http://dcm.sagepub.com/cgi/content/abstract/6/2/181},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Page/The%20Linguistics%20of%20Self-Branding%20and%20Micro-Celebrity%20in%20Twitter%20The%20Role.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p30615},
 read = {Yes},
 rating = {0}
 }

 @article{Altman:2012p29802,
 author = {E Altman and Y Portilla}, 
 title = {Geo-linguistic fingerprint and the evolution of languages in Twitter},
 abstract = {Having access to content of messages sent by some given group of subscribers of a social network may be used to identify (and quantify) some features of that group. The feature can stand for the level of interest in some event or product, or for the popularity of some idea, or a musical hit or of a political figure. The feature can also stand for the way the written language is used and transformed, the way words are spelled and the way new grammatical rules appear. This paper has two goals. First, we identify features of groups of subscribers that have their geographic location and their language in common. We develop a methodology that allows one to perform such a study using freely available statistical tools which makes use of a part of all tweets which Twitter makes available for free over the Internet. The methodology is based on the fact that one can differentiate among some geographic areas according to the activity pattern of tweets during the time of the day. The second objective is to present our findings on the way spelling and new words have are used in Twitter. We analyze differences in appearance of new spellings among communities that are characterized by different locations but have a common language.},
 year = {2012},
 date-added = {2012-10-26 12:49:46 +0100},
 date-modified = {2012-11-03 14:57:29 +0000},
 pmid = {related:sZZbBTZ9EyQJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Altman/Geo-linguistic%20fingerprint%20and%20the%20evolution%20of%20languages%20in%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29802},
 rating = {0}
 }

 @article{Zappavigna:2011p18968,
 author = {M Zappavigna}, 
 journal = {New Media {\&} Society},
 title = {Ambient affiliation: A linguistic perspective on Twitter},
 abstract = {This article explores how language is used to build community with the microblogging service, Twitter (www.twitter.com). Systemic Functional Linguistic (SFL), a theory of language use in its social context, is employed to analyse the structure and meaning of `tweets' (posts to Twitter) in a corpus of 45,000 tweets collected in the 24 hours after the announcement of Barak Obama's victory in the 2008 US presidential elections. This analysis examines the evaluative language used to affiliate in tweets. The article shows how a typographic convention, the hashtag, has extended its meaning potential to operate as a linguistic marker referencing the target of evaluation in a tweet (e.g. {\#}Obama). This both renders the language searchable and is used to upscale the call to affiliate with values expressed in the tweet. We are currently witnessing a cultural shift in electronic discourse from online conversation to such `searchable talk'.},
 number = {19},
 volume = {1},
 year = {2011},
 month = {Jan},
 date-added = {2011-06-06 22:47:47 +0100},
 date-modified = {2013-07-14 10:22:32 +0100},
 doi = {10.1177/1461444810385097},
 URL = {http://nms.sagepub.com/content/early/2011/05/26/1461444810385097.abstract},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Zappavigna/Ambient%20affiliation%20A%20linguistic%20perspective%20on%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p18968},
 read = {Yes},
 rating = {0}
 }

 @article{Cogan:2012p29800,
 author = {P Cogan and M Andrews and M Bradonjic and G Tucci and W.S Kennedy and A Sala}, 
 title = {Reconstruction and Analysis of Twitter Conversation Graphs},
 abstract = {User interactions over social networks has been an emergent theme over the last several years. In contrast to previous work we focus on characterizing user communications pat- terns around an initial post, or conversation root. Specif- ically, we focus on how other users respond to these roots and how the complete conversation initiated by this root evolves over time. For this purpose we focus our investi- gation on Twitter, the biggest micro-blogging social net- work. To the best of our knowledge this is the first such method that is able to reconstruct complete conversations around initial tweets. We propose a robust approach for reconstructing complete conversations and compare the re- sulting graph structures against those obtained from pre- vious crawling strategies based on keyword searches. Our crawl provides a large scale dataset, ideal for computer sci- entists to run large scale experimental evaluations, however our dataset is made of a collection of small scale, highly controlled and complete conversation graphs ideal for a so- ciological investigation. We believe our work will provide the proper dataset to establish concrete collaborations with interdisciplinary expertise.},
 year = {2012},
 date-added = {2012-10-26 12:49:49 +0100},
 date-modified = {2013-06-11 15:35:49 +0100},
 pmid = {related:GYyodHFyEW0J},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Cogan/Reconstruction%20and%20Analysis%20of%20Twitter%20Conversation%20Graphs.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29800},
 rating = {0}
 }

 @article{Lake:2010p18816,
 author = {T Lake}, 
 title = {Status Report: Twitter NLP},
 abstract = {Analysis of natural language is a historically difficult tasks for computers. One could easily make the argument that the complete and accurate analysis of any piece of natural language for non-numerical information would require a class of machines which can `understand' natural language, thus such machines would need be bestowed with human like intelligence capabilities. However, the previous statement does not imply useful knowledge or behavior can not be obtained from such an endeavor without satisfying the latter condition. This paper will describe the status, methods, goals, and real world applicability of such a project.},
 year = {2010},
 date-added = {2011-06-04 23:00:39 +0100},
 date-modified = {2012-04-18 14:09:19 +0100},
 pmid = {related:JidgCuAOwdIJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2010/Lake/Status%20Report%20Twitter%20NLP.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p18816},
 read = {Yes},
 rating = {0}
 }

 @article{Andre:2012p27691,
 author = {P Andr{\'e} and M Bernstein and K Luther}, 
 journal = {Proceedings of the ACM 2012 conference on Computer Supported Cooperative Work},
 title = {Who gives a tweet?: evaluating microblog content value},
 abstract = {While microblog readers have a wide variety of reactions to the content they see, studies have tended to focus on extremes such as retweeting and unfollowing. To understand the broad continuum of reactions in-between, which are typically not shared publicly, we designed a website that collected the first large corpus of follower ratings on Twitter updates. Using our dataset of over 43,000 voluntary ratings, we find that nearly 36% of the rated tweets are worth reading, 25% are not, and 39% are middling. These results suggest that users tolerate a large amount of less-desired content in their feeds. We find that users value information sharing and random thoughts above me-oriented or presence updates. We also offer insight into evolving social norms, such as lack of context and misuse of @mentions and hashtags. We discuss implications for emerging practice and tool design.},
 pages = {471--474},
 year = {2012},
 date-added = {2012-04-24 15:00:43 +0100},
 date-modified = {2013-06-11 10:06:44 +0100},
 pmid = {15094236176906694364related:3K5fiNR_edEJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Andr%C3%A9/Who%20gives%20a%20tweet?%20evaluating%20microblog%20content%20value.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p27691},
 read = {Yes},
 rating = {0}
 }

 @article{Bryden:2013p31150,
 author = {John Bryden and Sebastian Funk and Vincent Jansen}, 
 journal = {EPJ Data Science},
 title = {Word usage mirrors community structure in the online social network Twitter},
 abstract = {Background: Language has functions that transcend the transmission of information and varies with social context. To find out how language and social network structure interlink, we studied communication on Twitter, a broadly-used online messaging service.
 Results: We show that the network emerging from user communication can be structured into a hierarchy of communities, and that the frequencies of words used within those communities closely replicate this pattern. Consequently, communities can be characterised by their most significantly used words. The words used by an individual user, in turn, can be used to predict the community of which that user is a member.
 Conclusions: This indicates a relationship between human language and social networks, and suggests that the study of online communication offers vast potential for understanding the fabric of human society. Our approach can be used for enriching community detection with word analysis, which provides the ability to automate the classification of communities in social networks and identify emerging social groups.},
 number = {1},
 pages = {1--9},
 volume = {2},
 year = {2013},
 date-added = {2013-06-04 17:39:36 +0100},
 date-modified = {2013-06-11 10:06:31 +0100},
 pmid = {related:gO0q9HyhYMcJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2013/Bryden/Word%20usage%20mirrors%20community%20structure%20in%20the%20online%20social%20network.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p31150},
 rating = {0}
 }

 @article{Papacharissi:2011p23060,
 author = {Z Papacharissi and M Oliveira}, 
 journal = {tigger.uic.edu},
 title = {The Rhythms of News Storytelling on Twitter: Coverage of the January 25th Egyptian uprising on Twitter},
 abstract = {Page 1. The Rhythms of News  Storytelling on Twitter : Coverage of the January  25th Egyptian  uprising on Twitter Zizi Papacharissi, PhD Professor and Head, Communication, University of Illinois‐Chicago Maria de Fatima Oliveira ...},
 year = {2011},
 month = {Jan},
 date-added = {2011-09-18 11:07:23 +0100},
 date-modified = {2012-04-18 14:10:08 +0100},
 URL = {http://tigger.uic.edu/~zizi/Site/Research_files/RhythmsNewsStorytellingTwitterWAPORZPMO.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Papacharissi/The%20Rhythms%20of%20News%20Storytelling.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p23060},
 read = {Yes},
 rating = {0}
 }

 @article{Williams:2012p28367,
 author = {J Williams and G Katz}, 
 journal = {lrec-conf.org
 },
 title = {A New Twitter Verb Lexicon for Natural Language Processing},
 abstract = {We describe in-progress work on the creation of a new lexical resource that contains a list of 486 verbs annotated with quantified temporal durations for the events that they describe. This resource is being compiled from more than 14 million tweets from the Twitter microblogging site. We are creating this lexicon of verbs and typical durations to address a gap in the available information that is represented in existing research. The data that is contained in this lexicon is unlike any existing resources, which have been traditionally comprised of literature excerpts, news stories, and full-length weblogs. This kind of knowledge about how long an event lasts is crucial for natural language processing and is especially useful when the temporal duration of an event is implied. We are using data from Twitter because Twitter is a rich resource since people are publicly posting real events and real durations of those events throughout the day.},
 year = {2012},
 date-added = {2012-06-06 12:46:36 +0100},
 date-modified = {2012-07-14 12:44:04 +0100},
 URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/1076_Paper.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Williams/A%20New%20Twitter%20Verb%20Lexicon%20for%20Natural%20Language%20Processing.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p28367},
 read = {Yes},
 rating = {0}
 }

 @article{Jadhav:2009p1793,
 author = {A Jadhav and H Purohit and P Kapanipathi and Pramod Ananthram and A Ranabahu and V Nguyen and P N Mendes and A G Smith and M Cooney and A Sheth}, 
 journal = {knoesis.wright.edu},
 title = {Twitris 2.0: Semantically Empowered System for Understanding Perceptions From Social Data},
 abstract = {We present Twitris 2.0 1, a Semantic Web application that facilitates understanding of social perceptions by Semantics-based pro- cessing of massive amounts of event-centric data. Twitris 2.0 addresses challenges in large scale processing of social data, preserving spatio- temporal-thematic properties. Twitris 2.0 also covers context based se- mantic integration of multiple Web resources and expose semantically enriched social data to the public domain. Semantic Web technologies enable the system's integration and analysis abilities.},
 year = {2009},
 date-added = {2010-11-04 10:18:02 +0000},
 date-modified = {2012-04-18 14:09:13 +0100},
 pmid = {related:IZmwaw6OqykJ},
 URL = {http://knoesis.wright.edu/library/download/Twitris_ISWC_2010.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2009/Jadhav/Twitris%202.0%20Semantically%20Empowered%20System.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p1793},
 read = {Yes},
 rating = {0}
 }

 @article{Lindqvist:2011p24220,
 author = {A Lindqvist}, 
 journal = {lnu.diva-portal.org},
 title = {Trickin', wanna and ain't: Gender differences in the use of vernacular verb forms on Twitter},
 abstract = {The Internet site Twitter carries features of spontaneous and unedited informal language which makes it perfect for linguistic studies because people tend to write without trying to correct the language. Men and women use language differently in, for example, online interactions but there also exist similarities between the two sexes. The aim of this study is to investigate these differences and similarities between men's and women's use of vernacular forms on Twitter. In order to achieve the aim, 4000 tweets were collected and analysed from participants of both sexes to establish their different use of vernacular verb forms. The result turned out to be very surprising because previous studies, for example the study by Fischer (1958) within the same area, have shown that men tend to use vernacular forms more than women. Note, that the previous studies are based on offline interactions while this study is based on an online communication. However, this study showed that women used more vernacular verb forms than men on Twitter. This could be the case because women might want to sound more aggressive and masculine for some reason by using the non-standard forms on Twitter. If a different investigation within the same area were carried out, the result might be different because this study is limited to the analysis of only one Internet source.},
 year = {2011},
 date-added = {2011-10-13 02:55:42 +0100},
 date-modified = {2012-04-18 14:09:13 +0100},
 URL = {http://lnu.diva-portal.org/smash/get/diva2:431162/FULLTEXT02},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Lindqvist/Trickin'%20wanna%20and%20ain't%20Gender%20differences%20in%20the%20use%20of.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p24220},
 read = {Yes},
 rating = {0}
 }

 @article{Kwak:2010p8155,
 author = {H Kwak and C Lee and H Park and S Moon}, 
 journal = {WWW 2010},
 title = {What is Twitter, a social network or a news media?},
 abstract = {Twitter, a microblogging service less than three years old, com- mands more than 41 million users as of July 2009 and is growing fast. Twitter users tweet about any topic within the 140-character limit and follow others to receive their tweets. The goal of this paper is to study the topological characteristics of Twitter and its power as a new medium of information sharing.
 We have crawled the entire Twitter site and obtained 41.7 million user profiles, 1.47 billion social relations, 4, 262 trending topics, and 106 million tweets. In its follower-following topology analysis we have found a non-power-law follower distribution, a short effective diameter, and low reciprocity, which all mark a deviation from known characteristics of human social networks [28]. In order to identify influentials on Twitter, we have ranked users by the number of followers and by PageRank and found two rankings to be similar. Ranking by retweets differs from the previous two rankings, indicating a gap in influence inferred from the number of followers and that from the popularity of one's tweets. We have analyzed the tweets of top trending topics and reported on their temporal behavior and user participation. We have classified the trending topics based on the active period and the tweets and show that the majority (over 85%) of topics are headline news or persistent news in nature. A closer look at retweets reveals that any retweeted tweet is to reach an average of 1, 000 users no matter what the number of followers is of the original tweet. Once retweeted, a tweet gets retweeted almost instantly on next hops, signifying fast diffusion of information after the 1st retweet.
 To the best of our knowledge this work is the first quantitative study on the entire Twittersphere and information diffusion on it.},
 year = {2010},
 month = {Jan},
 date-added = {2011-01-17 15:28:58 +0000},
 date-modified = {2013-06-11 14:19:18 +0100},
 pmid = {9291640701307833453related:bcwDFv2J8oAJ},
 URL = {http://portal.acm.org/citation.cfm?id=1772690.1772751},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2010/Kwak/What%20is%20Twitter%20a%20social.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p8155},
 read = {Yes},
 rating = {0}
 }

 @article{Jalbuena:2013p30963,
 author = {M Jalbuena}, 
 journal = {MSEUF Research Studies},
 title = {Linguistic Features of English in Twitter},
 abstract = {This paper looked into the tweets of five prominent personalities in each of the following fields - education, entertainment, social life, politics and personal level -and analyzed the tone as well as the typing styles embedded in the lexical, grammatical and rhetorical features of the tweets. The content words or lexical features of English used in the five categories of tweets studied were neutral number nouns, singulars and plurals and proper nouns; unmarked adverbs, adverb particles and wh- adverbs; unmarked adjectives, comparatives and superlatives; the base form of the verb ``be'', past form of the verb ``be'', -ing form of the verb ``be'', infinitive of the verb ``be'', past participle of the verb ``be'', -s form of the verb ``be'', base form of the verb ``do'', infinitive of the verb ``do'', infinitive form of the verb ``have'', base form of the lexical verb, past tense form of the lexical verb, -ing form of the lexical verb, infinitive of the lexical verb , past participle form of lexical verb and -s form of the lexical verb. Majority of the Twitter users from the five categories used lexical verbs followed by nouns, adjectives and adverbs in their tweets. The dominant grammatical features of English used in Twitter are prepositions; indefinite, personal, reflexive and wh-pronouns; auxiliary verbs, the base form of the verb ``be'', past form of the verb ``be'', -ing form of the verb ``be'', infinitive of the verb ``be'', past participle of the verb ``be'', -s form of the verb ``be'', base form of the verb ``do'', past form of the verb ``do, infinitive of the verb ``do'', infinitive form of the verb ``have'', base form of the lexical verb, past tense form of the lexical verb, -ing form of the lexical verb, infinitive of lexical verb, past participle form of the lexical verb and -s form of the lexical verb; conjunctions; articles and interjections. Among the tweets analyzed, more posts utilized formal rather than informal language. More emoticons than punctuation marks were used by Twitter users to express themselves. Moreover, the Twitter users analyzed had more positive than negative sentiments in their tweet posts. Future researchers can expand this study and look into the other grammatical features of Twitter English that may be a basis for instructional materials development.},
 year = {2013},
 month = {Jan},
 date-added = {2013-05-08 18:10:04 +0100},
 date-modified = {2013-06-11 09:57:33 +0100},
 URL = {http://www.ejournals.ph/index.php?journal=MSEUFRS&page=article&op=viewArticle&path%255B%255D=6258},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2013/Jalbuena/Linguistic%20Features%20of%20English%20in%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p30963},
 read = {Yes},
 rating = {0}
 }

 @article{DanescuNiculescuMizil:2011p14485,
 author = {C Danescu-Niculescu-Mizil and M Gamon and S Dumais}, 
 journal = {Proceedings of WWW},
 title = {Mark My Words! Linguistic Style Accommodation in Social Media},
 abstract = {The psycholinguistic theory of communication accommodation accounts for the general observation that participants in conversations tend to converge to one another's communicative behavior: they coordinate in a variety of dimensions including choice of words, syntax, utterance length, pitch and gestures. In its almost forty years of existence, this theory has been empirically supported exclusively through small-scale or controlled laboratory studies. Here we address this phenomenon in the context of Twitter conversations. Undoubtedly, this setting is unlike any other in which accommodation was observed and, thus, challenging to the theory. Its novelty comes not only from its size, but also from the non real-time nature of conversations, from the 140 character length restriction, from the wide variety of social relation types, and from a design that was initially not geared towards conversation at all. Given such constraints, it is not clear a priori whether accommodation is robust enough to occur given the constraints of this new environment. To investigate this, we develop a probabilistic framework that can model accommodation and measure its effects. We apply it to a large Twitter conversational dataset specifically developed for this task. This is the first time the hypothesis of linguistic style accommodation has been examined (and verified) in a large scale, real world setting.
 Furthermore, when investigating concepts such as stylistic influence and symmetry of accommodation, we discover a complexity of the phenomenon which was never observed before. We also explore the potential relation between stylistic influence and network features commonly associated with social status.},
 annote = {The data citations in this paper are wrongly cited! },
 pages = {141--150},
 year = {2011},
 date-added = {2011-03-14 20:51:41 +0000},
 date-modified = {2012-04-18 14:09:49 +0100},
 pmid = {related:f_ENXI69eUEJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Danescu-Niculescu-Mizil/Mark%20My%20Words!%20Linguistic%20Style%20Accommodation%20in%20Social%20Media.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p14485},
 read = {Yes},
 rating = {5}
 }

 @article{Gimpel:2011p17813,
 author = {Kevin Gimpel and Nathan Schneider and Brendan OConnor and Dipanjan Das and Daniel Mills and Jacob Eisenstein and Michael Heilman and Dani Yogatama and Jeffrey Flanigan and Noah A Smith}, 
 title = {Part-of-speech tagging for twitter: Annotation, features, and experiments},
 abstract = {We address the problem of part-of-speechtagging for English data from the popular micro- blogging service Twitter. We develop a tagset, annotate data, develop features, and report tagging results nearing 90% accuracy. The data and tools have been made available to the research community with the goal of enabling richer text analysis of Twitter and related social media data sets.},
 year = {2011},
 date-added = {2011-05-03 17:25:53 +0100},
 date-modified = {2013-07-26 10:01:46 +0100},
 pmid = {18210696637056143703related:V9EhZBtkufwJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Gimpel/Part-of-speech%20tagging%20for%20twitter%20Annotation%20features%20and%20experiments.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p17813},
 read = {Yes},
 rating = {0}
 }

 @article{Krishnamurthy:2008p4789,
 author = {B Krishnamurthy and P Gill and Martin Arlitt}, 
 journal = {Proceedings of the first workshop on Online social networks},
 title = {A few chirps about twitter},
 abstract = {Web 2.0 has brought about several new applications that have en- abled arbitrary subsets of users to communicate with each other on a social basis. Such communication increasingly happens not just on Facebook and MySpace but on several smaller network applica- tions such as Twitter and Dodgeball. We present a detailed charac- terization of Twitter, an application that allows users to send short messages. We gathered three datasets (covering nearly 100,000 users) including constrained crawls of the Twitter network using two different methodologies, and a sampled collection from the publicly available timeline. We identify distinct classes of Twitter users and their behaviors, geographic growth patterns and current size of the network, and compare crawl results obtained under rate limiting constraints.},
 pages = {19--24},
 year = {2008},
 date-added = {2010-12-16 17:40:06 +0000},
 date-modified = {2013-06-11 14:19:51 +0100},
 pmid = {386010660995067638related:9u4fqKZiWwUJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2008/Krishnamurthy/A%20few%20chirps%20about%20twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p4789},
 read = {Yes},
 rating = {0}
 }

 @article{Owoputi:2013p31156,
 author = {O Owoputi and B O'Connor and C Dyer and K Gimpel and N Shneider and N.A Smith}, 
 journal = {Proceedings of NAACL- {\ldots}},
 title = {Improved part-of-speech tagging for online conversational text with word clusters},
 abstract = {We consider the problem of part-of-speech tagging for informal, online conversational text. We systematically evaluate the use of large-scale unsupervised word clustering and new lexical features to improve tagging accuracy. With these features, our system achieves state-of-the-art tagging results on both Twitter and IRC POS tagging tasks; Twitter tagging is improved from 90% to 93% accuracy (more than 3% absolute). Quali- tative analysis of these word clusters yields insights about NLP and linguistic phenomena in this genre. Additionally, we contribute the first POS annotation guidelines for such text and release a new dataset of English language tweets annotated using these guidelines. Tagging software, annotation guidelines, and large-scale word clusters are available at: http://www.ark.cs.cmu.edu/TweetNLP This paper describes release 0.3 of the ``CMU Twitter Part-of-Speech Tagger'' and annotated data.},
 year = {2013},
 month = {Jan},
 date-added = {2013-06-04 17:51:32 +0100},
 date-modified = {2013-06-11 10:00:30 +0100},
 pmid = {1899427335040502113},
 URL = {http://www.aclweb.org/anthology/N/N13/N13-1039.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2013/Owoputi/Improved%20part-of-speech%20tagging%20for%20online%20conversational%20text%20with%20word%20clusters.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p31156},
 rating = {0}
 }

 @article{Pak:2010p2,
 author = {A Pak and P Paroubek}, 
 journal = {Proceedings of LREC 2010},
 title = {Twitter as a corpus for sentiment analysis and opinion mining},
 abstract = {Microblogging today has become a very popular communication tool among Internet users. Millions of users share opinions on different aspects of life everyday. Therefore microblogging web-sites are rich sources of data for opinion mining and sentiment analysis. Because microblogging has appeared relatively recently, there are a few research works that were devoted to this topic. In our paper, we focus on using Twitter, the most popular microblogging platform, for the task of sentiment analysis. We show how to automatically collect a corpus for sentiment analysis and opinion mining purposes. We perform linguistic analysis of the collected corpus and explain discovered phenomena. Using the corpus, we build a sentiment classifier, that is able to determine positive, negative and neutral sentiments for a document. Experimental evaluations show that our proposed techniques are efficient and performs better than previously proposed methods. In our research, we worked with English, however, the proposed technique can be used with any other language.},
 year = {2010},
 keywords = {sa, corpus}, 
 date-added = {2010-10-26 19:48:35 +0100},
 date-modified = {2013-07-13 13:12:12 +0100},
 pmid = {3316879687933033692related:3Pz0qYDsBy4J},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2010/Pak/Twitter%20as%20a%20corpus%20for%20sentiment%20analysis%20and%20opinion%20mining.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p2},
 read = {Yes},
 rating = {4}
 }

 @article{Doan:2012p29778,
 author = {S Doan and L Ohno-Machado and N Collier}, 
 journal = {arXiv preprint arXiv:1210.0848},
 title = {Enhancing Twitter Data Analysis with Simple Semantic Filtering: Example in Tracking Influenza-Like Illnesses},
 abstract = {Systems that exploit publicly available user gen- erated content such as Twitter messages have been successful in tracking seasonal influenza. We developed a novel filtering method for Influenza-Like-Ilnesses (ILI)-related messages using 587 million messages from Twitter micro-blogs. We first filtered messages based on syndrome keywords from the BioCaster Ontology, an extant knowledge model of laymen's terms. We then filtered the messages according to semantic features such as negation, hashtags, emoticons, humor and geography. The data covered 36 weeks for the US 2009 influenza season from 30th August 2009 to 8th May 2010. Results showed that our system achieved the highest Pearson correlation coefficient of 98.46% (p-value<2.2e-16), an improvement of 3.98% over the previous state-of-the-art method. The results indicate that simple NLP- based enhancements to existing approaches to mine Twitter data can increase the value of this inexpensive resource.},
 year = {2012},
 month = {Dec},
 date-added = {2012-10-18 01:25:15 +0100},
 date-modified = {2012-10-18 01:27:15 +0100},
 URL = {http://arxiv.org/abs/1210.0848},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Doan/Enhancing%20Twitter%20Data%20Analysis%20with%20Simple%20Semantic%20Filtering%20Example%20in.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29778},
 read = {Yes},
 rating = {0}
 }

 @article{Roberts:2012p29833,
 author = {K Roberts and M.A Roach and J Johnson and J Guthrie and S.M Harabagiu}, 
 title = {EmpaTweet: Annotating and Detecting Emotions on Twitter},
 abstract = {The rise of micro-blogging in recent years has resulted in significant access to emotion-laden text. Unlike emotion expressed in other textual sources (e.g., blogs, quotes in newswire, email, product reviews, or even clinical text), micro-blogs differ by (1) placing a strict limit on length, resulting radically in new forms of emotional expression, and (2) encouraging users to express their daily thoughts in real-time, often resulting in far more emotion statements than might normally occur. In this paper, we introduce a corpus collected from Twitter with annotated micro-blog posts (or ``tweets'') annotated at the tweet-level with seven emotions: ANGER, DISGUST, FEAR, JOY, LOVE, SADNESS, and SURPRISE. We analyze how emotions are distributed in the data we annotated and compare it to the distributions in other emotion-annotated corpora. We also used the annotated corpus to train a classifier that automatically discovers the emotions in tweets. In addition, we present an analysis of the linguistic style used for expressing emotions our corpus. We hope that these observations will lead to the design of novel emotion detection techniques that account for linguistic style and psycholinguistic theories.},
 year = {2012},
 date-added = {2012-10-26 12:49:46 +0100},
 date-modified = {2012-11-03 14:46:52 +0000},
 pmid = {related:edB5jSq3_f4J},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Roberts/EmpaTweet%20Annotating%20and%20Detecting%20Emotions%20on%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29833},
 rating = {0}
 }

 @article{Kcman:2010p1800,
 author = {E Kıcıman}, 
 journal = {Web N-gram Workshop},
 title = {Language Differences and Metadata Features on Twitter},
 abstract = {In the past several years, microblogging services like Twitter and Facebook have become a popular method of communication, allowing users to disseminate and gather information to and from hundreds or thousands (or even millions) of people, often in real-time. As much of the content on microblogging services is publicly accessible, we have recently seen many secondary services being built atop them, including services that perform significant content analysis, such as real-time search engines and trend analysis services. With the eventual goal of building more accurate and less expensive models of microblog streams, this paper investigates the degree to which language variance is related to the metadata of microblog content. We hypothesize that if a strong relationship exists between metadata features and language then we will be able to use this metadata as a trivial classifier to match individual messages with specialized, more accurate language models. To investigate the validity of this hypothesis, we analyze a corpus of over 72M Twitter messages, building language models conditioned on a variety of available message metadata.},
 year = {2010},
 month = {Jan},
 date-added = {2010-11-04 10:13:51 +0000},
 date-modified = {2013-06-11 14:21:02 +0100},
 pmid = {related:nn6fjcMBG7YJ},
 URL = {http://research.microsoft.com/en-us/events/webngram/sigir2010web_ngram_workshop_proceedings.pdf%23page=55},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2010/K%C4%B1c%C4%B1man/Language%20Differences%20and%20Metadata%20Features%20on%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p1800},
 read = {Yes},
 rating = {0}
 }

 @article{Derczynski:2013p32369,
 author = {L Derczynski and A Ritter and S Clark and K Bontcheva}, 
 journal = {Proceedings of Recent {\ldots}},
 title = {Twitter Part-of-Speech Tagging for All: Overcoming Sparse and Noisy Data},
 abstract = {Part-of-speech information is a pre-requisite in many NLP algorithms. However, Twitter text is difficult to part-of-speech tag: it is noisy, with linguistic errors and idiosyncratic style. We present a detailed error analysis of exist- ing taggers, motivating a series of tagger aug- mentations which are demonstrated to improve performance. We identify and evaluate tech- niques for improving English part-of-speech tagging performance in this genre.
 Further, we present a novel approach to sys- tem combination for the case where available taggers use different tagsets, based on vote- constrained bootstrapping with unlabeled data. Coupled with assigning prior probabilities to some tokens and handling of unknown words and slang, we reach 88.7% tagging accuracy (90.5% on development data). This is a new high in PTB-compatible tweet part-of-speech tagging, reducing token error by 26.8% and sentence error by 12.2%. The model, training data and tools are made available.},
 year = {2013},
 month = {Jan},
 date-added = {2013-07-27 00:10:02 +0100},
 date-modified = {2013-07-27 00:10:38 +0100},
 pmid = {7780626220088589015},
 URL = {http://derczynski.com/sheffield/papers/twitter_pos.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2013/Derczynski/Twitter%20Part-of-Speech%20Tagging%20for%20All%20Overcoming%20Sparse%20and%20Noisy%20Data.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p32369},
 read = {Yes},
 rating = {0}
 }

 @article{Yerva:2012p26830,
 author = {S Yerva and Z Miklos and K Aberer}, 
 journal = {tmrfindia.org
 },
 title = {Entity-based Classification of Twitter Messages},
 abstract = {Twitter is a popular micro-blogging service on the Web, where people can enter short messages, which then become visible to some other users of the service. While the topics of these messages varies, there are a lot of messages where the users express their opinions about some companies or their products. These messages are a rich source of information for companies for sentiment analysis or opinion mining. There is however a great obstacle for analyzing the messages directly: as the company names are often ambiguous (e.g. apple, the fruit vs. Apple Inc.), one needs first to identify, which messages are related to the company. In this paper we address this question. We present various techniques for classifying tweet messages containing a given keyword, whether they are related to a particular company with that name or not. We first present simple techniques, which make use of company profiles, which we created semi-automatically from external Web sources. Our advanced techniques take ambiguity estimations into account and also automatically extend the company profiles from the twitter stream itself. We demonstrate the effectiveness of our methods through an extensive set of experiments. Moreover, we extensively analyze the sources of errors in the classification. The analysis not only brings further improvement, but also enables to use the human input more efficiently.},
 year = {2012},
 date-added = {2012-02-07 12:43:21 +0000},
 date-modified = {2012-04-18 14:10:56 +0100},
 URL = {http://www.tmrfindia.org/ijcsa/v9i15.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Yerva/Entity-based%20Classification%20of%20Twitter%20Messages.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p26830},
 read = {Yes},
 rating = {0}
 }

 @article{Mocanu:2012p30459,
 author = {Delia Mocanu and Andrea Baronchelli and Bruno Gon{\c c}alves and Nicola Perra and Alessandro Vespignani}, 
 journal = {arXiv},
 title = {The Twitter of Babel: Mapping World Languages through Microblogging Platforms},
 abstract = {Large scale analysis and statistics of socio-technical systems that just a few short years ago would have required the use of consistent economic and human resources can nowadays be conveniently performed by mining the enormous amount of digital data produced by human activities. Although a characterization of several aspects of our societies is emerging from the data revolution, a number of questions concerning the reliability and the biases inherent to the big data "proxies" of social life are still open. Here, we survey worldwide linguistic indicators and trends through the analysis of a large-scale dataset of microblogging posts. We show that available data allow for the study of language geography at scales ranging from country-level aggregation to specific city neighborhoods. The high resolution and coverage of the data allows us to investigate different indicators such as the linguistic homogeneity of different countries, the touristic seasonal patterns within countries and the geographical distribution of different languages in multilingual regions. This work highlights the potential of geolocalized studies of open data sources to improve current analysis and develop indicators for major social phenomena in specific communities.},
 eprint = {1212.5238v1},
 volume = {physics.soc-ph},
 year = {2012},
 month = {Dec},
 keywords = {cs.CL, cs.SI, physics.soc-ph}, 
 date-added = {2013-01-25 11:50:35 +0000},
 date-modified = {2013-01-25 11:51:00 +0000},
 pmid = {1212.5238v1},
 URL = {http://arxiv.org/abs/1212.5238v1},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Mocanu/The%20Twitter%20of%20Babel%20Mapping%20World%20Languages%20through%20Microblogging%20Platforms.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p30459},
 rating = {0}
 }

 @article{Ritter:2010p14612,
 author = {A Ritter and C Cherry and B Dolan}, 
 journal = {Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the ACL},
 title = {Unsupervised modeling of twitter conversations},
 abstract = {We propose the first unsupervised approach to the problem of modeling dialogue acts in an open domain. Trained on a corpus of noisy Twitter conversations, our method discovers dialogue acts by clustering raw utterances. Because it accounts for the sequential behaviour of these acts, the learned model can provide insight into the shape of communication in a new medium. We address the challenge of evaluating the emergent model with a qualitative visualization and an intrinsic conversation ordering task. This work is inspired by a corpus of 1.3 million Twitter conversations, which will be made publicly available. This huge amount of data, available only because Twitter blurs the line between chatting and publishing, highlights the need to be able to adapt quickly to a new medium.},
 pages = {172--180},
 year = {2010},
 month = {Jan},
 date-added = {2011-03-24 17:22:00 +0000},
 date-modified = {2012-04-18 14:09:45 +0100},
 pmid = {11539495314679928038related:5hTIjtmFJKAJ},
 URL = {http://portal.acm.org/citation.cfm?id=1858019},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2010/Ritter/Unsupervised%20modeling%20of%20twitter%20conversations.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p14612},
 read = {Yes},
 rating = {0}
 }

 @article{GonzalezIbanez:2011p25577,
 author = {R Gonz{\'a}lez-Ib{\'a}{\~n}ez and S Muresan and N Wacholder}, 
 journal = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies: short papers},
 title = {Identifying sarcasm in Twitter: a closer look},
 abstract = {arcasm transforms the polarity of an ap- parently positive or negative utterance into its opposite. We report on a method for constructing a corpus of sarcastic Twitter messages in which determination of the sarcasm of each message has been made by its author. We use this reliable corpus to compare sarcastic utterances in Twitter to utterances that express positive or negative attitudes without sarcasm. We investigate the impact of lexical and pragmatic factors on machine learning effectiveness for iden- tifying sarcastic utterances and we compare the performance of machine learning tech- niques and human judges on this task. Per- haps unsurprisingly, neither the human judges nor the machine learning techniques perform very well.},
 pages = {581--586},
 volume = {2},
 year = {2011},
 date-added = {2012-01-01 20:42:24 +0000},
 date-modified = {2012-04-18 14:10:39 +0100},
 pmid = {related:RABAegYleLgJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Gonz%C3%A1lez-Ib%C3%A1%C3%B1ez/Identifying%20sarcasm%20in%20Twitter%20a.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p25577},
 read = {Yes},
 rating = {0}
 }

 @article{Carvalho:2009p6363,
 author = {Paula Carvalho and L Sarmento and M J SIlva and E Oliveira}, 
 journal = {In Proceeding of the 1st international CIKM workshop on Topic-sentiment analysis for mass opinion (TSA '09). ACM, New York, NY, USA },
 title = {Clues for detecting irony in user-generated contents: oh...!! it's so easy;-)},
 abstract = {We investigate the accuracy of a set of surface patterns in identifying ironic sentences in comments submitted by users to an on-line newspaper. The initial focus is on identifying irony in sentences containing positive predicates since these sentences are more exposed to irony, making their true polarity harder to recognize. We show that it is possible to find ironic sentences with relatively high precision (from 45% to 85%) by exploring certain oral or gestural clues in user comments, such as emoticons, onomatopoeic expressions for laughter, heavy punctuation marks, quotation marks and positive interjections. We also demonstrate that clues based on deeper linguistic information are relatively inefficient in capturing irony in user-generated content, which points to the need for exploring additional types of oral clues.},
 pages = {53--56},
 year = {2009},
 month = {Jan},
 date-added = {2011-01-13 15:32:12 +0000},
 date-modified = {2013-06-11 15:47:46 +0100},
 doi = {10.1145/1651461.1651471},
 pmid = {3975998442169591000related:2GQGdYSVLTcJ},
 URL = {http://portal.acm.org/citation.cfm?id=1651461.1651471},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2009/Carvalho/Clues%20for%20detecting%20irony%20in%20user-generated%20contents%20oh...!!%20it's%20so.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p6363},
 read = {Yes},
 rating = {4}
 }

 @article{Choy:2012p28377,
 author = {M Choy}, 
 journal = {Arxiv preprint arXiv:1205.6396},
 title = {Effective Listings of Function Stop words for Twitter},
 abstract = {Many words in documents recur very frequently but are essentially meaningless as they are used to join words together in a sentence. It is commonly understood that stop words do not contribute to the context or content of textual documents. Due to their high frequency of occurrence, their presence in text mining presents an obstacle to the understanding of the content in the documents. To eliminate the bias effects, most text mining software or approaches make use of stop words list to identify and remove those words. However, the development of such top words list is difficult and inconsistent between textual sources. This problem is further aggravated by sources such as Twitter which are highly repetitive or similar in nature. In this paper, we will be examining the original work using term frequency, inverse document frequency and term adjacency for developing a stop words list for the Twitter data source. We propose a new technique using combinatorial values as an alternative measure to effectively list out stop words.},
 year = {2012},
 month = {Jan},
 date-added = {2012-06-06 12:46:36 +0100},
 date-modified = {2012-07-14 13:24:30 +0100},
 URL = {http://arxiv.org/abs/1205.6396},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Choy/Effective%20Listings%20of%20Function%20Stop%20words%20for%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p28377},
 rating = {0}
 }

 @article{bruns_quantitative_2012,
 author = {Axel Bruns and Stefan Stieglitz}, 
 journal = {Journal of Technology in Human Services},
 title = {Quantitative Approaches to Comparing Communication Patterns on Twitter},
 abstract = {To date, the available literature mainly discusses Twitter activity patterns in the context of individual case studies, while comparative research on a large number of communicative events and their dynamics and patterns is missing. By conducting a comparative study of more than 40 different cases (covering topics such as elections, natural disasters, corporate crises, and televised events) we identify a number of distinct types of discussion that can be observed on Twitter. Drawing on a range of communicative metrics, we show that thematic and contextual factors influence the usage of different communicative tools available to Twitter users, such as original tweets, @replies, retweets, and URLs. Based on this first analysis of the overall metrics of Twitter discussions, we also demonstrate stable patterns in the use of Twitter in the context of major topics and events.},
 number = {3-4},
 pages = {160--185},
 volume = {30},
 year = {2012},
 date-added = {2013-04-13 21:02:22 +0100},
 date-modified = {2013-04-13 21:02:22 +0100},
 doi = {10.1080/15228835.2012.744249},
 URL = {http://dx.doi.org/10.1080/15228835.2012.744249},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Bruns/Quantitative%20Approaches%20to%20Comparing%20Communication%20Patterns%20on%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p30602},
 rating = {0}
 }

 @article{Costajussa:2013p31939,
 author = {Marta Costa-juss{\`a} and Rafael Banchs}, 
 journal = {Language Resources and Evaluation},
 title = {Automatic normalization of short texts by combining statistical and rule-based techniques},
 pages = {1--15},
 year = {2013},
 date-added = {2013-07-10 09:57:12 +0100},
 date-modified = {2013-07-10 09:57:17 +0100},
 pmid = {related:H7rwaPVhTw4J},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2013/Costa-juss%C3%A0/Automatic%20normalization%20of%20short%20texts%20by%20combining%20statistical%20and%20rule-based.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p31939},
 rating = {0}
 }

 @article{Yang:2012p29793,
 author = {X Yang and A Ghoting and Y Ruan and S Parthasarathy}, 
 title = {Analysis of Streaming Data from Twitter Social Networks},
 abstract = {The Twitter social network is a dynamic network that can generate high speed data streams. In Twitter, the users can subscribe to the contents shared by their friends. The contents are in the form of messages written by their au- thors. All the messages in the network form a data stream and carry the highly dynamic behaviors of the users in the Twitter network.
 In this paper, we present our efforts to process the message stream of Twitter. We believe that in order to efficiently perform analysis on streaming data, we need an in-memory summary of the streaming data which can be used as input to mining algorithms. In this paper, we propose a novel sum- marization scheme to build such summary of the streaming data. We empirically demonstrate that our method can ef- fectively summarize message stream data from the Twitter social networks with limited memory consumption and high summarization quality.},
 year = {2012},
 date-added = {2012-10-26 12:49:45 +0100},
 date-modified = {2012-11-03 15:05:18 +0000},
 pmid = {related:PZYdEcwlsUEJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Yang/Analysis%20of%20Streaming%20Data%20from%20Twitter%20Social%20Networks.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29793},
 rating = {0}
 }

 @article{Naaman:2010p23670,
 author = {M Naaman and J Boase and CH Lai}, 
 journal = {Proceedings of CSCW 2010},
 title = {Is it really about me?: message content in social awareness streams},
 abstract = {In this work we examine the characteristics of social activity and patterns of communication on Twitter, a prominent example of the emerging class of communication systems we call ``social awareness streams.'' We use system data and message content from over 350 Twitter users, applying human coding and quantitative analysis to provide a deeper understanding of the activity of individuals on the Twitter network. In particular, we develop a content-based categorization of the type of messages posted by Twitter users, based on which we examine users' activity. Our analysis shows two common types of user behavior in terms of the content of the posted messages, and exposes differences between users in respect to these activities.},
 year = {2010},
 month = {Jan},
 date-added = {2011-10-01 14:17:31 +0100},
 date-modified = {2012-04-18 14:09:13 +0100},
 pmid = {17476508525031357314related:gnuUtOgHifIJ},
 URL = {http://portal.acm.org/citation.cfm?id=1718918.1718953},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2010/Naaman/Is%20it%20really%20about%20me?%20message%20content%20in%20social%20awareness.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p23670},
 read = {Yes},
 rating = {0}
 }

 @article{gillen_contact_2013,
 author = {Julia Gillen and Guy Merchant}, 
 journal = {Language Sciences},
 title = {Contact Calls: Twitter as a Dialogic Social and Linguistic Practice},
 abstract = {The rapid adoption of new forms of digital communication is now attracting the attention of researchers from a wide range of disciplines in the social sciences. In the landscape of social media, the microblogging application Twitter has rapidly become an accepted fea- ture of everyday life with a broad appeal. This paper, from a dual autoethnography (Davies and Merchant, 2007) over one year, is a reflexive account of the experience of two aca- demic Twitter users. We offer analyses of the functionalities of the semiotic environment and trace how our meaning making practices illuminate Bakhtinian (1986) principles of human communication, while at the same time constituting literacies that are distinctively new in character. We show how communication using Web 2.0 technologies can be described as semiotic and sociolinguistic practice and offer an appropriately dialogic and exploratory methodology to the study of New Literacies.},
 pages = {47--58},
 volume = {35},
 year = {2013},
 keywords = {Bakhtin, Literacies, Twitter, New Literacy Studies, New Literacies, Dialogue}, 
 date-added = {2013-04-13 21:02:22 +0100},
 date-modified = {2013-06-11 10:06:17 +0100},
 URL = {http://www.sciencedirect.com/science/article/pii/S0388000112000642},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2013/Gillen/Contact%20Calls%20Twitter%20as%20a%20Dialogic%20Social%20and%20Linguistic%20Practice.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p30608},
 rating = {0}
 }

 @inproceedings{honeycutt_beyond_2009,
 author = {Courtenay Honeycutt and Susan C Herring}, 
 journal = {Proceedings},
 title = {Beyond Microblogging: Conversation and Collaboration via Twitter},
 abstract = {The microblogging service Twitter is in the process
 of being appropriated for conversational interaction
 and is starting to be used for collaboration, as well. In
 order to determine how well Twitter supports user-touser exchanges, what people are using Twitterfor, and
 what usage or design modifications would make it
 (more) usable as a tool for collaboration, this study
 analyzes a corpus of naturally-occurring public Twitter messages (tweets), focusing on the functions and
 uses of the @ sign and the coherence of exchanges.
 The findings reveal a surprising degree of conversationality, facilitated especially by the use of @ as a
 marker of addressivity, and shed light on the limitations of Twitter's current design for collaborative use.},
 affiliation = {Los Alamitos, CA},
 year = {2009},
 date-added = {2013-04-13 21:02:21 +0100},
 date-modified = {2013-06-11 14:49:00 +0100},
 URL = {Preprint:%20http://ella.slis.indiana.edu/~herring/honeycutt.herring.2009.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2009/Honeycutt/Beyond%20Microblogging%20Conversation%20and%20Collaboration%20via%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p30614},
 read = {Yes},
 rating = {0}
 }

 @article{Mukherjee:2012p28823,
 author = {S Mukherjee and P Bhattacharyya and AR Balamurali}, 
 title = {Sentiment Analysis in Twitter with Lightweight Discourse Analysis},
 abstract = {We propose a lightweight method for using discourse relations for polarity detection of tweets. This method is targeted towards the web-based applications that deal with noisy and unstructured text, like the tweets, and cannot afford to use heavy linguistic resources like parsing due to the frequent failure of the parsers to handle noisy data. Most of the works in micro-blogs, like Twitter, use a bag-of-words model that ignores the discourse particles like but, since, although etc. In this work, we show how connectives, modals, conditionals and negation can be used to incorporate discourse information in any bag-of-words model, to improve sentiment classification accuracy. We first give a linguistic description of the various discourse relations which leads to conditions in rules and features in SVM. Discourse relations and corresponding rules are identified with minimal processing - just a list look up. We show that our discourse-based bag-of-words model performs well in a noisy medium (Twitter), where it performs better than an existing Twitter-based application. Furthermore, we show that our approach is beneficial to structured reviews as well, where we achieve a better accuracy than a state- of-the-art system in the travel review domain. Our system compares favorably with the state-of-the-art systems and has the additional attractiveness of being less resource intensive.},
 year = {2012},
 date-added = {2012-09-30 21:33:20 +0100},
 date-modified = {2013-07-10 09:23:37 +0100},
 pmid = {related:oTla0lpytgwJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Mukherjee/Sentiment%20Analysis%20in%20Twitter%20with%20Lightweight%20Discourse%20Analysis.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p28823},
 read = {Yes},
 rating = {0}
 }

 @article{Perreault:2011p18811,
 author = {M Perreault and D Ruths}, 
 title = {The Effect of Mobile Platforms on Twitter Content Generation},
 abstract = {The increased popularity of feature-rich mobile devices in recent years has enabled widespread consumption and production of social media content via mobile devices. Because mobile devices and mobile applications change context within which an individual generates and consumes microblog content, we might expect microblogging behavior to differ depending on whether the user is using a mobile device. To our knowledge, little has been established about what, if any, effects such mobile interfaces have on microblogging.
 In this paper, we investigate this question within the context of Twitter, among the most popular microblogging platforms. This work makes three specific contributions. First, we quantify the ways in which user profiles are effected by the mobile context: (1) the extent to which users tend to be either fully non-mobile or mobile and (2) the relative activity of the mo- bile Twitter community. Second, we assess the differences in content between mobile and non-mobile tweets (posts to the Twitter platform). Our results show that mobile platforms produce very different patterns of Twitter usage.
 As part of our analysis, we propose and apply a classification system for tweets. We consider this to be the third contribution of this work. While other classification systems have been proposed, ours is the first to permit the independent encoding of a tweet's form, content, and intended audience. In this paper we apply this system to show how tweets differ between mobile and non-mobile contexts. However, because of its flexibility and breadth, the schema may be useful to researchers studying Twitter content in other contexts as well.},
 year = {2011},
 date-added = {2011-06-04 22:55:17 +0100},
 date-modified = {2012-04-18 14:09:27 +0100},
 pmid = {related:aaj4bg4p_6EJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/Perreault/The%20Effect%20of%20Mobile%20Platforms.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p18811},
 read = {Yes},
 rating = {0}
 }

 @article{Yang:2012p29840,
 author = {X Yang and A Ghoting and Y Ruan and S Parthasarathy}, 
 title = {A framework for summarizing and analyzing twitter feeds},
 abstract = {The firehose of data generated by users on social network- ing and microblogging sites such as Facebook and Twitter is enormous. Real-time analytics on such data is challenging with most current efforts largely focusing on the efficient querying and retrieval of data produced recently. In this paper, we present a dynamic pattern driven approach to summarize data produced by Twitter feeds. We develop a novel approach to maintain an in-memory summary while retaining sufficient information to facilitate a range of user- specific and topic-specific temporal analytics. We empir- ically compare our approach with several state-of-the-art pattern summarization approaches along the axes of storage cost, query accuracy, query flexibility, and efficiency using real data from Twitter. We find that the proposed approach is not only scalable but also outperforms existing approaches by a large margin.},
 pages = {370--378},
 year = {2012},
 date-added = {2012-10-26 12:49:49 +0100},
 date-modified = {2013-01-01 13:36:52 +0000},
 pmid = {related:I2PaHQSWixwJ},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2012/Yang/A%20framework%20for%20summarizing%20and%20analyzing%20twitter%20feeds.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p29840},
 rating = {0}
 }

 @article{Kaufmann:2010p1734,
 author = {M Kaufmann}, 
 title = {Syntactic Normalization of Twitter Messages},
 abstract = {The use of computer mediated communication such as emailing, microblogs, Short Messaging System (SMS), and chat rooms has created corpora which contain incredibly noisy text. Tweets, messages sent by users on Twitter.com, are an especially noisy form of communication. Twitter.com contains billions of these tweets, but in their current state they contain so much noise that it is difficult to extract useful information. Tweets often contain highly irregular syntax and nonstandard use of English. This paper describes a novel system which normalizes these Twitter posts, converting them into a more standard form of English, so that standard machine translation (MT) and natural language processing (NLP) techniques can be more easily applied to them. In order to normalize Twitter tweets, we take a two step approach. We first preprocess tweets to remove as much noise as possible and then feed them into a machine translation model to convert them into standard English. Together, these two steps allow us to achieve improvement in BLEU scores comporable to the improvements achieved by SMS normalization},
 year = {2010},
 month = {Jan},
 date-added = {2010-11-04 10:00:38 +0000},
 date-modified = {2012-04-18 14:10:07 +0100},
 pmid = {related:bZXYMsT9nz0J},
 URL = {http://www.cs.uccs.edu/~kalita/work/reu/REUFinalPapers2010/Kaufmann.pdf},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2010/Kaufmann/Syntactic%20Normalization%20of%20Twitter%20Messages.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p1734},
 read = {Yes},
 rating = {0}
 }

 @article{RD:2011p16766,
 author = {Waters R.D. and J.Y Jamal}, 
 journal = {Public Relations Review},
 title = {Tweet, tweet, tweet: A content analysis of nonprofit organizations' Twitter updates},
 abstract = {Many of the relationship cultivation strategies and the dialogic principles assume sym- metrical communication is taking place. However, significant amounts of information are shared in a one-way manner. Although they have fallen out of favor with many academics, the four models of public relations can provide significant insights into how organizations communicate. Using the models as the guiding framework, this brief study examines how nonprofit organizations from the Philanthropy 200 communicate on Twitter. The findings reveal that the organizations are more likely to use one-way models despite the potential for dialogue and community building on the social networking site.},
 year = {2011},
 month = {Jan},
 date-added = {2011-04-20 21:48:01 +0100},
 date-modified = {2012-04-18 14:10:12 +0100},
 doi = {10.1016/j.pubrev.2011.03.002},
 URL = {http://linkinghub.elsevier.com/retrieve/pii/S0363811111000361},
 local-url = {file://localhost/Users/acepor/Dropbox/Papers/2011/R.D./Tweet%20tweet%20tweet%20A%20content%20analysis%20of%20nonprofit%20organizations'%20Twitter.pdf},
 uri = {papers://1BB16709-E0C1-4709-BEC5-06621A3EA216/Paper/p16766},
 read = {Yes},
 rating = {0}
 }