Displays information about a corpus, including attributes and metadata such as date of number of texts, creation and source.
# S3 method for corpus
summary(object, n = 100, tolower = FALSE, showmeta = TRUE, ...)
corpus to be summarized
maximum number of texts to describe, default=100
convert texts to lower case before counting types
set to TRUE
to include document-level
meta-data
additional arguments passed through to tokens()
summary(data_corpus_inaugural)
#> Corpus consisting of 59 documents, showing 59 documents:
#>
#> Text Types Tokens Sentences Year President FirstName
#> 1789-Washington 625 1537 23 1789 Washington George
#> 1793-Washington 96 147 4 1793 Washington George
#> 1797-Adams 826 2577 37 1797 Adams John
#> 1801-Jefferson 717 1923 41 1801 Jefferson Thomas
#> 1805-Jefferson 804 2380 45 1805 Jefferson Thomas
#> 1809-Madison 535 1261 21 1809 Madison James
#> 1813-Madison 541 1302 33 1813 Madison James
#> 1817-Monroe 1040 3677 121 1817 Monroe James
#> 1821-Monroe 1259 4886 131 1821 Monroe James
#> 1825-Adams 1003 3147 74 1825 Adams John Quincy
#> 1829-Jackson 517 1208 25 1829 Jackson Andrew
#> 1833-Jackson 499 1267 29 1833 Jackson Andrew
#> 1837-VanBuren 1315 4158 95 1837 Van Buren Martin
#> 1841-Harrison 1896 9125 210 1841 Harrison William Henry
#> 1845-Polk 1334 5186 153 1845 Polk James Knox
#> 1849-Taylor 496 1178 22 1849 Taylor Zachary
#> 1853-Pierce 1165 3636 104 1853 Pierce Franklin
#> 1857-Buchanan 945 3083 89 1857 Buchanan James
#> 1861-Lincoln 1075 3999 135 1861 Lincoln Abraham
#> 1865-Lincoln 360 775 26 1865 Lincoln Abraham
#> 1869-Grant 485 1229 40 1869 Grant Ulysses S.
#> 1873-Grant 552 1472 43 1873 Grant Ulysses S.
#> 1877-Hayes 831 2707 59 1877 Hayes Rutherford B.
#> 1881-Garfield 1021 3209 111 1881 Garfield James A.
#> 1885-Cleveland 676 1816 44 1885 Cleveland Grover
#> 1889-Harrison 1352 4721 157 1889 Harrison Benjamin
#> 1893-Cleveland 821 2125 58 1893 Cleveland Grover
#> 1897-McKinley 1232 4353 130 1897 McKinley William
#> 1901-McKinley 854 2437 100 1901 McKinley William
#> 1905-Roosevelt 404 1079 33 1905 Roosevelt Theodore
#> 1909-Taft 1437 5821 158 1909 Taft William Howard
#> 1913-Wilson 658 1882 68 1913 Wilson Woodrow
#> 1917-Wilson 549 1652 59 1917 Wilson Woodrow
#> 1921-Harding 1169 3719 148 1921 Harding Warren G.
#> 1925-Coolidge 1220 4440 196 1925 Coolidge Calvin
#> 1929-Hoover 1090 3860 158 1929 Hoover Herbert
#> 1933-Roosevelt 743 2057 85 1933 Roosevelt Franklin D.
#> 1937-Roosevelt 725 1989 96 1937 Roosevelt Franklin D.
#> 1941-Roosevelt 526 1519 68 1941 Roosevelt Franklin D.
#> 1945-Roosevelt 275 633 27 1945 Roosevelt Franklin D.
#> 1949-Truman 781 2504 116 1949 Truman Harry S.
#> 1953-Eisenhower 900 2743 119 1953 Eisenhower Dwight D.
#> 1957-Eisenhower 621 1907 92 1957 Eisenhower Dwight D.
#> 1961-Kennedy 566 1541 52 1961 Kennedy John F.
#> 1965-Johnson 568 1710 93 1965 Johnson Lyndon Baines
#> 1969-Nixon 743 2416 103 1969 Nixon Richard Milhous
#> 1973-Nixon 544 1995 68 1973 Nixon Richard Milhous
#> 1977-Carter 527 1370 52 1977 Carter Jimmy
#> 1981-Reagan 902 2781 129 1981 Reagan Ronald
#> 1985-Reagan 925 2909 123 1985 Reagan Ronald
#> 1989-Bush 795 2674 141 1989 Bush George
#> 1993-Clinton 642 1833 81 1993 Clinton Bill
#> 1997-Clinton 773 2436 111 1997 Clinton Bill
#> 2001-Bush 621 1806 97 2001 Bush George W.
#> 2005-Bush 772 2312 99 2005 Bush George W.
#> 2009-Obama 938 2689 110 2009 Obama Barack
#> 2013-Obama 814 2317 88 2013 Obama Barack
#> 2017-Trump 582 1660 88 2017 Trump Donald J.
#> 2021-Biden 812 2766 216 2021 Biden Joseph R.
#> Party
#> none
#> none
#> Federalist
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic
#> Democratic
#> Democratic
#> Whig
#> Whig
#> Whig
#> Democratic
#> Democratic
#> Republican
#> Republican
#> Republican
#> Republican
#> Republican
#> Republican
#> Democratic
#> Republican
#> Democratic
#> Republican
#> Republican
#> Republican
#> Republican
#> Democratic
#> Democratic
#> Republican
#> Republican
#> Republican
#> Democratic
#> Democratic
#> Democratic
#> Democratic
#> Democratic
#> Republican
#> Republican
#> Democratic
#> Democratic
#> Republican
#> Republican
#> Democratic
#> Republican
#> Republican
#> Republican
#> Democratic
#> Democratic
#> Republican
#> Republican
#> Democratic
#> Democratic
#> Republican
#> Democratic
#>
summary(data_corpus_inaugural, n = 10)
#> Corpus consisting of 59 documents, showing 10 documents:
#>
#> Text Types Tokens Sentences Year President FirstName
#> 1789-Washington 625 1537 23 1789 Washington George
#> 1793-Washington 96 147 4 1793 Washington George
#> 1797-Adams 826 2577 37 1797 Adams John
#> 1801-Jefferson 717 1923 41 1801 Jefferson Thomas
#> 1805-Jefferson 804 2380 45 1805 Jefferson Thomas
#> 1809-Madison 535 1261 21 1809 Madison James
#> 1813-Madison 541 1302 33 1813 Madison James
#> 1817-Monroe 1040 3677 121 1817 Monroe James
#> 1821-Monroe 1259 4886 131 1821 Monroe James
#> 1825-Adams 1003 3147 74 1825 Adams John Quincy
#> Party
#> none
#> none
#> Federalist
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#> Democratic-Republican
#>
corp <- corpus(data_char_ukimmig2010,
docvars = data.frame(party=names(data_char_ukimmig2010)))
summary(corp, showmeta = TRUE) # show the meta-data
#> Corpus consisting of 9 documents, showing 9 documents:
#>
#> Text Types Tokens Sentences party
#> BNP 1125 3280 88 BNP
#> Coalition 142 260 4 Coalition
#> Conservative 251 499 15 Conservative
#> Greens 322 679 21 Greens
#> Labour 298 683 29 Labour
#> LibDem 251 483 14 LibDem
#> PC 77 114 5 PC
#> SNP 88 134 4 SNP
#> UKIP 346 723 26 UKIP
#>
sumcorp <- summary(corp) # (quietly) assign the results
sumcorp$Types / sumcorp$Tokens # crude type-token ratio
#> [1] 0.3429878 0.5461538 0.5030060 0.4742268 0.4363104 0.5196687 0.6754386
#> [8] 0.6567164 0.4785615